From e0fa4107c2e8921f5d019909d69bf163a7e31738 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 23 Jun 2014 11:33:22 +0000 Subject: [PATCH 001/206] Add some code for an experimental fts5 module. Does not work yet. FossilOrigin-Name: 1e0648dcf283d4f1f6159db4d2433b6cc635992e --- ext/fts3/fts3.h | 1 + ext/fts5/fts5.c | 425 ++++++ ext/fts5/fts5Int.h | 310 +++++ ext/fts5/fts5_config.c | 318 +++++ ext/fts5/fts5_expr.c | 670 +++++++++ ext/fts5/fts5_index.c | 2939 +++++++++++++++++++++++++++++++++++++++ ext/fts5/fts5_storage.c | 411 ++++++ main.mk | 33 + manifest | 27 +- manifest.uuid | 2 +- src/main.c | 1 + test/fts5aa.test | 248 ++++ test/fts5ea.test | 84 ++ 13 files changed, 5460 insertions(+), 9 deletions(-) create mode 100644 ext/fts5/fts5.c create mode 100644 ext/fts5/fts5Int.h create mode 100644 ext/fts5/fts5_config.c create mode 100644 ext/fts5/fts5_expr.c create mode 100644 ext/fts5/fts5_index.c create mode 100644 ext/fts5/fts5_storage.c create mode 100644 test/fts5aa.test create mode 100644 test/fts5ea.test diff --git a/ext/fts3/fts3.h b/ext/fts3/fts3.h index c1aa8caf09..e99457eebd 100644 --- a/ext/fts3/fts3.h +++ b/ext/fts3/fts3.h @@ -20,6 +20,7 @@ extern "C" { #endif /* __cplusplus */ int sqlite3Fts3Init(sqlite3 *db); +int sqlite3Fts5Init(sqlite3 *db); #ifdef __cplusplus } /* extern "C" */ diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c new file mode 100644 index 0000000000..7a6c361068 --- /dev/null +++ b/ext/fts5/fts5.c @@ -0,0 +1,425 @@ +/* +** 2014 Jun 09 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +#include "fts5Int.h" + +typedef struct Fts5Table Fts5Table; + +struct Fts5Table { + sqlite3_vtab base; /* Base class used by SQLite core */ + Fts5Config *pConfig; /* Virtual table configuration */ + Fts5Index *pIndex; /* Full-text index */ + Fts5Storage *pStorage; /* Document store */ +}; + +/* +** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy +** argument is non-zero, attempt delete the shadow tables from teh database +*/ +static int fts5FreeVtab(Fts5Table *pTab, int bDestroy){ + int rc = SQLITE_OK; + if( pTab ){ + int rc2; + rc2 = sqlite3Fts5IndexClose(pTab->pIndex, bDestroy); + if( rc==SQLITE_OK ) rc = rc2; + rc2 = sqlite3Fts5StorageClose(pTab->pStorage, bDestroy); + if( rc==SQLITE_OK ) rc = rc2; + sqlite3Fts5ConfigFree(pTab->pConfig); + sqlite3_free(pTab); + } + return rc; +} + +/* +** The xDisconnect() virtual table method. +*/ +static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ + return fts5FreeVtab((Fts5Table*)pVtab, 0); +} + +/* +** The xDestroy() virtual table method. +*/ +static int fts5DestroyMethod(sqlite3_vtab *pVtab){ + return fts5FreeVtab((Fts5Table*)pVtab, 1); +} + +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the FTS3 virtual table. +** +** The argv[] array contains the following: +** +** argv[0] -> module name ("fts5") +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> "column name" and other module argument fields. +*/ +static int fts5InitVtab( + int bCreate, /* True for xCreate, false for xConnect */ + sqlite3 *db, /* The SQLite database connection */ + void *pAux, /* Hash table containing tokenizers */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ + char **pzErr /* Write any error message here */ +){ + int rc; /* Return code */ + Fts5Config *pConfig; /* Results of parsing argc/argv */ + Fts5Table *pTab = 0; /* New virtual table object */ + + /* Parse the arguments */ + rc = sqlite3Fts5ConfigParse(db, argc, (const char**)argv, &pConfig, pzErr); + assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); + + /* Allocate the new vtab object */ + if( rc==SQLITE_OK ){ + pTab = (Fts5Table*)sqlite3_malloc(sizeof(Fts5Table)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pTab, 0, sizeof(Fts5Table)); + pTab->pConfig = pConfig; + } + } + + /* Open the index sub-system */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr); + } + + /* Open the storage sub-system */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5StorageOpen( + pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr + ); + } + + /* Call sqlite3_declare_vtab() */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ConfigDeclareVtab(pConfig); + } + + if( rc!=SQLITE_OK ){ + fts5FreeVtab(pTab, 0); + pTab = 0; + } + *ppVTab = (sqlite3_vtab*)pTab; + return rc; +} + +/* +** The xConnect() and xCreate() methods for the virtual table. All the +** work is done in function fts5InitVtab(). +*/ +static int fts5ConnectMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); +} +static int fts5CreateMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); +} + +/* +** Implementation of the xBestIndex method for FTS3 tables. There +** are three possible strategies, in order of preference: +** +** 1. Direct lookup by rowid or docid. +** 2. Full-text search using a MATCH operator on a non-docid column. +** 3. Linear scan of %_content table. +*/ +static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ + return SQLITE_OK; +} + +/* +** Implementation of xOpen method. +*/ +static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + return SQLITE_OK; +} + +/* +** Close the cursor. For additional information see the documentation +** on the xClose method of the virtual table interface. +*/ +static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ + return SQLITE_OK; +} + + +/* +** Advance the cursor to the next row in the table that matches the +** search criteria. +** +** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned +** even if we reach end-of-file. The fts5EofMethod() will be called +** subsequently to determine whether or not an EOF was hit. +*/ +static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ + return SQLITE_OK; +} + +/* +** This is the xFilter interface for the virtual table. See +** the virtual table xFilter method documentation for additional +** information. +*/ +static int fts5FilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + return SQLITE_OK; +} + +/* +** This is the xEof method of the virtual table. SQLite calls this +** routine to find out if it has reached the end of a result set. +*/ +static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ + return 1; +} + +/* +** This is the xRowid method. The SQLite core calls this routine to +** retrieve the rowid for the current row of the result set. fts5 +** exposes %_content.docid as the rowid for the virtual table. The +** rowid should be written to *pRowid. +*/ +static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ + return SQLITE_OK; +} + +/* +** This is the xColumn method, called by SQLite to request a value from +** the row that the supplied cursor currently points to. +*/ +static int fts5ColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + return SQLITE_OK; +} + +/* +** This function is called to handle an FTS INSERT command. In other words, +** an INSERT statement of the form: +** +** INSERT INTO fts(fts) VALUES($pVal) +** +** Argument pVal is the value assigned to column "fts" by the INSERT +** statement. This function returns SQLITE_OK if successful, or an SQLite +** error code if an error occurs. +*/ +static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ + const char *z = sqlite3_value_text(pVal); + int n = sqlite3_value_bytes(pVal); + int rc = SQLITE_ERROR; + + if( 0==sqlite3_stricmp("integrity-check", z) ){ + rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); + }else + + if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ + int pgsz = atoi(&z[5]); + if( pgsz<32 ) pgsz = 32; + sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); + rc = SQLITE_OK; + } + + return rc; +} + +/* +** This function is the implementation of the xUpdate callback used by +** FTS3 virtual tables. It is invoked by SQLite each time a row is to be +** inserted, updated or deleted. +*/ +static int fts5UpdateMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ +){ + Fts5Table *pTab = (Fts5Table*)pVtab; + Fts5Config *pConfig = pTab->pConfig; + int eType0; /* value_type() of apVal[0] */ + int eConflict; /* ON CONFLICT for this DML */ + int rc = SQLITE_OK; /* Return code */ + + assert( nArg==1 || nArg==(2 + pConfig->nCol + 1) ); + + if( SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ + return fts5SpecialCommand(pTab, apVal[2 + pConfig->nCol]); + } + + eType0 = sqlite3_value_type(apVal[0]); + eConflict = sqlite3_vtab_on_conflict(pConfig->db); + + assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); + if( eType0==SQLITE_INTEGER ){ + i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); + } + + if( rc==SQLITE_OK && nArg>1 ){ + rc = sqlite3Fts5StorageInsert(pTab->pStorage, apVal, eConflict, pRowid); + } + + return rc; +} + +/* +** Implementation of xSync() method. +*/ +static int fts5SyncMethod(sqlite3_vtab *pVtab){ + int rc; + Fts5Table *pTab = (Fts5Table*)pVtab; + rc = sqlite3Fts5IndexSync(pTab->pIndex); + return rc; +} + +/* +** Implementation of xBegin() method. +*/ +static int fts5BeginMethod(sqlite3_vtab *pVtab){ + return SQLITE_OK; +} + +/* +** Implementation of xCommit() method. This is a no-op. The contents of +** the pending-terms hash-table have already been flushed into the database +** by fts5SyncMethod(). +*/ +static int fts5CommitMethod(sqlite3_vtab *pVtab){ + return SQLITE_OK; +} + +/* +** Implementation of xRollback(). Discard the contents of the pending-terms +** hash-table. Any changes made to the database are reverted by SQLite. +*/ +static int fts5RollbackMethod(sqlite3_vtab *pVtab){ + Fts5Table *pTab = (Fts5Table*)pVtab; + int rc; + rc = sqlite3Fts5IndexRollback(pTab->pIndex); + return rc; +} + +/* +** This routine implements the xFindFunction method for the FTS3 +** virtual table. +*/ +static int fts5FindFunctionMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + int nArg, /* Number of SQL function arguments */ + const char *zName, /* Name of SQL function */ + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ + void **ppArg /* Unused */ +){ + /* No function of the specified name was found. Return 0. */ + return 0; +} + +/* +** Implementation of FTS3 xRename method. Rename an fts5 table. +*/ +static int fts5RenameMethod( + sqlite3_vtab *pVtab, /* Virtual table handle */ + const char *zName /* New name of table */ +){ + int rc = SQLITE_OK; + return rc; +} + +/* +** The xSavepoint() method. +** +** Flush the contents of the pending-terms table to disk. +*/ +static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ + int rc = SQLITE_OK; + return rc; +} + +/* +** The xRelease() method. +** +** This is a no-op. +*/ +static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ + return SQLITE_OK; +} + +/* +** The xRollbackTo() method. +** +** Discard the contents of the pending terms table. +*/ +static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ + return SQLITE_OK; +} + +static const sqlite3_module fts5Module = { + /* iVersion */ 2, + /* xCreate */ fts5CreateMethod, + /* xConnect */ fts5ConnectMethod, + /* xBestIndex */ fts5BestIndexMethod, + /* xDisconnect */ fts5DisconnectMethod, + /* xDestroy */ fts5DestroyMethod, + /* xOpen */ fts5OpenMethod, + /* xClose */ fts5CloseMethod, + /* xFilter */ fts5FilterMethod, + /* xNext */ fts5NextMethod, + /* xEof */ fts5EofMethod, + /* xColumn */ fts5ColumnMethod, + /* xRowid */ fts5RowidMethod, + /* xUpdate */ fts5UpdateMethod, + /* xBegin */ fts5BeginMethod, + /* xSync */ fts5SyncMethod, + /* xCommit */ fts5CommitMethod, + /* xRollback */ fts5RollbackMethod, + /* xFindFunction */ fts5FindFunctionMethod, + /* xRename */ fts5RenameMethod, + /* xSavepoint */ fts5SavepointMethod, + /* xRelease */ fts5ReleaseMethod, + /* xRollbackTo */ fts5RollbackToMethod, +}; + +int sqlite3Fts5Init(sqlite3 *db){ + int rc; + rc = sqlite3_create_module_v2(db, "fts5", &fts5Module, 0, 0); + if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(db); + return rc; +} + diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h new file mode 100644 index 0000000000..5329c207c8 --- /dev/null +++ b/ext/fts5/fts5Int.h @@ -0,0 +1,310 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ +#ifndef _FTS5INT_H +#define _FTS5INT_H + +#include "sqliteInt.h" +#include "fts3_tokenizer.h" + + +/* +** Maximum number of prefix indexes on single FTS5 table. This must be +** less than 32. If it is set to anything large than that, an #error +** directive in fts5_index.c will cause the build to fail. +*/ +#define FTS5_MAX_PREFIX_INDEXES 31 + +#define FTS5_DEFAULT_NEARDIST 10 + +/************************************************************************** +** Interface to code in fts5_config.c. fts5_config.c contains contains code +** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. +*/ + +typedef struct Fts5Config Fts5Config; + +/* +** An instance of the following structure encodes all information that can +** be gleaned from the CREATE VIRTUAL TABLE statement. +*/ +struct Fts5Config { + sqlite3 *db; /* Database handle */ + char *zDb; /* Database holding FTS index (e.g. "main") */ + char *zName; /* Name of FTS index */ + int nCol; /* Number of columns */ + char **azCol; /* Column names */ + int nPrefix; /* Number of prefix indexes */ + int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ + sqlite3_tokenizer *pTokenizer; /* Tokenizer instance for this table */ +}; + +int sqlite3Fts5ConfigParse(sqlite3*, int, const char**, Fts5Config**, char**); +void sqlite3Fts5ConfigFree(Fts5Config*); + +int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); + +int sqlite3Fts5Tokenize( + Fts5Config *pConfig, /* FTS5 Configuration object */ + const char *pText, int nText, /* Text to tokenize */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ +); + +void sqlite3Fts5Dequote(char *z); + +/* +** End of interface to code in fts5_config.c. +**************************************************************************/ + +/************************************************************************** +** Interface to code in fts5_index.c. fts5_index.c contains contains code +** to access the data stored in the %_data table. +*/ + +typedef struct Fts5Index Fts5Index; +typedef struct Fts5IndexIter Fts5IndexIter; + +/* +** Values used as part of the flags argument passed to IndexQuery(). +*/ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_ASC 0x0002 /* Docs in ascending rowid order */ +#define FTS5INDEX_QUERY_MATCH 0x0004 /* Use the iMatch arg to Next() */ +#define FTS5INDEX_QUERY_DELETE 0x0008 /* Visit delete markers */ + +/* +** Create/destroy an Fts5Index object. +*/ +int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy); + +/* +** for( +** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0); +** 0==sqlite3Fts5IterEof(pIter); +** sqlite3Fts5IterNext(pIter) +** ){ +** i64 iDocid = sqlite3Fts5IndexDocid(pIter); +** } +*/ + +/* +** Open a new iterator to iterate though all docids that match the +** specified token or token prefix. +*/ +Fts5IndexIter *sqlite3Fts5IndexQuery( + Fts5Index *p, /* FTS index to query */ + const char *pToken, int nToken, /* Token (or prefix) to query for */ + int flags /* Mask of FTS5INDEX_QUERY_X flags */ +); + +/* +** Docid list iteration. +*/ +int sqlite3Fts5IterEof(Fts5IndexIter*); +void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); +int sqlite3Fts5IterSeek(Fts5IndexIter*, i64 iDocid); +i64 sqlite3Fts5IterDocid(Fts5IndexIter*); + +/* +** Position list iteration. +** +** for( +** iPos=sqlite3Fts5IterFirstPos(pIter, iCol); +** iPos>=0; +** iPos=sqlite3Fts5IterNextPos(pIter) +** ){ +** // token appears at position iPos of column iCol of the current document +** } +*/ +int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); +int sqlite3Fts5IterNextPos(Fts5IndexIter*); + +/* +** Close an iterator opened by sqlite3Fts5IndexQuery(). +*/ +void sqlite3Fts5IterClose(Fts5IndexIter*); + +/* +** Insert or remove data to or from the index. Each time a document is +** added to or removed from the index, this function is called one or more +** times. +** +** For an insert, it must be called once for each token in the new document. +** If the operation is a delete, it must be called (at least) once for each +** unique token in the document with an iCol value less than zero. The iPos +** argument is ignored for a delete. +*/ +void sqlite3Fts5IndexWrite( + Fts5Index *p, /* Index to write to */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +); + +/* +** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to +** document iDocid. +*/ +void sqlite3Fts5IndexBeginWrite( + Fts5Index *p, /* Index to write to */ + i64 iDocid /* Docid to add or remove data from */ +); + +/* +** Flush any data stored in the in-memory hash tables to the database. +** +** This is called whenever (a) the main transaction is committed or (b) a +** new sub-transaction is opened. +*/ +void sqlite3Fts5IndexFlush(Fts5Index *p); + +int sqlite3Fts5IndexSync(Fts5Index *p); + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +** +** This is called (a) whenever a main or sub-transaction is rolled back, +** and (b) whenever the read transaction is closed. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p); + +/* +** Retrieve and clear the current error code, respectively. +*/ +int sqlite3Fts5IndexErrcode(Fts5Index*); +void sqlite3Fts5IndexReset(Fts5Index*); + +/* +** Get (bSet==0) or set (bSet!=0) the "averages" record. +*/ +void sqlite3Fts5IndexAverages(Fts5Index *p, int bSet, int nAvg, int *aAvg); + +/* +** Functions called by the storage module as part of integrity-check. +*/ +u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int); +int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); + +/* Called during startup to register a UDF with SQLite */ +int sqlite3Fts5IndexInit(sqlite3*); + +void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); + +/* +** End of interface to code in fts5_index.c. +**************************************************************************/ + +/************************************************************************** +** Interface to code in fts5_storage.c. fts5_storage.c contains contains +** code to access the data stored in the %_content and %_docsize tables. +*/ +typedef struct Fts5Storage Fts5Storage; + +int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); +int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy); + +int sqlite3Fts5DropTable(Fts5Config*, const char *zPost); +int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, char **pzErr); + +int sqlite3Fts5StorageDelete(Fts5Storage *p, i64); +int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); + +int sqlite3Fts5StorageIntegrity(Fts5Storage *p); + +/* +** End of interface to code in fts5_storage.c. +**************************************************************************/ + + +/************************************************************************** +** Interface to code in fts5_expr.c. +*/ +typedef struct Fts5Expr Fts5Expr; +typedef struct Fts5Parse Fts5Parse; +typedef struct Fts5Token Fts5Token; +typedef struct Fts5ExprPhrase Fts5ExprPhrase; +typedef struct Fts5ExprNearset Fts5ExprNearset; + +struct Fts5Token { + const char *p; /* Token text (not NULL terminated) */ + int n; /* Size of buffer p in bytes */ +}; + +int sqlite3Fts5ExprNew( + Fts5Config *pConfig, + Fts5Index *pIdx, + const char *zExpr, + Fts5Expr **ppNew, + char **pzErr +); + +int sqlite3Fts5ExprFirst(Fts5Expr *p); +int sqlite3Fts5ExprNext(Fts5Expr *p); +int sqlite3Fts5ExprEof(Fts5Expr *p); +i64 sqlite3Fts5ExprRowid(Fts5Expr *p); + +void sqlite3Fts5ExprFree(Fts5Expr *p); + +// int sqlite3Fts5IterFirstPos(Fts5Expr*, int iCol, int *piPos); +// int sqlite3Fts5IterNextPos(Fts5Expr*, int *piPos); + +/* Called during startup to register a UDF with SQLite */ +int sqlite3Fts5ExprInit(sqlite3*); + +/******************************************* +** The fts5_expr.c API above this point is used by the other hand-written +** C code in this module. The interfaces below this point are called by +** the parser code in fts5parse.y. */ + +void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); + +Fts5Expr *sqlite3Fts5ParseExpr( + Fts5Parse *pParse, + int eType, + Fts5Expr *pLeft, + Fts5Expr *pRight, + Fts5ExprNearset *pNear +); + +Fts5ExprPhrase *sqlite3Fts5ParseTerm( + Fts5Parse *pParse, + Fts5ExprPhrase *pPhrase, + Fts5Token *pToken, + int bPrefix +); + +Fts5ExprNearset *sqlite3Fts5ParseNearset( + Fts5Parse*, + Fts5ExprNearset*, + Fts5ExprPhrase* +); + +void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); +void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); + +void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); +void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p); +void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); + + +/* +** End of interface to code in fts5_expr.c. +**************************************************************************/ + +#endif diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c new file mode 100644 index 0000000000..bbcbc5e0e5 --- /dev/null +++ b/ext/fts5/fts5_config.c @@ -0,0 +1,318 @@ +/* +** 2014 Jun 09 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +#include "fts5Int.h" + +/* +** Convert an SQL-style quoted string into a normal string by removing +** the quote characters. The conversion is done in-place. If the +** input does not begin with a quote character, then this routine +** is a no-op. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +*/ +void sqlite3Fts5Dequote(char *z){ + char quote; /* Quote character (if any ) */ + + quote = z[0]; + if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ + int iIn = 1; /* Index of next byte to read from input */ + int iOut = 0; /* Index of next byte to write to output */ + + /* If the first byte was a '[', then the close-quote character is a ']' */ + if( quote=='[' ) quote = ']'; + + while( ALWAYS(z[iIn]) ){ + if( z[iIn]==quote ){ + if( z[iIn+1]!=quote ) break; + z[iOut++] = quote; + iIn += 2; + }else{ + z[iOut++] = z[iIn++]; + } + } + z[iOut] = '\0'; + } +} + +/* +** Parse the "special" CREATE VIRTUAL TABLE directive and update +** configuration object pConfig as appropriate. +** +** If successful, object pConfig is updated and SQLITE_OK returned. If +** an error occurs, an SQLite error code is returned and an error message +** may be left in *pzErr. It is the responsibility of the caller to +** eventually free any such error message using sqlite3_free(). +*/ +static int fts5ConfigParseSpecial( + Fts5Config *pConfig, /* Configuration object to update */ + char *zCmd, /* Special command to parse */ + char *zArg, /* Argument to parse */ + char **pzErr /* OUT: Error message */ +){ + if( sqlite3_stricmp(zCmd, "prefix")==0 ){ + char *p; + if( pConfig->aPrefix ){ + *pzErr = sqlite3_mprintf("multiple prefix=... directives"); + return SQLITE_ERROR; + } + pConfig->aPrefix = sqlite3_malloc(sizeof(int) * FTS5_MAX_PREFIX_INDEXES); + p = zArg; + while( p[0] ){ + int nPre = 0; + while( p[0]==' ' ) p++; + while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ + nPre = nPre*10 + (p[0] - '0'); + p++; + } + while( p[0]==' ' ) p++; + if( p[0]==',' ){ + p++; + }else if( p[0] ){ + *pzErr = sqlite3_mprintf("malformed prefix=... directive"); + return SQLITE_ERROR; + } + if( nPre==0 || nPre>=1000 ){ + *pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre); + return SQLITE_ERROR; + } + pConfig->aPrefix[pConfig->nPrefix] = nPre; + pConfig->nPrefix++; + } + return SQLITE_OK; + } + + *pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd); + return SQLITE_ERROR; +} + +/* +** Duplicate the string passed as the only argument into a buffer allocated +** by sqlite3_malloc(). +** +** Return 0 if an OOM error is encountered. +*/ +static char *fts5Strdup(const char *z){ + return sqlite3_mprintf("%s", z); +} + +void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module**); + +/* +** Allocate an instance of the default tokenizer ("simple") at +** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error +** code if an error occurs. +*/ +static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ + sqlite3_tokenizer_module *pMod; /* Tokenizer module "simple" */ + sqlite3_tokenizer *pTokenizer; /* Tokenizer instance */ + int rc; /* Return code */ + + sqlite3Fts3SimpleTokenizerModule(&pMod); + rc = pMod->xCreate(0, 0, &pTokenizer); + if( rc==SQLITE_OK ){ + pTokenizer->pModule = pMod; + pConfig->pTokenizer = pTokenizer; + } + + return rc; +} + +/* +** Arguments nArg/azArg contain the string arguments passed to the xCreate +** or xConnect method of the virtual table. This function attempts to +** allocate an instance of Fts5Config containing the results of parsing +** those arguments. +** +** If successful, SQLITE_OK is returned and *ppOut is set to point to the +** new Fts5Config object. If an error occurs, an SQLite error code is +** returned, *ppOut is set to NULL and an error message may be left in +** *pzErr. It is the responsibility of the caller to eventually free any +** such error message using sqlite3_free(). +*/ +int sqlite3Fts5ConfigParse( + sqlite3 *db, + int nArg, /* Number of arguments */ + const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ + Fts5Config **ppOut, /* OUT: Results of parse */ + char **pzErr /* OUT: Error message */ +){ + int rc = SQLITE_OK; /* Return code */ + Fts5Config *pRet; /* New object to return */ + + *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); + if( pRet==0 ) return SQLITE_NOMEM; + memset(pRet, 0, sizeof(Fts5Config)); + pRet->db = db; + + pRet->azCol = (char**)sqlite3_malloc(sizeof(char*) * nArg); + pRet->zDb = fts5Strdup(azArg[1]); + pRet->zName = fts5Strdup(azArg[2]); + if( pRet->azCol==0 || pRet->zDb==0 || pRet->zName==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + for(i=3; rc==SQLITE_OK && iazCol[pRet->nCol++] = zDup; + } + } + } + } + + if( rc==SQLITE_OK && pRet->pTokenizer==0 ){ + rc = fts5ConfigDefaultTokenizer(pRet); + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts5ConfigFree(pRet); + *ppOut = 0; + } + return rc; +} + +/* +** Free the configuration object passed as the only argument. +*/ +void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ + if( pConfig ){ + int i; + if( pConfig->pTokenizer ){ + pConfig->pTokenizer->pModule->xDestroy(pConfig->pTokenizer); + } + sqlite3_free(pConfig->zDb); + sqlite3_free(pConfig->zName); + for(i=0; inCol; i++){ + sqlite3_free(pConfig->azCol[i]); + } + sqlite3_free(pConfig->azCol); + sqlite3_free(pConfig->aPrefix); + sqlite3_free(pConfig); + } +} + +/* +** Call sqlite3_declare_vtab() based on the contents of the configuration +** object passed as the only argument. Return SQLITE_OK if successful, or +** an SQLite error code if an error occurs. +*/ +int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ + int i; + int rc; + char *zSql; + char *zOld; + + zSql = (char*)sqlite3_mprintf("CREATE TABLE x("); + for(i=0; zSql && inCol; i++){ + zOld = zSql; + zSql = sqlite3_mprintf("%s%s%Q", zOld, (i==0?"":", "), pConfig->azCol[i]); + sqlite3_free(zOld); + } + + if( zSql ){ + zOld = zSql; + zSql = sqlite3_mprintf("%s, %Q HIDDEN)", zOld, pConfig->zName); + sqlite3_free(zOld); + } + + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_declare_vtab(pConfig->db, zSql); + sqlite3_free(zSql); + } + + return rc; +} + +/* +** Tokenize the text passed via the second and third arguments. +** +** The callback is invoked once for each token in the input text. The +** arguments passed to it are, in order: +** +** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() +** const char *pToken // Pointer to buffer containing token +** int nToken // Size of token in bytes +** int iStart // Byte offset of start of token within input text +** int iEnd // Byte offset of end of token within input text +** int iPos // Position of token in input (first token is 0) +** +** If the callback returns a non-zero value the tokenization is abandoned +** and no further callbacks are issued. +** +** This function returns SQLITE_OK if successful or an SQLite error code +** if an error occurs. If the tokenization was abandoned early because +** the callback returned SQLITE_DONE, this is not an error and this function +** still returns SQLITE_OK. Or, if the tokenization was abandoned early +** because the callback returned another non-zero value, it is assumed +** to be an SQLite error code and returned to the caller. +*/ +int sqlite3Fts5Tokenize( + Fts5Config *pConfig, /* FTS5 Configuration object */ + const char *pText, int nText, /* Text to tokenize */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ +){ + const sqlite3_tokenizer_module *pMod = pConfig->pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr = 0; + int rc; + + rc = pMod->xOpen(pConfig->pTokenizer, pText, nText, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( rc==SQLITE_OK ){ + const char *pToken; /* Pointer to token buffer */ + int nToken; /* Size of token in bytes */ + int iStart, iEnd, iPos; /* Start, end and position of token */ + pCsr->pTokenizer = pConfig->pTokenizer; + for(rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos); + rc==SQLITE_OK; + rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos) + ){ + if( (rc = xToken(pCtx, pToken, nToken, iStart, iEnd, iPos)) ) break; + } + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + pMod->xClose(pCsr); + } + return rc; +} + + diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c new file mode 100644 index 0000000000..36dc60a069 --- /dev/null +++ b/ext/fts5/fts5_expr.c @@ -0,0 +1,670 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#include "fts5Int.h" +#include "fts5parse.h" + +/* +** All token types in the generated fts5parse.h file are greater than 0. +*/ +#define FTS5_EOF 0 + +typedef struct Fts5ExprTerm Fts5ExprTerm; + +/* +** Functions generated by lemon from fts5parse.y. +*/ +void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(size_t)); +void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); +void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); + +/* +** eType: +** Expression node type. Always one of: +** +** FTS5_AND (pLeft, pRight valid) +** FTS5_OR (pLeft, pRight valid) +** FTS5_NOT (pLeft, pRight valid) +** FTS5_STRING (pNear valid) +*/ +struct Fts5Expr { + int eType; /* Node type */ + Fts5Expr *pLeft; /* Left hand child node */ + Fts5Expr *pRight; /* Right hand child node */ + Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ +}; + +/* +** An instance of the following structure represents a single search term +** or term prefix. +*/ +struct Fts5ExprTerm { + int bPrefix; /* True for a prefix term */ + char *zTerm; /* nul-terminated term */ +}; + +/* +** A phrase. One or more terms that must appear in a contiguous sequence +** within a document for it to match. +*/ +struct Fts5ExprPhrase { + int nTerm; /* Number of entries in aTerm[] */ + Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ +}; + +/* +** One or more phrases that must appear within a certain token distance of +** each other within each matching document. +*/ +struct Fts5ExprNearset { + int nNear; /* NEAR parameter */ + int iCol; /* Column to search (-1 -> all columns) */ + int nPhrase; /* Number of entries in aPhrase[] array */ + Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */ +}; + + +/* +** Parse context. +*/ +struct Fts5Parse { + Fts5Config *pConfig; + char *zErr; + int rc; + Fts5Expr *pExpr; /* Result of a successful parse */ +}; + +void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ + if( pParse->rc==SQLITE_OK ){ + va_list ap; + va_start(ap, zFmt); + pParse->zErr = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + pParse->rc = SQLITE_ERROR; + } +} + +static int fts5ExprIsspace(char t){ + return t==' ' || t=='\t' || t=='\n' || t=='\r'; +} + +static int fts5ExprIstoken(char t){ + return fts5ExprIsspace(t)==0 && t!='\0' + && t!=':' && t!='(' && t!=')' + && t!=',' && t!='+' && t!='*'; +} + +/* +** Read the first token from the nul-terminated string at *pz. +*/ +static int fts5ExprGetToken( + Fts5Parse *pParse, + const char **pz, /* IN/OUT: Pointer into buffer */ + Fts5Token *pToken +){ + const char *z = *pz; + int tok; + + /* Skip past any whitespace */ + while( fts5ExprIsspace(*z) ) z++; + + pToken->p = z; + pToken->n = 1; + switch( *z ){ + case '(': tok = FTS5_LP; break; + case ')': tok = FTS5_RP; break; + case ':': tok = FTS5_COLON; break; + case ',': tok = FTS5_COMMA; break; + case '+': tok = FTS5_PLUS; break; + case '*': tok = FTS5_STAR; break; + case '\0': tok = FTS5_EOF; break; + + case '"': { + const char *z2; + tok = FTS5_STRING; + + for(z2=&z[1]; 1; z2++){ + if( z2[0]=='"' ){ + z2++; + if( z2[0]!='"' ) break; + } + if( z2[0]=='\0' ){ + sqlite3Fts5ParseError(pParse, "unterminated string"); + return FTS5_EOF; + } + } + pToken->n = (z2 - z); + break; + } + + default: { + const char *z2; + tok = FTS5_STRING; + for(z2=&z[1]; fts5ExprIstoken(*z2); z2++); + pToken->n = (z2 - z); + if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; + if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; + if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; + break; + } + } + + *pz = &pToken->p[pToken->n]; + return tok; +} + +static void *fts5ParseAlloc(size_t t){ return sqlite3_malloc((int)t); } +static void fts5ParseFree(void *p){ sqlite3_free(p); } + +int sqlite3Fts5ExprNew( + Fts5Config *pConfig, + Fts5Index *pIdx, + const char *zExpr, /* Expression text */ + Fts5Expr **ppNew, + char **pzErr +){ + Fts5Parse sParse; + Fts5Token token; + const char *z = zExpr; + int t; /* Next token type */ + void *pEngine; + + *ppNew = 0; + *pzErr = 0; + memset(&sParse, 0, sizeof(sParse)); + pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); + if( pEngine==0 ) return SQLITE_NOMEM; + sParse.pConfig = pConfig; + + do { + t = fts5ExprGetToken(&sParse, &z, &token); + sqlite3Fts5Parser(pEngine, t, token, &sParse); + }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); + sqlite3Fts5ParserFree(pEngine, fts5ParseFree); + + assert( sParse.pExpr==0 || (sParse.rc==SQLITE_OK && sParse.zErr==0) ); + *ppNew = sParse.pExpr; + *pzErr = sParse.zErr; + return sParse.rc; +} + +/* +** Free the object passed as the only argument. +*/ +void sqlite3Fts5ExprFree(Fts5Expr *p){ + if( p ){ + sqlite3Fts5ExprFree(p->pLeft); + sqlite3Fts5ExprFree(p->pRight); + sqlite3Fts5ParseNearsetFree(p->pNear); + sqlite3_free(p); + } +} + +/* +** Argument pIn points to a buffer of nIn bytes. This function allocates +** and returns a new buffer populated with a copy of (pIn/nIn) with a +** nul-terminator byte appended to it. +** +** It is the responsibility of the caller to eventually free the returned +** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. +*/ +static char *fts5Strdup(const char *pIn, int nIn){ + char *zRet = (char*)sqlite3_malloc(nIn+1); + if( zRet ){ + memcpy(zRet, pIn, nIn); + zRet[nIn] = '\0'; + } + return zRet; +} + +static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ + *pz = sqlite3_mprintf("%.*s", pToken->n, pToken->p); + if( *pz==0 ) return SQLITE_NOMEM; + return SQLITE_OK; +} + +/* +** Free the phrase object passed as the only argument. +*/ +static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ + if( pPhrase ){ + int i; + for(i=0; inTerm; i++){ + sqlite3_free(pPhrase->aTerm[i].zTerm); + } + sqlite3_free(pPhrase); + } +} + +/* +** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated +** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is +** appended to it and the results returned. +** +** If an OOM error occurs, both the pNear and pPhrase objects are freed and +** NULL returned. +*/ +Fts5ExprNearset *sqlite3Fts5ParseNearset( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ + Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ +){ + const int SZALLOC = 8; + Fts5ExprNearset *pRet = 0; + + if( pParse->rc==SQLITE_OK ){ + if( pNear==0 ){ + int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + pRet->iCol = -1; + } + }else if( (pNear->nPhrase % SZALLOC)==0 ){ + int nNew = pRet->nPhrase + SZALLOC; + int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); + + pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + } + }else{ + pRet = pNear; + } + } + + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); + sqlite3Fts5ParseNearsetFree(pNear); + sqlite3Fts5ParsePhraseFree(pPhrase); + }else{ + pRet->apPhrase[pRet->nPhrase++] = pPhrase; + } + return pRet; +} + +typedef struct TokenCtx TokenCtx; +struct TokenCtx { + Fts5ExprPhrase *pPhrase; +}; + +/* +** Callback for tokenizing terms used by ParseTerm(). +*/ +static int fts5ParseTokenize( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + const int SZALLOC = 8; + TokenCtx *pCtx = (TokenCtx*)pContext; + Fts5ExprPhrase *pPhrase = pCtx->pPhrase; + Fts5ExprTerm *pTerm; + + if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ + Fts5ExprPhrase *pNew; + int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); + + pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew + ); + if( pNew==0 ) return SQLITE_NOMEM; + pCtx->pPhrase = pPhrase = pNew; + pNew->nTerm = nNew - SZALLOC; + } + + pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; + pTerm->bPrefix = 0; + + pTerm->zTerm = fts5Strdup(pToken, nToken); + return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; +} + + +/* +** Free the phrase object passed as the only argument. +*/ +void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ + fts5ExprPhraseFree(pPhrase); +} + +/* +** Free the phrase object passed as the second argument. +*/ +void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ + if( pNear ){ + int i; + for(i=0; inPhrase; i++){ + fts5ExprPhraseFree(pNear->apPhrase[i]); + } + sqlite3_free(pNear); + } +} + +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p){ + assert( pParse->pExpr==0 ); + pParse->pExpr = p; +} + +/* +** This function is called by the parser to process a string token. The +** string may or may not be quoted. In any case it is tokenized and a +** phrase object consisting of all tokens returned. +*/ +Fts5ExprPhrase *sqlite3Fts5ParseTerm( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprPhrase *pPhrase, /* Phrase to append to */ + Fts5Token *pToken, /* String to tokenize */ + int bPrefix /* True if there is a trailing "*" */ +){ + Fts5Config *pConfig = pParse->pConfig; + TokenCtx sCtx; /* Context object passed to callback */ + int rc; /* Tokenize return code */ + char *z = 0; + + pParse->rc = fts5ParseStringFromToken(pToken, &z); + if( z==0 ) return 0; + sqlite3Fts5Dequote(z); + + memset(&sCtx, 0, sizeof(TokenCtx)); + sCtx.pPhrase = pPhrase; + rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + if( rc ){ + pParse->rc = rc; + fts5ExprPhraseFree(sCtx.pPhrase); + sCtx.pPhrase = 0; + }else if( sCtx.pPhrase->nTerm>0 ){ + sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; + } + + sqlite3_free(z); + return sCtx.pPhrase; +} + +void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ + if( pParse->rc==SQLITE_OK ){ + if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ + sqlite3Fts5ParseError( + pParse, "syntax error near \"%.*s\"", pTok->n, pTok->p + ); + } + } +} + +void sqlite3Fts5ParseSetDistance( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5Token *p +){ + int nNear = 0; + int i; + if( p->n ){ + for(i=0; in; i++){ + char c = (char)p->p[i]; + if( c<'0' || c>'9' ){ + sqlite3Fts5ParseError( + pParse, "expected integer, got \"%.*s\"", p->n, p->p + ); + return; + } + nNear = nNear * 10 + (p->p[i] - '0'); + } + }else{ + nNear = FTS5_DEFAULT_NEARDIST; + } + pNear->nNear = nNear; +} + +void sqlite3Fts5ParseSetColumn( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5Token *p +){ + char *z = 0; + int rc = fts5ParseStringFromToken(p, &z); + if( rc==SQLITE_OK ){ + Fts5Config *pConfig = pParse->pConfig; + int i; + for(i=0; inCol; i++){ + if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ + pNear->iCol = i; + break; + } + } + if( i==pConfig->nCol ){ + sqlite3Fts5ParseError(pParse, "no such column: %s", z); + } + sqlite3_free(z); + }else{ + pParse->rc = rc; + } +} + +/* +** Allocate and return a new expression object. If anything goes wrong (i.e. +** OOM error), leave an error code in pParse and return NULL. +*/ +Fts5Expr *sqlite3Fts5ParseExpr( + Fts5Parse *pParse, /* Parse context */ + int eType, /* FTS5_STRING, AND, OR or NOT */ + Fts5Expr *pLeft, /* Left hand child expression */ + Fts5Expr *pRight, /* Right hand child expression */ + Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ +){ + Fts5Expr *pRet = 0; + + if( pParse->rc==SQLITE_OK ){ + assert( (eType!=FTS5_STRING && pLeft && pRight && !pNear) + || (eType==FTS5_STRING && !pLeft && !pRight && pNear) + ); + pRet = (Fts5Expr*)sqlite3_malloc(sizeof(Fts5Expr)); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, sizeof(*pRet)); + pRet->eType = eType; + pRet->pLeft = pLeft; + pRet->pRight = pRight; + pRet->pNear = pNear; + } + } + + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); + sqlite3Fts5ExprFree(pLeft); + sqlite3Fts5ExprFree(pRight); + sqlite3Fts5ParseNearsetFree(pNear); + } + return pRet; +} + +static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ + char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2); + if( zQuoted ){ + int i = 0; + char *zIn = pTerm->zTerm; + zQuoted[i++] = '"'; + while( *zIn ){ + if( *zIn=='"' ) zQuoted[i++] = '"'; + zQuoted[i++] = *zIn++; + } + zQuoted[i++] = '"'; + if( pTerm->bPrefix ){ + zQuoted[i++] = ' '; + zQuoted[i++] = '*'; + } + zQuoted[i++] = '\0'; + } + return zQuoted; +} + +static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ + char *zNew; + va_list ap; + va_start(ap, zFmt); + zNew = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + if( zApp ){ + char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); + sqlite3_free(zNew); + zNew = zNew2; + } + sqlite3_free(zApp); + return zNew; +} + +static char *fts5ExprPrint(Fts5Config *pConfig, Fts5Expr *pExpr){ + char *zRet = 0; + if( pExpr->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pExpr->pNear; + int i; + int iTerm; + + if( pNear->iCol>=0 ){ + zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[pNear->iCol]); + if( zRet==0 ) return 0; + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, "NEAR("); + if( zRet==0 ) return 0; + } + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( i!=0 ){ + zRet = fts5PrintfAppend(zRet, " "); + if( zRet==0 ) return 0; + } + for(iTerm=0; iTermnTerm; iTerm++){ + char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); + if( zTerm ){ + zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); + sqlite3_free(zTerm); + } + if( zTerm==0 || zRet==0 ){ + sqlite3_free(zRet); + return 0; + } + } + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); + if( zRet==0 ) return 0; + } + + }else{ + char *zOp = 0; + char *z1 = 0; + char *z2 = 0; + switch( pExpr->eType ){ + case FTS5_AND: zOp = "AND"; break; + case FTS5_NOT: zOp = "NOT"; break; + case FTS5_OR: zOp = "OR"; break; + default: assert( 0 ); + } + + z1 = fts5ExprPrint(pConfig, pExpr->pLeft); + z2 = fts5ExprPrint(pConfig, pExpr->pRight); + if( z1 && z2 ){ + int b1 = pExpr->pLeft->eType!=FTS5_STRING; + int b2 = pExpr->pRight->eType!=FTS5_STRING; + zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", + b1 ? "(" : "", z1, b1 ? ")" : "", + zOp, + b2 ? "(" : "", z2, b2 ? ")" : "" + ); + } + sqlite3_free(z1); + sqlite3_free(z2); + } + + return zRet; +} + +/* +** The implementation of user-defined scalar function fts5_expr(). +*/ +static void fts5ExprFunction( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + sqlite3 *db = sqlite3_context_db_handle(pCtx); + const char *zExpr = 0; + char *zErr = 0; + Fts5Expr *pExpr = 0; + int rc; + int i; + + const char **azConfig; /* Array of arguments for Fts5Config */ + int nConfig; /* Size of azConfig[] */ + Fts5Config *pConfig = 0; + + nConfig = nArg + 2; + azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); + if( azConfig==0 ){ + sqlite3_result_error_nomem(pCtx); + return; + } + azConfig[0] = 0; + azConfig[1] = "main"; + azConfig[2] = "tbl"; + for(i=1; i 0) +** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) +** + first leaf page number (often 1) +** + final leaf page number +** +** 2. The Averages Record: +** +** A single record within the %_data table. The data is a list of varints. +** The first value is the number of rows in the index. Then, for each column +** from left to right, the total number of tokens in the column for all +** rows of the table. +** +** 3. Segment leaves: +** +** TERM DOCLIST FORMAT: +** +** Most of each segment leaf is taken up by term/doclist data. The +** general format of the term/doclist data is: +** +** varint : size of first term +** blob: first term data +** doclist: first doclist +** zero-or-more { +** varint: number of bytes in common with previous term +** varint: number of bytes of new term data (nNew) +** blob: nNew bytes of new term data +** doclist: next doclist +** } +** +** doclist format: +** +** varint: first rowid +** poslist: first poslist +** zero-or-more { +** varint: rowid delta (always > 0) +** poslist: first poslist +** } +** 0x00 byte +** +** poslist format: +** +** collist: collist for column 0 +** zero-or-more { +** 0x01 byte +** varint: column number (I) +** collist: collist for column I +** } +** 0x00 byte +** +** collist format: +** +** varint: first offset + 2 +** zero-or-more { +** varint: offset delta + 2 +** } +** +** PAGINATION +** +** The format described above is only accurate if the entire term/doclist +** data fits on a single leaf page. If this is not the case, the format +** is changed in two ways: +** +** + if the first rowid on a page occurs before the first term, it +** is stored as a literal value: +** +** varint: first rowid +** +** + the first term on each page is stored in the same way as the +** very first term of the segment: +** +** varint : size of first term +** blob: first term data +** +** Each leaf page begins with: +** +** + 2-byte unsigned containing offset to first rowid (or 0). +** + 2-byte unsigned containing offset to first term (or 0). +** +** Followed by term/doclist data. +** +** 4. Segment interior nodes: +** +** The interior nodes turn the list of leaves into a b+tree. +** +** Each interior node begins with a varint - the page number of the left +** most child node. Following this, for each leaf page except the first, +** the interior nodes contain: +** +** a) If the leaf page contains at least one term, then a term-prefix that +** is greater than all previous terms, and less than or equal to the +** first term on the leaf page. +** +** b) If the leaf page no terms, a record indicating how many consecutive +** leaves contain no terms, and whether or not there is an associated +** by-rowid index record. +** +** By definition, there is never more than one type (b) record in a row. +** Type (b) records only ever appear on height=1 pages - immediate parents +** of leaves. Only type (a) records are pushed to higher levels. +** +** Term format: +** +** * Number of bytes in common with previous term plus 2, as a varint. +** * Number of bytes of new term data, as a varint. +** * new term data. +** +** No-term format: +** +** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there +** is an associated index-by-rowid record. +** * the number of zero-term leaves as a varint. +** +** 5. Segment doclist indexes: +** +** A list of varints - the first docid on each page (starting with the +** second) of the doclist. First element in the list is a literal docid. +** Each docid thereafter is a (negative) delta. +*/ + +/* +** Rowids for the averages and structure records in the %_data table. +*/ +#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ +#define FTS5_STRUCTURE_ROWID(iIdx) (10 + (iIdx)) /* For structure records */ + +/* +** Macros determining the rowids used by segment nodes. All nodes in all +** segments for all indexes (the regular FTS index and any prefix indexes) +** are stored in the %_data table with large positive rowids. +** +** The %_data table may contain up to (1< ((1<=? AND id<=?" */ +}; + +/* +** Buffer object for the incremental building of string data. +*/ +struct Fts5Buffer { + u8 *p; + int n; + int nSpace; +}; + +/* +** A single record read from the %_data table. +*/ +struct Fts5Data { + u8 *p; /* Pointer to buffer containing record */ + int n; /* Size of record in bytes */ + int nRef; /* Ref count */ +}; + +/* +** Before it is flushed to a level-0 segment, term data is collected in +** the hash tables in the Fts5Index.aHash[] array. Hash table keys are +** terms (or, for prefix indexes, term prefixes) and values are instances +** of type Fts5PendingDoclist. +*/ +struct Fts5PendingDoclist { + u8 *pTerm; /* Term for this entry */ + int nTerm; /* Bytes of data at pTerm */ + Fts5PendingPoslist *pPoslist; /* Linked list of position lists */ + int iCol; /* Column for last entry in pPending */ + int iPos; /* Pos value for last entry in pPending */ + Fts5PendingDoclist *pNext; /* Used during merge sort */ +}; +struct Fts5PendingPoslist { + i64 iRowid; /* Rowid for this doclist entry */ + Fts5Buffer buf; /* Current doclist contents */ + Fts5PendingPoslist *pNext; /* Previous poslist for same term */ +}; + +/* +** The contents of the "structure" record for each index are represented +** using an Fts5Structure record in memory. Which uses instances of the +** other Fts5StructureXXX types as components. +*/ +struct Fts5StructureSegment { + int iSegid; /* Segment id */ + int nHeight; /* Height of segment b-tree */ + int pgnoFirst; /* First leaf page number in segment */ + int pgnoLast; /* Last leaf page number in segment */ +}; +struct Fts5StructureLevel { + int nMerge; /* Number of segments in incr-merge */ + int nSeg; /* Total number of segments on level */ + Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ +}; +struct Fts5Structure { + u64 nWriteCounter; /* Total leaves written to level 0 */ + int nLevel; /* Number of levels in this index */ + Fts5StructureLevel aLevel[0]; /* Array of nLevel level objects */ +}; + +/* +** An object of type Fts5SegWriter is used to write to segments. +*/ +struct Fts5PageWriter { + int pgno; /* Page number for this page */ + Fts5Buffer buf; /* Buffer containing page data */ + Fts5Buffer term; /* Buffer containing previous term on page */ +}; + +struct Fts5SegWriter { + int iIdx; /* Index to write to */ + int iSegid; /* Segid to write to */ + int nWriter; /* Number of entries in aWriter */ + Fts5PageWriter *aWriter; /* Array of PageWriter objects */ + i64 iPrevRowid; /* Previous docid written to current leaf */ + u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ + u8 bFirstRowidInPage; /* True if next rowid is first in page */ + int nLeafWritten; /* Number of leaf pages written */ + int nEmpty; /* Number of contiguous term-less nodes */ +}; + +/* +** Object for iterating through the merged results of one or more segments, +** visiting each term/docid pair in the merged data. +** +** nSeg is always a power of two greater than or equal to the number of +** segments that this object is merging data from. Both the aSeg[] and +** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded +** with zeroed objects - these are handled as if they were iterators opened +** on empty segments. +** +** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an +** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the +** comparison in this context is the index of the iterator that currently +** points to the smaller term/rowid combination. Iterators at EOF are +** considered to be greater than all other iterators. +** +** aFirst[1] contains the index in aSeg[] of the iterator that points to +** the smallest key overall. aFirst[0] is unused. +*/ +struct Fts5MultiSegIter { + int nSeg; /* Size of aSeg[] array */ + Fts5SegIter *aSeg; /* Array of segment iterators */ + u16 *aFirst; /* Current merge state (see above) */ +}; + +/* +** Object for iterating through a single segment, visiting each term/docid +** pair in the segment. +** +** pSeg: +** The segment to iterate through. +** +** iLeafPgno: +** Current leaf page number within segment. +** +** iLeafOffset: +** Byte offset within the current leaf that is one byte past the end of the +** rowid field of the current entry. Usually this is the first byte of +** the position list data. The exception is if the rowid for the current +** entry is the last thing on the leaf page. +** +** pLeaf: +** Buffer containing current leaf page data. Set to NULL at EOF. +** +** iTermLeafPgno, iTermLeafOffset: +** Leaf page number containing the last term read from the segment. And +** the offset immediately following the term data. +*/ +struct Fts5SegIter { + Fts5StructureSegment *pSeg; /* Segment to iterate through */ + int iIdx; /* Byte offset within current leaf */ + int iLeafPgno; /* Current leaf page number */ + Fts5Data *pLeaf; /* Current leaf data */ + int iLeafOffset; /* Byte offset within current leaf */ + + int iTermLeafPgno; + int iTermLeafOffset; + + /* Variables populated based on current entry. */ + Fts5Buffer term; /* Current term */ + i64 iRowid; /* Current rowid */ +}; + +/* +** Object for iterating through a single position list. +*/ +struct Fts5PosIter { + Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ + i64 iLeafRowid; /* Absolute rowid of current leaf */ + int iLeafOffset; /* Current offset within leaf */ + + int iCol; + int iPos; +}; + +/* +** Object for iterating through the conents of a single internal node in +** memory. +*/ +struct Fts5NodeIter { + /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, + ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ + const u8 *aData; + int nData; + int iOff; + + /* Output variables */ + Fts5Buffer term; + int nEmpty; + int iChild; +}; + +/* +** An Fts5BtreeIter object is used to iterate through all entries in the +** b-tree hierarchy belonging to a single fts5 segment. In this case the +** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the +** b-tree hierarchy consists of the following: +** +** iLeaf: The page number of the leaf page the entry points to. +** +** term: A split-key that all terms on leaf page $leaf must be greater +** than or equal to. The "term" associated with the first b-tree +** hierarchy entry (the one that points to leaf page 1) is always +** an empty string. +** +** nEmpty: The number of empty (termless) leaf pages that immediately +** following iLeaf. +** +** The Fts5BtreeIter object is only used as part of the integrity-check code. +*/ +struct Fts5BtreeIterLevel { + Fts5NodeIter s; /* Iterator for the current node */ + Fts5Data *pData; /* Data for the current node */ +}; +struct Fts5BtreeIter { + Fts5Index *p; /* FTS5 backend object */ + Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */ + int iIdx; /* Index pSeg belongs to */ + int nLvl; /* Size of aLvl[] array */ + Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ + + /* Output variables */ + Fts5Buffer term; /* Current term */ + int iLeaf; /* Leaf containing terms >= current term */ + int nEmpty; /* Number of "empty" leaves following iLeaf */ + int bEof; /* Set to true at EOF */ +}; + +static void fts5PutU16(u8 *aOut, u16 iVal){ + aOut[0] = (iVal>>8); + aOut[1] = (iVal&0xFF); +} + +static u16 fts5GetU16(const u8 *aIn){ + return ((u16)aIn[0] << 8) + aIn[1]; +} + +/* +** Allocate and return a buffer at least nByte bytes in size. +** +** If an OOM error is encountered, return NULL and set the error code in +** the Fts5Index handle passed as the first argument. +*/ +static void *fts5IdxMalloc(Fts5Index *p, int nByte){ + void *pRet; + assert( p->rc==SQLITE_OK ); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + return pRet; +} + + +static int fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ + /* A no-op if an error has already occurred */ + if( *pRc ) return 1; + + if( (pBuf->n + nByte) > pBuf->nSpace ){ + u8 *pNew; + int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; + while( nNew<(pBuf->n + nByte) ){ + nNew = nNew * 2; + } + pNew = sqlite3_realloc(pBuf->p, nNew); + if( pNew==0 ){ + *pRc = SQLITE_NOMEM; + return 1; + }else{ + pBuf->nSpace = nNew; + pBuf->p = pNew; + } + } + return 0; +} + +/* +** Encode value iVal as an SQLite varint and append it to the buffer object +** pBuf. If an OOM error occurs, set the error code in p. +*/ +static void fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ + if( fts5BufferGrow(pRc, pBuf, 9) ) return; + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); +} + +/* +** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set +** the error code in p. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static void fts5BufferAppendBlob( + int *pRc, + Fts5Buffer *pBuf, + int nData, + const u8 *pData +){ + if( fts5BufferGrow(pRc, pBuf, nData) ) return; + memcpy(&pBuf->p[pBuf->n], pData, nData); + pBuf->n += nData; +} + +/* +** Append the nul-terminated string zStr to the buffer pBuf. This function +** ensures that the byte following the buffer data is set to 0x00, even +** though this byte is not included in the pBuf->n count. +*/ +static void fts5BufferAppendString( + int *pRc, + Fts5Buffer *pBuf, + const char *zStr +){ + int nStr = strlen(zStr); + if( fts5BufferGrow(pRc, pBuf, nStr+1) ) return; + fts5BufferAppendBlob(pRc, pBuf, nStr, (const u8*)zStr); + if( *pRc==SQLITE_OK ) pBuf->p[pBuf->n] = 0x00; +} + +/* +** Argument zFmt is a printf() style format string. This function performs +** the printf() style processing, then appends the results to buffer pBuf. +** +** Like fts5BufferAppendString(), this function ensures that the byte +** following the buffer data is set to 0x00, even though this byte is not +** included in the pBuf->n count. +*/ +static void fts5BufferAppendPrintf( + int *pRc, + Fts5Buffer *pBuf, + char *zFmt, ... +){ + if( *pRc==SQLITE_OK ){ + char *zTmp; + va_list ap; + va_start(ap, zFmt); + zTmp = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + + if( zTmp==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + fts5BufferAppendString(pRc, pBuf, zTmp); + sqlite3_free(zTmp); + } + } +} + +/* +** Free any buffer allocated by pBuf. Zero the structure before returning. +*/ +static void fts5BufferFree(Fts5Buffer *pBuf){ + sqlite3_free(pBuf->p); + memset(pBuf, 0, sizeof(Fts5Buffer)); +} + +/* +** Zero the contents of the buffer object. But do not free the associated +** memory allocation. +*/ +static void fts5BufferZero(Fts5Buffer *pBuf){ + pBuf->n = 0; +} + +/* +** Set the buffer to contain nData/pData. If an OOM error occurs, leave an +** the error code in p. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static void fts5BufferSet( + int *pRc, + Fts5Buffer *pBuf, + int nData, + const u8 *pData +){ + pBuf->n = 0; + fts5BufferAppendBlob(pRc, pBuf, nData, pData); +} + +/* +** Compare the contents of the two buffers using memcmp(). If one buffer +** is a prefix of the other, it is considered the lesser. +** +** Return -ve if pLeft is smaller than pRight, 0 if they are equal or +** +ve if pRight is smaller than pLeft. In other words: +** +** res = *pLeft - *pRight +*/ +static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ + int nCmp = MIN(pLeft->n, pRight->n); + int res = memcmp(pLeft->p, pRight->p, nCmp); + return (res==0 ? (pLeft->n - pRight->n) : res); +} + + +/* +** Close the read-only blob handle, if it is open. +*/ +static void fts5CloseReader(Fts5Index *p){ + if( p->pReader ){ + sqlite3_blob_close(p->pReader); + p->pReader = 0; + } +} + +static Fts5Data *fts5DataReadOrBuffer( + Fts5Index *p, + Fts5Buffer *pBuf, + i64 iRowid +){ + Fts5Data *pRet = 0; + if( p->rc==SQLITE_OK ){ + int rc; + + /* If the blob handle is not yet open, open and seek it. Otherwise, use + ** the blob_reopen() API to reseek the existing blob handle. */ + if( p->pReader==0 ){ + Fts5Config *pConfig = p->pConfig; + rc = sqlite3_blob_open(pConfig->db, + pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader + ); + }else{ + rc = sqlite3_blob_reopen(p->pReader, iRowid); + } + + if( rc==SQLITE_OK ){ + int nByte = sqlite3_blob_bytes(p->pReader); + if( pBuf ){ + fts5BufferZero(pBuf); + fts5BufferGrow(&rc, pBuf, nByte); + rc = sqlite3_blob_read(p->pReader, pBuf->p, nByte, 0); + if( rc==SQLITE_OK ) pBuf->n = nByte; + }else{ + pRet = (Fts5Data*)fts5IdxMalloc(p, sizeof(Fts5Data) + nByte); + if( !pRet ) return 0; + + pRet->n = nByte; + pRet->p = (u8*)&pRet[1]; + pRet->nRef = 1; + rc = sqlite3_blob_read(p->pReader, pRet->p, nByte, 0); + if( rc!=SQLITE_OK ){ + sqlite3_free(pRet); + pRet = 0; + } + } + } + p->rc = rc; + } + + return pRet; +} + +/* +** Retrieve a record from the %_data table. +** +** If an error occurs, NULL is returned and an error left in the +** Fts5Index object. +*/ +static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ + Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); + assert( (pRet==0)==(p->rc!=SQLITE_OK) ); +assert( pRet ); + return pRet; +} + +/* +** Read a record from the %_data table into the buffer supplied as the +** second argument. +** +** If an error occurs, an error is left in the Fts5Index object. If an +** error has already occurred when this function is called, it is a +** no-op. +*/ +static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ + (void)fts5DataReadOrBuffer(p, pBuf, iRowid); +} + +/* +** Release a reference to data record returned by an earlier call to +** fts5DataRead(). +*/ +static void fts5DataRelease(Fts5Data *pData){ + if( pData ){ + pData->nRef--; + if( pData->nRef==0 ) sqlite3_free(pData); + } +} + +static void fts5DataReference(Fts5Data *pData){ + pData->nRef++; +} + +/* +** INSERT OR REPLACE a record into the %_data table. +*/ +static void fts5DataWrite(Fts5Index *p, i64 iRowid, u8 *pData, int nData){ + if( p->rc!=SQLITE_OK ) return; + + if( p->pWriter==0 ){ + int rc; + Fts5Config *pConfig = p->pConfig; + char *zSql = sqlite3_mprintf( + "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); + sqlite3_free(zSql); + } + if( rc!=SQLITE_OK ){ + p->rc = rc; + return; + } + } + + sqlite3_bind_int64(p->pWriter, 1, iRowid); + sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); + sqlite3_step(p->pWriter); + p->rc = sqlite3_reset(p->pWriter); +} + +/* +** Execute the following SQL: +** +** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast +*/ +static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ + if( p->rc!=SQLITE_OK ) return; + + if( p->pDeleter==0 ){ + int rc; + Fts5Config *pConfig = p->pConfig; + char *zSql = sqlite3_mprintf( + "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); + sqlite3_free(zSql); + } + if( rc!=SQLITE_OK ){ + p->rc = rc; + return; + } + } + + sqlite3_bind_int64(p->pDeleter, 1, iFirst); + sqlite3_bind_int64(p->pDeleter, 2, iLast); + sqlite3_step(p->pDeleter); + p->rc = sqlite3_reset(p->pDeleter); +} + +/* +** Close the sqlite3_blob handle used to read records from the %_data table. +** And discard any cached reads. This function is called at the end of +** a read transaction or when any sub-transaction is rolled back. +*/ +static void fts5DataReset(Fts5Index *p){ + if( p->pReader ){ + sqlite3_blob_close(p->pReader); + p->pReader = 0; + } +} + +/* +** Remove all records associated with segment iSegid in index iIdx. +*/ +static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ + i64 iFirst = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, 0); + i64 iLast = FTS5_SEGMENT_ROWID(iIdx, iSegid+1, 0, 0)-1; + fts5DataDelete(p, iFirst, iLast); +} + +/* +** Deserialize and return the structure record currently stored in serialized +** form within buffer pData/nData. +** +** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array +** are over-allocated by one slot. This allows the structure contents +** to be more easily edited. +** +** If an error occurs, *ppOut is set to NULL and an SQLite error code +** returned. Otherwise, *ppOut is set to point to the new object and +** SQLITE_OK returned. +*/ +static int fts5StructureDecode( + const u8 *pData, /* Buffer containing serialized structure */ + int nData, /* Size of buffer pData in bytes */ + Fts5Structure **ppOut /* OUT: Deserialized object */ +){ + int rc = SQLITE_OK; + int i = 0; + int iLvl; + int nLevel = 0; + int nSegment = 0; + int nByte; /* Bytes of space to allocate */ + Fts5Structure *pRet = 0; + + /* Read the total number of levels and segments from the start of the + ** structure record. Use these values to allocate space for the deserialized + ** version of the record. */ + i = getVarint32(&pData[i], nLevel); + i += getVarint32(&pData[i], nSegment); + nByte = ( + sizeof(Fts5Structure) + + sizeof(Fts5StructureLevel) * (nLevel+1) + + sizeof(Fts5StructureSegment) * (nSegment+nLevel+1) + ); + pRet = (Fts5Structure*)sqlite3_malloc(nByte); + + if( pRet ){ + u8 *pSpace = (u8*)&pRet->aLevel[nLevel+1]; + memset(pRet, 0, nByte); + pRet->nLevel = nLevel; + i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter); + for(iLvl=0; iLvlaLevel[iLvl]; + int nTotal; + int iSeg; + + i += getVarint32(&pData[i], pLvl->nMerge); + i += getVarint32(&pData[i], nTotal); + assert( nTotal>=pLvl->nMerge ); + pLvl->nSeg = nTotal; + pLvl->aSeg = (Fts5StructureSegment*)pSpace; + pSpace += ((nTotal+1) * sizeof(Fts5StructureSegment)); + + for(iSeg=0; iSegaSeg[iSeg].iSegid); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); + } + } + pRet->aLevel[nLevel].aSeg = (Fts5StructureSegment*)pSpace; + }else{ + rc = SQLITE_NOMEM; + } + + *ppOut = pRet; + return rc; +} + +/* +** Read, deserialize and return the structure record for index iIdx. +** +** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array +** are over-allocated as described for function fts5StructureDecode() +** above. +** +** If an error occurs, NULL is returned and an error code left in the +** Fts5Index handle. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ + Fts5Config *pConfig = p->pConfig; + Fts5Structure *pRet = 0; /* Object to return */ + Fts5Data *pData; /* %_data entry containing structure record */ + + assert( iIdx<=pConfig->nPrefix ); + pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx)); + if( !pData ) return 0; + p->rc = fts5StructureDecode(pData->p, pData->n, &pRet); + + fts5DataRelease(pData); + return pRet; +} + +/* +** Release a reference to an Fts5Structure object returned by an earlier +** call to fts5StructureRead() or fts5StructureDecode(). +*/ +static void fts5StructureRelease(Fts5Structure *pStruct){ + sqlite3_free(pStruct); +} + +/* +** Return the total number of segments in index structure pStruct. +*/ +static int fts5StructureCountSegments(Fts5Structure *pStruct){ + int nSegment = 0; /* Total number of segments */ + int iLvl; /* Used to iterate through levels */ + + for(iLvl=0; iLvlnLevel; iLvl++){ + nSegment += pStruct->aLevel[iLvl].nSeg; + } + + return nSegment; +} + +/* +** Serialize and store the "structure" record for index iIdx. +** +** If an error occurs, leave an error code in the Fts5Index object. If an +** error has already occurred, this function is a no-op. +*/ +static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ + int nSegment; /* Total number of segments */ + Fts5Buffer buf; /* Buffer to serialize record into */ + int iLvl; /* Used to iterate through levels */ + + nSegment = fts5StructureCountSegments(pStruct); + memset(&buf, 0, sizeof(Fts5Buffer)); + fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); + fts5BufferAppendVarint(&p->rc, &buf, nSegment); + fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); + + for(iLvl=0; iLvlnLevel; iLvl++){ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); + + for(iSeg=0; iSegnSeg; iSeg++){ + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); + } + } + + fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); + fts5BufferFree(&buf); +} + + +/* +** Load the next leaf page into the segment iterator. +*/ +static void fts5SegIterNextPage( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter /* Iterator to advance to next page */ +){ + Fts5StructureSegment *pSeg = pIter->pSeg; + if( pIter->pLeaf ) fts5DataRelease(pIter->pLeaf); + if( pIter->iLeafPgnopgnoLast ){ + pIter->iLeafPgno++; + pIter->pLeaf = fts5DataRead(p, + FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno) + ); + }else{ + pIter->pLeaf = 0; + } +} + +static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ + u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ + int iOff = pIter->iLeafOffset; /* Offset to read at */ + int nNew; /* Bytes of new data */ + + iOff += getVarint32(&a[iOff], nNew); + pIter->term.n = nKeep; + fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + iOff += nNew; + pIter->iTermLeafOffset = iOff; + pIter->iTermLeafPgno = pIter->iLeafPgno; + if( iOff>=pIter->pLeaf->n ){ + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ){ + if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; + return; + } + iOff = 4; + a = pIter->pLeaf->p; + } + iOff += sqlite3GetVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; +} + +/* +** Initialize the iterator object pIter to iterate through the entries in +** segment pSeg within index iIdx. The iterator is left pointing to the +** first entry when this function returns. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterInit( + Fts5Index *p, + int iIdx, /* Config.aHash[] index of FTS index */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + + if( p->rc==SQLITE_OK ){ + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + pIter->iLeafPgno = pSeg->pgnoFirst-1; + fts5SegIterNextPage(p, pIter); + } + + if( p->rc==SQLITE_OK ){ + u8 *a = pIter->pLeaf->p; + pIter->iLeafOffset = fts5GetU16(&a[2]); + fts5SegIterLoadTerm(p, pIter, 0); + } +} + +/* +** Advance iterator pIter to the next entry. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. It +** is not considered an error if the iterator reaches EOF. If an error has +** already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterNext( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter /* Iterator to advance */ +){ + if( p->rc==SQLITE_OK ){ + Fts5Data *pLeaf = pIter->pLeaf; + int iOff; + int bNewTerm = 0; + int nKeep = 0; + + /* Search for the end of the position list within the current page. */ + u8 *a = pLeaf->p; + int n = pLeaf->n; + for(iOff=pIter->iLeafOffset; iOffiLeafOffset = iOff; + if( iDelta==0 ){ + bNewTerm = 1; + if( iOff>=n ){ + fts5SegIterNextPage(p, pIter); + pIter->iLeafOffset = 4; + }else if( iOff!=fts5GetU16(&a[2]) ){ + pIter->iLeafOffset += getVarint32(&a[iOff], nKeep); + } + }else{ + pIter->iRowid -= iDelta; + } + }else{ + iOff = 0; + /* Next entry is not on the current page */ + while( iOff==0 ){ + fts5SegIterNextPage(p, pIter); + pLeaf = pIter->pLeaf; + if( pLeaf==0 ) break; + if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ + iOff += sqlite3GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ + pIter->iLeafOffset = iOff; + bNewTerm = 1; + } + } + } + + /* Check if the iterator is now at EOF. If so, return early. */ + if( pIter->pLeaf==0 ) return; + if( bNewTerm ){ + fts5SegIterLoadTerm(p, pIter, nKeep); + } + } +} + +/* +** Zero the iterator passed as the only argument. +*/ +static void fts5SegIterClear(Fts5SegIter *pIter){ + fts5BufferFree(&pIter->term); + fts5DataRelease(pIter->pLeaf); + memset(pIter, 0, sizeof(Fts5SegIter)); +} + +/* +** Do the comparison necessary to populate pIter->aFirst[iOut]. +** +** If the returned value is non-zero, then it is the index of an entry +** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing +** to a key that is a duplicate of another, higher priority, +** segment-iterator in the pSeg->aSeg[] array. +*/ +static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ + int i1; /* Index of left-hand Fts5SegIter */ + int i2; /* Index of right-hand Fts5SegIter */ + int iRes; + Fts5SegIter *p1; /* Left-hand Fts5SegIter */ + Fts5SegIter *p2; /* Right-hand Fts5SegIter */ + + assert( iOutnSeg && iOut>0 ); + + if( iOut>=(pIter->nSeg/2) ){ + i1 = (iOut - pIter->nSeg/2) * 2; + i2 = i1 + 1; + }else{ + i1 = pIter->aFirst[iOut*2]; + i2 = pIter->aFirst[iOut*2+1]; + } + p1 = &pIter->aSeg[i1]; + p2 = &pIter->aSeg[i2]; + + if( p1->pLeaf==0 ){ /* If p1 is at EOF */ + iRes = i2; + }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ + iRes = i1; + }else{ + int res = fts5BufferCompare(&p1->term, &p2->term); + if( res==0 ){ + assert( i2>i1 ); + assert( i2!=0 ); + if( p1->iRowid==p2->iRowid ) return i2; + res = (p1->iRowid > p2->iRowid) ? -1 : +1; + } + assert( res!=0 ); + if( res<0 ){ + iRes = i1; + }else{ + iRes = i2; + } + } + + pIter->aFirst[iOut] = iRes; + return 0; +} + +/* +** Free the iterator object passed as the second argument. +*/ +static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( pIter ){ + int i; + for(i=0; inSeg; i++){ + fts5SegIterClear(&pIter->aSeg[i]); + } + sqlite3_free(pIter); + } +} + +static void fts5MultiIterAdvanced( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged, /* Index of sub-iterator just advanced */ + int iMinset /* Minimum entry in aFirst[] to set */ +){ + int i; + for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ + fts5SegIterNext(p, &pIter->aSeg[iEq]); + i = pIter->nSeg + iEq; + } + } +} + +/* +** Move the iterator to the next entry. +** +** If an error occurs, an error code is left in Fts5Index.rc. It is not +** considered an error if the iterator reaches EOF, or if it is already at +** EOF when this function is called. +*/ +static void fts5MultiIterNext(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int iFirst = pIter->aFirst[1]; + fts5SegIterNext(p, &pIter->aSeg[iFirst]); + fts5MultiIterAdvanced(p, pIter, iFirst, 1); + } +} + +/* +** Allocate a new Fts5MultiSegIter object. +** +** The new object will be used to iterate through data in structure pStruct. +** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel +** is zero or greater, data from the first nSegment segments on level iLevel +** is merged. +** +** The iterator initially points to the first term/rowid entry in the +** iterated data. +*/ +static void fts5MultiIterNew( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5Structure *pStruct, /* Structure of specific index */ + int iIdx, /* Config.aHash[] index of FTS index */ + int iLevel, /* Level to iterate (-1 for all) */ + int nSegment, /* Number of segments to merge (iLevel>=0) */ + Fts5MultiSegIter **ppOut /* New object */ +){ + int nSeg; /* Number of segments merged */ + int nSlot; /* Power of two >= nSeg */ + int iIter = 0; /* */ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl; + Fts5MultiSegIter *pNew; + + /* Allocate space for the new multi-seg-iterator. */ + if( iLevel<0 ){ + nSeg = fts5StructureCountSegments(pStruct); + }else{ + nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); + } + for(nSlot=2; nSlotaSeg[] */ + sizeof(u16) * nSlot /* pNew->aFirst[] */ + ); + if( pNew==0 ) return; + pNew->nSeg = nSlot; + pNew->aSeg = (Fts5SegIter*)&pNew[1]; + pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; + + /* Initialize each of the component segment iterators. */ + if( iLevel<0 ){ + Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; + for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + } + } + }else{ + pLvl = &pStruct->aLevel[iLevel]; + for(iSeg=nSeg-1; iSeg>=0; iSeg--){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + } + } + assert( iIter==nSeg ); + + /* If the above was successful, each component iterators now points + ** to the first entry in its segment. In this case initialize the + ** aFirst[] array. Or, if an error has occurred, free the iterator + ** object and set the output variable to NULL. */ + if( p->rc==SQLITE_OK ){ + for(iIter=nSlot-1; iIter>0; iIter--){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ + fts5SegIterNext(p, &pNew->aSeg[iEq]); + fts5MultiIterAdvanced(p, pNew, iEq, iIter); + } + } + }else{ + fts5MultiIterFree(p, pNew); + *ppOut = 0; + } +} + +/* +** Return true if the iterator is at EOF or if an error has occurred. +** False otherwise. +*/ +static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ + return (p->rc || pIter->aSeg[ pIter->aFirst[1] ].pLeaf==0); +} + +/* +** Return the rowid of the entry that the iterator currently points +** to. If the iterator points to EOF when this function is called the +** results are undefined. +*/ +static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ + return pIter->aSeg[ pIter->aFirst[1] ].iRowid; +} + +/* +** Return a pointer to a buffer containing the term associated with the +** entry that the iterator currently points to. +*/ +static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ + Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1] ]; + *pn = p->term.n; + return p->term.p; +} + +/* +** Read and return the next 32-bit varint from the position-list iterator +** passed as the second argument. +** +** If an error occurs, zero is returned an an error code left in +** Fts5Index.rc. If an error has already occurred when this function is +** called, it is a no-op. +*/ +static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ + int iVal = 0; + if( p->rc==SQLITE_OK ){ + int iOff = pIter->iLeafOffset; + if( iOff < pIter->pLeaf->n ){ + pIter->iLeafOffset += getVarint32(&pIter->pLeaf->p[iOff], iVal); + }else{ + fts5DataRelease(pIter->pLeaf); + pIter->iLeafRowid++; + pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); + if( pIter->pLeaf ){ + pIter->iLeafOffset = 4 + getVarint32(&pIter->pLeaf->p[4], iVal); + } + } + } + return iVal; +} + +/* +** Advance the position list iterator to the next entry. +*/ +static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){ + int iVal; + iVal = fts5PosIterReadVarint(p, pIter); + if( iVal==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + } + else if( iVal==1 ){ + pIter->iCol = fts5PosIterReadVarint(p, pIter); + pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; + }else{ + pIter->iPos += (iVal - 2); + } +} + +/* +** Initialize the Fts5PosIter object passed as the final argument to iterate +** through the position-list associated with the index entry that iterator +** pMulti currently points to. +*/ +static void fts5PosIterInit( + Fts5Index *p, /* FTS5 backend object */ + Fts5MultiSegIter *pMulti, /* Multi-seg iterator to read pos-list from */ + Fts5PosIter *pIter /* Initialize this object */ +){ + if( p->rc==SQLITE_OK ){ + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + int iId = pSeg->pSeg->iSegid; + + memset(pIter, 0, sizeof(*pIter)); + pIter->pLeaf = pSeg->pLeaf; + pIter->iLeafOffset = pSeg->iLeafOffset; + pIter->iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + fts5DataReference(pIter->pLeaf); + fts5PosIterNext(p, pIter); + } +} + +/* +** Return true if the position iterator passed as the second argument is +** at EOF. Or if an error has already occurred. Otherwise, return false. +*/ +static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ + return (p->rc || pIter->pLeaf==0); +} + + +/* +** Allocate memory. The difference between this function and fts5IdxMalloc() +** is that this increments the Fts5Index.nPendingData variable by the +** number of bytes allocated. It should be used for all allocations used +** to store pending-data within the in-memory hash tables. +*/ +static void *fts5PendingMalloc(Fts5Index *p, int nByte){ + p->nPendingData += nByte; + return fts5IdxMalloc(p, nByte); +} + +/* +** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) +** in hash table for index iIdx. If iIdx is zero, this is the main terms +** index. Values of 1 and greater for iIdx are prefix indexes. +** +** If an OOM error is encountered, set the Fts5Index.rc error code +** accordingly. +*/ +static void fts5AddTermToHash( + Fts5Index *p, /* Index object to write to */ + int iIdx, /* Entry in p->aHash[] to update */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + Fts5Config *pConfig = p->pConfig; + Fts3Hash *pHash; + Fts5PendingDoclist *pDoclist; + Fts5PendingPoslist *pPoslist; + i64 iRowid = p->iWriteRowid; /* Rowid associated with these tokens */ + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK ) return; + + /* Find the hash table to use. It has already been allocated. */ + assert( iIdx<=pConfig->nPrefix ); + assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] ); + pHash = &p->aHash[iIdx]; + + /* Find the doclist to append to. Allocate a new doclist object if + ** required. */ + pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken); + if( pDoclist==0 ){ + Fts5PendingDoclist *pDel; + pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken); + if( pDoclist==0 ) return; + pDoclist->pTerm = (u8*)&pDoclist[1]; + pDoclist->nTerm = nToken; + memcpy(pDoclist->pTerm, pToken, nToken); + pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist); + if( pDel ){ + assert( pDoclist==pDel ); + sqlite3_free(pDel); + p->rc = SQLITE_NOMEM; + return; + } + } + + /* Find the poslist to append to. Allocate a new object if required. */ + pPoslist = pDoclist->pPoslist; + if( pPoslist==0 || pPoslist->iRowid!=iRowid ){ + pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist)); + if( pPoslist==0 ) return; + pPoslist->pNext = pDoclist->pPoslist; + pPoslist->iRowid = iRowid; + pDoclist->pPoslist = pPoslist; + pDoclist->iCol = 0; + pDoclist->iPos = 0; + } + + /* Append the values to the position list. */ + if( iCol>=0 ){ + p->nPendingData -= pPoslist->buf.nSpace; + if( iCol!=pDoclist->iCol ){ + fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1); + fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol); + pDoclist->iCol = iCol; + pDoclist->iPos = 0; + } + fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos); + p->nPendingData += pPoslist->buf.nSpace; + pDoclist->iPos = iPos; + } +} + +/* +** Free the pending-doclist object passed as the only argument. +*/ +static void fts5FreePendingDoclist(Fts5PendingDoclist *p){ + Fts5PendingPoslist *pPoslist; + Fts5PendingPoslist *pNext; + for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){ + pNext = pPoslist->pNext; + fts5BufferFree(&pPoslist->buf); + sqlite3_free(pPoslist); + } + sqlite3_free(p); +} + +/* +** Insert or remove data to or from the index. Each time a document is +** added to or removed from the index, this function is called one or more +** times. +** +** For an insert, it must be called once for each token in the new document. +** If the operation is a delete, it must be called (at least) once for each +** unique token in the document with an iCol value less than zero. The iPos +** argument is ignored for a delete. +*/ +void sqlite3Fts5IndexWrite( + Fts5Index *p, /* Index to write to */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + int i; /* Used to iterate through indexes */ + Fts5Config *pConfig = p->pConfig; + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK ) return; + + /* Allocate hash tables if they have not already been allocated */ + if( p->aHash==0 ){ + int nHash = pConfig->nPrefix + 1; + p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash); + if( p->aHash==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + for(i=0; iaHash[i], FTS3_HASH_STRING, 0); + } + } + } + + /* Add the new token to the main terms hash table. And to each of the + ** prefix hash tables that it is large enough for. */ + fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); + for(i=0; inPrefix; i++){ + if( nToken>=pConfig->aPrefix[i] ){ + fts5AddTermToHash(p, i+1, iCol, iPos, pToken, pConfig->aPrefix[i]); + } + } +} + +/* +** Allocate a new segment-id for the structure pStruct. +** +** If an error has already occurred, this function is a no-op. 0 is +** returned in this case. +*/ +static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ + int i; + if( p->rc!=SQLITE_OK ) return 0; + + for(i=0; i<100; i++){ + int iSegid; + sqlite3_randomness(sizeof(int), (void*)&iSegid); + iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1); + if( iSegid ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ + iSegid = 0; + } + } + } + } + if( iSegid ) return iSegid; + } + + p->rc = SQLITE_ERROR; + return 0; +} + +static Fts5PendingDoclist *fts5PendingMerge( + Fts5Index *p, + Fts5PendingDoclist *pLeft, + Fts5PendingDoclist *pRight +){ + Fts5PendingDoclist *p1 = pLeft; + Fts5PendingDoclist *p2 = pRight; + Fts5PendingDoclist *pRet = 0; + Fts5PendingDoclist **ppOut = &pRet; + + while( p1 || p2 ){ + if( p1==0 ){ + *ppOut = p2; + p2 = 0; + }else if( p2==0 ){ + *ppOut = p1; + p1 = 0; + }else{ + int nCmp = MIN(p1->nTerm, p2->nTerm); + int res = memcmp(p1->pTerm, p2->pTerm, nCmp); + if( res==0 ) res = p1->nTerm - p2->nTerm; + + if( res>0 ){ + /* p2 is smaller */ + *ppOut = p2; + ppOut = &p2->pNext; + p2 = p2->pNext; + }else{ + /* p1 is smaller */ + *ppOut = p1; + ppOut = &p1->pNext; + p1 = p1->pNext; + } + *ppOut = 0; + } + } + + return pRet; +} + +/* +** Extract all tokens from hash table iHash and link them into a list +** in sorted order. The hash table is cleared before returning. It is +** the responsibility of the caller to free the elements of the returned +** list. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){ + const int nMergeSlot = 32; + Fts3Hash *pHash; + Fts3HashElem *pE; /* Iterator variable */ + Fts5PendingDoclist **ap; + Fts5PendingDoclist *pList; + int i; + + ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot); + if( !ap ) return 0; + + pHash = &p->aHash[iHash]; + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ + int i; + Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); + assert( pDoclist->pNext==0 ); + for(i=0; ap[i]; i++){ + pDoclist = fts5PendingMerge(p, pDoclist, ap[i]); + ap[i] = 0; + } + ap[i] = pDoclist; + } + + pList = 0; + for(i=0; iiOff offset currently points to an entry indicating one +** or more term-less nodes, advance past it and set pIter->nEmpty to +** the number of empty child nodes. +*/ +static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ + if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->iOff++; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); + }else{ + pIter->nEmpty = 0; + } +} + +/* +** Advance to the next entry within the node. +*/ +static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ + if( pIter->iOff>=pIter->nData ){ + pIter->aData = 0; + pIter->iChild += pIter->nEmpty; + }else{ + int nPre, nNew; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); + pIter->term.n = nPre-2; + fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); + pIter->iOff += nNew; + pIter->iChild += (1 + pIter->nEmpty); + fts5NodeIterGobbleNEmpty(pIter); + if( *pRc ) pIter->aData = 0; + } +} + + +/* +** Initialize the iterator object pIter to iterate through the internal +** segment node in pData. +*/ +static void fts5NodeIterInit(int nData, const u8 *aData, Fts5NodeIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->aData = aData; + pIter->nData = nData; + pIter->iOff = getVarint32(aData, pIter->iChild); + fts5NodeIterGobbleNEmpty(pIter); +} + +/* +** Free any memory allocated by the iterator object. +*/ +static void fts5NodeIterFree(Fts5NodeIter *pIter){ + fts5BufferFree(&pIter->term); +} + + +/* +** This is called once for each leaf page except the first that contains +** at least one term. Argument (nTerm/pTerm) is the split-key - a term that +** is larger than all terms written to earlier leaves, and equal to or +** smaller than the first term on the new leaf. +** +** If an error occurs, an error code is left in Fts5Index.rc. If an error +** has already occurred when this function is called, it is a no-op. +*/ +static void fts5WriteBtreeTerm( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Writer object */ + int nTerm, const u8 *pTerm /* First term on new page */ +){ + int iHeight; + for(iHeight=1; 1; iHeight++){ + Fts5PageWriter *pPage; + + if( iHeight>=pWriter->nWriter ){ + Fts5PageWriter *aNew; + Fts5PageWriter *pNew; + int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); + aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); + if( aNew==0 ) return; + + pNew = &aNew[pWriter->nWriter]; + memset(pNew, 0, sizeof(Fts5PageWriter)); + pNew->pgno = 1; + fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); + + pWriter->nWriter++; + pWriter->aWriter = aNew; + } + pPage = &pWriter->aWriter[iHeight]; + + if( pWriter->nEmpty ){ + assert( iHeight==1 ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, 0); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->nEmpty); + pWriter->nEmpty = 0; + } + + if( pPage->buf.n>=p->pgsz ){ + /* pPage will be written to disk. The term will be written into the + ** parent of pPage. */ + i64 iRowid = FTS5_SEGMENT_ROWID( + pWriter->iIdx, pWriter->iSegid, iHeight, pPage->pgno + ); + fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->term); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno); + pPage->pgno++; + }else{ + int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre); + fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); + fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + break; + } + } +} + +static void fts5WriteBtreeNoTerm( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter /* Writer object */ +){ + pWriter->nEmpty++; +} + +static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ + static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + i64 iRowid; + + if( pPage->term.n==0 ){ + /* No term was written to this page. */ + fts5WriteBtreeNoTerm(p, pWriter); + } + + /* Write the current page to the db. */ + iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno); + fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + + /* Initialize the next page. */ + fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->term); + fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + pPage->pgno++; + + /* Increase the leaves written counter */ + pWriter->nLeafWritten++; +} + +/* +** Append term pTerm/nTerm to the segment being written by the writer passed +** as the second argument. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5WriteAppendTerm( + Fts5Index *p, + Fts5SegWriter *pWriter, + int nTerm, const u8 *pTerm +){ + int nPrefix; /* Bytes of prefix compression for term */ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + + assert( pPage->buf.n==0 || pPage->buf.n>4 ); + if( pPage->buf.n==0 ){ + /* Zero the first term and first docid fields */ + static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; + fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + assert( pPage->term.n==0 ); + } + if( p->rc ) return; + + if( pPage->term.n==0 ){ + /* Update the "first term" field of the page header. */ + assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); + fts5PutU16(&pPage->buf.p[2], pPage->buf.n); + nPrefix = 0; + if( pWriter->aWriter[0].pgno!=1 ){ + fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm); + pPage = &pWriter->aWriter[0]; + } + }else{ + nPrefix = fts5PrefixCompress( + pPage->term.n, pPage->term.p, nTerm, pTerm + ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); + } + + /* Append the number of bytes of new data, then the term data itself + ** to the page. */ + fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); + fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); + + /* Update the Fts5PageWriter.term field. */ + fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + + pWriter->bFirstRowidInPage = 0; + pWriter->bFirstRowidInDoclist = 1; + + /* If the current leaf page is full, flush it to disk. */ + if( pPage->buf.n>=p->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +/* +** Append a docid to the writers output. +*/ +static void fts5WriteAppendRowid( + Fts5Index *p, + Fts5SegWriter *pWriter, + i64 iRowid +){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + + /* If this is to be the first docid written to the page, set the + ** docid-pointer in the page-header. */ + if( pWriter->bFirstRowidInPage ) fts5PutU16(pPage->buf.p, pPage->buf.n); + + /* Write the docid. */ + if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ + fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); + }else{ + assert( iRowidiPrevRowid ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->iPrevRowid - iRowid); + } + pWriter->iPrevRowid = iRowid; + pWriter->bFirstRowidInDoclist = 0; + pWriter->bFirstRowidInPage = 0; + + if( pPage->buf.n>=p->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +static void fts5WriteAppendPoslistInt( + Fts5Index *p, + Fts5SegWriter *pWriter, + int iVal +){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); + if( pPage->buf.n>=p->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ + fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); +} + +/* +** Write the contents of pending-doclist object pDoclist to writer pWriter. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5WritePendingDoclist( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Write to this writer object */ + Fts5PendingDoclist *pDoclist /* Doclist to write to pWriter */ +){ + Fts5PendingPoslist *pPoslist; /* Used to iterate through the doclist */ + + /* Append the term */ + fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm); + + /* Append the position list for each rowid */ + for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){ + int i = 0; + + /* Append the rowid itself */ + fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid); + + /* Copy the position list to the output segment */ + while( ibuf.n){ + int iVal; + i += getVarint32(&pPoslist->buf.p[i], iVal); + fts5WriteAppendPoslistInt(p, pWriter, iVal); + } + + /* Write the position list terminator */ + fts5WriteAppendZerobyte(p, pWriter); + } + + /* Write the doclist terminator */ + fts5WriteAppendZerobyte(p, pWriter); +} + +static void fts5WriteFinish( + Fts5Index *p, + Fts5SegWriter *pWriter, + int *pnHeight, + int *pnLeaf +){ + int i; + *pnLeaf = pWriter->aWriter[0].pgno; + *pnHeight = pWriter->nWriter; + fts5WriteFlushLeaf(p, pWriter); + if( pWriter->nWriter>1 && pWriter->nEmpty ){ + Fts5PageWriter *pPg = &pWriter->aWriter[1]; + fts5BufferAppendVarint(&p->rc, &pPg->buf, 0); + fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); + } + for(i=1; inWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno); + fts5DataWrite(p, iRow, pPg->buf.p, pPg->buf.n); + } + for(i=0; inWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + fts5BufferFree(&pPg->term); + fts5BufferFree(&pPg->buf); + } + sqlite3_free(pWriter->aWriter); +} + +static void fts5WriteInit( + Fts5Index *p, + Fts5SegWriter *pWriter, + int iIdx, int iSegid +){ + memset(pWriter, 0, sizeof(Fts5SegWriter)); + pWriter->iIdx = iIdx; + pWriter->iSegid = iSegid; + + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); + if( pWriter->aWriter==0 ) return; + pWriter->nWriter = 1; + pWriter->aWriter[0].pgno = 1; +} + +static void fts5WriteInitForAppend( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Writer to initialize */ + int iIdx, /* Index segment is a part of */ + Fts5StructureSegment *pSeg /* Segment object to append to */ +){ + int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); + memset(pWriter, 0, sizeof(Fts5SegWriter)); + pWriter->iIdx = iIdx; + pWriter->iSegid = pSeg->iSegid; + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); + pWriter->nWriter = pSeg->nHeight; + + if( p->rc==SQLITE_OK ){ + int pgno = 1; + int i; + pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; + for(i=pSeg->nHeight-1; i>0; i--){ + i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pgno); + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + pPg->pgno = pgno; + fts5DataBuffer(p, &pPg->buf, iRowid); + if( p->rc==SQLITE_OK ){ + Fts5NodeIter ss; + fts5NodeIterInit(pPg->buf.n, pPg->buf.p, &ss); + while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); + fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); + pgno = ss.iChild; + fts5NodeIterFree(&ss); + } + } + if( pSeg->nHeight==1 ){ + pWriter->nEmpty = pSeg->pgnoLast-1; + } + assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); + } +} + +/* +** Iterator pIter was used to iterate through the input segments of on an +** incremental merge operation. This function is called if the incremental +** merge step has finished but the input has not been completely exhausted. +*/ +static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ + int i; + Fts5Buffer buf; + memset(&buf, 0, sizeof(Fts5Buffer)); + for(i=0; inSeg; i++){ + Fts5SegIter *pSeg = &pIter->aSeg[i]; + if( pSeg->pSeg==0 ){ + /* no-op */ + }else if( pSeg->pLeaf==0 ){ + pSeg->pSeg->pgnoLast = 0; + pSeg->pSeg->pgnoFirst = 0; + }else{ + int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ + i64 iLeafRowid; + Fts5Data *pData; + int iId = pSeg->pSeg->iSegid; + u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; + + iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iTermLeafPgno); + pData = fts5DataRead(p, iLeafRowid); + if( pData ){ + fts5BufferZero(&buf); + fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); + fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); + fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); + fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); + fts5DataRelease(pData); + pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; + fts5DataDelete(p, FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, 1),iLeafRowid); + fts5DataWrite(p, iLeafRowid, buf.p, buf.n); + } + } + } + fts5BufferFree(&buf); +} + +/* +** +*/ +static void fts5IndexMergeLevel( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure *pStruct, /* Stucture of index iIdx */ + int iLvl, /* Level to read input from */ + int *pnRem /* Write up to this many output leaves */ +){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; + Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ + int nRem = *pnRem; /* Output leaf pages left to write */ + int nInput; /* Number of input segments */ + Fts5SegWriter writer; /* Writer object */ + Fts5StructureSegment *pSeg; /* Output segment */ + Fts5Buffer term; + int bRequireDoclistTerm = 0; + + assert( iLvlnLevel ); + assert( pLvl->nMerge<=pLvl->nSeg ); + + memset(&writer, 0, sizeof(Fts5SegWriter)); + memset(&term, 0, sizeof(Fts5Buffer)); + writer.iIdx = iIdx; + if( pLvl->nMerge ){ + assert( pLvlOut->nSeg>0 ); + nInput = pLvl->nMerge; + fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); + pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; + }else{ + int iSegid = fts5AllocateSegid(p, pStruct); + fts5WriteInit(p, &writer, iIdx, iSegid); + + /* Add the new segment to the output level */ + if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; + pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; + pLvlOut->nSeg++; + pSeg->pgnoFirst = 1; + pSeg->iSegid = iSegid; + + /* Read input from all segments in the input level */ + nInput = pLvl->nSeg; + } +#if 0 +fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); +fflush(stdout); +#endif + + for(fts5MultiIterNew(p, pStruct, iIdx, iLvl, nInput, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter) + ){ + Fts5PosIter sPos; /* Used to iterate through position list */ + int iCol = 0; /* Current output column */ + int iPos = 0; /* Current output position */ + int nTerm; + const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); + + if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ + if( writer.nLeafWritten>nRem ) break; + + /* This is a new term. Append a term to the output segment. */ + if( bRequireDoclistTerm ){ + fts5WriteAppendZerobyte(p, &writer); + } + fts5WriteAppendTerm(p, &writer, nTerm, pTerm); + fts5BufferSet(&p->rc, &term, nTerm, pTerm); + bRequireDoclistTerm = 1; + } + + /* Append the rowid to the output */ + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); + + /* Copy the position list from input to output */ + for(fts5PosIterInit(p, pIter, &sPos); + fts5PosIterEof(p, &sPos)==0; + fts5PosIterNext(p, &sPos) + ){ + if( sPos.iCol!=iCol ){ + fts5WriteAppendPoslistInt(p, &writer, 1); + fts5WriteAppendPoslistInt(p, &writer, sPos.iCol); + iCol = sPos.iCol; + iPos = 0; + } + fts5WriteAppendPoslistInt(p, &writer, (sPos.iPos-iPos) + 2); + iPos = sPos.iPos; + } + fts5WriteAppendZerobyte(p, &writer); + } + + /* Flush the last leaf page to disk. Set the output segment b-tree height + ** and last leaf page number at the same time. */ + fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); + + if( fts5MultiIterEof(p, pIter) ){ + int i; + + /* Remove the redundant segments from the %_data table */ + for(i=0; iaSeg[i].iSegid); + } + + /* Remove the redundant segments from the input level */ + if( pLvl->nSeg!=nInput ){ + int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); + memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); + } + pLvl->nSeg -= nInput; + pLvl->nMerge = 0; + }else{ + fts5TrimSegments(p, pIter); + pLvl->nMerge = nInput; + } + + fts5MultiIterFree(p, pIter); + fts5BufferFree(&term); + *pnRem -= writer.nLeafWritten; +} + +/* +** A total of nLeaf leaf pages of data has just been flushed to a level-0 +** segments in index iIdx with structure pStruct. This function updates the +** write-counter accordingly and, if necessary, performs incremental merge +** work. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5IndexWork( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure *pStruct, /* Current structure of index */ + int nLeaf /* Number of output leaves just written */ +){ + i64 nWrite; /* Initial value of write-counter */ + int nWork; /* Number of work-quanta to perform */ + int nRem; /* Number of leaf pages left to write */ + + /* Update the write-counter. While doing so, set nWork. */ + nWrite = pStruct->nWriteCounter; + nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); + pStruct->nWriteCounter += nLeaf; + nRem = p->nWorkUnit * nWork * pStruct->nLevel; + + while( nRem>0 ){ + int iLvl; /* To iterate through levels */ + int iBestLvl = -1; /* Level offering the most input segments */ + int nBest = 0; /* Number of input segments on best level */ + + /* Set iBestLvl to the level to read input segments from. */ + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + if( pLvl->nMerge ){ + if( pLvl->nMerge>nBest ){ + iBestLvl = iLvl; + nBest = pLvl->nMerge; + } + break; + } + if( pLvl->nSeg>nBest ){ + nBest = pLvl->nSeg; + iBestLvl = iLvl; + } + } + assert( iBestLvl>=0 && nBest>0 ); + + if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; + fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); + assert( nRem==0 || p->rc==SQLITE_OK ); + } +} + +/* +** Flush the contents of in-memory hash table iHash to a new level-0 +** segment on disk. Also update the corresponding structure record. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ + Fts5Structure *pStruct; + int iSegid; + int pgnoLast = 0; /* Last leaf page number in segment */ + + /* Obtain a reference to the index structure and allocate a new segment-id + ** for the new level-0 segment. */ + pStruct = fts5StructureRead(p, iHash); + iSegid = fts5AllocateSegid(p, pStruct); + + if( iSegid ){ + Fts5SegWriter writer; + Fts5PendingDoclist *pList; + Fts5PendingDoclist *pIter; + Fts5PendingDoclist *pNext; + + Fts5StructureSegment *pSeg; /* New segment within pStruct */ + int nHeight; /* Height of new segment b-tree */ + + pList = fts5PendingList(p, iHash); + assert( pList!=0 || p->rc!=SQLITE_OK ); + fts5WriteInit(p, &writer, iHash, iSegid); + + for(pIter=pList; pIter; pIter=pNext){ + pNext = pIter->pNext; + fts5WritePendingDoclist(p, &writer, pIter); + fts5FreePendingDoclist(pIter); + } + fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + + /* Edit the Fts5Structure and write it back to the database. */ + if( pStruct->nLevel==0 ) pStruct->nLevel = 1; + pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; + pSeg->iSegid = iSegid; + pSeg->nHeight = nHeight; + pSeg->pgnoFirst = 1; + pSeg->pgnoLast = pgnoLast; + } + + fts5IndexWork(p, iHash, pStruct, pgnoLast); + fts5StructureWrite(p, iHash, pStruct); + fts5StructureRelease(pStruct); +} + +/* +** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain +** to the document with rowid iRowid. +*/ +void sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ + if( iRowid<=p->iWriteRowid ){ + sqlite3Fts5IndexFlush(p); + } + p->iWriteRowid = iRowid; +} + +/* +** Flush any data stored in the in-memory hash tables to the database. +*/ +void sqlite3Fts5IndexFlush(Fts5Index *p){ + Fts5Config *pConfig = p->pConfig; + int i; /* Used to iterate through indexes */ + int nLeaf = 0; /* Number of leaves written */ + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; + assert( p->aHash ); + + /* Flush the terms and each prefix index to disk */ + for(i=0; i<=pConfig->nPrefix; i++){ + fts5FlushOneHash(p, i, &nLeaf); + } + p->nPendingData = 0; +} + +/* +** Commit data to disk. +*/ +int sqlite3Fts5IndexSync(Fts5Index *p){ + sqlite3Fts5IndexFlush(p); + fts5CloseReader(p); + return p->rc; +} + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p){ + fts5CloseReader(p); + return SQLITE_OK; +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying %_data table. +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5IndexOpen( + Fts5Config *pConfig, + int bCreate, + Fts5Index **pp, + char **pzErr +){ + int rc = SQLITE_OK; + Fts5Index *p; /* New object */ + + *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Index)); + p->pConfig = pConfig; + p->pgsz = 1000; + p->nMinMerge = FTS5_MIN_MERGE; + p->nWorkUnit = FTS5_WORK_UNIT; + p->nMaxPendingData = 1024*1024; + p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); + if( p->zDataTbl==0 ){ + rc = SQLITE_NOMEM; + }else if( bCreate ){ + int i; + Fts5Structure s; + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", pzErr + ); + if( rc==SQLITE_OK ){ + memset(&s, 0, sizeof(Fts5Structure)); + for(i=0; inPrefix+1; i++){ + fts5StructureWrite(p, i, &s); + } + rc = p->rc; + } + } + + if( rc ){ + sqlite3Fts5IndexClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). +*/ +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ + int rc = SQLITE_OK; + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "data"); + } + assert( p->pReader==0 ); + sqlite3_finalize(p->pWriter); + sqlite3_finalize(p->pDeleter); + sqlite3_free(p->aHash); + sqlite3_free(p->zDataTbl); + sqlite3_free(p); + return rc; +} + +/* +** Return a simple checksum value based on the arguments. +*/ +static u64 fts5IndexEntryCksum( + i64 iRowid, + int iCol, + int iPos, + const char *pTerm, + int nTerm +){ + int i; + u64 ret = iRowid; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; inPrefix; iIdx++){ + int n = ((iIdx==pConfig->nPrefix) ? nTerm : pConfig->aPrefix[iIdx]); + if( n<=nTerm ){ + ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, n); + } + } + + return ret; +} + +static void fts5BtreeIterInit( + Fts5Index *p, + int iIdx, + Fts5StructureSegment *pSeg, + Fts5BtreeIter *pIter +){ + int nByte; + int i; + nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1); + memset(pIter, 0, sizeof(*pIter)); + pIter->nLvl = pSeg->nHeight-1; + pIter->iIdx = iIdx; + pIter->p = p; + pIter->pSeg = pSeg; + if( nByte && p->rc==SQLITE_OK ){ + pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); + } + for(i=0; p->rc==SQLITE_OK && inLvl; i++){ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1); + Fts5Data *pData; + pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); + if( pData ){ + fts5NodeIterInit(pData->n, pData->p, &pIter->aLvl[i].s); + } + } + + if( pIter->nLvl==0 || p->rc ){ + pIter->bEof = 1; + pIter->iLeaf = pSeg->pgnoLast; + }else{ + pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->iLeaf = pIter->aLvl[0].s.iChild; + } +} + +static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ + Fts5Index *p = pIter->p; + int i; + + assert( pIter->bEof==0 && pIter->aLvl[0].s.aData ); + for(i=0; inLvl && p->rc==SQLITE_OK; i++){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + fts5NodeIterNext(&p->rc, &pLvl->s); + if( pLvl->s.aData ){ + fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p); + break; + }else{ + fts5NodeIterFree(&pLvl->s); + fts5DataRelease(pLvl->pData); + pLvl->pData = 0; + } + } + if( i==pIter->nLvl || p->rc ){ + pIter->bEof = 1; + }else{ + int iSegid = pIter->pSeg->iSegid; + for(i--; i>=0; i--){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + i64 iRowid = FTS5_SEGMENT_ROWID(pIter->iIdx,iSegid,i+1,pLvl[1].s.iChild); + pLvl->pData = fts5DataRead(p, iRowid); + if( pLvl->pData ){ + fts5NodeIterInit(pLvl->pData->n, pLvl->pData->p, &pLvl->s); + } + } + } + + pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->iLeaf = pIter->aLvl[0].s.iChild; + assert( p->rc==SQLITE_OK || pIter->bEof ); +} + +static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ + int i; + for(i=0; inLvl; i++){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + fts5NodeIterFree(&pLvl->s); + if( pLvl->pData ){ + fts5DataRelease(pLvl->pData); + pLvl->pData = 0; + } + } + sqlite3_free(pIter->aLvl); + fts5BufferFree(&pIter->term); +} + +static void fts5IndexIntegrityCheckSegment( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index that pSeg is a part of */ + Fts5StructureSegment *pSeg /* Segment to check internal consistency */ +){ + Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ + + /* Iterate through the b-tree hierarchy. */ + for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); + iter.bEof==0; + fts5BtreeIterNext(&iter) + ){ + i64 iRow; /* Rowid for this leaf */ + Fts5Data *pLeaf; /* Data for this leaf */ + int iOff; /* Offset of first term on leaf */ + int i; /* Used to iterate through empty leaves */ + + /* If the leaf in question has already been trimmed from the segment, + ** ignore this b-tree entry. Otherwise, load it into memory. */ + if( iter.iLeafpgnoFirst ) continue; + iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf); + pLeaf = fts5DataRead(p, iRow); + if( pLeaf==0 ) break; + + /* Check that the leaf contains at least one term, and that it is equal + ** to or larger than the split-key in iter.term. */ + iOff = fts5GetU16(&pLeaf->p[2]); + if( iOff==0 ){ + p->rc = FTS5_CORRUPT; + }else{ + int nTerm; /* Size of term on leaf in bytes */ + int res; /* Comparison of term and split-key */ + iOff += getVarint32(&pLeaf->p[iOff], nTerm); + res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); + if( res==0 ) res = nTerm - iter.term.n; + if( res<0 ){ + p->rc = FTS5_CORRUPT; + } + } + fts5DataRelease(pLeaf); + if( p->rc ) break; + + /* Now check that the iter.nEmpty leaves following the current leaf + ** (a) exist and (b) contain no terms. */ + for(i=1; i<=iter.nEmpty; i++){ + pLeaf = fts5DataRead(p, iRow+i); + if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ + p->rc = FTS5_CORRUPT; + } + fts5DataRelease(pLeaf); + } + } + + if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ + p->rc = FTS5_CORRUPT; + } + + fts5BtreeIterFree(&iter); +} + +/* +** Run internal checks to ensure that the FTS index (a) is internally +** consistent and (b) contains entries for which the XOR of the checksums +** as calculated by fts5IndexEntryCksum() is cksum. +** +** Return SQLITE_CORRUPT if any of the internal checks fail, or if the +** checksum does not match. Return SQLITE_OK if all checks pass without +** error, or some other SQLite error code if another error (e.g. OOM) +** occurs. +*/ +int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ + Fts5Config *pConfig = p->pConfig; + int iIdx; /* Used to iterate through indexes */ + int rc; /* Return code */ + u64 cksum2 = 0; /* Checksum based on contents of indexes */ + + /* Check that the checksum of the index matches the argument checksum */ + for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ + Fts5MultiSegIter *pIter; + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + for(fts5MultiIterNew(p, pStruct, iIdx, -1, 0, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter) + ){ + Fts5PosIter sPos; /* Used to iterate through position list */ + int n; /* Size of term in bytes */ + i64 iRowid = fts5MultiIterRowid(pIter); + char *z = (char*)fts5MultiIterTerm(pIter, &n); + + for(fts5PosIterInit(p, pIter, &sPos); + fts5PosIterEof(p, &sPos)==0; + fts5PosIterNext(p, &sPos) + ){ + cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); +#if 0 + fprintf(stdout, "rowid=%d ", (int)iRowid); + fprintf(stdout, "term=%.*s ", n, z); + fprintf(stdout, "col=%d ", sPos.iCol); + fprintf(stdout, "off=%d\n", sPos.iPos); + fflush(stdout); +#endif + } + } + fts5MultiIterFree(p, pIter); + fts5StructureRelease(pStruct); + } + rc = p->rc; + if( rc==SQLITE_OK && cksum!=cksum2 ) rc = FTS5_CORRUPT; + + /* Check that the internal nodes of each segment match the leaves */ + for(iIdx=0; rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); + } + } + } + fts5StructureRelease(pStruct); + rc = p->rc; + } + + return rc; +} + +/* +*/ +static void fts5DecodeStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + const u8 *pBlob, int nBlob +){ + int rc; /* Return code */ + int iLvl, iSeg; /* Iterate through levels, segments */ + Fts5Structure *p = 0; /* Decoded structure object */ + + rc = fts5StructureDecode(pBlob, nBlob, &p); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; + fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge); + for(iSeg=0; iSegnSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; + fts5BufferAppendPrintf(pRc, pBuf, + " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, + pSeg->pgnoFirst, pSeg->pgnoLast + ); + } + fts5BufferAppendPrintf(pRc, pBuf, "}"); + } + + fts5StructureRelease(p); +} + +/* +** Decode a segment-data rowid from the %_data table. This function is +** the opposite of macro FTS5_SEGMENT_ROWID(). +*/ +static void fts5DecodeRowid( + i64 iRowid, /* Rowid from %_data table */ + int *piIdx, /* OUT: Index */ + int *piSegid, /* OUT: Segment id */ + int *piHeight, /* OUT: Height */ + int *piPgno /* OUT: Page number */ +){ + *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); + iRowid >>= FTS5_DATA_PAGE_B; + + *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); + iRowid >>= FTS5_DATA_HEIGHT_B; + + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); + iRowid >>= FTS5_DATA_ID_B; + + *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); +} + +/* +** Buffer (a/n) is assumed to contain a list of serialized varints. Read +** each varint and append its string representation to buffer pBuf. Return +** after either the input buffer is exhausted or a 0 value is read. +** +** The return value is the number of bytes read from the input buffer. +*/ +static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ + int iOff = 0; + while( iOffpgsz = pgsz; +} + diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c new file mode 100644 index 0000000000..76cd2e1da7 --- /dev/null +++ b/ext/fts5/fts5_storage.c @@ -0,0 +1,411 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#include "fts5Int.h" + +struct Fts5Storage { + Fts5Config *pConfig; + Fts5Index *pIndex; + + sqlite3_stmt *aStmt[7]; +}; + +#define FTS5_STMT_INSERT_CONTENT 0 +#define FTS5_STMT_REPLACE_CONTENT 1 + +#define FTS5_STMT_DELETE_CONTENT 2 +#define FTS5_STMT_INSERT_DOCSIZE 3 +#define FTS5_STMT_DELETE_DOCSIZE 4 + +#define FTS5_STMT_SCAN_CONTENT 5 +#define FTS5_STMT_SEEK_CONTENT 6 + +/* +** Prepare the two insert statements - Fts5Storage.pInsertContent and +** Fts5Storage.pInsertDocsize - if they have not already been prepared. +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +static int fts5StorageGetStmt( + Fts5Storage *p, /* Storage handle */ + int eStmt, /* FTS5_STMT_XXX constant */ + sqlite3_stmt **ppStmt /* OUT: Prepared statement handle */ +){ + int rc = SQLITE_OK; + + assert( eStmt>=0 && eStmtaStmt) ); + if( p->aStmt[eStmt]==0 ){ + const char *azStmt[] = { + "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ + "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ + "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ + "INSERT INTO %Q.'%q_docsize' VALUES(?,?)", /* INSERT_DOCSIZE */ + "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ + "SELECT * FROM %Q.'%q_content'", /* SCAN_CONTENT */ + "SELECT * FROM %Q.'%q_content' WHERE rowid=?", /* SEEK_CONTENT */ + }; + Fts5Config *pConfig = p->pConfig; + char *zSql = 0; + + if( eStmt==FTS5_STMT_INSERT_CONTENT || eStmt==FTS5_STMT_REPLACE_CONTENT ){ + int nCol = pConfig->nCol + 1; + char *zBind; + int i; + + zBind = sqlite3_malloc(1 + nCol*2); + if( zBind ){ + for(i=0; izDb,pConfig->zName,zBind); + sqlite3_free(zBind); + } + }else{ + zSql = sqlite3_mprintf(azStmt[eStmt], pConfig->zDb, pConfig->zName); + } + + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->aStmt[eStmt], 0); + sqlite3_free(zSql); + } + } + + *ppStmt = p->aStmt[eStmt]; + return rc; +} + +/* +** Drop the shadow table with the postfix zPost (e.g. "content"). Return +** SQLITE_OK if successful or an SQLite error code otherwise. +*/ +int sqlite3Fts5DropTable(Fts5Config *pConfig, const char *zPost){ + int rc; + char *zSql = sqlite3_mprintf("DROP TABLE IF EXISTS %Q.'%q_%q'", + pConfig->zDb, pConfig->zName, zPost + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_exec(pConfig->db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } + return rc; +} + +/* +** Create the shadow table named zPost, with definition zDefn. Return +** SQLITE_OK if successful, or an SQLite error code otherwise. +*/ +int sqlite3Fts5CreateTable( + Fts5Config *pConfig, /* FTS5 configuration */ + const char *zPost, /* Shadow table to create (e.g. "content") */ + const char *zDefn, /* Columns etc. for shadow table */ + char **pzErr /* OUT: Error message */ +){ + int rc; + char *zSql = sqlite3_mprintf("CREATE TABLE %Q.'%q_%q'(%s)", + pConfig->zDb, pConfig->zName, zPost, zDefn + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *zErr = 0; + assert( *pzErr==0 ); + rc = sqlite3_exec(pConfig->db, zSql, 0, 0, &zErr); + if( zErr ){ + *pzErr = sqlite3_mprintf( + "fts5: error creating shadow table %q_%s: %s", + pConfig->zName, zPost, zErr + ); + sqlite3_free(zErr); + } + sqlite3_free(zSql); + } + return rc; +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying tables +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5StorageOpen( + Fts5Config *pConfig, + Fts5Index *pIndex, + int bCreate, + Fts5Storage **pp, + char **pzErr /* OUT: Error message */ +){ + int rc; + Fts5Storage *p; /* New object */ + + *pp = p = (Fts5Storage*)sqlite3_malloc(sizeof(Fts5Storage)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Storage)); + p->pConfig = pConfig; + p->pIndex = pIndex; + + if( bCreate ){ + int i; + char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); + if( zDefn==0 ){ + rc = SQLITE_NOMEM; + }else{ + int iOff = sprintf(zDefn, "id INTEGER PRIMARY KEY"); + for(i=0; inCol; i++){ + iOff += sprintf(&zDefn[iOff], ", c%d", i); + } + rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, pzErr); + } + sqlite3_free(zDefn); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5CreateTable( + pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", pzErr + ); + } + } + + if( rc ){ + sqlite3Fts5StorageClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). +*/ +int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ + int rc = SQLITE_OK; + int i; + + /* Finalize all SQL statements */ + for(i=0; iaStmt); i++){ + sqlite3_finalize(p->aStmt[i]); + } + + /* If required, remove the shadow tables from the database */ + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "content"); + if( rc==SQLITE_OK ) sqlite3Fts5DropTable(p->pConfig, "docsize"); + } + + sqlite3_free(p); + return rc; +} + +/* +** Remove a row from the FTS table. +*/ +int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ + assert( !"do this" ); + return SQLITE_OK; +} + +typedef struct Fts5InsertCtx Fts5InsertCtx; +struct Fts5InsertCtx { + Fts5Storage *pStorage; + int iCol; +}; + +/* +** Tokenization callback used when inserting tokens into the FTS index. +*/ +static int fts5StorageInsertCallback( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; + Fts5Index *pIdx = pCtx->pStorage->pIndex; + sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); + return SQLITE_OK; +} + +/* +** If a row with rowid iDel is present in the %_content table, add the +** delete-markers to the FTS index necessary to delete it. Do not actually +** remove the %_content row at this time though. +*/ +static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ + Fts5Config *pConfig = p->pConfig; + sqlite3_stmt *pSeek; /* SELECT to read row iDel from %_data */ + int rc; /* Return code */ + + rc = fts5StorageGetStmt(p, FTS5_STMT_SEEK_CONTENT, &pSeek); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int64(pSeek, 1, iDel); + if( sqlite3_step(pSeek)==SQLITE_ROW ){ + int iCol; + Fts5InsertCtx ctx; + ctx.pStorage = p; + ctx.iCol = -1; + sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); + for(iCol=1; iCol<=pConfig->nCol; iCol++){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_column_text(pSeek, iCol), + sqlite3_column_bytes(pSeek, iCol), + (void*)&ctx, + fts5StorageInsertCallback + ); + } + } + rc2 = sqlite3_reset(pSeek); + if( rc==SQLITE_OK ) rc = rc2; + } + + return rc; +} + +/* +** Insert a new row into the FTS table. +*/ +int sqlite3Fts5StorageInsert( + Fts5Storage *p, /* Storage module to write to */ + sqlite3_value **apVal, /* Array of values passed to xUpdate() */ + int eConflict, /* on conflict clause */ + i64 *piRowid /* OUT: rowid of new record */ +){ + Fts5Config *pConfig = p->pConfig; + int rc = SQLITE_OK; /* Return code */ + sqlite3_stmt *pInsert; /* Statement used to write %_content table */ + int eStmt; /* Type of statement used on %_content */ + int i; /* Counter variable */ + Fts5InsertCtx ctx; /* Tokenization callback context object */ + + /* Insert the new row into the %_content table. */ + if( eConflict==SQLITE_REPLACE ){ + eStmt = FTS5_STMT_REPLACE_CONTENT; + if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ + rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); + } + }else{ + eStmt = FTS5_STMT_INSERT_CONTENT; + } + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, eStmt, &pInsert); + } + for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ + rc = sqlite3_bind_value(pInsert, i, apVal[i]); + } + if( rc==SQLITE_OK ){ + sqlite3_step(pInsert); + rc = sqlite3_reset(pInsert); + } + *piRowid = sqlite3_last_insert_rowid(pConfig->db); + + /* Add new entries to the FTS index */ + sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); + ctx.pStorage = p; + for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), + sqlite3_value_bytes(apVal[ctx.iCol+2]), + (void*)&ctx, + fts5StorageInsertCallback + ); + } + + return rc; +} + +/* +** Context object used by sqlite3Fts5StorageIntegrity(). +*/ +typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; +struct Fts5IntegrityCtx { + i64 iRowid; + int iCol; + u64 cksum; + Fts5Config *pConfig; +}; + +/* +** Tokenization callback used by integrity check. +*/ +static int fts5StorageIntegrityCallback( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; + pCtx->cksum ^= sqlite3Fts5IndexCksum( + pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken + ); + return SQLITE_OK; +} + +/* +** Check that the contents of the FTS index match that of the %_content +** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return +** some other SQLite error code if an error occurs while attempting to +** determine this. +*/ +int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ + Fts5Config *pConfig = p->pConfig; + int rc; /* Return code */ + Fts5IntegrityCtx ctx; + sqlite3_stmt *pScan; + + memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); + ctx.pConfig = p->pConfig; + + /* Generate the expected index checksum based on the contents of the + ** %_content table. This block stores the checksum in ctx.cksum. */ + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_CONTENT, &pScan); + if( rc==SQLITE_OK ){ + int rc2; + while( SQLITE_ROW==sqlite3_step(pScan) ){ + int i; + ctx.iRowid = sqlite3_column_int64(pScan, 0); + for(i=0; rc==SQLITE_OK && inCol; i++){ + ctx.iCol = i; + rc = sqlite3Fts5Tokenize( + pConfig, + (const char*)sqlite3_column_text(pScan, i+1), + sqlite3_column_bytes(pScan, i+1), + (void*)&ctx, + fts5StorageIntegrityCallback + ); + } + } + rc2 = sqlite3_reset(pScan); + if( rc==SQLITE_OK ) rc = rc2; + } + + /* Pass the expected checksum down to the FTS index module. It will + ** verify, amongst other things, that it matches the checksum generated by + ** inspecting the index itself. */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum); + } + + return rc; +} + + diff --git a/main.mk b/main.mk index 30149eccab..cde67dbb2e 100644 --- a/main.mk +++ b/main.mk @@ -47,6 +47,7 @@ TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async +TCCX += -I$(TOP)/ext/fts5 # Object files for the SQLite library. # @@ -71,6 +72,13 @@ LIBOBJ+= vdbe.o parse.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o utf.o vtab.o +LIBOBJ += fts5.o +LIBOBJ += fts5_config.o +LIBOBJ += fts5_expr.o +LIBOBJ += fts5_index.o +LIBOBJ += fts5_storage.o +LIBOBJ += fts5parse.o + # All of the source code files. @@ -375,6 +383,8 @@ EXTHDR += \ $(TOP)/ext/rtree/rtree.h EXTHDR += \ $(TOP)/ext/icu/sqliteicu.h +EXTHDR += \ + $(TOP)/ext/fts5/fts5Int.h # This is the default Makefile target. The objects listed here # are what get build when you type just "make" with no arguments. @@ -553,10 +563,33 @@ fts3_unicode2.o: $(TOP)/ext/fts3/fts3_unicode2.c $(HDR) $(EXTHDR) fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c +fts5.o: $(TOP)/ext/fts5/fts5.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5.c + rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c +# FTS5 things +# +fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c + +fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c + +fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c + +fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c + +fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon + cp $(TOP)/ext/fts5/fts5parse.y . + rm -f fts5parse.h + ./lemon $(OPTS) fts5parse.y + + # Rules for building test programs and for running tests # tclsqlite3: $(TOP)/src/tclsqlite.c libsqlite3.a diff --git a/manifest b/manifest index fe8a9edf8e..fe4d032557 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sSQLITE_OMIT_WSD\sbuilds. -D 2014-06-23T10:18:50.447 +C Add\ssome\scode\sfor\san\sexperimental\sfts5\smodule.\sDoes\snot\swork\syet. +D 2014-06-23T11:33:22.754 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -79,7 +79,7 @@ F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts3/fts3.c 20bc65862cfcea0a39bb64a819f8fe92a8e144c1 -F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe +F ext/fts3/fts3.h 62a77d880cf06a2865052726f8325c8fabcecad7 F ext/fts3/fts3Int.h 16cddf2d7b0e5f3681615ae1d8ca0e45fca44918 F ext/fts3/fts3_aux.c 5c211e17a64885faeb16b9ba7772f9d5445c2365 F ext/fts3/fts3_expr.c 351395fad6fcb16ecfc61db0861008a70101330c @@ -103,6 +103,12 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 +F ext/fts5/fts5.c 2cb2cc3c1acefa36d9e8ce8e68bceaac8515059a +F ext/fts5/fts5Int.h cc41cf776a3e612aa3a461e96463647fd3957bed +F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef +F ext/fts5/fts5_expr.c bdfb98dab7729cf967022d7a4a815828bbad8c23 +F ext/fts5/fts5_index.c 0548e8925a0664cfa00b2477ebe9afa18bc7848f +F ext/fts5/fts5_storage.c aa1ff4b6b283303ffd8c5dc57a45ebe55e62a7b2 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -146,7 +152,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 7b1d0be0840f213405c977c87917241158126a33 +F main.mk 2bb1ec703ac4f27743961764b59cfb5f91d72bfe F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -189,7 +195,7 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c 0df0b1550b9cc1f58229644735e317ac89131f12 F src/lempar.c cdf0a000315332fc9b50b62f3b5e22e080a0952b F src/loadext.c 867c7b330b740c6c917af9956b13b81d0a048303 -F src/main.c 7c2c3cafdd6313c8f9319ebec1565782e624372e +F src/main.c e777879ad7c431f5b3b5d49c8419727b61d7c1be F src/malloc.c 0203ebce9152c6a0e5de520140b8ba65187350be F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c c0c990fcaddff810ea277b4fb5d9138603dd5d4b @@ -585,6 +591,8 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 +F test/fts5aa.test bbea71fed733b1d433bf83dbc8d86077936d1efc +F test/fts5ea.test 814287a2cb25ac3e59abbe4ccbcabf6bda821868 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1179,7 +1187,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 612b6d1b1f74eaf618520b90811eca10f978fc71 -R 283ec0802d51fd4e82222d529f6a8475 +P 07dda49c1bf8997a18c3368acb81b6d863ea38d6 +R 66e5d0ccaa728e4d98b92edeb331ffb3 +T *branch * fts5 +T *sym-fts5 * +T -sym-trunk * U dan -Z de359222916ca6f6bd684ca986937509 +Z e3b7f827041011d2f1d78b39cdee11d7 diff --git a/manifest.uuid b/manifest.uuid index 1a8777f6b6..6f7226ff29 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -07dda49c1bf8997a18c3368acb81b6d863ea38d6 \ No newline at end of file +1e0648dcf283d4f1f6159db4d2433b6cc635992e \ No newline at end of file diff --git a/src/main.c b/src/main.c index 564c012472..5d894b2ea9 100644 --- a/src/main.c +++ b/src/main.c @@ -2609,6 +2609,7 @@ static int openDatabase( #ifdef SQLITE_ENABLE_FTS3 if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5Init(db); } #endif diff --git a/test/fts5aa.test b/test/fts5aa.test new file mode 100644 index 0000000000..699d01d033 --- /dev/null +++ b/test/fts5aa.test @@ -0,0 +1,248 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5aa + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); + SELECT name, sql FROM sqlite_master; +} { + t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)} + t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)} + t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)} + t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)} +} + +do_execsql_test 1.1 { + DROP TABLE t1; + SELECT name, sql FROM sqlite_master; +} { +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); +} +do_execsql_test 2.1 { + INSERT INTO t1 VALUES('a b c', 'd e f'); +} +do_execsql_test 2.2 { + SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 +} { + {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} +} +do_execsql_test 2.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); +} +foreach {i x y} { + 1 {g f d b f} {h h e i a} + 2 {f i g j e} {i j c f f} + 3 {e e i f a} {e h f d f} + 4 {h j f j i} {h a c f j} + 5 {d b j c g} {f e i b e} + 6 {a j a e e} {j d f d e} + 7 {g i j c h} {j d h c a} + 8 {j j i d d} {e e d f b} + 9 {c j j d c} {h j i f g} + 10 {b f h i a} {c f b b j} +} { + do_execsql_test 3.$i.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 3.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + if {[set_test_counter errors]} break +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} +foreach {i x y} { + 1 {g f d b f} {h h e i a} + 2 {f i g j e} {i j c f f} + 3 {e e i f a} {e h f d f} + 4 {h j f j i} {h a c f j} + 5 {d b j c g} {f e i b e} + 6 {a j a e e} {j d f d e} + 7 {g i j c h} {j d h c a} + 8 {j j i d d} {e e d f b} + 9 {c j j d c} {h j i f g} + 10 {b f h i a} {c f b b j} +} { + do_execsql_test 4.$i.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 4.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + if {[set_test_counter errors]} break +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} +foreach {i x y} { + 1 {dd abc abc abc abcde} {aaa dd ddd ddd aab} + 2 {dd aab d aaa b} {abcde c aaa aaa aaa} + 3 {abcde dd b b dd} {abc abc d abc ddddd} + 4 {aaa abcde dddd dddd abcde} {abc b b abcde abc} + 5 {aab dddd d dddd c} {ddd abcde dddd abcde c} + 6 {ddd dd b aab abcde} {d ddddd dddd c abc} + 7 {d ddddd ddd c abcde} {c aab d abcde ddd} + 8 {abcde aaa aab c c} {ddd c dddd b aaa} + 9 {abcde aab ddddd c aab} {dddd dddd b c dd} + 10 {ddd abcde dddd dd c} {dddd c c d abcde} +} { + do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + if {[set_test_counter errors]} break +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +do_execsql_test 6.1 { + INSERT INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a'); + REPLACE INTO t1(rowid, x, y) VALUES(22, 'd e f', 'f e d'); +} + +do_execsql_test 6.2 { + INSERT INTO t1(t1) VALUES('integrity-check') +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +proc doc {} { + set v [list aaa aab abc abcde b c d dd ddd dddd ddddd] + set ret [list] + for {set j 0} {$j < 20} {incr j} { + lappend ret [lindex $v [expr int(rand()*[llength $v])]] + } + return $ret +} + +proc dump_structure {} { + db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} { + foreach lvl [lrange $t 1 end] { + set seg [string repeat . [expr [llength $lvl]-2]] + puts "[lrange $lvl 0 1] $seg" + } + } +} + +for {set i 1} {$i <= 10} {incr i} { + do_test 7.$i { + for {set j 0} {$j < 100} {incr j} { + set x [doc] + set y [doc] + set z [doc] + set rowid [expr int(rand() * 100)] + execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } + } {} + if {[set_test_counter errors]} exit +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +do_execsql_test 8.1 { + INSERT INTO t1 VALUES('the quick brown fox'); + INSERT INTO t1(t1) VALUES('integrity-check'); +} + + +#finish_test + + +#------------------------------------------------------------------------- +# +reset_db + +expr srand(0) + +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3"); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +proc doc {} { + set v [list aaa aab abc abcde b c d dd ddd dddd ddddd] + set ret [list] + for {set j 0} {$j < 20} {incr j} { + lappend ret [lindex $v [expr int(rand()*[llength $v])]] + } + return $ret +} + +proc dump_structure {} { + db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} { + foreach lvl [lrange $t 1 end] { + set seg [string repeat . [expr [llength $lvl]-2]] + puts "[lrange $lvl 0 1] $seg" + } + } +} + +for {set i 1} {$i <= 10} {incr i} { + do_test 9.$i { + for {set j 0} {$j < 100} {incr j} { + set x [doc] + set y [doc] + set z [doc] + set rowid [expr int(rand() * 100)] + execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } + } {} + if {[set_test_counter errors]} break +} + +finish_test + diff --git a/test/fts5ea.test b/test/fts5ea.test new file mode 100644 index 0000000000..fdb28769cf --- /dev/null +++ b/test/fts5ea.test @@ -0,0 +1,84 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5ea + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +proc do_syntax_error_test {tn expr err} { + set ::se_expr $expr + do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err] +} + +proc do_syntax_test {tn expr res} { + set ::se_expr $expr + do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] +} + +foreach {tn expr res} { + 1 {abc} {"abc"} + 2 {abc def} {"abc" AND "def"} + 3 {abc*} {"abc" *} + 4 {"abc def ghi" *} {"abc" + "def" + "ghi" *} + 5 {one AND two} {"one" AND "two"} + 6 {one+two} {"one" + "two"} + 7 {one AND two OR three} {("one" AND "two") OR "three"} + 8 {one OR two AND three} {"one" OR ("two" AND "three")} + 9 {NEAR(one two)} {NEAR("one" "two", 10)} + 10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)} +} { + do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] +} + +foreach {tn expr res} { + 1 {c1:abc} + {c1 : "abc"} + 2 {c2 : NEAR(one two) c1:"hello world"} + {c2 : NEAR("one" "two", 10) AND c1 : "hello" + "world"} +} { + do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res] +} + +breakpoint +foreach {tn expr err} { + 1 {AND} {syntax error near "AND"} + 2 {abc def AND} {syntax error near ""} + 3 {abc OR AND} {syntax error near "AND"} + 4 {(a OR b) abc} {syntax error near "abc"} + 5 {NEaR (a b)} {syntax error near "NEaR"} + 6 {(a OR b) NOT c)} {syntax error near ")"} + 7 {nosuch: a nosuch2: b} {no such column: nosuch} + 8 {addr: a nosuch2: b} {no such column: nosuch2} +} { + do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] +} + + + +# do_syntax_error_test 1.0 {NOT} {syntax error near "NOT"} + + + +# do_catchsql_test 1.1 { + # SELECT fts5_expr('a OR b NOT c') +#} {0 {"a" OR "b" NOT "c"}} + + +#do_execsql_test 1.0 { SELECT fts5_expr('a') } {{"a"}} + +finish_test From d0bfb36a08ad41304cc65ddd4b4893b545f138c6 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 24 Jun 2014 16:59:06 +0000 Subject: [PATCH 002/206] Add simple full-table-scan and rowid lookup support to fts5. FossilOrigin-Name: 3515da85d09220c464979467b476c611da4a6a7a --- ext/fts5/fts5.c | 155 +++++++++++++++++++++++++++++++++++++--- ext/fts5/fts5Int.h | 9 +++ ext/fts5/fts5_storage.c | 84 +++++++++++++++++----- manifest | 22 +++--- manifest.uuid | 2 +- test/fts5aa.test | 3 - test/fts5ab.test | 57 +++++++++++++++ 7 files changed, 290 insertions(+), 42 deletions(-) create mode 100644 test/fts5ab.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 7a6c361068..56a74d6486 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -16,6 +16,7 @@ #include "fts5Int.h" typedef struct Fts5Table Fts5Table; +typedef struct Fts5Cursor Fts5Cursor; struct Fts5Table { sqlite3_vtab base; /* Base class used by SQLite core */ @@ -24,6 +25,13 @@ struct Fts5Table { Fts5Storage *pStorage; /* Document store */ }; +struct Fts5Cursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + int idxNum; /* idxNum passed to xFilter() */ + sqlite3_stmt *pStmt; /* Statement used to read %_content */ + int bEof; /* True at EOF */ +}; + /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy ** argument is non-zero, attempt delete the shadow tables from teh database @@ -145,15 +153,69 @@ static int fts5CreateMethod( return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); } +/* +** The three query plans xBestIndex may choose between. +*/ +#define FTS5_PLAN_SCAN 1 /* No usable constraint */ +#define FTS5_PLAN_MATCH 2 /* ( MATCH ?) */ +#define FTS5_PLAN_ROWID 3 /* (rowid = ?) */ + +#define FTS5_PLAN(idxNum) ((idxNum) & 0x7) + +#define FTS5_ORDER_DESC 8 /* ORDER BY rowid DESC */ +#define FTS5_ORDER_ASC 16 /* ORDER BY rowid ASC */ + + +static int fts5FindConstraint(sqlite3_index_info *pInfo, int eOp, int iCol){ + int i; + + for(i=0; inConstraint; i++){ + struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; + if( p->usable && p->iColumn==iCol && p->op==eOp ) return i; + } + + return -1; +} + /* -** Implementation of the xBestIndex method for FTS3 tables. There +** Implementation of the xBestIndex method for FTS5 tables. There ** are three possible strategies, in order of preference: ** -** 1. Direct lookup by rowid or docid. -** 2. Full-text search using a MATCH operator on a non-docid column. -** 3. Linear scan of %_content table. +** 1. Full-text search using a MATCH operator. +** 2. A by-rowid lookup. +** 3. A full-table scan. */ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ + Fts5Table *pTab = (Fts5Table*)pVTab; + Fts5Config *pConfig = pTab->pConfig; + int iCons; + int ePlan = FTS5_PLAN_SCAN; + + iCons = fts5FindConstraint(pInfo,SQLITE_INDEX_CONSTRAINT_MATCH,pConfig->nCol); + if( iCons>=0 ){ + ePlan = FTS5_PLAN_MATCH; + pInfo->estimatedCost = 1.0; + }else{ + iCons = fts5FindConstraint(pInfo, SQLITE_INDEX_CONSTRAINT_EQ, -1); + if( iCons>=0 ){ + ePlan = FTS5_PLAN_ROWID; + pInfo->estimatedCost = 2.0; + } + } + + if( iCons>=0 ){ + pInfo->aConstraintUsage[iCons].argvIndex = 1; + pInfo->aConstraintUsage[iCons].omit = 1; + }else{ + pInfo->estimatedCost = 10000000.0; + } + + if( pInfo->nOrderBy==1 && pInfo->aOrderBy[0].iColumn<0 ){ + pInfo->orderByConsumed = 1; + ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC; + } + + pInfo->idxNum = ePlan; return SQLITE_OK; } @@ -161,7 +223,23 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ ** Implementation of xOpen method. */ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ - return SQLITE_OK; + Fts5Cursor *pCsr; + int rc = SQLITE_OK; + pCsr = (Fts5Cursor*)sqlite3_malloc(sizeof(Fts5Cursor)); + if( pCsr ){ + memset(pCsr, 0, sizeof(Fts5Cursor)); + }else{ + rc = SQLITE_NOMEM; + } + *ppCsr = (sqlite3_vtab_cursor*)pCsr; + return rc; +} + +static int fts5StmtType(int idxNum){ + if( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ){ + return (idxNum&FTS5_ORDER_ASC) ? FTS5_STMT_SCAN_ASC : FTS5_STMT_SCAN_DESC; + } + return FTS5_STMT_LOOKUP; } /* @@ -169,6 +247,13 @@ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ ** on the xClose method of the virtual table interface. */ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + if( pCsr->pStmt ){ + int eStmt = fts5StmtType(pCsr->idxNum); + sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); + } + sqlite3_free(pCsr); return SQLITE_OK; } @@ -182,7 +267,22 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ ** subsequently to determine whether or not an EOF was hit. */ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ - return SQLITE_OK; + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + int ePlan = FTS5_PLAN(pCsr->idxNum); + int rc = SQLITE_OK; + + assert( ePlan!=FTS5_PLAN_MATCH ); + if( ePlan!=FTS5_PLAN_MATCH ){ + rc = sqlite3_step(pCsr->pStmt); + if( rc!=SQLITE_ROW ){ + pCsr->bEof = 1; + rc = sqlite3_reset(pCsr->pStmt); + }else{ + rc = SQLITE_OK; + } + } + + return rc; } /* @@ -197,7 +297,25 @@ static int fts5FilterMethod( int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ - return SQLITE_OK; + Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + int rc = SQLITE_OK; + int ePlan = FTS5_PLAN(idxNum); + int eStmt = fts5StmtType(idxNum); + + assert( ePlan!=FTS5_PLAN_MATCH ); + memset(&pCursor[1], 0, sizeof(Fts5Cursor) - sizeof(sqlite3_vtab_cursor)); + pCsr->idxNum = idxNum; + + rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); + if( ePlan==FTS5_PLAN_ROWID ){ + sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + } + + if( rc==SQLITE_OK ){ + rc = fts5NextMethod(pCursor); + } + return rc; } /* @@ -205,7 +323,8 @@ static int fts5FilterMethod( ** routine to find out if it has reached the end of a result set. */ static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ - return 1; + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + return pCsr->bEof; } /* @@ -215,6 +334,16 @@ static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ ** rowid should be written to *pRowid. */ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + int ePlan = FTS5_PLAN(pCsr->idxNum); + + assert( pCsr->bEof==0 ); + assert( ePlan!=FTS5_PLAN_MATCH ); + + if( ePlan!=FTS5_PLAN_MATCH ){ + *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); + } + return SQLITE_OK; } @@ -227,6 +356,14 @@ static int fts5ColumnMethod( sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + int ePlan = FTS5_PLAN(pCsr->idxNum); + + assert( pCsr->bEof==0 ); + assert( ePlan!=FTS5_PLAN_MATCH ); + if( ePlan!=FTS5_PLAN_MATCH ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + } return SQLITE_OK; } @@ -241,7 +378,7 @@ static int fts5ColumnMethod( ** error code if an error occurs. */ static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ - const char *z = sqlite3_value_text(pVal); + const char *z = (const char*)sqlite3_value_text(pVal); int n = sqlite3_value_bytes(pVal); int rc = SQLITE_ERROR; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 5329c207c8..eb6d447cac 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -226,6 +226,15 @@ int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); int sqlite3Fts5StorageIntegrity(Fts5Storage *p); +#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ +#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ +#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ + +int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); +void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); + + + /* ** End of interface to code in fts5_storage.c. **************************************************************************/ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 76cd2e1da7..6b86218e42 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -18,18 +18,26 @@ struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; - sqlite3_stmt *aStmt[7]; + sqlite3_stmt *aStmt[8]; }; -#define FTS5_STMT_INSERT_CONTENT 0 -#define FTS5_STMT_REPLACE_CONTENT 1 -#define FTS5_STMT_DELETE_CONTENT 2 -#define FTS5_STMT_INSERT_DOCSIZE 3 -#define FTS5_STMT_DELETE_DOCSIZE 4 +#if FTS5_STMT_SCAN_ASC!=0 +# error "FTS5_STMT_SCAN_ASC mismatch" +#endif +#if FTS5_STMT_SCAN_DESC!=1 +# error "FTS5_STMT_SCAN_DESC mismatch" +#endif +#if FTS5_STMT_LOOKUP!=2 +# error "FTS5_STMT_LOOKUP mismatch" +#endif -#define FTS5_STMT_SCAN_CONTENT 5 -#define FTS5_STMT_SEEK_CONTENT 6 +#define FTS5_STMT_INSERT_CONTENT 3 +#define FTS5_STMT_REPLACE_CONTENT 4 + +#define FTS5_STMT_DELETE_CONTENT 5 +#define FTS5_STMT_INSERT_DOCSIZE 6 +#define FTS5_STMT_DELETE_DOCSIZE 7 /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and @@ -47,13 +55,15 @@ static int fts5StorageGetStmt( assert( eStmt>=0 && eStmtaStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { - "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ - "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ - "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ - "INSERT INTO %Q.'%q_docsize' VALUES(?,?)", /* INSERT_DOCSIZE */ - "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ - "SELECT * FROM %Q.'%q_content'", /* SCAN_CONTENT */ - "SELECT * FROM %Q.'%q_content' WHERE rowid=?", /* SEEK_CONTENT */ + "SELECT * FROM %Q.'%q_content' ORDER BY id ASC", /* SCAN_ASC */ + "SELECT * FROM %Q.'%q_content' ORDER BY id DESC", /* SCAN_DESC */ + "SELECT * FROM %Q.'%q_content' WHERE rowid=?", /* LOOKUP */ + + "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ + "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ + "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ + "INSERT INTO %Q.'%q_docsize' VALUES(?,?)", /* INSERT_DOCSIZE */ + "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ }; Fts5Config *pConfig = p->pConfig; char *zSql = 0; @@ -253,7 +263,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ sqlite3_stmt *pSeek; /* SELECT to read row iDel from %_data */ int rc; /* Return code */ - rc = fts5StorageGetStmt(p, FTS5_STMT_SEEK_CONTENT, &pSeek); + rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek); if( rc==SQLITE_OK ){ int rc2; sqlite3_bind_int64(pSeek, 1, iDel); @@ -377,7 +387,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ /* Generate the expected index checksum based on the contents of the ** %_content table. This block stores the checksum in ctx.cksum. */ - rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_CONTENT, &pScan); + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan); if( rc==SQLITE_OK ){ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ @@ -408,4 +418,44 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ return rc; } +/* +** Obtain an SQLite statement handle that may be used to read data from the +** %_content table. +*/ +int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **pp){ + int rc; + assert( eStmt==FTS5_STMT_SCAN_ASC + || eStmt==FTS5_STMT_SCAN_DESC + || eStmt==FTS5_STMT_LOOKUP + ); + rc = fts5StorageGetStmt(p, eStmt, pp); + if( rc==SQLITE_OK ){ + assert( p->aStmt[eStmt]==*pp ); + p->aStmt[eStmt] = 0; + } + return rc; +} + +/* +** Release an SQLite statement handle obtained via an earlier call to +** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function +** must match that passed to the sqlite3Fts5StorageStmt() call. +*/ +void sqlite3Fts5StorageStmtRelease( + Fts5Storage *p, + int eStmt, + sqlite3_stmt *pStmt +){ + assert( eStmt==FTS5_STMT_SCAN_ASC + || eStmt==FTS5_STMT_SCAN_DESC + || eStmt==FTS5_STMT_LOOKUP + ); + if( p->aStmt[eStmt]==0 ){ + sqlite3_reset(pStmt); + p->aStmt[eStmt] = pStmt; + }else{ + sqlite3_finalize(pStmt); + } +} + diff --git a/manifest b/manifest index fe4d032557..099fc5ea9c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssome\scode\sfor\san\sexperimental\sfts5\smodule.\sDoes\snot\swork\syet. -D 2014-06-23T11:33:22.754 +C Add\ssimple\sfull-table-scan\sand\srowid\slookup\ssupport\sto\sfts5. +D 2014-06-24T16:59:06.519 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,12 +103,12 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 2cb2cc3c1acefa36d9e8ce8e68bceaac8515059a -F ext/fts5/fts5Int.h cc41cf776a3e612aa3a461e96463647fd3957bed +F ext/fts5/fts5.c 3efba544818662a02e8e5ebd73d57cff6182b2dd +F ext/fts5/fts5Int.h 6f11697324ebaafe92872ee5b19f3661b2b621f1 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c bdfb98dab7729cf967022d7a4a815828bbad8c23 F ext/fts5/fts5_index.c 0548e8925a0664cfa00b2477ebe9afa18bc7848f -F ext/fts5/fts5_storage.c aa1ff4b6b283303ffd8c5dc57a45ebe55e62a7b2 +F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -591,7 +591,8 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test bbea71fed733b1d433bf83dbc8d86077936d1efc +F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 +F test/fts5ab.test 0c44271259bfba089e9e2ab3c18c2760d8a5392c F test/fts5ea.test 814287a2cb25ac3e59abbe4ccbcabf6bda821868 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1187,10 +1188,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 07dda49c1bf8997a18c3368acb81b6d863ea38d6 -R 66e5d0ccaa728e4d98b92edeb331ffb3 -T *branch * fts5 -T *sym-fts5 * -T -sym-trunk * +P 1e0648dcf283d4f1f6159db4d2433b6cc635992e +R 21f33a2cea70ea3a9d3ce73abf49bcfc U dan -Z e3b7f827041011d2f1d78b39cdee11d7 +Z 81c134ce13df1e24ae1f1936c8a52cf8 diff --git a/manifest.uuid b/manifest.uuid index 6f7226ff29..28b2a683d7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1e0648dcf283d4f1f6159db4d2433b6cc635992e \ No newline at end of file +3515da85d09220c464979467b476c611da4a6a7a \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 699d01d033..f8b8b54d08 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -198,9 +198,6 @@ do_execsql_test 8.1 { } -#finish_test - - #------------------------------------------------------------------------- # reset_db diff --git a/test/fts5ab.test b/test/fts5ab.test new file mode 100644 index 0000000000..d075eb01e7 --- /dev/null +++ b/test/fts5ab.test @@ -0,0 +1,57 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5ab + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b); + INSERT INTO t1 VALUES('hello', 'world'); + INSERT INTO t1 VALUES('one two', 'three four'); + INSERT INTO t1(rowid, a, b) VALUES(45, 'forty', 'five'); +} + +do_execsql_test 1.1 { + SELECT * FROM t1; +} { forty five {one two} {three four} hello world } + +do_execsql_test 1.2 { + SELECT rowid FROM t1; +} {45 2 1} + +do_execsql_test 1.3 { + SELECT rowid FROM t1 ORDER BY rowid ASC; +} {1 2 45} + +do_execsql_test 1.4 { + SELECT * FROM t1 WHERE rowid=2; +} {{one two} {three four}} + +do_execsql_test 1.5 { + SELECT * FROM t1 WHERE rowid=2.01; +} {} + +do_execsql_test 1.6 { + SELECT * FROM t1 WHERE rowid=1.99; +} {} + +finish_test From 22d43ec4e804ab0761e2d3ff7747cb4b0af0b8da Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 25 Jun 2014 20:28:38 +0000 Subject: [PATCH 003/206] Begin adding query support to fts5. FossilOrigin-Name: 47a9f3cc92deefe163108e3507bd4614bf1f5da7 --- ext/fts5/fts5.c | 71 ++++++++-- ext/fts5/fts5Int.h | 57 ++++---- ext/fts5/fts5_expr.c | 182 ++++++++++++++++++++---- ext/fts5/fts5_index.c | 315 ++++++++++++++++++++++++++++++++--------- manifest | 24 ++-- manifest.uuid | 2 +- test/fts5ab.test | 20 +++ test/fts5ea.test | 12 +- test/permutations.test | 6 + 9 files changed, 539 insertions(+), 150 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 56a74d6486..ba9117c527 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -18,6 +18,9 @@ typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; +/* +** Virtual-table object. +*/ struct Fts5Table { sqlite3_vtab base; /* Base class used by SQLite core */ Fts5Config *pConfig; /* Virtual table configuration */ @@ -25,11 +28,16 @@ struct Fts5Table { Fts5Storage *pStorage; /* Document store */ }; +/* +** Virtual-table cursor object. +*/ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ int idxNum; /* idxNum passed to xFilter() */ sqlite3_stmt *pStmt; /* Statement used to read %_content */ int bEof; /* True at EOF */ + Fts5Expr *pExpr; /* Expression for MATCH queries */ + int bSeekRequired; }; /* @@ -165,15 +173,18 @@ static int fts5CreateMethod( #define FTS5_ORDER_DESC 8 /* ORDER BY rowid DESC */ #define FTS5_ORDER_ASC 16 /* ORDER BY rowid ASC */ - +/* +** Search the object passed as the first argument for a usable constraint +** on column iCol using operator eOp. If one is found, return its index in +** the pInfo->aConstraint[] array. If no such constraint is found, return +** a negative value. +*/ static int fts5FindConstraint(sqlite3_index_info *pInfo, int eOp, int iCol){ int i; - for(i=0; inConstraint; i++){ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; if( p->usable && p->iColumn==iCol && p->op==eOp ) return i; } - return -1; } @@ -253,6 +264,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } + sqlite3Fts5ExprFree(pCsr->pExpr); sqlite3_free(pCsr); return SQLITE_OK; } @@ -271,7 +283,6 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ int ePlan = FTS5_PLAN(pCsr->idxNum); int rc = SQLITE_OK; - assert( ePlan!=FTS5_PLAN_MATCH ); if( ePlan!=FTS5_PLAN_MATCH ){ rc = sqlite3_step(pCsr->pStmt); if( rc!=SQLITE_ROW ){ @@ -280,6 +291,10 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ }else{ rc = SQLITE_OK; } + }else{ + rc = sqlite3Fts5ExprNext(pCsr->pExpr); + pCsr->bEof = sqlite3Fts5ExprEof(pCsr->pExpr); + pCsr->bSeekRequired = 1; } return rc; @@ -302,19 +317,30 @@ static int fts5FilterMethod( int rc = SQLITE_OK; int ePlan = FTS5_PLAN(idxNum); int eStmt = fts5StmtType(idxNum); + int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); - assert( ePlan!=FTS5_PLAN_MATCH ); memset(&pCursor[1], 0, sizeof(Fts5Cursor) - sizeof(sqlite3_vtab_cursor)); pCsr->idxNum = idxNum; rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); - if( ePlan==FTS5_PLAN_ROWID ){ - sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + if( rc==SQLITE_OK ){ + if( ePlan==FTS5_PLAN_MATCH ){ + char **pzErr = &pTab->base.zErrMsg; + const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); + rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); + pCsr->bEof = sqlite3Fts5ExprEof(pCsr->pExpr); + pCsr->bSeekRequired = 1; + } + }else{ + if( ePlan==FTS5_PLAN_ROWID ){ + sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + } + rc = fts5NextMethod(pCursor); + } } - if( rc==SQLITE_OK ){ - rc = fts5NextMethod(pCursor); - } return rc; } @@ -338,10 +364,10 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ int ePlan = FTS5_PLAN(pCsr->idxNum); assert( pCsr->bEof==0 ); - assert( ePlan!=FTS5_PLAN_MATCH ); - if( ePlan!=FTS5_PLAN_MATCH ){ *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); + }else{ + *pRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); } return SQLITE_OK; @@ -358,13 +384,28 @@ static int fts5ColumnMethod( ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = FTS5_PLAN(pCsr->idxNum); + int rc = SQLITE_OK; assert( pCsr->bEof==0 ); - assert( ePlan!=FTS5_PLAN_MATCH ); - if( ePlan!=FTS5_PLAN_MATCH ){ + if( pCsr->bSeekRequired ){ + assert( ePlan==FTS5_PLAN_MATCH && pCsr->pExpr ); + sqlite3_reset(pCsr->pStmt); + sqlite3_bind_int64(pCsr->pStmt, 1, sqlite3Fts5ExprRowid(pCsr->pExpr)); + rc = sqlite3_step(pCsr->pStmt); + if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + }else{ + rc = sqlite3_reset(pCsr->pStmt); + if( rc==SQLITE_OK ){ + rc = SQLITE_CORRUPT_VTAB; + } + } + } + + if( rc==SQLITE_OK ){ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); } - return SQLITE_OK; + return rc; } /* diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index eb6d447cac..774b9f9687 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -75,13 +75,13 @@ void sqlite3Fts5Dequote(char *z); typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; + /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_ASC 0x0002 /* Docs in ascending rowid order */ #define FTS5INDEX_QUERY_MATCH 0x0004 /* Use the iMatch arg to Next() */ -#define FTS5INDEX_QUERY_DELETE 0x0008 /* Visit delete markers */ /* ** Create/destroy an Fts5Index object. @@ -114,8 +114,7 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( */ int sqlite3Fts5IterEof(Fts5IndexIter*); void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); -int sqlite3Fts5IterSeek(Fts5IndexIter*, i64 iDocid); -i64 sqlite3Fts5IterDocid(Fts5IndexIter*); +i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* ** Position list iteration. @@ -128,8 +127,8 @@ i64 sqlite3Fts5IterDocid(Fts5IndexIter*); ** // token appears at position iPos of column iCol of the current document ** } */ -int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); -int sqlite3Fts5IterNextPos(Fts5IndexIter*); +// int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); +// int sqlite3Fts5IterNextPos(Fts5IndexIter*); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). @@ -213,6 +212,11 @@ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. */ + +#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ +#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ +#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ + typedef struct Fts5Storage Fts5Storage; int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); @@ -226,15 +230,10 @@ int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); int sqlite3Fts5StorageIntegrity(Fts5Storage *p); -#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ -#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ -#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ - int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); - /* ** End of interface to code in fts5_storage.c. **************************************************************************/ @@ -244,6 +243,7 @@ void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); ** Interface to code in fts5_expr.c. */ typedef struct Fts5Expr Fts5Expr; +typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; @@ -254,23 +254,29 @@ struct Fts5Token { int n; /* Size of buffer p in bytes */ }; +/* Parse a MATCH expression. */ int sqlite3Fts5ExprNew( Fts5Config *pConfig, - Fts5Index *pIdx, const char *zExpr, Fts5Expr **ppNew, char **pzErr ); -int sqlite3Fts5ExprFirst(Fts5Expr *p); -int sqlite3Fts5ExprNext(Fts5Expr *p); -int sqlite3Fts5ExprEof(Fts5Expr *p); -i64 sqlite3Fts5ExprRowid(Fts5Expr *p); +/* +** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bAsc); +** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); +** rc = sqlite3Fts5ExprNext(pExpr) +** ){ +** // The document with rowid iRowid matches the expression! +** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); +** } +*/ +int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, int bAsc); +int sqlite3Fts5ExprNext(Fts5Expr*); +int sqlite3Fts5ExprEof(Fts5Expr*); +i64 sqlite3Fts5ExprRowid(Fts5Expr*); -void sqlite3Fts5ExprFree(Fts5Expr *p); - -// int sqlite3Fts5IterFirstPos(Fts5Expr*, int iCol, int *piPos); -// int sqlite3Fts5IterNextPos(Fts5Expr*, int *piPos); +void sqlite3Fts5ExprFree(Fts5Expr*); /* Called during startup to register a UDF with SQLite */ int sqlite3Fts5ExprInit(sqlite3*); @@ -282,11 +288,11 @@ int sqlite3Fts5ExprInit(sqlite3*); void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); -Fts5Expr *sqlite3Fts5ParseExpr( - Fts5Parse *pParse, - int eType, - Fts5Expr *pLeft, - Fts5Expr *pRight, +Fts5ExprNode *sqlite3Fts5ParseNode( + Fts5Parse *pParse, + int eType, + Fts5ExprNode *pLeft, + Fts5ExprNode *pRight, Fts5ExprNearset *pNear ); @@ -305,10 +311,11 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); +void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); -void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p); +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 36dc60a069..e34818dd6c 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -29,6 +29,12 @@ void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(size_t)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); +struct Fts5Expr { + Fts5Index *pIndex; + Fts5ExprNode *pRoot; + int bAsc; +}; + /* ** eType: ** Expression node type. Always one of: @@ -38,11 +44,13 @@ void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); ** FTS5_NOT (pLeft, pRight valid) ** FTS5_STRING (pNear valid) */ -struct Fts5Expr { +struct Fts5ExprNode { int eType; /* Node type */ - Fts5Expr *pLeft; /* Left hand child node */ - Fts5Expr *pRight; /* Right hand child node */ + Fts5ExprNode *pLeft; /* Left hand child node */ + Fts5ExprNode *pRight; /* Right hand child node */ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ + int bEof; /* True at EOF */ + i64 iRowid; }; /* @@ -52,6 +60,7 @@ struct Fts5Expr { struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ + Fts5IndexIter *pIter; /* Iterator for this term */ }; /* @@ -82,7 +91,7 @@ struct Fts5Parse { Fts5Config *pConfig; char *zErr; int rc; - Fts5Expr *pExpr; /* Result of a successful parse */ + Fts5ExprNode *pExpr; /* Result of a successful parse */ }; void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ @@ -168,8 +177,7 @@ static void *fts5ParseAlloc(size_t t){ return sqlite3_malloc((int)t); } static void fts5ParseFree(void *p){ sqlite3_free(p); } int sqlite3Fts5ExprNew( - Fts5Config *pConfig, - Fts5Index *pIdx, + Fts5Config *pConfig, /* FTS5 Configuration */ const char *zExpr, /* Expression text */ Fts5Expr **ppNew, char **pzErr @@ -179,12 +187,13 @@ int sqlite3Fts5ExprNew( const char *z = zExpr; int t; /* Next token type */ void *pEngine; + Fts5Expr *pNew; *ppNew = 0; *pzErr = 0; memset(&sParse, 0, sizeof(sParse)); pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); - if( pEngine==0 ) return SQLITE_NOMEM; + if( pEngine==0 ){ return SQLITE_NOMEM; } sParse.pConfig = pConfig; do { @@ -194,23 +203,140 @@ int sqlite3Fts5ExprNew( sqlite3Fts5ParserFree(pEngine, fts5ParseFree); assert( sParse.pExpr==0 || (sParse.rc==SQLITE_OK && sParse.zErr==0) ); - *ppNew = sParse.pExpr; + if( sParse.rc==SQLITE_OK ){ + *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); + if( pNew==0 ){ + sParse.rc = SQLITE_NOMEM; + }else{ + pNew->pRoot = sParse.pExpr; + pNew->pIndex = 0; + } + } + *pzErr = sParse.zErr; return sParse.rc; } /* -** Free the object passed as the only argument. +** Free the expression node object passed as the only argument. */ -void sqlite3Fts5ExprFree(Fts5Expr *p){ +void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ - sqlite3Fts5ExprFree(p->pLeft); - sqlite3Fts5ExprFree(p->pRight); + sqlite3Fts5ParseNodeFree(p->pLeft); + sqlite3Fts5ParseNodeFree(p->pRight); sqlite3Fts5ParseNearsetFree(p->pNear); sqlite3_free(p); } } +/* +** Free the expression object passed as the only argument. +*/ +void sqlite3Fts5ExprFree(Fts5Expr *p){ + if( p ){ + sqlite3Fts5ParseNodeFree(p->pRoot); + sqlite3_free(p); + } +} + +/* +** +*/ +static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + assert( 0 ); + return SQLITE_OK; +} + +static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + + pNode->bEof = 0; + if( pNode->eType==FTS5_STRING ){ + Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; + Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; + assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); + + pTerm->pIter = sqlite3Fts5IndexQuery( + pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) + ); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + }else{ + pNode->iRowid = sqlite3Fts5IterRowid(pTerm->pIter); + } + + }else{ + rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeFirst(pExpr, pNode->pRight); + } + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeTest(pExpr, pNode); + } + } + return rc; +} + +static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + + if( pNode->eType==FTS5_STRING ){ + Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; + Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; + assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); + sqlite3Fts5IterNext(pTerm->pIter, 0); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + }else{ + pNode->iRowid = sqlite3Fts5IterRowid(pTerm->pIter); + } + }else{ + assert( 0 ); + } + return rc; +} + + + +/* +** Begin iterating through the set of documents in index pIdx matched by +** the MATCH expression passed as the first argument. If the "bAsc" parameter +** is passed a non-zero value, iteration is in ascending rowid order. Or, +** if it is zero, in descending order. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. It +** is not considered an error if the query does not match any documents. +*/ +int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bAsc){ + int rc; + p->pIndex = pIdx; + p->bAsc = bAsc; + rc = fts5ExprNodeFirst(p, p->pRoot); + return rc; +} + +/* +** Move to the next document +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. It +** is not considered an error if the query does not match any documents. +*/ +int sqlite3Fts5ExprNext(Fts5Expr *p){ + int rc; + rc = fts5ExprNodeNext(p, p->pRoot); + return rc; +} + +int sqlite3Fts5ExprEof(Fts5Expr *p){ + return p->pRoot->bEof; +} + +i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ + return p->pRoot->iRowid; +} + /* ** Argument pIn points to a buffer of nIn bytes. This function allocates ** and returns a new buffer populated with a copy of (pIn/nIn) with a @@ -229,7 +355,7 @@ static char *fts5Strdup(const char *pIn, int nIn){ } static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ - *pz = sqlite3_mprintf("%.*s", pToken->n, pToken->p); + *pz = fts5Strdup(pToken->p, pToken->n); if( *pz==0 ) return SQLITE_NOMEM; return SQLITE_OK; } @@ -241,7 +367,11 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; inTerm; i++){ - sqlite3_free(pPhrase->aTerm[i].zTerm); + Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; + sqlite3_free(pTerm->zTerm); + if( pTerm->pIter ){ + sqlite3Fts5IterClose(pTerm->pIter); + } } sqlite3_free(pPhrase); } @@ -357,7 +487,7 @@ void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ } } -void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5Expr *p){ +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ assert( pParse->pExpr==0 ); pParse->pExpr = p; } @@ -401,7 +531,7 @@ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ if( pParse->rc==SQLITE_OK ){ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ sqlite3Fts5ParseError( - pParse, "syntax error near \"%.*s\"", pTok->n, pTok->p + pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p ); } } @@ -460,20 +590,20 @@ void sqlite3Fts5ParseSetColumn( ** Allocate and return a new expression object. If anything goes wrong (i.e. ** OOM error), leave an error code in pParse and return NULL. */ -Fts5Expr *sqlite3Fts5ParseExpr( +Fts5ExprNode *sqlite3Fts5ParseNode( Fts5Parse *pParse, /* Parse context */ int eType, /* FTS5_STRING, AND, OR or NOT */ - Fts5Expr *pLeft, /* Left hand child expression */ - Fts5Expr *pRight, /* Right hand child expression */ + Fts5ExprNode *pLeft, /* Left hand child expression */ + Fts5ExprNode *pRight, /* Right hand child expression */ Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ ){ - Fts5Expr *pRet = 0; + Fts5ExprNode *pRet = 0; if( pParse->rc==SQLITE_OK ){ assert( (eType!=FTS5_STRING && pLeft && pRight && !pNear) || (eType==FTS5_STRING && !pLeft && !pRight && pNear) ); - pRet = (Fts5Expr*)sqlite3_malloc(sizeof(Fts5Expr)); + pRet = (Fts5ExprNode*)sqlite3_malloc(sizeof(Fts5ExprNode)); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; }else{ @@ -487,8 +617,8 @@ Fts5Expr *sqlite3Fts5ParseExpr( if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); - sqlite3Fts5ExprFree(pLeft); - sqlite3Fts5ExprFree(pRight); + sqlite3Fts5ParseNodeFree(pLeft); + sqlite3Fts5ParseNodeFree(pRight); sqlite3Fts5ParseNearsetFree(pNear); } return pRet; @@ -529,7 +659,7 @@ static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ return zNew; } -static char *fts5ExprPrint(Fts5Config *pConfig, Fts5Expr *pExpr){ +static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; if( pExpr->eType==FTS5_STRING ){ Fts5ExprNearset *pNear = pExpr->pNear; @@ -634,10 +764,10 @@ static void fts5ExprFunction( rc = sqlite3Fts5ConfigParse(db, nConfig, azConfig, &pConfig, &zErr); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5ExprNew(pConfig, 0, zExpr, &pExpr, &zErr); + rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); } if( rc==SQLITE_OK ){ - char *zText = fts5ExprPrint(pConfig, pExpr); + char *zText = fts5ExprPrint(pConfig, pExpr->pRoot); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); sqlite3_free(zText); diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 261383c296..375912d69d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -297,6 +297,13 @@ struct Fts5Index { sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; +struct Fts5IndexIter { + Fts5Index *pIndex; + Fts5Structure *pStruct; + Fts5MultiSegIter *pMulti; +}; + + /* ** Buffer object for the incremental building of string data. */ @@ -425,10 +432,15 @@ struct Fts5MultiSegIter { ** iTermLeafPgno, iTermLeafOffset: ** Leaf page number containing the last term read from the segment. And ** the offset immediately following the term data. +** +** bOneTerm: +** If true, set the iterator to point to EOF after the current doclist has +** been exhausted. Do not proceed to the next term in the segment. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int iIdx; /* Byte offset within current leaf */ + int bOneTerm; /* If true, iterate through single doclist */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ int iLeafOffset; /* Byte offset within current leaf */ @@ -657,6 +669,23 @@ static void fts5BufferSet( fts5BufferAppendBlob(pRc, pBuf, nData, pData); } +/* +** Compare the contents of the pLeft buffer with the pRight/nRight blob. +** +** Return -ve if pLeft is smaller than pRight, 0 if they are equal or +** +ve if pRight is smaller than pLeft. In other words: +** +** res = *pLeft - *pRight +*/ +static int fts5BufferCompareBlob( + Fts5Buffer *pLeft, /* Left hand side of comparison */ + const u8 *pRight, int nRight /* Right hand side of comparison */ +){ + int nCmp = MIN(pLeft->n, nRight); + int res = memcmp(pLeft->p, pRight, nCmp); + return (res==0 ? (pLeft->n - nRight) : res); +} + /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. @@ -739,7 +768,6 @@ static Fts5Data *fts5DataReadOrBuffer( static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); assert( (pRet==0)==(p->rc!=SQLITE_OK) ); -assert( pRet ); return pRet; } @@ -1006,6 +1034,60 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ } +/* +** If the pIter->iOff offset currently points to an entry indicating one +** or more term-less nodes, advance past it and set pIter->nEmpty to +** the number of empty child nodes. +*/ +static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ + if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->iOff++; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); + }else{ + pIter->nEmpty = 0; + } +} + +/* +** Advance to the next entry within the node. +*/ +static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ + if( pIter->iOff>=pIter->nData ){ + pIter->aData = 0; + pIter->iChild += pIter->nEmpty; + }else{ + int nPre, nNew; + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); + pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); + pIter->term.n = nPre-2; + fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); + pIter->iOff += nNew; + pIter->iChild += (1 + pIter->nEmpty); + fts5NodeIterGobbleNEmpty(pIter); + if( *pRc ) pIter->aData = 0; + } +} + + +/* +** Initialize the iterator object pIter to iterate through the internal +** segment node in pData. +*/ +static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->aData = aData; + pIter->nData = nData; + pIter->iOff = getVarint32(aData, pIter->iChild); + fts5NodeIterGobbleNEmpty(pIter); +} + +/* +** Free any memory allocated by the iterator object. +*/ +static void fts5NodeIterFree(Fts5NodeIter *pIter){ + fts5BufferFree(&pIter->term); +} + /* ** Load the next leaf page into the segment iterator. */ @@ -1079,6 +1161,77 @@ static void fts5SegIterInit( } } +/* +** Initialize the object pIter to point to term pTerm/nTerm within segment +** pSeg, index iIdx. If there is no such term in the index, the iterator +** is set to EOF. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterSeekInit( + Fts5Index *p, /* FTS5 backend */ + int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + int iPg = 1; + int h; + + assert( pTerm && nTerm ); + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + pIter->bOneTerm = 1; + + for(h=pSeg->nHeight-1; h>0; h--){ + Fts5NodeIter node; /* For iterating through internal nodes */ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg); + Fts5Data *pNode = fts5DataRead(p, iRowid); + if( pNode==0 ) break; + + fts5NodeIterInit(pNode->p, pNode->n, &node); + assert( node.term.n==0 ); + + iPg = node.iChild; + for(fts5NodeIterNext(&p->rc, &node); + node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)>=0; + fts5NodeIterNext(&p->rc, &node) + ){ + iPg = node.iChild; + } + } + + if( iPg>=pSeg->pgnoFirst ){ + int res; + pIter->iLeafPgno = iPg - 1; + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf ){ + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + pIter->iLeafOffset = fts5GetU16(&a[2]); + fts5SegIterLoadTerm(p, pIter, 0); + + while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){ + if( res<0 ){ + /* Search for the end of the position list within the current page. */ + int iOff; + for(iOff=pIter->iLeafOffset; iOffiLeafOffset = iOff+1; + if( iOffpLeaf); + pIter->pLeaf = 0; + break; + } + } + } +} + /* ** Advance iterator pIter to the next entry. ** @@ -1137,9 +1290,13 @@ static void fts5SegIterNext( } /* Check if the iterator is now at EOF. If so, return early. */ - if( pIter->pLeaf==0 ) return; - if( bNewTerm ){ - fts5SegIterLoadTerm(p, pIter, nKeep); + if( pIter->pLeaf && bNewTerm ){ + if( pIter->bOneTerm ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + } } } } @@ -1263,6 +1420,7 @@ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ @@ -1274,6 +1432,8 @@ static void fts5MultiIterNew( Fts5StructureLevel *pLvl; Fts5MultiSegIter *pNew; + assert( (pTerm==0 && nTerm==0) || iLevel<0 ); + /* Allocate space for the new multi-seg-iterator. */ if( iLevel<0 ){ nSeg = fts5StructureCountSegments(pStruct); @@ -1296,7 +1456,12 @@ static void fts5MultiIterNew( Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ - fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + Fts5SegIter *pIter = &pNew->aSeg[iIter++]; + if( pTerm==0 ){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], pIter); + }else{ + fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, &pLvl->aSeg[iSeg], pIter); + } } } }else{ @@ -1701,60 +1866,6 @@ static int fts5PrefixCompress( return i; } -/* -** If the pIter->iOff offset currently points to an entry indicating one -** or more term-less nodes, advance past it and set pIter->nEmpty to -** the number of empty child nodes. -*/ -static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ - if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ - pIter->iOff++; - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); - }else{ - pIter->nEmpty = 0; - } -} - -/* -** Advance to the next entry within the node. -*/ -static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ - if( pIter->iOff>=pIter->nData ){ - pIter->aData = 0; - pIter->iChild += pIter->nEmpty; - }else{ - int nPre, nNew; - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); - pIter->term.n = nPre-2; - fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); - pIter->iOff += nNew; - pIter->iChild += (1 + pIter->nEmpty); - fts5NodeIterGobbleNEmpty(pIter); - if( *pRc ) pIter->aData = 0; - } -} - - -/* -** Initialize the iterator object pIter to iterate through the internal -** segment node in pData. -*/ -static void fts5NodeIterInit(int nData, const u8 *aData, Fts5NodeIter *pIter){ - memset(pIter, 0, sizeof(*pIter)); - pIter->aData = aData; - pIter->nData = nData; - pIter->iOff = getVarint32(aData, pIter->iChild); - fts5NodeIterGobbleNEmpty(pIter); -} - -/* -** Free any memory allocated by the iterator object. -*/ -static void fts5NodeIterFree(Fts5NodeIter *pIter){ - fts5BufferFree(&pIter->term); -} - /* ** This is called once for each leaf page except the first that contains @@ -2062,7 +2173,7 @@ static void fts5WriteInitForAppend( fts5DataBuffer(p, &pPg->buf, iRowid); if( p->rc==SQLITE_OK ){ Fts5NodeIter ss; - fts5NodeIterInit(pPg->buf.n, pPg->buf.p, &ss); + fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss); while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); pgno = ss.iChild; @@ -2167,7 +2278,7 @@ fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif - for(fts5MultiIterNew(p, pStruct, iIdx, iLvl, nInput, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ @@ -2524,7 +2635,7 @@ static void fts5BtreeIterInit( Fts5Data *pData; pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); if( pData ){ - fts5NodeIterInit(pData->n, pData->p, &pIter->aLvl[i].s); + fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s); } } @@ -2563,7 +2674,7 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ i64 iRowid = FTS5_SEGMENT_ROWID(pIter->iIdx,iSegid,i+1,pLvl[1].s.iChild); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData ){ - fts5NodeIterInit(pLvl->pData->n, pLvl->pData->p, &pLvl->s); + fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); } } } @@ -2667,7 +2778,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - for(fts5MultiIterNew(p, pStruct, iIdx, -1, 0, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ @@ -2893,7 +3004,7 @@ static void fts5DecodeFunction( fts5BufferFree(&term); }else{ Fts5NodeIter ss; - for(fts5NodeIterInit(n, a, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ + for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ if( ss.term.n==0 ){ fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild); }else{ @@ -2937,3 +3048,77 @@ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } +/* +** Open a new iterator to iterate though all docids that match the +** specified token or token prefix. +*/ +Fts5IndexIter *sqlite3Fts5IndexQuery( + Fts5Index *p, /* FTS index to query */ + const char *pToken, int nToken, /* Token (or prefix) to query for */ + int flags /* Mask of FTS5INDEX_QUERY_X flags */ +){ + Fts5IndexIter *pRet; + int iIdx = 0; + + if( flags & FTS5INDEX_QUERY_PREFIX ){ + Fts5Config *pConfig = p->pConfig; + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nToken ) break; + } + if( iIdx>pConfig->nPrefix ){ + /* No matching prefix index. todo: deal with this. */ + assert( 0 ); + } + } + + pRet = (Fts5IndexIter*)sqlite3_malloc(sizeof(Fts5IndexIter)); + if( pRet ){ + pRet->pStruct = fts5StructureRead(p, 0); + if( pRet->pStruct ){ + fts5MultiIterNew(p, + pRet->pStruct, iIdx, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + ); + } + pRet->pIndex = p; + } + + if( p->rc ){ + sqlite3Fts5IterClose(pRet); + pRet = 0; + } + return pRet; +} + +/* +** Return true if the iterator passed as the only argument is at EOF. +*/ +int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ + return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); +} + +/* +** Move to the next matching rowid. +*/ +void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){ + fts5MultiIterNext(pIter->pIndex, pIter->pMulti); +} + +/* +** Return the current rowid. +*/ +i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ + return fts5MultiIterRowid(pIter->pMulti); +} + +/* +** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). +*/ +void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ + if( pIter ){ + fts5MultiIterFree(pIter->pIndex, pIter->pMulti); + fts5StructureRelease(pIter->pStruct); + fts5CloseReader(pIter->pIndex); + sqlite3_free(pIter); + } +} + diff --git a/manifest b/manifest index 099fc5ea9c..21a248c4b9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssimple\sfull-table-scan\sand\srowid\slookup\ssupport\sto\sfts5. -D 2014-06-24T16:59:06.519 +C Begin\sadding\squery\ssupport\sto\sfts5. +D 2014-06-25T20:28:38.917 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,11 +103,11 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 3efba544818662a02e8e5ebd73d57cff6182b2dd -F ext/fts5/fts5Int.h 6f11697324ebaafe92872ee5b19f3661b2b621f1 +F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 +F ext/fts5/fts5Int.h 3fd1ebeb58963727cae0ccc8e4e80751bd870296 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c bdfb98dab7729cf967022d7a4a815828bbad8c23 -F ext/fts5/fts5_index.c 0548e8925a0664cfa00b2477ebe9afa18bc7848f +F ext/fts5/fts5_expr.c 9666362ff500ce21262f355194c1f4b164261b5d +F ext/fts5/fts5_index.c 3e6fbae93eb4dbaaa4bbba4bb11719aafefe363d F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -592,8 +592,8 @@ F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 -F test/fts5ab.test 0c44271259bfba089e9e2ab3c18c2760d8a5392c -F test/fts5ea.test 814287a2cb25ac3e59abbe4ccbcabf6bda821868 +F test/fts5ab.test 79841ddc1645900b17dcf25d3767dcb05f82a4d4 +F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -760,7 +760,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test bc474bafb022cc5014ef3a9c3d5ab61d6d6f587c +F test/permutations.test 43a4c2397b5e8a45c41fac20c7a8a2d4094f470f F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1188,7 +1188,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1e0648dcf283d4f1f6159db4d2433b6cc635992e -R 21f33a2cea70ea3a9d3ce73abf49bcfc +P 3515da85d09220c464979467b476c611da4a6a7a +R 71f18cc20edef985f8bf9c192ad2143e U dan -Z 81c134ce13df1e24ae1f1936c8a52cf8 +Z a8b08260950e139985bb661309680a76 diff --git a/manifest.uuid b/manifest.uuid index 28b2a683d7..ad013c65e2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3515da85d09220c464979467b476c611da4a6a7a \ No newline at end of file +47a9f3cc92deefe163108e3507bd4614bf1f5da7 \ No newline at end of file diff --git a/test/fts5ab.test b/test/fts5ab.test index d075eb01e7..a731956aa8 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -54,4 +54,24 @@ do_execsql_test 1.6 { SELECT * FROM t1 WHERE rowid=1.99; } {} +#------------------------------------------------------------------------- + +reset_db +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1 VALUES('one'); + INSERT INTO t1 VALUES('two'); + INSERT INTO t1 VALUES('three'); +} + +do_catchsql_test 2.2 { + SELECT rowid, * FROM t1 WHERE t1 MATCH 'AND AND' +} {1 {fts5: syntax error near "AND"}} + +do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two} +do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three} +do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one} + + + finish_test diff --git a/test/fts5ea.test b/test/fts5ea.test index fdb28769cf..a76f901d06 100644 --- a/test/fts5ea.test +++ b/test/fts5ea.test @@ -56,12 +56,12 @@ foreach {tn expr res} { breakpoint foreach {tn expr err} { - 1 {AND} {syntax error near "AND"} - 2 {abc def AND} {syntax error near ""} - 3 {abc OR AND} {syntax error near "AND"} - 4 {(a OR b) abc} {syntax error near "abc"} - 5 {NEaR (a b)} {syntax error near "NEaR"} - 6 {(a OR b) NOT c)} {syntax error near ")"} + 1 {AND} {fts5: syntax error near "AND"} + 2 {abc def AND} {fts5: syntax error near ""} + 3 {abc OR AND} {fts5: syntax error near "AND"} + 4 {(a OR b) abc} {fts5: syntax error near "abc"} + 5 {NEaR (a b)} {fts5: syntax error near "NEaR"} + 6 {(a OR b) NOT c)} {fts5: syntax error near ")"} 7 {nosuch: a nosuch2: b} {no such column: nosuch} 8 {addr: a nosuch2: b} {no such column: nosuch2} } { diff --git a/test/permutations.test b/test/permutations.test index c3f4ddf9f5..d03895e8e6 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -222,6 +222,12 @@ test_suite "fts3" -prefix "" -description { fts4growth.test fts4growth2.test } +test_suite "fts5" -prefix "" -description { + All FTS5 tests. +} -files { + fts5aa.test fts5ab.test fts5ea.test +} + test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files From e369fe4352a65f3fc4930cc9af2b71522136df84 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 26 Jun 2014 12:31:41 +0000 Subject: [PATCH 004/206] Fix minor problems in term matching. FossilOrigin-Name: 94eeb077d08a1d2607f3ff3a9fbf18229ba475bb --- ext/fts5/fts5_expr.c | 3 ++- ext/fts5/fts5_index.c | 31 +++++++++++++++++++++++-------- manifest | 16 ++++++++-------- manifest.uuid | 2 +- test/fts5ab.test | 27 +++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index e34818dd6c..364ac024ef 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -461,8 +461,9 @@ static int fts5ParseTokenize( pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; pTerm->bPrefix = 0; - + pTerm->pIter = 0; pTerm->zTerm = fts5Strdup(pToken, nToken); + return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 375912d69d..e620058b2e 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1097,8 +1097,8 @@ static void fts5SegIterNextPage( ){ Fts5StructureSegment *pSeg = pIter->pSeg; if( pIter->pLeaf ) fts5DataRelease(pIter->pLeaf); - if( pIter->iLeafPgnopgnoLast ){ - pIter->iLeafPgno++; + pIter->iLeafPgno++; + if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno) ); @@ -1196,11 +1196,13 @@ static void fts5SegIterSeekInit( iPg = node.iChild; for(fts5NodeIterNext(&p->rc, &node); - node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)>=0; + node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; fts5NodeIterNext(&p->rc, &node) ){ iPg = node.iChild; } + fts5NodeIterFree(&node); + fts5DataRelease(pNode); } if( iPg>=pSeg->pgnoFirst ){ @@ -1215,12 +1217,25 @@ static void fts5SegIterSeekInit( fts5SegIterLoadTerm(p, pIter, 0); while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){ - if( res<0 ){ - /* Search for the end of the position list within the current page. */ + if( res<0 && pIter->iLeafPgno==iPg ){ + /* Search for the end of the current doclist within the current + ** page. The end of a doclist is marked by a pair of successive + ** 0x00 bytes. */ int iOff; - for(iOff=pIter->iLeafOffset; iOffiLeafOffset = iOff+1; - if( iOffiLeafOffset+1; iOffiLeafOffset = iOff + getVarint32(&a[iOff], nKeep); + fts5SegIterLoadTerm(p, pIter, nKeep); + continue; + } } /* No matching term on this page. Set the iterator to EOF. */ diff --git a/manifest b/manifest index 21a248c4b9..34ec6a1a05 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Begin\sadding\squery\ssupport\sto\sfts5. -D 2014-06-25T20:28:38.917 +C Fix\sminor\sproblems\sin\sterm\smatching. +D 2014-06-26T12:31:41.784 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,8 +106,8 @@ F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 F ext/fts5/fts5Int.h 3fd1ebeb58963727cae0ccc8e4e80751bd870296 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 9666362ff500ce21262f355194c1f4b164261b5d -F ext/fts5/fts5_index.c 3e6fbae93eb4dbaaa4bbba4bb11719aafefe363d +F ext/fts5/fts5_expr.c 1874b17f10a38d0b21e0c38a28637f74e4d2570a +F ext/fts5/fts5_index.c c8b21d12f15ca6fe028ede3f8e040dcb2aaef6f6 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -592,7 +592,7 @@ F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 -F test/fts5ab.test 79841ddc1645900b17dcf25d3767dcb05f82a4d4 +F test/fts5ab.test 6436ad345d1e7eb5ab198c0174834380805f609c F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1188,7 +1188,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 3515da85d09220c464979467b476c611da4a6a7a -R 71f18cc20edef985f8bf9c192ad2143e +P 47a9f3cc92deefe163108e3507bd4614bf1f5da7 +R b3c853e0de2f65908d06e4caafc2bc71 U dan -Z a8b08260950e139985bb661309680a76 +Z 840b486e1cfb6171cd4015bc26a5f123 diff --git a/manifest.uuid b/manifest.uuid index ad013c65e2..3e7acc7005 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -47a9f3cc92deefe163108e3507bd4614bf1f5da7 \ No newline at end of file +94eeb077d08a1d2607f3ff3a9fbf18229ba475bb \ No newline at end of file diff --git a/test/fts5ab.test b/test/fts5ab.test index a731956aa8..1b976caff7 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -59,6 +59,7 @@ do_execsql_test 1.6 { reset_db do_execsql_test 2.1 { CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1) VALUES('pgsz=32'); INSERT INTO t1 VALUES('one'); INSERT INTO t1 VALUES('two'); INSERT INTO t1 VALUES('three'); @@ -72,6 +73,32 @@ do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two} do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three} do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one} +do_execsql_test 2.6 { + INSERT INTO t1 VALUES('a b c d e f g'); + INSERT INTO t1 VALUES('b d e a a a i'); + INSERT INTO t1 VALUES('x y z b c c c'); +} +foreach {tn expr res} { + 1 a {5 4} + 2 b {6 5 4} + 3 c {6 4} + 4 d {5 4} + 5 e {5 4} + 6 f {4} + 7 g {4} + 8 x {6} + 9 y {6} + 10 z {6} +} { + do_execsql_test 2.7.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res +} + +#db eval { +# SELECT fts5_decode(rowid, block) AS t FROM t1_data; +#} { +# puts $t +#} finish_test + From acf6642819e65b48e58c26fd1eb4b7ff0dd8ccbc Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 1 Jul 2014 20:45:18 +0000 Subject: [PATCH 005/206] Change the position list format so that its size in bytes is stored at the start of the list itself. FossilOrigin-Name: 62f2ff20418702ed0fbf708369edf5638445b51b --- ext/fts5/fts5Int.h | 14 +-- ext/fts5/fts5_index.c | 245 +++++++++++++++++++++++++++++------------- manifest | 14 +-- manifest.uuid | 2 +- 4 files changed, 181 insertions(+), 94 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 774b9f9687..ff217c881b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -75,7 +75,6 @@ void sqlite3Fts5Dequote(char *z); typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; - /* ** Values used as part of the flags argument passed to IndexQuery(). */ @@ -117,18 +116,9 @@ void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* -** Position list iteration. -** -** for( -** iPos=sqlite3Fts5IterFirstPos(pIter, iCol); -** iPos>=0; -** iPos=sqlite3Fts5IterNextPos(pIter) -** ){ -** // token appears at position iPos of column iCol of the current document -** } +** Obtain the position list that corresponds to the current position. */ -// int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol); -// int sqlite3Fts5IterNextPos(Fts5IndexIter*); +const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter*, int *pn); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e620058b2e..8f3ce6dca2 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -110,13 +110,13 @@ ** ** poslist format: ** +** varint: size of poslist in bytes. not including this field. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I ** } -** 0x00 byte ** ** collist format: ** @@ -255,6 +255,7 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5Buffer Fts5Buffer; +typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; @@ -297,13 +298,6 @@ struct Fts5Index { sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; -struct Fts5IndexIter { - Fts5Index *pIndex; - Fts5Structure *pStruct; - Fts5MultiSegIter *pMulti; -}; - - /* ** Buffer object for the incremental building of string data. */ @@ -313,6 +307,13 @@ struct Fts5Buffer { int nSpace; }; +struct Fts5IndexIter { + Fts5Index *pIndex; + Fts5Structure *pStruct; + Fts5MultiSegIter *pMulti; + Fts5Buffer poslist; /* Buffer containing current poslist */ +}; + /* ** A single record read from the %_data table. */ @@ -422,9 +423,9 @@ struct Fts5MultiSegIter { ** ** iLeafOffset: ** Byte offset within the current leaf that is one byte past the end of the -** rowid field of the current entry. Usually this is the first byte of -** the position list data. The exception is if the rowid for the current -** entry is the last thing on the leaf page. +** rowid field of the current entry. Usually this is the size field of the +** position list data. The exception is if the rowid for the current entry +** is the last thing on the leaf page. ** ** pLeaf: ** Buffer containing current leaf page data. Set to NULL at EOF. @@ -453,13 +454,25 @@ struct Fts5SegIter { i64 iRowid; /* Current rowid */ }; +/* +** Object for iterating through paginated data. +*/ +struct Fts5ChunkIter { + Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ + i64 iLeafRowid; /* Absolute rowid of current leaf */ + int nRem; /* Remaining bytes of data to read */ + + /* Output parameters */ + u8 *p; /* Pointer to chunk of data */ + int n; /* Size of buffer p in bytes */ +}; + /* ** Object for iterating through a single position list. */ struct Fts5PosIter { - Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ - i64 iLeafRowid; /* Absolute rowid of current leaf */ - int iLeafOffset; /* Current offset within leaf */ + Fts5ChunkIter chunk; /* Current chunk of data */ + int iOff; /* Offset within chunk data */ int iCol; int iPos; @@ -1107,6 +1120,10 @@ static void fts5SegIterNextPage( } } +/* +** Leave pIter->iLeafOffset as the offset to the size field of the first +** position list. The position list belonging to document pIter->iRowid. +*/ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; /* Offset to read at */ @@ -1218,14 +1235,17 @@ static void fts5SegIterSeekInit( while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){ if( res<0 && pIter->iLeafPgno==iPg ){ - /* Search for the end of the current doclist within the current - ** page. The end of a doclist is marked by a pair of successive - ** 0x00 bytes. */ - int iOff; - for(iOff=pIter->iLeafOffset+1; iOffiLeafOffset; + while( iOffp; int n = pLeaf->n; - for(iOff=pIter->iLeafOffset; iOffiLeafOffset; + if( iOff<=n ){ + int nPoslist; + iOff += getVarint32(&a[iOff], nPoslist); + iOff += nPoslist; + } if( iOffterm.p; } +/* +** Return true if the chunk iterator passed as the second argument is +** at EOF. Or if an error has already occurred. Otherwise, return false. +*/ +static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){ + return (p->rc || pIter->pLeaf==0); +} + +/* +** Advance the chunk-iterator to the next chunk of data to read. +*/ +static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){ + assert( pIter->nRem>=pIter->n ); + pIter->nRem -= pIter->n; + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + pIter->p = 0; + if( pIter->nRem>0 ){ + Fts5Data *pLeaf; + pIter->iLeafRowid++; + pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); + if( pLeaf ){ + pIter->n = MIN(pIter->nRem, pLeaf->n-4); + pIter->p = pLeaf->p+4; + } + } +} + +/* +** Intialize the chunk iterator to read the position list data for which +** the size field is at offset iOff of leaf pLeaf. +*/ +static void fts5ChunkIterInit( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pSeg, /* Segment iterator to read poslist from */ + Fts5ChunkIter *pIter /* Initialize this object */ +){ + int iId = pSeg->pSeg->iSegid; + i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + Fts5Data *pLeaf = pSeg->pLeaf; + int iOff = pSeg->iLeafOffset; + + memset(pIter, 0, sizeof(*pIter)); + pIter->iLeafRowid = rowid; + if( iOffn ){ + fts5DataReference(pLeaf); + pIter->pLeaf = pLeaf; + }else{ + pIter->nRem = 1; + fts5ChunkIterNext(p, pIter); + if( p->rc ) return; + iOff = 4; + pLeaf = pIter->pLeaf; + } + + iOff += getVarint32(&pLeaf->p[iOff], pIter->nRem); + pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); + pIter->p = pLeaf->p + iOff; + + if( pIter->n==0 ){ + fts5ChunkIterNext(p, pIter); + } +} + /* ** Read and return the next 32-bit varint from the position-list iterator ** passed as the second argument. @@ -1543,17 +1632,12 @@ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ int iVal = 0; if( p->rc==SQLITE_OK ){ - int iOff = pIter->iLeafOffset; - if( iOff < pIter->pLeaf->n ){ - pIter->iLeafOffset += getVarint32(&pIter->pLeaf->p[iOff], iVal); - }else{ - fts5DataRelease(pIter->pLeaf); - pIter->iLeafRowid++; - pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); - if( pIter->pLeaf ){ - pIter->iLeafOffset = 4 + getVarint32(&pIter->pLeaf->p[4], iVal); - } + if( pIter->iOff>=pIter->chunk.n ){ + fts5ChunkIterNext(p, &pIter->chunk); + if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0; + pIter->iOff = 0; } + pIter->iOff += getVarint32(&pIter->chunk.p[pIter->iOff], iVal); } return iVal; } @@ -1563,16 +1647,15 @@ static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ */ static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){ int iVal; + assert( fts5ChunkIterEof(p, &pIter->chunk)==0 ); iVal = fts5PosIterReadVarint(p, pIter); - if( iVal==0 ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - } - else if( iVal==1 ){ - pIter->iCol = fts5PosIterReadVarint(p, pIter); - pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; - }else{ - pIter->iPos += (iVal - 2); + if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ + if( iVal==1 ){ + pIter->iCol = fts5PosIterReadVarint(p, pIter); + pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; + }else{ + pIter->iPos += (iVal - 2); + } } } @@ -1588,14 +1671,11 @@ static void fts5PosIterInit( ){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; - int iId = pSeg->pSeg->iSegid; - memset(pIter, 0, sizeof(*pIter)); - pIter->pLeaf = pSeg->pLeaf; - pIter->iLeafOffset = pSeg->iLeafOffset; - pIter->iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); - fts5DataReference(pIter->pLeaf); - fts5PosIterNext(p, pIter); + fts5ChunkIterInit(p, pSeg, &pIter->chunk); + if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ + fts5PosIterNext(p, pIter); + } } } @@ -1604,7 +1684,7 @@ static void fts5PosIterInit( ** at EOF. Or if an error has already occurred. Otherwise, return false. */ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ - return (p->rc || pIter->pLeaf==0); + return (p->rc || pIter->chunk.pLeaf==0); } @@ -2106,15 +2186,15 @@ static void fts5WritePendingDoclist( /* Append the rowid itself */ fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid); + /* Append the size of the position list in bytes */ + fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n); + /* Copy the position list to the output segment */ while( ibuf.n){ int iVal; i += getVarint32(&pPoslist->buf.p[i], iVal); fts5WriteAppendPoslistInt(p, pWriter, iVal); } - - /* Write the position list terminator */ - fts5WriteAppendZerobyte(p, pWriter); } /* Write the doclist terminator */ @@ -2297,9 +2377,8 @@ fflush(stdout); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ - Fts5PosIter sPos; /* Used to iterate through position list */ - int iCol = 0; /* Current output column */ - int iPos = 0; /* Current output position */ + Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; + Fts5ChunkIter sPos; /* Used to iterate through position list */ int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); @@ -2319,20 +2398,16 @@ fflush(stdout); fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); /* Copy the position list from input to output */ - for(fts5PosIterInit(p, pIter, &sPos); - fts5PosIterEof(p, &sPos)==0; - fts5PosIterNext(p, &sPos) - ){ - if( sPos.iCol!=iCol ){ - fts5WriteAppendPoslistInt(p, &writer, 1); - fts5WriteAppendPoslistInt(p, &writer, sPos.iCol); - iCol = sPos.iCol; - iPos = 0; + fts5ChunkIterInit(p, pSeg, &sPos); + fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); + for(/* noop */; fts5ChunkIterEof(p, &sPos)==0; fts5ChunkIterNext(p, &sPos)){ + int iOff = 0; + while( iOffposlist); fts5MultiIterNext(pIter->pIndex, pIter->pMulti); } @@ -3125,6 +3207,21 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ return fts5MultiIterRowid(pIter->pMulti); } +/* +** Return a pointer to a buffer containing a copy of the position list for +** the current entry. Output variable *pn is set to the size of the buffer +** in bytes before returning. +** +** The returned buffer does not include the 0x00 terminator byte stored on +** disk. +*/ +const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ + assert( sqlite3Fts5IterEof(pIter)==0 ); + + *pn = pIter->poslist.n; + return pIter->poslist.p; +} + /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ diff --git a/manifest b/manifest index 34ec6a1a05..e5d38010a4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sminor\sproblems\sin\sterm\smatching. -D 2014-06-26T12:31:41.784 +C Change\sthe\sposition\slist\sformat\sso\sthat\sits\ssize\sin\sbytes\sis\sstored\sat\sthe\sstart\sof\sthe\slist\sitself. +D 2014-07-01T20:45:18.496 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,10 +104,10 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 -F ext/fts5/fts5Int.h 3fd1ebeb58963727cae0ccc8e4e80751bd870296 +F ext/fts5/fts5Int.h 80f3d38a69a0c58ccc94428c8fc8adbcf7561a2d F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 1874b17f10a38d0b21e0c38a28637f74e4d2570a -F ext/fts5/fts5_index.c c8b21d12f15ca6fe028ede3f8e040dcb2aaef6f6 +F ext/fts5/fts5_index.c ea3dfe56a16813fcf59e03f6156965894b4b5e6f F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1188,7 +1188,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 47a9f3cc92deefe163108e3507bd4614bf1f5da7 -R b3c853e0de2f65908d06e4caafc2bc71 +P 94eeb077d08a1d2607f3ff3a9fbf18229ba475bb +R 5d9b8f6933c58725a24e426a963b0d97 U dan -Z 840b486e1cfb6171cd4015bc26a5f123 +Z bb8816e0d501865bff7c4c8da87350cb diff --git a/manifest.uuid b/manifest.uuid index 3e7acc7005..ac69385214 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -94eeb077d08a1d2607f3ff3a9fbf18229ba475bb \ No newline at end of file +62f2ff20418702ed0fbf708369edf5638445b51b \ No newline at end of file From 1dfacb467582ee758a8b5e8a9e071f5e45b243c0 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 2 Jul 2014 20:18:49 +0000 Subject: [PATCH 006/206] Add support for phrase queries to fts5. FossilOrigin-Name: 2e5652e6526b8fb3f5c163168d95bc0bb4c93686 --- ext/fts5/fts5Int.h | 33 +++++ ext/fts5/fts5_buffer.c | 139 ++++++++++++++++++ ext/fts5/fts5_expr.c | 326 ++++++++++++++++++++++++++++++++++++++--- ext/fts5/fts5_index.c | 189 ++++++------------------ main.mk | 4 + manifest | 21 +-- manifest.uuid | 2 +- test/fts5ab.test | 43 +++++- 8 files changed, 569 insertions(+), 188 deletions(-) create mode 100644 ext/fts5/fts5_buffer.c diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index ff217c881b..5f55bbadb5 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -67,6 +67,39 @@ void sqlite3Fts5Dequote(char *z); ** End of interface to code in fts5_config.c. **************************************************************************/ +/************************************************************************** +*/ + +/* +** Buffer object for the incremental building of string data. +*/ +typedef struct Fts5Buffer Fts5Buffer; +struct Fts5Buffer { + u8 *p; + int n; + int nSpace; +}; + +int sqlite3Fts5BufferGrow(int*, Fts5Buffer*, int); +void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); +void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, int, const u8*); +void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); +void sqlite3Fts5BufferFree(Fts5Buffer*); +void sqlite3Fts5BufferZero(Fts5Buffer*); +void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); +void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); + +#define fts5BufferZero(x) sqlite3Fts5BufferZero(x) +#define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) +#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c) +#define fts5BufferFree(a) sqlite3Fts5BufferFree(a) +#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) +#define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) + +/* +** End of interface to code in fts5_buffer.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c new file mode 100644 index 0000000000..d8ad29f59a --- /dev/null +++ b/ext/fts5/fts5_buffer.c @@ -0,0 +1,139 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + + + +#include "fts5Int.h" + +int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ + /* A no-op if an error has already occurred */ + if( *pRc ) return 1; + + if( (pBuf->n + nByte) > pBuf->nSpace ){ + u8 *pNew; + int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; + while( nNew<(pBuf->n + nByte) ){ + nNew = nNew * 2; + } + pNew = sqlite3_realloc(pBuf->p, nNew); + if( pNew==0 ){ + *pRc = SQLITE_NOMEM; + return 1; + }else{ + pBuf->nSpace = nNew; + pBuf->p = pNew; + } + } + return 0; +} + +/* +** Encode value iVal as an SQLite varint and append it to the buffer object +** pBuf. If an OOM error occurs, set the error code in p. +*/ +void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ + if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return; + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); +} + +/* +** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set +** the error code in p. If an error has already occurred when this function +** is called, it is a no-op. +*/ +void sqlite3Fts5BufferAppendBlob( + int *pRc, + Fts5Buffer *pBuf, + int nData, + const u8 *pData +){ + if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return; + memcpy(&pBuf->p[pBuf->n], pData, nData); + pBuf->n += nData; +} + +/* +** Append the nul-terminated string zStr to the buffer pBuf. This function +** ensures that the byte following the buffer data is set to 0x00, even +** though this byte is not included in the pBuf->n count. +*/ +void sqlite3Fts5BufferAppendString( + int *pRc, + Fts5Buffer *pBuf, + const char *zStr +){ + int nStr = strlen(zStr); + if( sqlite3Fts5BufferGrow(pRc, pBuf, nStr+1) ) return; + sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr, (const u8*)zStr); + if( *pRc==SQLITE_OK ) pBuf->p[pBuf->n] = 0x00; +} + +/* +** Argument zFmt is a printf() style format string. This function performs +** the printf() style processing, then appends the results to buffer pBuf. +** +** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte +** following the buffer data is set to 0x00, even though this byte is not +** included in the pBuf->n count. +*/ +void sqlite3Fts5BufferAppendPrintf( + int *pRc, + Fts5Buffer *pBuf, + char *zFmt, ... +){ + if( *pRc==SQLITE_OK ){ + char *zTmp; + va_list ap; + va_start(ap, zFmt); + zTmp = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + + if( zTmp==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); + sqlite3_free(zTmp); + } + } +} + +/* +** Free any buffer allocated by pBuf. Zero the structure before returning. +*/ +void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ + sqlite3_free(pBuf->p); + memset(pBuf, 0, sizeof(Fts5Buffer)); +} + +/* +** Zero the contents of the buffer object. But do not free the associated +** memory allocation. +*/ +void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ + pBuf->n = 0; +} + +/* +** Set the buffer to contain nData/pData. If an OOM error occurs, leave an +** the error code in p. If an error has already occurred when this function +** is called, it is a no-op. +*/ +void sqlite3Fts5BufferSet( + int *pRc, + Fts5Buffer *pBuf, + int nData, + const u8 *pData +){ + pBuf->n = 0; + sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); +} diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 364ac024ef..1c0e936cb0 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -68,6 +68,8 @@ struct Fts5ExprTerm { ** within a document for it to match. */ struct Fts5ExprPhrase { + Fts5Buffer poslist; /* Current position list */ + i64 iRowid; /* Current rowid */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; @@ -94,6 +96,43 @@ struct Fts5Parse { Fts5ExprNode *pExpr; /* Result of a successful parse */ }; +/************************************************************************* +*/ +typedef struct Fts5PoslistIter Fts5PoslistIter; +struct Fts5PoslistIter { + const u8 *a; /* Position list to iterate through */ + int n; /* Size of buffer at a[] in bytes */ + int i; /* Current offset in a[] */ + + /* Output variables */ + int bEof; /* Set to true at EOF */ + i64 iPos; /* (iCol<<32) + iPos */ +}; + +static void fts5PoslistIterNext(Fts5PoslistIter *pIter){ + if( pIter->i>=pIter->n ){ + pIter->bEof = 1; + }else{ + int iVal; + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + if( iVal==1 ){ + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + pIter->iPos = ((u64)iVal << 32); + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + } + pIter->iPos += (iVal-2); + } +} + +static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->a = a; + pIter->n = n; + fts5PoslistIterNext(pIter); +} +/* +*************************************************************************/ + void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ if( pParse->rc==SQLITE_OK ){ va_list ap; @@ -246,6 +285,259 @@ static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ assert( 0 ); return SQLITE_OK; } + +/* +** All individual term iterators in pPhrase are guaranteed to be valid and +** pointing to the same rowid when this function is called. This function +** checks if the current rowid really is a match, and if so populates +** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch +** is set to true if this is really a match, or false otherwise. +** +** SQLITE_OK is returned if an error occurs, or an SQLite error code +** otherwise. It is not considered an error code if the current rowid is +** not a match. +*/ +static int fts5ExprPhraseIsMatch( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbMatch /* OUT: Set to true if really a match */ +){ + Fts5PoslistIter aStatic[4]; + Fts5PoslistIter *aIter = aStatic; + int i; + int rc = SQLITE_OK; + + if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ + int nByte = sizeof(Fts5PoslistIter) * pPhrase->nTerm; + aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); + if( !aIter ) return SQLITE_NOMEM; + } + + /* Initialize a term iterator for each term in the phrase */ + for(i=0; inTerm; i++){ + int n; + const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n); + fts5PoslistIterInit(a, n, &aIter[i]); + } + + *pbMatch = 0; + while( 1 ){ + + int bMatch = 1; + i64 iPos = aIter[0].iPos; + for(i=1; inTerm; i++){ + Fts5PoslistIter *pPos = &aIter[i]; + i64 iAdj = pPos->iPos-i; + if( (pPos->iPos-i)!=iPos ){ + bMatch = 0; + if( iAdj>iPos ) iPos = iAdj; + } + } + if( bMatch ){ + *pbMatch = 1; + break; + } + + for(i=0; inTerm; i++){ + Fts5PoslistIter *pPos = &aIter[i]; + while( (pPos->iPos-i) < iPos ){ + fts5PoslistIterNext(pPos); + if( pPos->bEof ) goto ismatch_out; + } + } + } + + ismatch_out: + if( aIter!=aStatic ) sqlite3_free(aIter); + return rc; +} + +/* +** All individual term iterators in pPhrase are guaranteed to be valid when +** this function is called. This function checks if all term iterators +** point to the same rowid, and if not, advances them until they do. +** If an EOF is reached before this happens, *pbEof is set to true before +** returning. +** +** SQLITE_OK is returned if an error occurs, or an SQLite error code +** otherwise. It is not considered an error code if an iterator reaches +** EOF. +*/ +static int fts5ExprPhraseNextRowidMatch( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + assert( *pbEof==0 ); + while( 1 ){ + int i; + int bMatch = 1; + i64 iMin = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); + for(i=1; inTerm; i++){ + i64 iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[i].pIter); + if( iRowid!=iMin ){ + bMatch = 0; + if( iRowidnTerm; i++){ + Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; + while( sqlite3Fts5IterRowid(pIter)>iMin ){ + sqlite3Fts5IterNext(pIter, 0); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return SQLITE_OK; + } + } + } + } + + return SQLITE_OK; +} + +static int fts5ExprPhraseAdvanceAll( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + int i; + int rc = SQLITE_OK; + for(i=0; inTerm; i++){ + Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; + sqlite3Fts5IterNext(pIter, 0); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + break; + } + } + return rc; +} + +/* +** Argument pPhrase points to a multi-term phrase object. All individual +** term iterators point to valid entries (not EOF). +* +** This function tests if the term iterators currently all point to the +** same rowid, and if so, if the rowid matches the phrase constraint. If +** so, the pPhrase->poslist buffer is populated and the pPhrase->iRowid +** variable set before returning. Or, if the current combination of +** iterators is not a match, they are advanced until they are. If one of +** the iterators reaches EOF before a match is found, *pbEof is set to +** true before returning. The final values of the pPhrase->poslist and +** iRowid fields are undefined in this case. +** +** SQLITE_OK is returned if an error occurs, or an SQLite error code +** otherwise. It is not considered an error code if an iterator reaches +** EOF. +*/ +static int fts5ExprPhraseNextMatch( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + int i; /* Used to iterate through terms */ + int rc = SQLITE_OK; /* Return code */ + int bMatch = 0; + + assert( *pbEof==0 ); + + while( 1 ){ + rc = fts5ExprPhraseNextRowidMatch(pExpr, pPhrase, pbEof); + if( rc!=SQLITE_OK || *pbEof ) break; + + /* At this point, all term iterators are valid and point to the same rowid. + ** The following assert() statements verify this. */ +#ifdef SQLITE_DEBUG + for(i=0; inTerm; i++){ + Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; + Fts5IndexIter *pOne = pPhrase->aTerm[0].pIter; + assert( 0==sqlite3Fts5IterEof(pIter) ); + assert( sqlite3Fts5IterRowid(pOne)==sqlite3Fts5IterRowid(pIter) ); + } +#endif + + rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); + if( rc!=SQLITE_OK || bMatch ) break; + rc = fts5ExprPhraseAdvanceAll(pExpr, pPhrase, pbEof); + if( rc!=SQLITE_OK || *pbEof ) break; + } + + pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); + return rc; +} + +/* +** Advance the phrase iterator pPhrase to the next match. +*/ +static int fts5ExprPhraseNext( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + int i; + for(i=0; inTerm; i++){ + Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; + sqlite3Fts5IterNext(pIter, 0); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return SQLITE_OK; + } + } + + if( pPhrase->nTerm==1 ){ + pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); + }else{ + fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); + } + + return SQLITE_OK; +} + +/* +** Point phrase object pPhrase at the first matching document. Or, if there +** are no matching documents at all, move pPhrase to EOF and set *pbEof to +** true before returning. +** +** If no error occurs, SQLITE_OK is returned. Otherwise, an SQLite error +** code. +*/ +static int fts5ExprPhraseFirst( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + int i; /* Used to iterate through terms */ + int rc = SQLITE_OK; + + for(i=0; inTerm; i++){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; + pTerm->pIter = sqlite3Fts5IndexQuery( + pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) + ); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + *pbEof = 1; + return SQLITE_OK; + } + } + + if( pPhrase->nTerm==1 ){ + const u8 *a; int n; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + pPhrase->iRowid = sqlite3Fts5IterRowid(pIter); + a = sqlite3Fts5IterPoslist(pIter, &n); + if( a ){ + sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, n, a); + } + }else{ + rc = fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); + } + + return rc; +} static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; @@ -253,20 +545,10 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ pNode->bEof = 0; if( pNode->eType==FTS5_STRING ){ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; - assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); - - pTerm->pIter = sqlite3Fts5IndexQuery( - pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), - (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) - ); - if( sqlite3Fts5IterEof(pTerm->pIter) ){ - pNode->bEof = 1; - }else{ - pNode->iRowid = sqlite3Fts5IterRowid(pTerm->pIter); - } - + assert( pNode->pNear->nPhrase==1 ); + assert( pNode->bEof==0 ); + rc = fts5ExprPhraseFirst(pExpr, pPhrase, &pNode->bEof); + pNode->iRowid = pPhrase->iRowid; }else{ rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); if( rc==SQLITE_OK ){ @@ -284,14 +566,9 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ if( pNode->eType==FTS5_STRING ){ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; - assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); - sqlite3Fts5IterNext(pTerm->pIter, 0); - if( sqlite3Fts5IterEof(pTerm->pIter) ){ - pNode->bEof = 1; - }else{ - pNode->iRowid = sqlite3Fts5IterRowid(pTerm->pIter); - } + assert( pNode->pNear->nPhrase==1 ); + rc = fts5ExprPhraseNext(pExpr, pPhrase, &pNode->bEof); + pNode->iRowid = pPhrase->iRowid; }else{ assert( 0 ); } @@ -373,6 +650,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ sqlite3Fts5IterClose(pTerm->pIter); } } + fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } @@ -455,13 +733,13 @@ static int fts5ParseTokenize( sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); if( pNew==0 ) return SQLITE_NOMEM; + if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); pCtx->pPhrase = pPhrase = pNew; pNew->nTerm = nNew - SZALLOC; } pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; - pTerm->bPrefix = 0; - pTerm->pIter = 0; + memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->zTerm = fts5Strdup(pToken, nToken); return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8f3ce6dca2..e60ec5f503 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -254,7 +254,6 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; -typedef struct Fts5Buffer Fts5Buffer; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5MultiSegIter Fts5MultiSegIter; @@ -298,15 +297,6 @@ struct Fts5Index { sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; -/* -** Buffer object for the incremental building of string data. -*/ -struct Fts5Buffer { - u8 *p; - int n; - int nSpace; -}; - struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; @@ -559,129 +549,6 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ } -static int fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ - /* A no-op if an error has already occurred */ - if( *pRc ) return 1; - - if( (pBuf->n + nByte) > pBuf->nSpace ){ - u8 *pNew; - int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; - while( nNew<(pBuf->n + nByte) ){ - nNew = nNew * 2; - } - pNew = sqlite3_realloc(pBuf->p, nNew); - if( pNew==0 ){ - *pRc = SQLITE_NOMEM; - return 1; - }else{ - pBuf->nSpace = nNew; - pBuf->p = pNew; - } - } - return 0; -} - -/* -** Encode value iVal as an SQLite varint and append it to the buffer object -** pBuf. If an OOM error occurs, set the error code in p. -*/ -static void fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ - if( fts5BufferGrow(pRc, pBuf, 9) ) return; - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); -} - -/* -** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set -** the error code in p. If an error has already occurred when this function -** is called, it is a no-op. -*/ -static void fts5BufferAppendBlob( - int *pRc, - Fts5Buffer *pBuf, - int nData, - const u8 *pData -){ - if( fts5BufferGrow(pRc, pBuf, nData) ) return; - memcpy(&pBuf->p[pBuf->n], pData, nData); - pBuf->n += nData; -} - -/* -** Append the nul-terminated string zStr to the buffer pBuf. This function -** ensures that the byte following the buffer data is set to 0x00, even -** though this byte is not included in the pBuf->n count. -*/ -static void fts5BufferAppendString( - int *pRc, - Fts5Buffer *pBuf, - const char *zStr -){ - int nStr = strlen(zStr); - if( fts5BufferGrow(pRc, pBuf, nStr+1) ) return; - fts5BufferAppendBlob(pRc, pBuf, nStr, (const u8*)zStr); - if( *pRc==SQLITE_OK ) pBuf->p[pBuf->n] = 0x00; -} - -/* -** Argument zFmt is a printf() style format string. This function performs -** the printf() style processing, then appends the results to buffer pBuf. -** -** Like fts5BufferAppendString(), this function ensures that the byte -** following the buffer data is set to 0x00, even though this byte is not -** included in the pBuf->n count. -*/ -static void fts5BufferAppendPrintf( - int *pRc, - Fts5Buffer *pBuf, - char *zFmt, ... -){ - if( *pRc==SQLITE_OK ){ - char *zTmp; - va_list ap; - va_start(ap, zFmt); - zTmp = sqlite3_vmprintf(zFmt, ap); - va_end(ap); - - if( zTmp==0 ){ - *pRc = SQLITE_NOMEM; - }else{ - fts5BufferAppendString(pRc, pBuf, zTmp); - sqlite3_free(zTmp); - } - } -} - -/* -** Free any buffer allocated by pBuf. Zero the structure before returning. -*/ -static void fts5BufferFree(Fts5Buffer *pBuf){ - sqlite3_free(pBuf->p); - memset(pBuf, 0, sizeof(Fts5Buffer)); -} - -/* -** Zero the contents of the buffer object. But do not free the associated -** memory allocation. -*/ -static void fts5BufferZero(Fts5Buffer *pBuf){ - pBuf->n = 0; -} - -/* -** Set the buffer to contain nData/pData. If an OOM error occurs, leave an -** the error code in p. If an error has already occurred when this function -** is called, it is a no-op. -*/ -static void fts5BufferSet( - int *pRc, - Fts5Buffer *pBuf, - int nData, - const u8 *pData -){ - pBuf->n = 0; - fts5BufferAppendBlob(pRc, pBuf, nData, pData); -} - /* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** @@ -1621,6 +1488,11 @@ static void fts5ChunkIterInit( } } +static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; +} + /* ** Read and return the next 32-bit varint from the position-list iterator ** passed as the second argument. @@ -2935,15 +2807,17 @@ static void fts5DecodeStructure( for(iLvl=0; iLvlnLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; - fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge + ); for(iSeg=0; iSegnSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; - fts5BufferAppendPrintf(pRc, pBuf, + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, pSeg->pgnoFirst, pSeg->pgnoLast ); } - fts5BufferAppendPrintf(pRc, pBuf, "}"); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); } fts5StructureRelease(p); @@ -2984,7 +2858,7 @@ static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ while( iOffpStruct = fts5StructureRead(p, 0); if( pRet->pStruct ){ fts5MultiIterNew(p, @@ -3216,8 +3095,23 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ ** disk. */ const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ - assert( sqlite3Fts5IterEof(pIter)==0 ); + Fts5ChunkIter iter; + Fts5Index *p = pIter->pIndex; + Fts5SegIter *pSeg = &pIter->pMulti->aSeg[ pIter->pMulti->aFirst[1] ]; + assert( sqlite3Fts5IterEof(pIter)==0 ); + fts5ChunkIterInit(p, pSeg, &iter); + if( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferZero(&pIter->poslist); + fts5BufferGrow(&p->rc, &pIter->poslist, iter.nRem); + while( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferAppendBlob(&p->rc, &pIter->poslist, iter.n, iter.p); + fts5ChunkIterNext(p, &iter); + } + } + fts5ChunkIterRelease(&iter); + + if( p->rc ) return 0; *pn = pIter->poslist.n; return pIter->poslist.p; } @@ -3230,6 +3124,7 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ fts5MultiIterFree(pIter->pIndex, pIter->pMulti); fts5StructureRelease(pIter->pStruct); fts5CloseReader(pIter->pIndex); + fts5BufferFree(&pIter->poslist); sqlite3_free(pIter); } } diff --git a/main.mk b/main.mk index cde67dbb2e..953d63e39f 100644 --- a/main.mk +++ b/main.mk @@ -73,6 +73,7 @@ LIBOBJ+= vdbe.o parse.o \ vdbetrace.o wal.o walker.o where.o utf.o vtab.o LIBOBJ += fts5.o +LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o LIBOBJ += fts5_index.o @@ -572,6 +573,9 @@ rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) # FTS5 things # +fts5_buffer.o: $(TOP)/ext/fts5/fts5_buffer.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c + fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_config.c diff --git a/manifest b/manifest index e5d38010a4..2c8ddafbb9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sposition\slist\sformat\sso\sthat\sits\ssize\sin\sbytes\sis\sstored\sat\sthe\sstart\sof\sthe\slist\sitself. -D 2014-07-01T20:45:18.496 +C Add\ssupport\sfor\sphrase\squeries\sto\sfts5. +D 2014-07-02T20:18:49.027 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,10 +104,11 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 -F ext/fts5/fts5Int.h 80f3d38a69a0c58ccc94428c8fc8adbcf7561a2d +F ext/fts5/fts5Int.h b7a684ff3508ab24437886f8bc873a16f494a7db +F ext/fts5/fts5_buffer.c f1a26a79e2943fe4388e531fa141941b5eb6d31a F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 1874b17f10a38d0b21e0c38a28637f74e4d2570a -F ext/fts5/fts5_index.c ea3dfe56a16813fcf59e03f6156965894b4b5e6f +F ext/fts5/fts5_expr.c aacfcf6b7c14ca5987ba1de0bd080eee31fca98c +F ext/fts5/fts5_index.c 6bb95f6a1ed0e50bc9f2dce7b7a92859f5821364 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -152,7 +153,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 2bb1ec703ac4f27743961764b59cfb5f91d72bfe +F main.mk c5524f888196af43a9b5dfae878205044f549dbf F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -592,7 +593,7 @@ F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 -F test/fts5ab.test 6436ad345d1e7eb5ab198c0174834380805f609c +F test/fts5ab.test bdc1dd9d58163c0c7b184be817f82e3bf8a81c37 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1188,7 +1189,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 94eeb077d08a1d2607f3ff3a9fbf18229ba475bb -R 5d9b8f6933c58725a24e426a963b0d97 +P 62f2ff20418702ed0fbf708369edf5638445b51b +R 773d748328905c117f50682aca9f537a U dan -Z bb8816e0d501865bff7c4c8da87350cb +Z 194302280b1431e713a39a0adc2c19fe diff --git a/manifest.uuid b/manifest.uuid index ac69385214..9cec4319b1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -62f2ff20418702ed0fbf708369edf5638445b51b \ No newline at end of file +2e5652e6526b8fb3f5c163168d95bc0bb4c93686 \ No newline at end of file diff --git a/test/fts5ab.test b/test/fts5ab.test index 1b976caff7..1cbab9d616 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -94,11 +94,42 @@ foreach {tn expr res} { do_execsql_test 2.7.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res } -#db eval { -# SELECT fts5_decode(rowid, block) AS t FROM t1_data; -#} { -# puts $t -#} +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a,b); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +foreach {tn a b} { + 1 {abashed abandons abase abash abaft} {abases abased} + 2 {abasing abases abaft abated abandons} {abases abandoned} + 3 {abatement abash abash abated abase} {abasements abashing} + 4 {abaft abasements abase abasement abasing} {abasement abases} + 5 {abaft abashing abatement abash abasements} {abandons abandoning} + 6 {aback abate abasements abashes abandoned} {abasement abased} + 7 {abandons abated abased aback abandoning} {abases abandoned} + 8 {abashing abases abasement abaft abashing} {abashed abate} + 9 {abash abase abate abashing abashed} {abandon abandoned} + 10 {abate abandoning abandons abasement aback} {abandon abandoning} +} { + do_execsql_test 2.1.$tn.1 { INSERT INTO t1 VALUES($a, $b) } + do_execsql_test 2.1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +} + +foreach {tn expr res} { + 1 {abash} {9 5 3 1} + 2 {abase} {9 4 3 1} + 3 {abase + abash} {1} + 4 {abash + abase} {9} + 5 {abaft + abashing} {8 5} + 6 {abandon + abandoning} {10} + 7 {"abashing abases abasement abaft abashing"} {8} +} { + do_execsql_test 2.2.$tn { + SELECT rowid FROM t1 WHERE t1 MATCH $expr + } $res +} finish_test - From c18a8fe99c3d6e8ad316ecac66b961a5a9dcb996 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 3 Jul 2014 20:39:39 +0000 Subject: [PATCH 007/206] Add support for NEAR expressions to fts5. FossilOrigin-Name: 250ae8d40115e2e47cc5a1e8a427fa8c0a89124d --- ext/fts5/fts5_expr.c | 423 ++++++++++++++++++++++++++---------------- ext/fts5/fts5_index.c | 2 +- manifest | 17 +- manifest.uuid | 2 +- test/fts5ab.test | 15 +- test/fts5ac.test | 168 +++++++++++++++++ 6 files changed, 449 insertions(+), 178 deletions(-) create mode 100644 test/fts5ac.test diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 1c0e936cb0..5a29f04709 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -69,7 +69,6 @@ struct Fts5ExprTerm { */ struct Fts5ExprPhrase { Fts5Buffer poslist; /* Current position list */ - i64 iRowid; /* Current rowid */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; @@ -109,7 +108,7 @@ struct Fts5PoslistIter { i64 iPos; /* (iCol<<32) + iPos */ }; -static void fts5PoslistIterNext(Fts5PoslistIter *pIter){ +static int fts5PoslistIterNext(Fts5PoslistIter *pIter){ if( pIter->i>=pIter->n ){ pIter->bEof = 1; }else{ @@ -122,6 +121,7 @@ static void fts5PoslistIterNext(Fts5PoslistIter *pIter){ } pIter->iPos += (iVal-2); } + return pIter->bEof; } static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ @@ -130,6 +130,32 @@ static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ pIter->n = n; fts5PoslistIterNext(pIter); } + +typedef struct Fts5PoslistWriter Fts5PoslistWriter; +struct Fts5PoslistWriter { + int iCol; + int iOff; +}; + +static int fts5PoslistWriterAppend( + Fts5Buffer *pBuf, + Fts5PoslistWriter *pWriter, + i64 iPos +){ + int rc = SQLITE_OK; + int iCol = (int)(iPos >> 32); + int iOff = (iPos & 0x7FFFFFFF); + + if( iCol!=pWriter->iCol ){ + fts5BufferAppendVarint(&rc, pBuf, 1); + fts5BufferAppendVarint(&rc, pBuf, iCol); + pWriter->iCol = iCol; + pWriter->iOff = 0; + } + fts5BufferAppendVarint(&rc, pBuf, (iOff - pWriter->iOff) + 2); + + return rc; +} /* *************************************************************************/ @@ -302,11 +328,14 @@ static int fts5ExprPhraseIsMatch( Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ + Fts5PoslistWriter writer = {0, 0}; Fts5PoslistIter aStatic[4]; Fts5PoslistIter *aIter = aStatic; int i; int rc = SQLITE_OK; + /* If the aStatic[] array is not large enough, allocate a large array + ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistIter) * pPhrase->nTerm; aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); @@ -320,40 +349,145 @@ static int fts5ExprPhraseIsMatch( fts5PoslistIterInit(a, n, &aIter[i]); } - *pbMatch = 0; + fts5BufferZero(&pPhrase->poslist); while( 1 ){ - - int bMatch = 1; + int bMatch; i64 iPos = aIter[0].iPos; - for(i=1; inTerm; i++){ - Fts5PoslistIter *pPos = &aIter[i]; - i64 iAdj = pPos->iPos-i; - if( (pPos->iPos-i)!=iPos ){ - bMatch = 0; - if( iAdj>iPos ) iPos = iAdj; + do { + bMatch = 1; + for(i=0; inTerm; i++){ + Fts5PoslistIter *pPos = &aIter[i]; + i64 iAdj = iPos + i; + if( pPos->iPos!=iAdj ){ + bMatch = 0; + while( pPos->iPosiPos>iAdj ) iPos = pPos->iPos-i; + } } - } - if( bMatch ){ - *pbMatch = 1; - break; - } + }while( bMatch==0 ); + + /* Append position iPos to the output */ + rc = fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); + if( rc!=SQLITE_OK ) goto ismatch_out; for(i=0; inTerm; i++){ - Fts5PoslistIter *pPos = &aIter[i]; - while( (pPos->iPos-i) < iPos ){ - fts5PoslistIterNext(pPos); - if( pPos->bEof ) goto ismatch_out; - } + if( fts5PoslistIterNext(&aIter[i]) ) goto ismatch_out; } } ismatch_out: + *pbMatch = (pPhrase->poslist.n>0); + if( aIter!=aStatic ) sqlite3_free(aIter); + return rc; +} + + +static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ + Fts5PoslistIter aStatic[4]; + Fts5PoslistIter *aIter = aStatic; + int i; + int rc = SQLITE_OK; + int bMatch; + i64 iMax; + + /* If the aStatic[] array is not large enough, allocate a large array + ** using sqlite3_malloc(). This approach could be improved upon. */ + if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ + int nByte = sizeof(Fts5PoslistIter) * pNear->nPhrase; + aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); + if( !aIter ) return SQLITE_NOMEM; + } + + /* Initialize a term iterator for each phrase */ + for(i=0; inPhrase; i++){ + Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist; + fts5PoslistIterInit(pPoslist->p, pPoslist->n, &aIter[i]); + } + + iMax = aIter[0].iPos; + do { + bMatch = 1; + for(i=0; inPhrase; i++){ + Fts5PoslistIter *pPos = &aIter[i]; + i64 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; + if( pPos->iPosiPos>iMax ){ + bMatch = 0; + while( pPos->iPosiPos>iMax ) iMax = pPos->iPos; + } + } + }while( bMatch==0 ); + + ismatch_out: + *pbMatch = bMatch; if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } /* -** All individual term iterators in pPhrase are guaranteed to be valid when +** Advance each phrase iterator in phrase pNear. If any reach EOF, set +** output variable *pbEof to true before returning. +*/ +static int fts5ExprNearAdvanceAll( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNearset *pNear, /* Near object to advance iterators of */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + int rc = SQLITE_OK; /* Return code */ + int i, j; /* Phrase and token index, respectively */ + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + sqlite3Fts5IterNext(pIter, 0); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return rc; + } + } + } + + return rc; +} + +/* +** Advance iterator pIter until it points to a value equal to or smaller +** than the initial value of *piMin. If this means the iterator points +** to a value smaller than *piMin, update *piMin to the new smallest value. +** +** If the iterator reaches EOF, set *pbEof to true before returning. If +** an error occurs, set *pRc to an error code. If either *pbEof or *pRc +** are set, return a non-zero value. Otherwise, return zero. +*/ +static int fts5ExprAdvanceto( + Fts5IndexIter *pIter, /* Iterator to advance */ + i64 *piMin, /* IN/OUT: Minimum rowid seen so far */ + int *pRc, /* OUT: Error code */ + int *pbEof /* OUT: Set to true if EOF */ +){ + i64 iMin = *piMin; + i64 iRowid; + while( (iRowid = sqlite3Fts5IterRowid(pIter))>iMin ){ + sqlite3Fts5IterNext(pIter, 0); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return 1; + } + } + if( iRowidaTerm[0].pIter); - for(i=1; inTerm; i++){ - i64 iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[i].pIter); - if( iRowid!=iMin ){ - bMatch = 0; - if( iRowidnTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - while( sqlite3Fts5IterRowid(pIter)>iMin ){ - sqlite3Fts5IterNext(pIter, 0); - if( sqlite3Fts5IterEof(pIter) ){ - *pbEof = 1; - return SQLITE_OK; - } - } - } - } - - return SQLITE_OK; -} - -static int fts5ExprPhraseAdvanceAll( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ -){ - int i; + Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; - for(i=0; inTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - sqlite3Fts5IterNext(pIter, 0); - if( sqlite3Fts5IterEof(pIter) ){ - *pbEof = 1; - break; + int i, j; /* Phrase and token index, respectively */ + i64 iMin; /* Smallest rowid any iterator points to */ + int bMatch; + + iMin = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); + do { + bMatch = 1; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid!=iMin ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, &iMin, &rc, &pNode->bEof) ) return rc; + } } - } + }while( bMatch==0 ); + + pNode->iRowid = iMin; return rc; } /* -** Argument pPhrase points to a multi-term phrase object. All individual -** term iterators point to valid entries (not EOF). +** Argument pNode points to a NEAR node. All individual term iterators +** point to valid entries (not EOF). * ** This function tests if the term iterators currently all point to the -** same rowid, and if so, if the rowid matches the phrase constraint. If -** so, the pPhrase->poslist buffer is populated and the pPhrase->iRowid +** same rowid, and if so, if the row matches the phrase and NEAR constraints. +** If so, the pPhrase->poslist buffers are populated and the pNode->iRowid ** variable set before returning. Or, if the current combination of ** iterators is not a match, they are advanced until they are. If one of ** the iterators reaches EOF before a match is found, *pbEof is set to @@ -432,123 +542,106 @@ static int fts5ExprPhraseAdvanceAll( ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ -static int fts5ExprPhraseNextMatch( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +static int fts5ExprNearNextMatch( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode ){ - int i; /* Used to iterate through terms */ - int rc = SQLITE_OK; /* Return code */ - int bMatch = 0; - - assert( *pbEof==0 ); - + int rc = SQLITE_OK; + Fts5ExprNearset *pNear = pNode->pNear; while( 1 ){ - rc = fts5ExprPhraseNextRowidMatch(pExpr, pPhrase, pbEof); - if( rc!=SQLITE_OK || *pbEof ) break; + int i; - /* At this point, all term iterators are valid and point to the same rowid. - ** The following assert() statements verify this. */ -#ifdef SQLITE_DEBUG - for(i=0; inTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - Fts5IndexIter *pOne = pPhrase->aTerm[0].pIter; - assert( 0==sqlite3Fts5IterEof(pIter) ); - assert( sqlite3Fts5IterRowid(pOne)==sqlite3Fts5IterRowid(pIter) ); + /* Advance the iterators until they are a match */ + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + if( pNode->bEof || rc!=SQLITE_OK ) break; + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); + if( rc!=SQLITE_OK ) return rc; + if( bMatch==0 ) break; + }else{ + int n; + u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); + fts5BufferSet(&rc, &pPhrase->poslist, n, a); + } } -#endif - rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); - if( rc!=SQLITE_OK || bMatch ) break; - rc = fts5ExprPhraseAdvanceAll(pExpr, pPhrase, pbEof); - if( rc!=SQLITE_OK || *pbEof ) break; + if( i==pNear->nPhrase ){ + int bMatch = 1; + if( pNear->nPhrase>1 ){ + rc = fts5ExprNearIsMatch(pNear, &bMatch); + } + if( rc!=SQLITE_OK || bMatch ) break; + } + + rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); + if( pNode->bEof || rc!=SQLITE_OK ) break; } - pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); return rc; } /* -** Advance the phrase iterator pPhrase to the next match. +** Initialize all term iterators in the pNear object. If any term is found +** to match no documents at all, set *pbEof to true and return immediately, +** without initializing any further iterators. */ -static int fts5ExprPhraseNext( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +static int fts5ExprNearInitAll( + Fts5Expr *pExpr, + Fts5ExprNode *pNode ){ - int i; - for(i=0; inTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - sqlite3Fts5IterNext(pIter, 0); - if( sqlite3Fts5IterEof(pIter) ){ - *pbEof = 1; - return SQLITE_OK; - } - } + Fts5ExprNearset *pNear = pNode->pNear; + Fts5ExprTerm *pTerm; + Fts5ExprPhrase *pPhrase; + int i, j; - if( pPhrase->nTerm==1 ){ - pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); - }else{ - fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); + for(i=0; inPhrase; i++){ + pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + pTerm = &pPhrase->aTerm[j]; + pTerm->pIter = sqlite3Fts5IndexQuery( + pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) + ); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + return SQLITE_OK; + } + } } return SQLITE_OK; } -/* -** Point phrase object pPhrase at the first matching document. Or, if there -** are no matching documents at all, move pPhrase to EOF and set *pbEof to -** true before returning. -** -** If no error occurs, SQLITE_OK is returned. Otherwise, an SQLite error -** code. -*/ -static int fts5ExprPhraseFirst( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +static int fts5ExprNearNext( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode ){ - int i; /* Used to iterate through terms */ - int rc = SQLITE_OK; - - for(i=0; inTerm; i++){ - Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; - pTerm->pIter = sqlite3Fts5IndexQuery( - pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), - (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) - ); - if( sqlite3Fts5IterEof(pTerm->pIter) ){ - *pbEof = 1; - return SQLITE_OK; - } + int rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof); + if( rc==SQLITE_OK && pNode->bEof==0 ){ + rc = fts5ExprNearNextMatch(pExpr, pNode); } - - if( pPhrase->nTerm==1 ){ - const u8 *a; int n; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - pPhrase->iRowid = sqlite3Fts5IterRowid(pIter); - a = sqlite3Fts5IterPoslist(pIter, &n); - if( a ){ - sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, n, a); - } - }else{ - rc = fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); - } - return rc; } static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; - pNode->bEof = 0; + if( pNode->eType==FTS5_STRING ){ - Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - assert( pNode->pNear->nPhrase==1 ); - assert( pNode->bEof==0 ); - rc = fts5ExprPhraseFirst(pExpr, pPhrase, &pNode->bEof); - pNode->iRowid = pPhrase->iRowid; + + /* Initialize all term iterators in the NEAR object. */ + rc = fts5ExprNearInitAll(pExpr, pNode); + + /* Attempt to advance to the first match */ + if( rc==SQLITE_OK && pNode->bEof==0 ){ + rc = fts5ExprNearNextMatch(pExpr, pNode); + } + }else{ rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); if( rc==SQLITE_OK ){ @@ -565,10 +658,7 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; if( pNode->eType==FTS5_STRING ){ - Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - assert( pNode->pNear->nPhrase==1 ); - rc = fts5ExprPhraseNext(pExpr, pPhrase, &pNode->bEof); - pNode->iRowid = pPhrase->iRowid; + rc = fts5ExprNearNext(pExpr, pNode); }else{ assert( 0 ); } @@ -806,6 +896,11 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( return sCtx.pPhrase; } +/* +** Token pTok has appeared in a MATCH expression where the NEAR operator +** is expected. If token pTok does not contain "NEAR", store an error +** in the pParse object. +*/ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ if( pParse->rc==SQLITE_OK ){ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e60ec5f503..af69b4280d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1156,7 +1156,7 @@ static void fts5SegIterNext( int n = pLeaf->n; iOff = pIter->iLeafOffset; - if( iOff<=n ){ + if( iOff=0 || [string first $pat $y]>=0} { + set res [concat $id $res] + } + } + set n [llength $res] + do_execsql_test 1.2.$tn.$n { + SELECT rowid FROM xx WHERE xx match '"' || $phrase || '"' + } $res +} + + + +finish_test + From c2642d7c1e8ea94289344c4d92cd5b7013b94d4b Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 5 Jul 2014 07:54:01 +0000 Subject: [PATCH 008/206] Add support for the "colname : " syntax to fts5. FossilOrigin-Name: 004667106e552e832a564b77e242b86f183d4441 --- ext/fts5/fts5_expr.c | 167 ++++++++++++++++++++++++++++++++++++++----- manifest | 14 ++-- manifest.uuid | 2 +- test/fts5ac.test | 138 ++++++++++++++++++++++++++++++++--- 4 files changed, 286 insertions(+), 35 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 5a29f04709..8a84f958ba 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -99,6 +99,7 @@ struct Fts5Parse { */ typedef struct Fts5PoslistIter Fts5PoslistIter; struct Fts5PoslistIter { + int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ @@ -116,19 +117,31 @@ static int fts5PoslistIterNext(Fts5PoslistIter *pIter){ pIter->i += getVarint32(&pIter->a[pIter->i], iVal); if( iVal==1 ){ pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - pIter->iPos = ((u64)iVal << 32); - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + if( pIter->iCol>=0 && iVal>pIter->iCol ){ + pIter->bEof = 1; + }else{ + pIter->iPos = ((u64)iVal << 32); + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + } } pIter->iPos += (iVal-2); } return pIter->bEof; } -static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ +static int fts5PoslistIterInit( + int iCol, /* If (iCol>=0), this column only */ + const u8 *a, int n, /* Poslist buffer to iterate through */ + Fts5PoslistIter *pIter /* Iterator object to initialize */ +){ memset(pIter, 0, sizeof(*pIter)); pIter->a = a; pIter->n = n; - fts5PoslistIterNext(pIter); + pIter->iCol = iCol; + do { + fts5PoslistIterNext(pIter); + }while( pIter->bEof==0 && (pIter->iPos >> 32)bEof; } typedef struct Fts5PoslistWriter Fts5PoslistWriter; @@ -325,6 +338,7 @@ static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + int iCol, /* If >=0, search for matches in iCol only */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ @@ -334,6 +348,8 @@ static int fts5ExprPhraseIsMatch( int i; int rc = SQLITE_OK; + fts5BufferZero(&pPhrase->poslist); + /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ @@ -346,10 +362,9 @@ static int fts5ExprPhraseIsMatch( for(i=0; inTerm; i++){ int n; const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n); - fts5PoslistIterInit(a, n, &aIter[i]); + if( fts5PoslistIterInit(iCol, a, n, &aIter[i]) ) goto ismatch_out; } - fts5BufferZero(&pPhrase->poslist); while( 1 ){ int bMatch; i64 iPos = aIter[0].iPos; @@ -384,6 +399,22 @@ static int fts5ExprPhraseIsMatch( } +/* +** The near-set object passed as the first argument contains more than +** one phrase. All phrases currently point to the same row. The +** Fts5ExprPhrase.poslist buffers are populated accordingly. This function +** tests if the current row contains instances of each phrase sufficiently +** close together to meet the NEAR constraint. Output variable *pbMatch +** is set to true if it does, or false otherwise. +** +** If no error occurs, SQLITE_OK is returned. Or, if an error does occur, +** an SQLite error code. If a value other than SQLITE_OK is returned, the +** final value of *pbMatch is undefined. +** +** TODO: This function should also edit the position lists associated +** with each phrase to remove any phrase instances that are not part of +** a set of intances that collectively matches the NEAR constraint. +*/ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ Fts5PoslistIter aStatic[4]; Fts5PoslistIter *aIter = aStatic; @@ -392,6 +423,8 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ int bMatch; i64 iMax; + assert( pNear->nPhrase>1 ); + /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ @@ -403,7 +436,7 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ /* Initialize a term iterator for each phrase */ for(i=0; inPhrase; i++){ Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist; - fts5PoslistIterInit(pPoslist->p, pPoslist->n, &aIter[i]); + fts5PoslistIterInit(-1, pPoslist->p, pPoslist->n, &aIter[i]); } iMax = aIter[0].iPos; @@ -557,14 +590,14 @@ static int fts5ExprNearNextMatch( for(i=0; inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 ){ + if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); + rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); if( rc!=SQLITE_OK ) return rc; if( bMatch==0 ) break; }else{ int n; - u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); + const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); fts5BufferSet(&rc, &pPhrase->poslist, n, a); } } @@ -1033,6 +1066,82 @@ static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ return zNew; } +/* +** Compose a tcl-readable representation of expression pExpr. Return a +** pointer to a buffer containing that representation. It is the +** responsibility of the caller to at some point free the buffer using +** sqlite3_free(). +*/ +static char *fts5ExprPrintTcl( + Fts5Config *pConfig, + const char *zNearsetCmd, + Fts5ExprNode *pExpr +){ + char *zRet = 0; + if( pExpr->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pExpr->pNear; + int i; + int iTerm; + + zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); + if( pNear->iCol>=0 ){ + zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol); + if( zRet==0 ) return 0; + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); + if( zRet==0 ) return 0; + } + + zRet = fts5PrintfAppend(zRet, "--"); + if( zRet==0 ) return 0; + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + + zRet = fts5PrintfAppend(zRet, " {"); + for(iTerm=0; zRet && iTermnTerm; iTerm++){ + char *zTerm = pPhrase->aTerm[iTerm].zTerm; + zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + } + + if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); + if( zRet==0 ) return 0; + } + + if( zRet ) zRet = fts5PrintfAppend(zRet, "]"); + if( zRet==0 ) return 0; + + }else{ + char *zOp = 0; + char *z1 = 0; + char *z2 = 0; + switch( pExpr->eType ){ + case FTS5_AND: zOp = "&&"; break; + case FTS5_NOT: zOp = "&& !"; break; + case FTS5_OR: zOp = "||"; break; + default: assert( 0 ); + } + + z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft); + z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight); + if( z1 && z2 ){ + int b1 = pExpr->pLeft->eType!=FTS5_STRING; + int b2 = pExpr->pRight->eType!=FTS5_STRING; + zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", + b1 ? "(" : "", z1, b1 ? ")" : "", + zOp, + b2 ? "(" : "", z2, b2 ? ")" : "" + ); + } + sqlite3_free(z1); + sqlite3_free(z2); + } + + return zRet; +} + static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; if( pExpr->eType==FTS5_STRING ){ @@ -1117,12 +1226,18 @@ static void fts5ExprFunction( Fts5Expr *pExpr = 0; int rc; int i; + int bTcl = sqlite3_user_data(pCtx)!=0; const char **azConfig; /* Array of arguments for Fts5Config */ + const char *zNearsetCmd = "nearset"; int nConfig; /* Size of azConfig[] */ Fts5Config *pConfig = 0; - nConfig = nArg + 2; + if( bTcl && nArg>1 ){ + zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); + } + + nConfig = nArg + 2 - bTcl; azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); if( azConfig==0 ){ sqlite3_result_error_nomem(pCtx); @@ -1131,8 +1246,8 @@ static void fts5ExprFunction( azConfig[0] = 0; azConfig[1] = "main"; azConfig[2] = "tbl"; - for(i=1; ipRoot); + char *zText; + if( bTcl ){ + zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); + }else{ + zText = fts5ExprPrint(pConfig, pExpr->pRoot); + } if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); sqlite3_free(zText); @@ -1166,9 +1286,22 @@ static void fts5ExprFunction( ** UDF with the SQLite handle passed as the only argument. */ int sqlite3Fts5ExprInit(sqlite3 *db){ - int rc = sqlite3_create_function( - db, "fts5_expr", -1, SQLITE_UTF8, 0, fts5ExprFunction, 0, 0 - ); + struct Fts5ExprFunc { + const char *z; + void *p; + void (*x)(sqlite3_context*,int,sqlite3_value**); + } aFunc[] = { + { "fts5_expr", 0, fts5ExprFunction }, + { "fts5_expr_tcl", (void*)1, fts5ExprFunction }, + }; + int i; + int rc = SQLITE_OK; + + for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){ + struct Fts5ExprFunc *p = &aFunc[i]; + rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, p->p, p->x, 0, 0); + } + return rc; } diff --git a/manifest b/manifest index f8d149a28e..0526a1e9b9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sNEAR\sexpressions\sto\sfts5. -D 2014-07-03T20:39:39.548 +C Add\ssupport\sfor\sthe\s"colname\s:\s"\ssyntax\sto\sfts5. +D 2014-07-05T07:54:01.680 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,7 +107,7 @@ F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 F ext/fts5/fts5Int.h b7a684ff3508ab24437886f8bc873a16f494a7db F ext/fts5/fts5_buffer.c f1a26a79e2943fe4388e531fa141941b5eb6d31a F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 84dd8c1f313f795b41f3fc5f73bee013e8301b68 +F ext/fts5/fts5_expr.c 618e6641c8dc428c146ec84bf30ff0b3da6b28c7 F ext/fts5/fts5_index.c d8ab9712e38dc1beb9a9145ec89e18dc083c0467 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -594,7 +594,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test 4db86a9473ee2a8c2cb30e0d81df21c6022f99b6 -F test/fts5ac.test cc4fc45a85fde7fbe8da135aed6b25d2795ba9f6 +F test/fts5ac.test c7ca34a477d638195a839c961e1b572890dc5d0d F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1190,7 +1190,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2e5652e6526b8fb3f5c163168d95bc0bb4c93686 -R f894350f59d9ccf43dee7a3b5d2aafff +P 250ae8d40115e2e47cc5a1e8a427fa8c0a89124d +R 213fb14ea45e358dcb308401853c4570 U dan -Z be26a54244aa4231a7a300eba9899e25 +Z 0b8632fefc20738326985d3c409c9be8 diff --git a/manifest.uuid b/manifest.uuid index a90859e837..506ee6aff0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -250ae8d40115e2e47cc5a1e8a427fa8c0a89124d \ No newline at end of file +004667106e552e832a564b77e242b86f183d4441 \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index d5556ef706..c4586481bf 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -137,6 +137,89 @@ do_test 1.1 { } } {} +proc phrasematch {phrase value} { + if {[string first $phrase $value]>=0} { + return 1 + } + return 0 +} + +# Usage: +# +proc nearmatch {nNear phraselist value} { + set nPhrase [llength $phraselist] + + set phraselist [string tolower $phraselist] + set value [string tolower $value] + + if {$nPhrase==1} { + set bMatch [phrasematch [lindex $phraselist 0] $value] + } else { + set nValue [llength $value] + if {$nNear >= $nValue} {set nNear [expr $nValue-1]} + + for {set i $nNear} {$i < $nValue} {incr i} { + set bMatch 1 + foreach phrase $phraselist { + set iMin [expr $i - $nNear - [llength $phrase]] + set iMax [expr $i - 1 + [llength $phrase]] + set subdoc [lrange $value $iMin $iMax] + if {![phrasematch $phrase $subdoc]} { + set bMatch 0 + break + } + } + if {$bMatch} break + } + } + return $bMatch +} + +# Usage: +# +# nearset aCol ?-near N? ?-col C? -- phrase1 phrase2... +# +proc nearset {aCol args} { + set O(-near) 10 + set O(-col) -1 + + set nOpt [lsearch -exact $args --] + if {$nOpt<0} { error "no -- option" } + + foreach {k v} [lrange $args 0 [expr $nOpt-1]] { + if {[info exists O($k)]==0} { error "unrecognized option $k" } + set O($k) $v + } + + set phraselist [lrange $args [expr $nOpt+1] end] + + set bMatch 0 + set iCol -1 + foreach col $aCol { + incr iCol + if {$O(-col)>=0 && $O(-col)!=$iCol} continue + + if {[nearmatch $O(-near) $phraselist $col]} { + set bMatch 1 + break + } + } + + return $bMatch +} + +proc matchdata {expr} { + set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] + set res [list] + foreach {id x y} $::data { + set cols [list $x $y] + if $tclexpr { + set res [concat $id $res] + } + } + return $res +} + foreach {tn phrase} { 1 "o" 2 "b q" @@ -149,16 +232,51 @@ foreach {tn phrase} { 9 "no" 10 "L O O L V V K" } { - set res [list] - foreach {id x y} $data { - set pat [string tolower $phrase] - if {[string first $pat $x]>=0 || [string first $pat $y]>=0} { - set res [concat $id $res] - } - } - set n [llength $res] - do_execsql_test 1.2.$tn.$n { - SELECT rowid FROM xx WHERE xx match '"' || $phrase || '"' + + set expr "\"$phrase\"" + set res [matchdata $expr] + + do_execsql_test 1.2.$tn.[llength $res] { + SELECT rowid FROM xx WHERE xx match $expr + } $res +} + +# Test the "nearmatch" commnad. +# +do_test 2.0 { nearmatch 2 {a b} {a x x b} } 1 +do_test 2.1 { nearmatch 2 {b a} {a x x b} } 1 +do_test 2.2 { nearmatch 1 {b a} {a x x b} } 0 +do_test 2.3 { nearmatch 1 {"a b" "c d"} {x x a b x c d} } 1 +do_test 2.4 { nearmatch 1 {"a b" "c d"} {x a b x x c d} } 0 +do_test 2.5 { nearmatch 400 {a b} {a x x b} } 1 +do_test 2.6 { nearmatch 0 {a} {a x x b} } 1 +do_test 2.7 { nearmatch 0 {b} {a x x b} } 1 + +foreach {tn expr tclexpr} { + 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} +} { + do_execsql_test 3.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr] +} + +#------------------------------------------------------------------------- +# +foreach {tn expr} { + 1 { NEAR(r c) } + 2 { NEAR(r c, 5) } + 3 { NEAR(r c, 3) } + 4 { NEAR(r c, 2) } + 5 { NEAR(r c, 0) } + 6 { NEAR(a b c) } + 7 { NEAR(a b c, 8) } + 8 { x : NEAR(r c) } + 9 { y : NEAR(r c) } + 10 { x : "r c" } + 11 { y : "r c" } +} { + + set res [matchdata $expr] + do_execsql_test 2.$tn.[llength $res] { + SELECT rowid FROM xx WHERE xx match $expr } $res } From 103576dd75bf97634a65877b21b156412722c636 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 5 Jul 2014 15:15:41 +0000 Subject: [PATCH 009/206] Add support for AND, OR and NOT to fts5. FossilOrigin-Name: 8682b87e794767cefcaa080fd53c8973c24c556a --- ext/fts5/fts5_expr.c | 153 +++++++++++++++++++++++++++++++++++-------- manifest | 14 ++-- manifest.uuid | 2 +- test/fts5ac.test | 17 ++++- 4 files changed, 146 insertions(+), 40 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 8a84f958ba..e528757bb4 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -317,14 +317,6 @@ void sqlite3Fts5ExprFree(Fts5Expr *p){ } } -/* -** -*/ -static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){ - assert( 0 ); - return SQLITE_OK; -} - /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function @@ -462,8 +454,8 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ } /* -** Advance each phrase iterator in phrase pNear. If any reach EOF, set -** output variable *pbEof to true before returning. +** Advance each term iterator in each phrase in pNear. If any reach EOF, +** set output variable *pbEof to true before returning. */ static int fts5ExprNearAdvanceAll( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ @@ -650,17 +642,131 @@ static int fts5ExprNearInitAll( return SQLITE_OK; } -static int fts5ExprNearNext( - Fts5Expr *pExpr, /* Expression that pNear is a part of */ - Fts5ExprNode *pNode -){ - int rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof); - if( rc==SQLITE_OK && pNode->bEof==0 ){ - rc = fts5ExprNearNextMatch(pExpr, pNode); +/* fts3ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ +static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); + +/* +** Nodes at EOF are considered larger than all other nodes. A node that +** points to a *smaller* rowid is considered larger. +** +** res = (*p1) - (*p2) +*/ +static int fts5NodeCompare(Fts5ExprNode *p1, Fts5ExprNode *p2){ + if( p2->bEof ) return -1; + if( p1->bEof ) return +1; + if( p1->iRowid>p2->iRowid ) return -1; + return (p1->iRowid < p2->iRowid); +} + +static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + + if( pNode->bEof==0 ){ + switch( pNode->eType ){ + case FTS5_STRING: { + rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof); + break; + }; + + case FTS5_AND: { + rc = fts5ExprNodeNext(pExpr, pNode->pLeft); + if( rc==SQLITE_OK ) rc = fts5ExprNodeNext(pExpr, pNode->pRight); + break; + } + + case FTS5_OR: { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + int cmp = fts5NodeCompare(p1, p2); + + if( cmp==0 ){ + rc = fts5ExprNodeNext(pExpr, p1); + if( rc==SQLITE_OK ) rc = fts5ExprNodeNext(pExpr, p2); + }else{ + rc = fts5ExprNodeNext(pExpr, (cmp < 0) ? p1 : p2); + } + + break; + } + + default: assert( pNode->eType==FTS5_NOT ); { + rc = fts5ExprNodeNext(pExpr, pNode->pLeft); + break; + } + } + + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeNextMatch(pExpr, pNode); + } + } + + return rc; +} + +/* +** +*/ +static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + if( pNode->bEof==0 ){ + switch( pNode->eType ){ + + case FTS5_STRING: { + rc = fts5ExprNearNextMatch(pExpr, pNode); + break; + } + + case FTS5_AND: { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + + while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ + Fts5ExprNode *pAdv = (p1->iRowid > p2->iRowid) ? p1 : p2; + rc = fts5ExprNodeNext(pExpr, pAdv); + if( rc!=SQLITE_OK ) break; + } + pNode->bEof = p1->bEof || p2->bEof; + pNode->iRowid = p1->iRowid; + break; + } + + case FTS5_OR: { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + Fts5ExprNode *pNext = (fts5NodeCompare(p1, p2) > 0 ? p2 : p1); + pNode->bEof = pNext->bEof; + pNode->iRowid = pNext->iRowid; + break; + } + + default: assert( pNode->eType==FTS5_NOT ); { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + while( rc==SQLITE_OK ){ + int cmp; + while( rc==SQLITE_OK && (cmp = fts5NodeCompare(p1, p2))>0 ){ + rc = fts5ExprNodeNext(pExpr, p2); + } + if( rc || cmp ) break; + rc = fts5ExprNodeNext(pExpr, p1); + } + pNode->bEof = p1->bEof; + pNode->iRowid = p1->iRowid; + break; + } + } } return rc; } + +/* +** Set node pNode, which is part of expression pExpr, to point to the first +** match. If there are no matches, set the Node.bEof flag to indicate EOF. +** +** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. +** It is not an error if there are no matches. +*/ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; pNode->bEof = 0; @@ -681,23 +787,12 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ rc = fts5ExprNodeFirst(pExpr, pNode->pRight); } if( rc==SQLITE_OK ){ - rc = fts5ExprNodeTest(pExpr, pNode); + rc = fts5ExprNodeNextMatch(pExpr, pNode); } } return rc; } -static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ - int rc = SQLITE_OK; - - if( pNode->eType==FTS5_STRING ){ - rc = fts5ExprNearNext(pExpr, pNode); - }else{ - assert( 0 ); - } - return rc; -} - /* diff --git a/manifest b/manifest index 0526a1e9b9..f794442bda 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sthe\s"colname\s:\s"\ssyntax\sto\sfts5. -D 2014-07-05T07:54:01.680 +C Add\ssupport\sfor\sAND,\sOR\sand\sNOT\sto\sfts5. +D 2014-07-05T15:15:41.850 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,7 +107,7 @@ F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 F ext/fts5/fts5Int.h b7a684ff3508ab24437886f8bc873a16f494a7db F ext/fts5/fts5_buffer.c f1a26a79e2943fe4388e531fa141941b5eb6d31a F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 618e6641c8dc428c146ec84bf30ff0b3da6b28c7 +F ext/fts5/fts5_expr.c a341fe4f6d49875a7aeaa443036a3dc6aa2bff52 F ext/fts5/fts5_index.c d8ab9712e38dc1beb9a9145ec89e18dc083c0467 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -594,7 +594,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test 4db86a9473ee2a8c2cb30e0d81df21c6022f99b6 -F test/fts5ac.test c7ca34a477d638195a839c961e1b572890dc5d0d +F test/fts5ac.test d3aeb7a079d40093b34ac8053fc5e4c0ed7e88dc F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1190,7 +1190,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 250ae8d40115e2e47cc5a1e8a427fa8c0a89124d -R 213fb14ea45e358dcb308401853c4570 +P 004667106e552e832a564b77e242b86f183d4441 +R 4ab8db5e873de873b3febadf2e9942c6 U dan -Z 0b8632fefc20738326985d3c409c9be8 +Z fcdcd2fcf8de98f33f86410b0c7d6d38 diff --git a/manifest.uuid b/manifest.uuid index 506ee6aff0..3630392fc9 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -004667106e552e832a564b77e242b86f183d4441 \ No newline at end of file +8682b87e794767cefcaa080fd53c8973c24c556a \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index c4586481bf..849ea52e5e 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -208,7 +208,7 @@ proc nearset {aCol args} { return $bMatch } -proc matchdata {expr} { +proc matchdata {expr {print 0}} { set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] set res [list] foreach {id x y} $::data { @@ -217,6 +217,9 @@ proc matchdata {expr} { set res [concat $id $res] } } + if {$print} { + puts $tclexpr + } return $res } @@ -272,10 +275,18 @@ foreach {tn expr} { 9 { y : NEAR(r c) } 10 { x : "r c" } 11 { y : "r c" } + 12 { a AND b } + 13 { a AND b AND c } + 14a { a } + 14b { a OR b } + 15 { a OR b AND c } + 16 { c AND b OR a } + 17 { c AND (b OR a) } + 18 { c NOT (b OR a) } + 19 { c NOT b OR a AND d } } { - set res [matchdata $expr] - do_execsql_test 2.$tn.[llength $res] { + do_execsql_test 4.$tn.[llength $res] { SELECT rowid FROM xx WHERE xx match $expr } $res } From a2b569f95571307fc9fae0496ea7ef3572994289 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 8 Jul 2014 16:27:37 +0000 Subject: [PATCH 010/206] Add support for prefix queries to fts5. FossilOrigin-Name: 75ebd3cd5904a4f89f7f3a9b25d32b2a42a31310 --- ext/fts5/fts5Int.h | 27 +++ ext/fts5/fts5_buffer.c | 60 +++++ ext/fts5/fts5_expr.c | 109 ++------- ext/fts5/fts5_index.c | 519 ++++++++++++++++++++++++++++++----------- manifest | 19 +- manifest.uuid | 2 +- test/fts5ad.test | 196 ++++++++++++++++ 7 files changed, 698 insertions(+), 234 deletions(-) create mode 100644 test/fts5ad.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 5f55bbadb5..94206a849f 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -96,6 +96,33 @@ void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) +typedef struct Fts5PoslistReader Fts5PoslistReader; +struct Fts5PoslistReader { + /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ + int iCol; /* If (iCol>=0), this column only */ + const u8 *a; /* Position list to iterate through */ + int n; /* Size of buffer at a[] in bytes */ + int i; /* Current offset in a[] */ + + /* Output variables */ + int bEof; /* Set to true at EOF */ + i64 iPos; /* (iCol<<32) + iPos */ +}; +int sqlite3Fts5PoslistReaderInit( + int iCol, /* If (iCol>=0), this column only */ + const u8 *a, int n, /* Poslist buffer to iterate through */ + Fts5PoslistReader *pIter /* Iterator object to initialize */ +); +int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); + +typedef struct Fts5PoslistWriter Fts5PoslistWriter; +struct Fts5PoslistWriter { + int iCol; + int iOff; +}; +int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); + + /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index d8ad29f59a..0f09b59255 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -137,3 +137,63 @@ void sqlite3Fts5BufferSet( pBuf->n = 0; sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); } + + +/* +** Advance the iterator object passed as the only argument. Return true +** if the iterator reaches EOF, or false otherwise. +*/ +int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ + if( pIter->i>=pIter->n ){ + pIter->bEof = 1; + }else{ + int iVal; + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + if( iVal==1 ){ + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + if( pIter->iCol>=0 && iVal>pIter->iCol ){ + pIter->bEof = 1; + }else{ + pIter->iPos = ((u64)iVal << 32); + pIter->i += getVarint32(&pIter->a[pIter->i], iVal); + } + } + pIter->iPos += (iVal-2); + } + return pIter->bEof; +} + +int sqlite3Fts5PoslistReaderInit( + int iCol, /* If (iCol>=0), this column only */ + const u8 *a, int n, /* Poslist buffer to iterate through */ + Fts5PoslistReader *pIter /* Iterator object to initialize */ +){ + memset(pIter, 0, sizeof(*pIter)); + pIter->a = a; + pIter->n = n; + pIter->iCol = iCol; + do { + sqlite3Fts5PoslistReaderNext(pIter); + }while( pIter->bEof==0 && (pIter->iPos >> 32)bEof; +} + +int sqlite3Fts5PoslistWriterAppend( + Fts5Buffer *pBuf, + Fts5PoslistWriter *pWriter, + i64 iPos +){ + int rc = SQLITE_OK; + int iCol = (int)(iPos >> 32); + int iOff = (iPos & 0x7FFFFFFF); + + if( iCol!=pWriter->iCol ){ + fts5BufferAppendVarint(&rc, pBuf, 1); + fts5BufferAppendVarint(&rc, pBuf, iCol); + pWriter->iCol = iCol; + pWriter->iOff = 0; + } + fts5BufferAppendVarint(&rc, pBuf, (iOff - pWriter->iOff) + 2); + + return rc; +} diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index e528757bb4..37bf84e1c6 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -95,83 +95,6 @@ struct Fts5Parse { Fts5ExprNode *pExpr; /* Result of a successful parse */ }; -/************************************************************************* -*/ -typedef struct Fts5PoslistIter Fts5PoslistIter; -struct Fts5PoslistIter { - int iCol; /* If (iCol>=0), this column only */ - const u8 *a; /* Position list to iterate through */ - int n; /* Size of buffer at a[] in bytes */ - int i; /* Current offset in a[] */ - - /* Output variables */ - int bEof; /* Set to true at EOF */ - i64 iPos; /* (iCol<<32) + iPos */ -}; - -static int fts5PoslistIterNext(Fts5PoslistIter *pIter){ - if( pIter->i>=pIter->n ){ - pIter->bEof = 1; - }else{ - int iVal; - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - if( iVal==1 ){ - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - if( pIter->iCol>=0 && iVal>pIter->iCol ){ - pIter->bEof = 1; - }else{ - pIter->iPos = ((u64)iVal << 32); - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - } - } - pIter->iPos += (iVal-2); - } - return pIter->bEof; -} - -static int fts5PoslistIterInit( - int iCol, /* If (iCol>=0), this column only */ - const u8 *a, int n, /* Poslist buffer to iterate through */ - Fts5PoslistIter *pIter /* Iterator object to initialize */ -){ - memset(pIter, 0, sizeof(*pIter)); - pIter->a = a; - pIter->n = n; - pIter->iCol = iCol; - do { - fts5PoslistIterNext(pIter); - }while( pIter->bEof==0 && (pIter->iPos >> 32)bEof; -} - -typedef struct Fts5PoslistWriter Fts5PoslistWriter; -struct Fts5PoslistWriter { - int iCol; - int iOff; -}; - -static int fts5PoslistWriterAppend( - Fts5Buffer *pBuf, - Fts5PoslistWriter *pWriter, - i64 iPos -){ - int rc = SQLITE_OK; - int iCol = (int)(iPos >> 32); - int iOff = (iPos & 0x7FFFFFFF); - - if( iCol!=pWriter->iCol ){ - fts5BufferAppendVarint(&rc, pBuf, 1); - fts5BufferAppendVarint(&rc, pBuf, iCol); - pWriter->iCol = iCol; - pWriter->iOff = 0; - } - fts5BufferAppendVarint(&rc, pBuf, (iOff - pWriter->iOff) + 2); - - return rc; -} -/* -*************************************************************************/ - void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ if( pParse->rc==SQLITE_OK ){ va_list ap; @@ -335,8 +258,8 @@ static int fts5ExprPhraseIsMatch( int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0, 0}; - Fts5PoslistIter aStatic[4]; - Fts5PoslistIter *aIter = aStatic; + Fts5PoslistReader aStatic[4]; + Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; @@ -345,8 +268,8 @@ static int fts5ExprPhraseIsMatch( /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ - int nByte = sizeof(Fts5PoslistIter) * pPhrase->nTerm; - aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); + int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; + aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } @@ -354,7 +277,7 @@ static int fts5ExprPhraseIsMatch( for(i=0; inTerm; i++){ int n; const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n); - if( fts5PoslistIterInit(iCol, a, n, &aIter[i]) ) goto ismatch_out; + if( sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ) goto ismatch_out; } while( 1 ){ @@ -363,12 +286,12 @@ static int fts5ExprPhraseIsMatch( do { bMatch = 1; for(i=0; inTerm; i++){ - Fts5PoslistIter *pPos = &aIter[i]; + Fts5PoslistReader *pPos = &aIter[i]; i64 iAdj = iPos + i; if( pPos->iPos!=iAdj ){ bMatch = 0; while( pPos->iPosiPos>iAdj ) iPos = pPos->iPos-i; } @@ -376,11 +299,11 @@ static int fts5ExprPhraseIsMatch( }while( bMatch==0 ); /* Append position iPos to the output */ - rc = fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); + rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); if( rc!=SQLITE_OK ) goto ismatch_out; for(i=0; inTerm; i++){ - if( fts5PoslistIterNext(&aIter[i]) ) goto ismatch_out; + if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; } } @@ -408,8 +331,8 @@ static int fts5ExprPhraseIsMatch( ** a set of intances that collectively matches the NEAR constraint. */ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ - Fts5PoslistIter aStatic[4]; - Fts5PoslistIter *aIter = aStatic; + Fts5PoslistReader aStatic[4]; + Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; int bMatch; @@ -420,27 +343,27 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ - int nByte = sizeof(Fts5PoslistIter) * pNear->nPhrase; - aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); + int nByte = sizeof(Fts5PoslistReader) * pNear->nPhrase; + aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } /* Initialize a term iterator for each phrase */ for(i=0; inPhrase; i++){ Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist; - fts5PoslistIterInit(-1, pPoslist->p, pPoslist->n, &aIter[i]); + sqlite3Fts5PoslistReaderInit(-1, pPoslist->p, pPoslist->n, &aIter[i]); } iMax = aIter[0].iPos; do { bMatch = 1; for(i=0; inPhrase; i++){ - Fts5PoslistIter *pPos = &aIter[i]; + Fts5PoslistReader *pPos = &aIter[i]; i64 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; if( pPos->iPosiPos>iMax ){ bMatch = 0; while( pPos->iPosiPos>iMax ) iMax = pPos->iPos; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index af69b4280d..bfd6afa18e 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -263,6 +263,7 @@ typedef struct Fts5PendingDoclist Fts5PendingDoclist; typedef struct Fts5PendingPoslist Fts5PendingPoslist; typedef struct Fts5PosIter Fts5PosIter; typedef struct Fts5SegIter Fts5SegIter; +typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; @@ -297,10 +298,22 @@ struct Fts5Index { sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ }; +struct Fts5DoclistIter { + u8 *a; + int n; + int i; + + /* Output variables. aPoslist==0 at EOF */ + i64 iRowid; + u8 *aPoslist; + int nPoslist; +}; + struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; Fts5MultiSegIter *pMulti; + Fts5DoclistIter *pDoclist; Fts5Buffer poslist; /* Buffer containing current poslist */ }; @@ -424,14 +437,17 @@ struct Fts5MultiSegIter { ** Leaf page number containing the last term read from the segment. And ** the offset immediately following the term data. ** -** bOneTerm: -** If true, set the iterator to point to EOF after the current doclist has -** been exhausted. Do not proceed to the next term in the segment. +** flags: +** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: +** +** FTS5_SEGITER_ONETERM: +** If set, set the iterator to point to EOF after the current doclist +** has been exhausted. Do not proceed to the next term in the segment. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int iIdx; /* Byte offset within current leaf */ - int bOneTerm; /* If true, iterate through single doclist */ + int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ int iLeafOffset; /* Byte offset within current leaf */ @@ -444,6 +460,9 @@ struct Fts5SegIter { i64 iRowid; /* Current rowid */ }; +#define FTS5_SEGITER_ONETERM 0x01 + + /* ** Object for iterating through paginated data. */ @@ -458,7 +477,7 @@ struct Fts5ChunkIter { }; /* -** Object for iterating through a single position list. +** Object for iterating through a single position list on disk. */ struct Fts5PosIter { Fts5ChunkIter chunk; /* Current chunk of data */ @@ -566,6 +585,17 @@ static int fts5BufferCompareBlob( return (res==0 ? (pLeft->n - nRight) : res); } +#if 0 +static int fts5CompareBlob( + const u8 *pLeft, int nLeft, + const u8 *pRight, int nRight +){ + int nCmp = MIN(nLeft, nRight); + int res = memcmp(pLeft, pRight, nCmp); + return (res==0 ? (nLeft - nRight) : res); +} +#endif + /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. @@ -669,6 +699,7 @@ static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ */ static void fts5DataRelease(Fts5Data *pData){ if( pData ){ + assert( pData->nRef>0 ); pData->nRef--; if( pData->nRef==0 ) sqlite3_free(pData); } @@ -1045,95 +1076,6 @@ static void fts5SegIterInit( } } -/* -** Initialize the object pIter to point to term pTerm/nTerm within segment -** pSeg, index iIdx. If there is no such term in the index, the iterator -** is set to EOF. -** -** If an error occurs, Fts5Index.rc is set to an appropriate error code. If -** an error has already occurred when this function is called, it is a no-op. -*/ -static void fts5SegIterSeekInit( - Fts5Index *p, /* FTS5 backend */ - int iIdx, /* Config.aHash[] index of FTS index */ - const u8 *pTerm, int nTerm, /* Term to seek to */ - Fts5StructureSegment *pSeg, /* Description of segment */ - Fts5SegIter *pIter /* Object to populate */ -){ - int iPg = 1; - int h; - - assert( pTerm && nTerm ); - memset(pIter, 0, sizeof(*pIter)); - pIter->pSeg = pSeg; - pIter->iIdx = iIdx; - pIter->bOneTerm = 1; - - for(h=pSeg->nHeight-1; h>0; h--){ - Fts5NodeIter node; /* For iterating through internal nodes */ - i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg); - Fts5Data *pNode = fts5DataRead(p, iRowid); - if( pNode==0 ) break; - - fts5NodeIterInit(pNode->p, pNode->n, &node); - assert( node.term.n==0 ); - - iPg = node.iChild; - for(fts5NodeIterNext(&p->rc, &node); - node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; - fts5NodeIterNext(&p->rc, &node) - ){ - iPg = node.iChild; - } - fts5NodeIterFree(&node); - fts5DataRelease(pNode); - } - - if( iPg>=pSeg->pgnoFirst ){ - int res; - pIter->iLeafPgno = iPg - 1; - fts5SegIterNextPage(p, pIter); - if( pIter->pLeaf ){ - u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; - - pIter->iLeafOffset = fts5GetU16(&a[2]); - fts5SegIterLoadTerm(p, pIter, 0); - - while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){ - if( res<0 && pIter->iLeafPgno==iPg ){ - int iOff = pIter->iLeafOffset; - while( iOffiLeafOffset = iOff + getVarint32(&a[iOff], nKeep); - fts5SegIterLoadTerm(p, pIter, nKeep); - continue; - } - } - - /* No matching term on this page. Set the iterator to EOF. */ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - break; - } - } - } -} - /* ** Advance iterator pIter to the next entry. ** @@ -1198,7 +1140,7 @@ static void fts5SegIterNext( /* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf && bNewTerm ){ - if( pIter->bOneTerm ){ + if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ @@ -1208,6 +1150,79 @@ static void fts5SegIterNext( } } +/* +** Initialize the object pIter to point to term pTerm/nTerm within segment +** pSeg, index iIdx. If there is no such term in the index, the iterator +** is set to EOF. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterSeekInit( + Fts5Index *p, /* FTS5 backend */ + int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to */ + int bGe, /* If true seek for >=. If false, == */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + int iPg = 1; + int h; + + assert( pTerm && nTerm ); + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + + /* This block sets stack variable iPg to the leaf page number that may + ** contain term (pTerm/nTerm), if it is present in the segment. */ + for(h=pSeg->nHeight-1; h>0; h--){ + Fts5NodeIter node; /* For iterating through internal nodes */ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg); + Fts5Data *pNode = fts5DataRead(p, iRowid); + if( pNode==0 ) break; + + fts5NodeIterInit(pNode->p, pNode->n, &node); + assert( node.term.n==0 ); + + iPg = node.iChild; + for(fts5NodeIterNext(&p->rc, &node); + node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; + fts5NodeIterNext(&p->rc, &node) + ){ + iPg = node.iChild; + } + fts5NodeIterFree(&node); + fts5DataRelease(pNode); + } + + if( iPgpgnoFirst ){ + iPg = pSeg->pgnoFirst; + } + + pIter->iLeafPgno = iPg - 1; + fts5SegIterNextPage(p, pIter); + + if( pIter->pLeaf ){ + int res; + pIter->iLeafOffset = fts5GetU16(&pIter->pLeaf->p[2]); + fts5SegIterLoadTerm(p, pIter, 0); + do { + res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); + if( res>=0 ) break; + fts5SegIterNext(p, pIter); + }while( pIter->pLeaf ); + + if( bGe==0 && res ){ + /* Set iterator to point to EOF */ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + } + } + + if( bGe==0 ) pIter->flags |= FTS5_SEGITER_ONETERM; +} + /* ** Zero the iterator passed as the only argument. */ @@ -1327,6 +1342,7 @@ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ + int bGe, /* True for >= */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ @@ -1363,11 +1379,12 @@ static void fts5MultiIterNew( Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; Fts5SegIter *pIter = &pNew->aSeg[iIter++]; if( pTerm==0 ){ - fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], pIter); + fts5SegIterInit(p, iIdx, pSeg, pIter); }else{ - fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, &pLvl->aSeg[iSeg], pIter); + fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, bGe, pSeg, pIter); } } } @@ -2245,7 +2262,7 @@ fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, iLvl, nInput, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ @@ -2740,7 +2757,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, -1, 0, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter) ){ @@ -3022,6 +3039,232 @@ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } +/* +** Iterator pMulti currently points to a valid entry (not EOF). This +** function appends a copy of the position-list of the entry pMulti +** currently points to to buffer pBuf. +** +** If an error occurs, an error code is left in p->rc. It is assumed +** no error has already occurred when this function is called. +*/ +static void fts5MultiIterPoslist( + Fts5Index *p, + Fts5MultiSegIter *pMulti, + int bSz, + Fts5Buffer *pBuf +){ + Fts5ChunkIter iter; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + assert( fts5MultiIterEof(p, pMulti)==0 ); + fts5ChunkIterInit(p, pSeg, &iter); + if( fts5ChunkIterEof(p, &iter)==0 ){ + if( bSz ){ + fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem); + } + while( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); + fts5ChunkIterNext(p, &iter); + } + } + fts5ChunkIterRelease(&iter); +} + +static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ + if( pIter->in ){ + if( pIter->i ){ + i64 iDelta; + pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); + pIter->iRowid -= iDelta; + }else{ + pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); + } + pIter->i += getVarint32(&pIter->a[pIter->i], pIter->nPoslist); + pIter->aPoslist = &pIter->a[pIter->i]; + pIter->i += pIter->nPoslist; + }else{ + pIter->aPoslist = 0; + } +} + +static void fts5DoclistIterInit(Fts5Buffer *pBuf, Fts5DoclistIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->a = pBuf->p; + pIter->n = pBuf->n; + fts5DoclistIterNext(pIter); +} + +/* +** Append a doclist to buffer pBuf. +*/ +static void fts5MergeAppendDocid( + int *pRc, /* IN/OUT: Error code */ + Fts5Buffer *pBuf, /* Buffer to write to */ + i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ + i64 iRowid /* Rowid to append */ +){ + if( pBuf->n==0 ){ + fts5BufferAppendVarint(pRc, pBuf, iRowid); + }else{ + fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); + } + *piLastRowid = iRowid; +} + +/* +** Buffers p1 and p2 contain doclists. This function merges the content +** of the two doclists together and sets buffer p1 to the result before +** returning. +** +** If an error occurs, an error code is left in p->rc. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5MergePrefixLists( + Fts5Index *p, /* FTS5 backend object */ + Fts5Buffer *p1, /* First list to merge */ + Fts5Buffer *p2 /* Second list to merge */ +){ + if( p2->n ){ + i64 iLastRowid = 0; + Fts5DoclistIter i1; + Fts5DoclistIter i2; + Fts5Buffer out; + Fts5Buffer tmp; + memset(&out, 0, sizeof(out)); + memset(&tmp, 0, sizeof(tmp)); + + fts5DoclistIterInit(p1, &i1); + fts5DoclistIterInit(p2, &i2); + while( i1.aPoslist!=0 || i2.aPoslist!=0 ){ + if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid>i2.iRowid) ){ + /* Copy entry from i1 */ + fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i1.iRowid); + fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist); + fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); + fts5DoclistIterNext(&i1); + } + else if( i1.aPoslist==0 || i2.iRowid>i1.iRowid ){ + /* Copy entry from i2 */ + fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); + fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist); + fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); + fts5DoclistIterNext(&i2); + } + else{ + Fts5PoslistReader r1; + Fts5PoslistReader r2; + Fts5PoslistWriter writer; + + memset(&writer, 0, sizeof(writer)); + + /* Merge the two position lists. */ + fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); + fts5BufferZero(&tmp); + sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); + sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); + while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){ + i64 iNew; + if( r2.bEof || (r1.bEof==0 && r1.iPosrc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + } + + fts5BufferAppendVarint(&p->rc, &out, tmp.n); + fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); + fts5DoclistIterNext(&i1); + fts5DoclistIterNext(&i2); + } + } + + fts5BufferSet(&p->rc, p1, out.n, out.p); + fts5BufferFree(&tmp); + fts5BufferFree(&out); + } +} + +static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ + Fts5Buffer tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +static void fts5SetupPrefixIter( + Fts5Index *p, /* Index to read from */ + const u8 *pToken, /* Buffer containing prefix to match */ + int nToken, /* Size of buffer pToken in bytes */ + Fts5IndexIter *pIter /* Populate this object */ +){ + Fts5Structure *pStruct; + Fts5Buffer *aBuf; + const int nBuf = 32; + + + aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); + pStruct = fts5StructureRead(p, 0); + + if( aBuf && pStruct ){ + Fts5DoclistIter *pDoclist; + int i; + i64 iLastRowid; + Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ + Fts5Buffer doclist; + + memset(&doclist, 0, sizeof(doclist)); + for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1); + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext(p, p1) + ){ + i64 iRowid = fts5MultiIterRowid(p1); + int nTerm; + const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); + assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); + if( nTerm0 && iRowid>=iLastRowid ){ + for(i=0; doclist.n && p->rc==SQLITE_OK; i++){ + assert( irc, &doclist, iRowid); + }else{ + fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); + } + iLastRowid = iRowid; + fts5MultiIterPoslist(p, p1, 1, &doclist); + } + + for(i=0; ipDoclist = pDoclist; + fts5DoclistIterInit(&doclist, pIter->pDoclist); + } + } + + fts5StructureRelease(pStruct); + sqlite3_free(aBuf); +} + /* ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. @@ -3040,21 +3283,25 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( if( pConfig->aPrefix[iIdx-1]==nToken ) break; } if( iIdx>pConfig->nPrefix ){ - /* No matching prefix index. todo: deal with this. */ - assert( 0 ); + iIdx = -1; } } pRet = (Fts5IndexIter*)sqlite3_malloc(sizeof(Fts5IndexIter)); if( pRet ){ memset(pRet, 0, sizeof(Fts5IndexIter)); - pRet->pStruct = fts5StructureRead(p, 0); - if( pRet->pStruct ){ - fts5MultiIterNew(p, - pRet->pStruct, iIdx, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti - ); - } + pRet->pIndex = p; + if( iIdx>=0 ){ + pRet->pStruct = fts5StructureRead(p, iIdx); + if( pRet->pStruct ){ + fts5MultiIterNew(p, pRet->pStruct, + iIdx, 0, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + ); + } + }else{ + fts5SetupPrefixIter(p, (const u8*)pToken, nToken, pRet); + } } if( p->rc ){ @@ -3068,24 +3315,37 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( ** Return true if the iterator passed as the only argument is at EOF. */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ - return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); + if( pIter->pDoclist ){ + return pIter->pDoclist->aPoslist==0; + }else{ + return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); + } } /* ** Move to the next matching rowid. */ void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){ - fts5BufferZero(&pIter->poslist); - fts5MultiIterNext(pIter->pIndex, pIter->pMulti); + if( pIter->pDoclist ){ + fts5DoclistIterNext(pIter->pDoclist); + }else{ + fts5BufferZero(&pIter->poslist); + fts5MultiIterNext(pIter->pIndex, pIter->pMulti); + } } /* ** Return the current rowid. */ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ - return fts5MultiIterRowid(pIter->pMulti); + if( pIter->pDoclist ){ + return pIter->pDoclist->iRowid; + }else{ + return fts5MultiIterRowid(pIter->pMulti); + } } + /* ** Return a pointer to a buffer containing a copy of the position list for ** the current entry. Output variable *pn is set to the size of the buffer @@ -3095,25 +3355,17 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ ** disk. */ const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ - Fts5ChunkIter iter; - Fts5Index *p = pIter->pIndex; - Fts5SegIter *pSeg = &pIter->pMulti->aSeg[ pIter->pMulti->aFirst[1] ]; - - assert( sqlite3Fts5IterEof(pIter)==0 ); - fts5ChunkIterInit(p, pSeg, &iter); - if( fts5ChunkIterEof(p, &iter)==0 ){ + if( pIter->pDoclist ){ + *pn = pIter->pDoclist->nPoslist; + return pIter->pDoclist->aPoslist; + }else{ + Fts5Index *p = pIter->pIndex; fts5BufferZero(&pIter->poslist); - fts5BufferGrow(&p->rc, &pIter->poslist, iter.nRem); - while( fts5ChunkIterEof(p, &iter)==0 ){ - fts5BufferAppendBlob(&p->rc, &pIter->poslist, iter.n, iter.p); - fts5ChunkIterNext(p, &iter); - } + fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); + if( p->rc ) return 0; + *pn = pIter->poslist.n; + return pIter->poslist.p; } - fts5ChunkIterRelease(&iter); - - if( p->rc ) return 0; - *pn = pIter->poslist.n; - return pIter->poslist.p; } /* @@ -3121,10 +3373,15 @@ const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ */ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ if( pIter ){ - fts5MultiIterFree(pIter->pIndex, pIter->pMulti); - fts5StructureRelease(pIter->pStruct); + if( pIter->pDoclist ){ + sqlite3_free(pIter->pDoclist->a); + sqlite3_free(pIter->pDoclist); + }else{ + fts5MultiIterFree(pIter->pIndex, pIter->pMulti); + fts5StructureRelease(pIter->pStruct); + fts5BufferFree(&pIter->poslist); + } fts5CloseReader(pIter->pIndex); - fts5BufferFree(&pIter->poslist); sqlite3_free(pIter); } } diff --git a/manifest b/manifest index f794442bda..4480043dec 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sAND,\sOR\sand\sNOT\sto\sfts5. -D 2014-07-05T15:15:41.850 +C Add\ssupport\sfor\sprefix\squeries\sto\sfts5. +D 2014-07-08T16:27:37.120 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,11 +104,11 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 -F ext/fts5/fts5Int.h b7a684ff3508ab24437886f8bc873a16f494a7db -F ext/fts5/fts5_buffer.c f1a26a79e2943fe4388e531fa141941b5eb6d31a +F ext/fts5/fts5Int.h bb716a6e6a376a7c8211e55e5577c6c020d176c2 +F ext/fts5/fts5_buffer.c 83b463a179ad4348fa87796fce78b0e4ef6b898a F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c a341fe4f6d49875a7aeaa443036a3dc6aa2bff52 -F ext/fts5/fts5_index.c d8ab9712e38dc1beb9a9145ec89e18dc083c0467 +F ext/fts5/fts5_expr.c 21351cdd256f8e561a57a38490d27f7922247696 +F ext/fts5/fts5_index.c a3084168a384a9d43f7fb045511b386ccb6e55e8 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -595,6 +595,7 @@ F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test 4db86a9473ee2a8c2cb30e0d81df21c6022f99b6 F test/fts5ac.test d3aeb7a079d40093b34ac8053fc5e4c0ed7e88dc +F test/fts5ad.test a4d2f344c86a45ee53b424512585b3900ccb8cf3 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1190,7 +1191,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 004667106e552e832a564b77e242b86f183d4441 -R 4ab8db5e873de873b3febadf2e9942c6 +P 8682b87e794767cefcaa080fd53c8973c24c556a +R dba83c3d230dbf413439715289f715cf U dan -Z fcdcd2fcf8de98f33f86410b0c7d6d38 +Z 876bab147b2ac69a43a21b2ef49df211 diff --git a/manifest.uuid b/manifest.uuid index 3630392fc9..9182ec19d5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8682b87e794767cefcaa080fd53c8973c24c556a \ No newline at end of file +75ebd3cd5904a4f89f7f3a9b25d32b2a42a31310 \ No newline at end of file diff --git a/test/fts5ad.test b/test/fts5ad.test new file mode 100644 index 0000000000..3898b4c89c --- /dev/null +++ b/test/fts5ad.test @@ -0,0 +1,196 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5ad + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE yy USING fts5(x, y); + INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching'); + INSERT INTO yy VALUES('indices (or all matching', 'values if -inline is'); + INSERT INTO yy VALUES('specified as well.) If', 'indices are returned, the'); +} {} + +foreach {tn match res} { + 1 {c*} {1} + 2 {i*} {3 2} + 3 {t*} {3 1} + 4 {r*} {3 1} +} { + do_execsql_test 1.$tn { + SELECT rowid FROM yy WHERE yy MATCH $match + } $res +} + +foreach {T create} { + 2 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b); + INSERT INTO t1(t1) VALUES('pgsz=32'); + } + + 3 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); + INSERT INTO t1(t1) VALUES('pgsz=32'); + } + +} { + + do_test $T.1 { + execsql { DROP TABLE IF EXISTS t1 } + execsql $create + } {} + + do_test $T.1 { + foreach {rowid a b} { + 0 {fghij uvwxyz klmn pq uvwx} {klmn f fgh uv fghij klmno} + 1 {uv f abcd abcd fghi} {pq klm uv uv fgh uv a} + 2 {klmn klm pqrs fghij uv} {f k uvw ab abcd pqr uv} + 3 {ab pqrst a fghi ab pqr fg} {k klmno a fg abcd} + 4 {abcd pqrst uvwx a fgh} {f klmno fghij kl pqrst} + 5 {uvwxyz k abcde u a} {uv k k kl klmn} + 6 {uvwxyz k klmn pqrst uv} {fghi pqrs abcde u k} + 7 {uvwxy klmn u p pqrst fgh} {p f fghi abcd uvw kl uv} + 8 {f klmno pqrst uvwxy pqrst} {uv abcde klm pq pqr} + 9 {f abcde a uvwxyz pqrst} {fghij abc k uvwx pqr fghij uvwxy} + 10 {ab uv f fg pqrst uvwxy} {fgh p uv k abc klm uvw} + 11 {pq klmno a uvw abcde uvwxyz} {fghij pq uvwxyz pqr fghi} + 12 {fgh u pq fgh uvw} {uvw pqr f uvwxy uvwx} + 13 {uvwx klmn f fgh abcd pqr} {uvw k fg uv klm abcd} + 14 {ab uvwx pqrst pqr uvwxyz pqrs} {uvwxyz abcde ab ab uvw abcde} + 15 {abc abcde uvwxyz abc kl k pqr} {klm k k klmno u fgh} + 16 {fghi abcd fghij uv uvwxyz ab uv} {klmn pqr a uvw fghi} + 17 {abc pqrst fghi uvwx uvw klmn fghi} {ab fg pqr pqrs p} + 18 {pqr kl a fghij fgh fg kl} {pqr uvwxyz uvw abcd uvwxyz} + 19 {fghi fghi pqr kl fghi f} {klmn u u klmno klmno} + 20 {abc pqrst klmno kl pq uvwxy} {abc k fghi pqrs klm} + 21 {a pqr uvwxyz uv fghi a fgh} {abc pqrs pqrst pq klm} + 22 {klm abc uvwxyz klm pqrst} {fghij k pq pqr u klm fghij} + 23 {p klm uv p a a} {uvwxy klmn uvw abcde pq} + 24 {uv fgh fg pq uvwxy u uvwxy} {pqrs a uvw p uvwx uvwxyz fg} + 25 {fghij fghi klmn abcd pq kl} {fghi abcde pqrs abcd fgh uvwxy} + 26 {pq fgh a abc klmno klmn} {fgh p k p fg fghij} + 27 {fg pq kl uvwx fghij pqrst klmn} {abcd uvw abcd fghij f fghij} + 28 {uvw fghi p fghij pq fgh uvwx} {k fghij abcd uvwx pqr fghi} + 29 {klm pq abcd pq f uvwxy} {pqrst p fghij pqr p} + 30 {ab uvwx fg uvwx klmn klm} {klmn klmno fghij klmn klm} + 31 {pq k pqr abcd a pqrs} {abcd abcd uvw a abcd klmno ab} + 32 {pqrst u abc pq klm} {abc kl uvwxyz fghij u fghi p} + 33 {f uvwxy u k f uvw uvwx} {pqrs uvw fghi fg pqrst klm} + 34 {pqrs pq fghij uvwxyz pqr} {ab abc abc uvw f pq f} + 35 {uvwxy ab uvwxy klmno kl pqrs} {abcde uvw pqrs uvwx k k} + 36 {uvwxyz k ab abcde abc uvw} {uvw abcde uvw klmn uv klmn} + 37 {k kl uv abcde uvwx fg u} {u abc uvwxy k fg abcd} + 38 {fghi pqrst fghi pqr pqrst uvwx} {u uv uvwx fghi abcde} + 39 {k pqrst k uvw fg pqrst fghij} {uvwxy ab kl klmn uvwxyz abcde} + 40 {fg uvwxy pqrs klmn uvwxyz klm p} {k uv ab fghij fgh k pqrs} + 41 {uvwx abc f pq uvwxy k} {ab uvwxyz abc f fghij} + 42 {uvwxy klmno uvwxyz uvwxyz pqrst} {uv kl kl klmno k f abcde} + 43 {abcde ab pqrs fg f fgh} {abc fghij fghi k k} + 44 {uvw abcd a ab pqrst klmn fg} {pqrst u uvwx pqrst fghij f pqrst} + 45 {uvwxy p kl uvwxyz ab pqrst fghi} {abc f pqr fg a k} + 46 {u p f a fgh} {a kl pq uv f} + 47 {pqrs abc fghij fg abcde ab a} {p ab uv pqrs kl fghi abcd} + 48 {abcde uvwxy pqrst uv abc pqr uvwx} {uvwxy klm uvwxy uvwx k} + 49 {fgh klm abcde klmno u} {a f fghij f uvwxyz abc u} + 50 {uv uvw uvwxyz uvwxyz uv ab} {uvwx pq fg u k uvwxy} + 51 {uvwxy pq p kl fghi} {pqrs fghi pqrs abcde uvwxyz ab} + 52 {pqr p uvwxy kl pqrs klmno fghij} {ab abcde abc pqrst pqrs uv} + 53 {fgh pqrst p a klmno} {ab ab pqrst pqr kl pqrst} + 54 {abcd klm ab uvw a fg u} {f pqr f abcd uv} + 55 {u fg uvwxyz k uvw} {abc pqrs f fghij fg pqrs uvwxy} + 56 {klm fg p fghi fg a} {uv a fghi uvwxyz a fghi} + 57 {uvwxy k abcde fgh f fghi} {f kl klmn f fghi klm} + 58 {klm k fgh uvw fgh fghi} {klmno uvwx u pqrst u} + 59 {fghi pqr pqrst p uvw fghij} {uv pqrst pqrs pq fghij klm} + 60 {uvwx klm uvwxy uv klmn} {p a a abc klmn ab k} + 61 {uvwxy uvwx klm uvwx klm} {pqrs ab ab uvwxyz fg} + 62 {kl uv uv uvw fg kl k} {abcde uvw fgh uvwxy klm} + 63 {a abc fgh u klm abcd} {fgh pqr uv klmn fghij} + 64 {klmn k klmn klmno pqrs pqr} {fg kl abcde klmno uvwxy kl pq} + 65 {uvwxyz klm fghi abc abcde kl} {uvwxy uvw uvwxyz uvwxyz pq pqrst} + 66 {pq klm abc pqrst fgh f} {u abcde pqrst abcde fg} + 67 {u pqrst kl u uvw klmno} {u pqr pqrs fgh u p} + 68 {abc fghi uvwxy fgh k pq} {uv p uvwx uvwxyz ab} + 69 {klmno f uvwxyz uvwxy klmn fg ab} {fgh kl a pqr abcd pqr} + 70 {fghi pqrst pqrst uv a} {uvwxy k p uvw uvwx a} + 71 {a fghij f p uvw} {klm fg abcd abcde klmno pqrs} + 72 {uv uvwx uvwx uvw klm} {uv fghi klmno uvwxy uvw} + 73 {kl uvwxy ab f pq klm u} {uvwxy klmn klm abcd pq fg k} + 74 {uvw pqrst abcd uvwxyz ab} {fgh fgh klmn abc pq} + 75 {uvwxyz klm pq abcd klmno pqr uvwxyz} {kl f a fg pqr klmn} + 76 {uvw uvwxy pqr k pqrst kl} {uvwxy abc uvw uvw u} + 77 {fgh klm u uvwxyz f uvwxy abcde} {uv abcde klmno u u ab} + 78 {klmno abc pq pqr fgh} {p uv abcd fgh abc u k} + 79 {fg pqr uvw pq uvwx} {uv uvw fghij pqrs fg p} + 80 {abcd pqrs uvwx uvwxy uvwx} {u uvw pqrst pqr abcde pqrs kl} + 81 {uvwxyz klm pq uvwxy fghij} {p pq klm fghij u a a} + 82 {uvwx k uvwxyz klmno pqrst kl} {abcde p f pqrst abcd uvwxyz p} + 83 {abcd abcde klm pqrst uvwxyz} {uvw pqrst u p uvwxyz a pqrs} + 84 {k klm abc uv uvwxy klm klmn} {k abc pqr a abc p kl} + 85 {klmn abcd pqrs p pq klm a} {klmn kl ab uvw pq} + 86 {klmn a pqrs abc uvw pqrst} {a pqr kl klm a k f} + 87 {pqrs ab uvwx uvwxy a pqr f} {fg klm uvwx pqr pqr} + 88 {klmno ab k kl u uvwxyz} {uv kl uvw fghi uv uvw} + 89 {pq fghi pqrst klmn uvwxy abc pqrs} {fg f f fg abc abcde klm} + 90 {kl a k fghi uvwx fghi u} {ab uvw pqr fg a p abc} + 91 {uvwx pqrs klmno ab fgh uvwx} {pqr uvwx abc kl f klmno kl} + 92 {fghij pq pqrs fghij f pqrst} {u abcde fg pq pqr fgh k} + 93 {fgh u pqrs abcde klmno abc} {abc fg pqrst pqr abcde} + 94 {uvwx p abc f pqr p} {k pqrs kl klm abc fghi klm} + 95 {kl p klmno uvwxyz klmn} {fghi ab a fghi pqrs kl} + 96 {pqr fgh pq uvwx a} {uvw klm klmno fg uvwxy uvwx} + 97 {fg abc uvwxyz fghi pqrst pq} {abc k a ab abcde f} + 98 {uvwxy fghi uvwxy u abcde abcde uvw} {klmn uvwx pqrs uvw uvwxy abcde} + 99 {pq fg fghi uvwx uvwx fghij uvwxy} {klmn klmn f abc fg a} + } { + execsql { + INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b); + } + } + } {} + + proc prefix_query {prefix} { + set ret [list] + db eval {SELECT rowid, a, b FROM t1 ORDER BY rowid DESC} { + if {[lsearch -glob $a $prefix]>=0 || [lsearch -glob $b $prefix]>=0} { + lappend ret $rowid + } + } + return $ret + } + + foreach {tn prefix} { + 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} + 6 {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*} + 11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*} + 16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*} + 21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*} + 27 {x*} + } { + set res [prefix_query $prefix] + set n [llength $res] + do_execsql_test $T.$tn.$n {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} $res + } +} + +finish_test + From 48eecfb8b9b4b7c0cc6dd4281fda4d50e206c88d Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Jul 2014 20:21:12 +0000 Subject: [PATCH 011/206] Support "ORDER BY rowid ASC". FossilOrigin-Name: b96b5e166990e4ec363b24f66e04cfa5f00f6342 --- ext/fts5/fts5_expr.c | 75 +++++--- ext/fts5/fts5_index.c | 383 +++++++++++++++++++++++++++++++++-------- manifest | 22 +-- manifest.uuid | 2 +- test/fts5ab.test | 16 +- test/fts5ac.test | 69 ++++---- test/fts5ad.test | 39 +++-- test/permutations.test | 2 +- 8 files changed, 457 insertions(+), 151 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 37bf84e1c6..4618e7d491 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -414,21 +414,25 @@ static int fts5ExprNearAdvanceAll( */ static int fts5ExprAdvanceto( Fts5IndexIter *pIter, /* Iterator to advance */ - i64 *piMin, /* IN/OUT: Minimum rowid seen so far */ + int bAsc, /* True if iterator is "rowid ASC" */ + i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ int *pRc, /* OUT: Error code */ int *pbEof /* OUT: Set to true if EOF */ ){ - i64 iMin = *piMin; + i64 iLast = *piLast; i64 iRowid; - while( (iRowid = sqlite3Fts5IterRowid(pIter))>iMin ){ + while( 1 ){ + iRowid = sqlite3Fts5IterRowid(pIter); + if( (bAsc==0 && iRowid<=iLast) || (bAsc==1 && iRowid>=iLast) ) break; sqlite3Fts5IterNext(pIter, 0); if( sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; return 1; } } - if( iRowidiLast) ); + *piLast = iRowid; } return 0; @@ -452,10 +456,15 @@ static int fts5ExprNearNextRowidMatch( Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; int i, j; /* Phrase and token index, respectively */ - i64 iMin; /* Smallest rowid any iterator points to */ - int bMatch; + i64 iLast; /* Lastest rowid any iterator points to */ + int bMatch; /* True if all terms are at the same rowid */ + + /* Set iLast, the lastest rowid any iterator points to. If the iterator + ** skips through rowids in the default descending order, this means the + ** minimum rowid. Or, if the iterator is "ORDER BY rowid ASC", then it + ** means the maximum rowid. */ + iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); - iMin = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); do { bMatch = 1; for(i=0; inPhrase; i++){ @@ -463,13 +472,15 @@ static int fts5ExprNearNextRowidMatch( for(j=0; jnTerm; j++){ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid!=iMin ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, &iMin, &rc, &pNode->bEof) ) return rc; + if( iRowid!=iLast ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, pExpr->bAsc, &iLast, &rc, &pNode->bEof) ){ + return rc; + } } } }while( bMatch==0 ); - pNode->iRowid = iMin; + pNode->iRowid = iLast; return rc; } @@ -555,7 +566,7 @@ static int fts5ExprNearInitAll( (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) ); - if( sqlite3Fts5IterEof(pTerm->pIter) ){ + if( pTerm->pIter && sqlite3Fts5IterEof(pTerm->pIter) ){ pNode->bEof = 1; return SQLITE_OK; } @@ -569,16 +580,31 @@ static int fts5ExprNearInitAll( static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); /* -** Nodes at EOF are considered larger than all other nodes. A node that -** points to a *smaller* rowid is considered larger. -** +** Compare the values currently indicated by the two nodes as follows: +** ** res = (*p1) - (*p2) +** +** Nodes that point to values that come later in the iteration order are +** considered to be larger. Nodes at EOF are the largest of all. +** +** This means that if the iteration order is ASC, then numerically larger +** rowids are considered larger. Or if it is the default DESC, numerically +** smaller rowids are larger. */ -static int fts5NodeCompare(Fts5ExprNode *p1, Fts5ExprNode *p2){ +static int fts5NodeCompare( + Fts5Expr *pExpr, + Fts5ExprNode *p1, + Fts5ExprNode *p2 +){ if( p2->bEof ) return -1; if( p1->bEof ) return +1; - if( p1->iRowid>p2->iRowid ) return -1; - return (p1->iRowid < p2->iRowid); + if( pExpr->bAsc ){ + if( p1->iRowidiRowid ) return -1; + return (p1->iRowid > p2->iRowid); + }else{ + if( p1->iRowid>p2->iRowid ) return -1; + return (p1->iRowid < p2->iRowid); + } } static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ @@ -600,7 +626,7 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ case FTS5_OR: { Fts5ExprNode *p1 = pNode->pLeft; Fts5ExprNode *p2 = pNode->pRight; - int cmp = fts5NodeCompare(p1, p2); + int cmp = fts5NodeCompare(pExpr, p1, p2); if( cmp==0 ){ rc = fts5ExprNodeNext(pExpr, p1); @@ -644,7 +670,12 @@ static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ Fts5ExprNode *p2 = pNode->pRight; while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ - Fts5ExprNode *pAdv = (p1->iRowid > p2->iRowid) ? p1 : p2; + Fts5ExprNode *pAdv; + if( pExpr->bAsc ){ + pAdv = (p1->iRowid < p2->iRowid) ? p1 : p2; + }else{ + pAdv = (p1->iRowid > p2->iRowid) ? p1 : p2; + } rc = fts5ExprNodeNext(pExpr, pAdv); if( rc!=SQLITE_OK ) break; } @@ -656,7 +687,7 @@ static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ case FTS5_OR: { Fts5ExprNode *p1 = pNode->pLeft; Fts5ExprNode *p2 = pNode->pRight; - Fts5ExprNode *pNext = (fts5NodeCompare(p1, p2) > 0 ? p2 : p1); + Fts5ExprNode *pNext = (fts5NodeCompare(pExpr, p1, p2) > 0 ? p2 : p1); pNode->bEof = pNext->bEof; pNode->iRowid = pNext->iRowid; break; @@ -667,7 +698,7 @@ static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ Fts5ExprNode *p2 = pNode->pRight; while( rc==SQLITE_OK ){ int cmp; - while( rc==SQLITE_OK && (cmp = fts5NodeCompare(p1, p2))>0 ){ + while( rc==SQLITE_OK && (cmp = fts5NodeCompare(pExpr, p1, p2))>0 ){ rc = fts5ExprNodeNext(pExpr, p2); } if( rc || cmp ) break; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index bfd6afa18e..e6ea440f5f 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -299,6 +299,7 @@ struct Fts5Index { }; struct Fts5DoclistIter { + int bAsc; u8 *a; int n; int i; @@ -410,6 +411,7 @@ struct Fts5SegWriter { */ struct Fts5MultiSegIter { int nSeg; /* Size of aSeg[] array */ + int bRev; /* True to iterate in reverse order */ Fts5SegIter *aSeg; /* Array of segment iterators */ u16 *aFirst; /* Current merge state (see above) */ }; @@ -443,6 +445,18 @@ struct Fts5MultiSegIter { ** FTS5_SEGITER_ONETERM: ** If set, set the iterator to point to EOF after the current doclist ** has been exhausted. Do not proceed to the next term in the segment. +** +** FTS5_SEGITER_REVERSE: +** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If +** it is set, iterate through docids in ascending order instead of the +** default descending order. +** +** iRowidOffset/nRowidOffset/aRowidOffset: +** These are used if the FTS5_SEGITER_REVERSE flag is set. +** +** Each time a new page is loaded, the iterator is set to point to the +** final rowid. Additionally, the aRowidOffset[] array is populated +** with the byte offsets of all relevant rowid fields on the page. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ @@ -452,15 +466,23 @@ struct Fts5SegIter { Fts5Data *pLeaf; /* Current leaf data */ int iLeafOffset; /* Byte offset within current leaf */ + /* The page and offset from which the current term was read. The offset + ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; + /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ + int iRowidOffset; /* Current entry in aRowidOffset[] */ + int nRowidOffset; /* Allocated size of aRowidOffset[] array */ + int *aRowidOffset; /* Array of offset to rowid fields */ + /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ }; #define FTS5_SEGITER_ONETERM 0x01 +#define FTS5_SEGITER_REVERSE 0x02 /* @@ -1076,6 +1098,55 @@ static void fts5SegIterInit( } } +static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ + *piRowid = (int)fts5GetU16(&pLeaf->p[0]); + *piTerm = (int)fts5GetU16(&pLeaf->p[2]); +} + +/* +** This function is only ever called on iterators created by calls to +** Fts5IndexQuery() with the FTS5INDEX_QUERY_ASC flag set. +** +** When this function is called, iterator pIter points to the first rowid +** on the current leaf associated with the term being queried. This function +** advances it to point to the last such rowid and, if necessary, initializes +** the aRowidOffset[] and iRowidOffset variables. +*/ +static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ + int n = pIter->pLeaf->n; + int i = pIter->iLeafOffset; + u8 *a = pIter->pLeaf->p; + int iRowidOffset = 0; + + while( p->rc==SQLITE_OK && i=n ) break; + i += getVarint(&a[i], (u64*)&iDelta); + if( iDelta==0 ) break; + pIter->iRowid -= iDelta; + + if( iRowidOffset>=pIter->nRowidOffset ){ + int nNew = pIter->nRowidOffset + 8; + int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + break; + } + pIter->aRowidOffset = aNew; + pIter->nRowidOffset = nNew; + } + + pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; + pIter->iLeafOffset = i; + } + pIter->iRowidOffset = iRowidOffset; +} + + /* ** Advance iterator pIter to the next entry. ** @@ -1088,68 +1159,198 @@ static void fts5SegIterNext( Fts5SegIter *pIter /* Iterator to advance */ ){ if( p->rc==SQLITE_OK ){ - Fts5Data *pLeaf = pIter->pLeaf; - int iOff; - int bNewTerm = 0; - int nKeep = 0; + if( pIter->flags & FTS5_SEGITER_REVERSE ){ + if( pIter->iRowidOffset>0 ){ + u8 *a = pIter->pLeaf->p; + int iOff; + int nPos; + i64 iDelta; + pIter->iRowidOffset--; - /* Search for the end of the position list within the current page. */ - u8 *a = pLeaf->p; - int n = pLeaf->n; - - iOff = pIter->iLeafOffset; - if( iOffiLeafOffset = iOff; - if( iDelta==0 ){ - bNewTerm = 1; - if( iOff>=n ){ - fts5SegIterNextPage(p, pIter); - pIter->iLeafOffset = 4; - }else if( iOff!=fts5GetU16(&a[2]) ){ - pIter->iLeafOffset += getVarint32(&a[iOff], nKeep); - } + pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + iOff += getVarint32(&a[iOff], nPos); + iOff += nPos; + getVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid += iDelta; }else{ - pIter->iRowid -= iDelta; - } - }else{ - iOff = 0; - /* Next entry is not on the current page */ - while( iOff==0 ){ - fts5SegIterNextPage(p, pIter); - pLeaf = pIter->pLeaf; - if( pLeaf==0 ) break; - if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ - iOff += sqlite3GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - } - else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ - pIter->iLeafOffset = iOff; - bNewTerm = 1; - } - } - } - - /* Check if the iterator is now at EOF. If so, return early. */ - if( pIter->pLeaf && bNewTerm ){ - if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; + while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ + Fts5Data *pNew; + pIter->iLeafPgno--; + pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( + pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + )); + if( pNew ){ + if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ + if( pIter->iTermLeafOffsetn ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = pIter->iTermLeafOffset; + } + }else{ + int iRowidOff, dummy; + fts5LeafHeader(pNew, &iRowidOff, &dummy); + if( iRowidOff ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = iRowidOff; + } + } + + if( pIter->pLeaf ){ + u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; + pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid); + break; + }else{ + fts5DataRelease(pNew); + } + } + } + + if( pIter->pLeaf ){ + fts5SegIterReverseInitPage(p, pIter); + } + } + }else{ + Fts5Data *pLeaf = pIter->pLeaf; + int iOff; + int bNewTerm = 0; + int nKeep = 0; + + /* Search for the end of the position list within the current page. */ + u8 *a = pLeaf->p; + int n = pLeaf->n; + + iOff = pIter->iLeafOffset; + if( iOffiLeafOffset = iOff; + if( iDelta==0 ){ + bNewTerm = 1; + if( iOff>=n ){ + fts5SegIterNextPage(p, pIter); + pIter->iLeafOffset = 4; + }else if( iOff!=fts5GetU16(&a[2]) ){ + pIter->iLeafOffset += getVarint32(&a[iOff], nKeep); + } + }else{ + pIter->iRowid -= iDelta; + } }else{ - fts5SegIterLoadTerm(p, pIter, nKeep); + iOff = 0; + /* Next entry is not on the current page */ + while( iOff==0 ){ + fts5SegIterNextPage(p, pIter); + pLeaf = pIter->pLeaf; + if( pLeaf==0 ) break; + if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ + iOff += sqlite3GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ + pIter->iLeafOffset = iOff; + bNewTerm = 1; + } + } + } + + /* Check if the iterator is now at EOF. If so, return early. */ + if( pIter->pLeaf && bNewTerm ){ + if( pIter->flags & FTS5_SEGITER_ONETERM ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + } } } } } +/* +** Iterator pIter currently points to the first rowid in a doclist. This +** function sets the iterator up so that iterates in reverse order through +** the doclist. +*/ +static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ + Fts5Data *pLeaf; /* Current leaf data */ + int iOff = pIter->iLeafOffset; /* Byte offset within current leaf */ + Fts5Data *pLast = 0; + int pgnoLast = 0; + + /* Move to the page that contains the last rowid in this doclist. */ + pLeaf = pIter->pLeaf; + + while( iOffn ){ + int nPos; + i64 iDelta; + + /* Position list size in bytes */ + iOff += getVarint32(&pLeaf->p[iOff], nPos); + iOff += nPos; + if( iOff>=pLeaf->n ) break; + + /* Rowid delta. Or, if 0x00, the end of doclist marker. */ + nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) break; + iOff += nPos; + } + + if( iOff>=pLeaf->n ){ + Fts5StructureSegment *pSeg = pIter->pSeg; + i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pIter->iLeafPgno); + i64 iLast = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pSeg->pgnoLast); + + /* The last rowid in the doclist may not be on the current page. Search + ** forward to find the page containing the last rowid. */ + for(iAbs++; p->rc==SQLITE_OK && iAbs<=iLast; iAbs++){ + Fts5Data *pNew = fts5DataRead(p, iAbs); + if( pNew ){ + int iRowid, iTerm; + fts5LeafHeader(pNew, &iRowid, &iTerm); + if( iRowid ){ + Fts5Data *pTmp = pLast; + pLast = pNew; + pNew = pTmp; + pgnoLast = iAbs & (((i64)1 << FTS5_DATA_PAGE_B) - 1); + } + if( iTerm ){ + iAbs = iLast; + } + fts5DataRelease(pNew); + } + } + } + + /* If pLast is NULL at this point, then the last rowid for this doclist + ** lies on the page currently indicated by the iterator. In this case + ** iLastOff is set to the value that pIter->iLeafOffset will take when + ** the iterator points to that rowid. + ** + ** Or, if pLast is non-NULL, then it is the page that contains the last + ** rowid. + */ + if( pLast ){ + int dummy; + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = pLast; + pIter->iLeafPgno = pgnoLast; + fts5LeafHeader(pLast, &iOff, &dummy); + iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + + fts5SegIterReverseInitPage(p, pIter); + pIter->flags |= FTS5_SEGITER_REVERSE; +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg, index iIdx. If there is no such term in the index, the iterator @@ -1162,13 +1363,15 @@ static void fts5SegIterSeekInit( Fts5Index *p, /* FTS5 backend */ int iIdx, /* Config.aHash[] index of FTS index */ const u8 *pTerm, int nTerm, /* Term to seek to */ - int bGe, /* If true seek for >=. If false, == */ + int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int h; + int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0); + assert( bGe==0 || (flags & FTS5INDEX_QUERY_ASC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; @@ -1220,7 +1423,12 @@ static void fts5SegIterSeekInit( } } - if( bGe==0 ) pIter->flags |= FTS5_SEGITER_ONETERM; + if( bGe==0 ){ + pIter->flags |= FTS5_SEGITER_ONETERM; + if( pIter->pLeaf && (flags & FTS5INDEX_QUERY_ASC) ){ + fts5SegIterReverse(p, iIdx, pIter); + } + } } /* @@ -1229,6 +1437,7 @@ static void fts5SegIterSeekInit( static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); + sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } @@ -1248,6 +1457,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ assert( iOutnSeg && iOut>0 ); + assert( pIter->bRev==0 || pIter->bRev==1 ); if( iOut>=(pIter->nSeg/2) ){ i1 = (iOut - pIter->nSeg/2) * 2; @@ -1269,7 +1479,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ assert( i2>i1 ); assert( i2!=0 ); if( p1->iRowid==p2->iRowid ) return i2; - res = (p1->iRowid > p2->iRowid) ? -1 : +1; + res = ((p1->iRowid < p2->iRowid)==pIter->bRev) ? -1 : +1; } assert( res!=0 ); if( res<0 ){ @@ -1342,7 +1552,7 @@ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ - int bGe, /* True for >= */ + int flags, /* True for >= */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ @@ -1373,6 +1583,7 @@ static void fts5MultiIterNew( pNew->nSeg = nSlot; pNew->aSeg = (Fts5SegIter*)&pNew[1]; pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; + pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_ASC)); /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ @@ -1384,7 +1595,7 @@ static void fts5MultiIterNew( if( pTerm==0 ){ fts5SegIterInit(p, iIdx, pSeg, pIter); }else{ - fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, bGe, pSeg, pIter); + fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, flags, pSeg, pIter); } } } @@ -3074,7 +3285,11 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ if( pIter->i ){ i64 iDelta; pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); - pIter->iRowid -= iDelta; + if( pIter->bAsc ){ + pIter->iRowid += iDelta; + }else{ + pIter->iRowid -= iDelta; + } }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } @@ -3086,10 +3301,15 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ } } -static void fts5DoclistIterInit(Fts5Buffer *pBuf, Fts5DoclistIter *pIter){ +static void fts5DoclistIterInit( + Fts5Buffer *pBuf, + int bAsc, + Fts5DoclistIter *pIter +){ memset(pIter, 0, sizeof(*pIter)); pIter->a = pBuf->p; pIter->n = pBuf->n; + pIter->bAsc = bAsc; fts5DoclistIterNext(pIter); } @@ -3098,14 +3318,17 @@ static void fts5DoclistIterInit(Fts5Buffer *pBuf, Fts5DoclistIter *pIter){ */ static void fts5MergeAppendDocid( int *pRc, /* IN/OUT: Error code */ + int bAsc, Fts5Buffer *pBuf, /* Buffer to write to */ i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ i64 iRowid /* Rowid to append */ ){ if( pBuf->n==0 ){ fts5BufferAppendVarint(pRc, pBuf, iRowid); - }else{ + }else if( bAsc==0 ){ fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); + }else{ + fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); } *piLastRowid = iRowid; } @@ -3120,6 +3343,7 @@ static void fts5MergeAppendDocid( */ static void fts5MergePrefixLists( Fts5Index *p, /* FTS5 backend object */ + int bAsc, Fts5Buffer *p1, /* First list to merge */ Fts5Buffer *p2 /* Second list to merge */ ){ @@ -3132,19 +3356,21 @@ static void fts5MergePrefixLists( memset(&out, 0, sizeof(out)); memset(&tmp, 0, sizeof(tmp)); - fts5DoclistIterInit(p1, &i1); - fts5DoclistIterInit(p2, &i2); + fts5DoclistIterInit(p1, bAsc, &i1); + fts5DoclistIterInit(p2, bAsc, &i2); while( i1.aPoslist!=0 || i2.aPoslist!=0 ){ - if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid>i2.iRowid) ){ + if( i2.aPoslist==0 || (i1.aPoslist && + ( (!bAsc && i1.iRowid>i2.iRowid) || (bAsc && i1.iRowidrc, &out, &iLastRowid, i1.iRowid); + fts5MergeAppendDocid(&p->rc, bAsc, &out, &iLastRowid, i1.iRowid); fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); fts5DoclistIterNext(&i1); } - else if( i1.aPoslist==0 || i2.iRowid>i1.iRowid ){ + else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ - fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); + fts5MergeAppendDocid(&p->rc, bAsc, &out, &iLastRowid, i2.iRowid); fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); fts5DoclistIterNext(&i2); @@ -3157,7 +3383,7 @@ static void fts5MergePrefixLists( memset(&writer, 0, sizeof(writer)); /* Merge the two position lists. */ - fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); + fts5MergeAppendDocid(&p->rc, bAsc, &out, &iLastRowid, i2.iRowid); fts5BufferZero(&tmp); sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); @@ -3195,6 +3421,7 @@ static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ + int bAsc, /* True for "ORDER BY rowid ASC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5IndexIter *pIter /* Populate this object */ @@ -3203,7 +3430,6 @@ static void fts5SetupPrefixIter( Fts5Buffer *aBuf; const int nBuf = 32; - aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p, 0); @@ -3225,29 +3451,34 @@ static void fts5SetupPrefixIter( assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( nTerm0 && iRowid>=iLastRowid ){ + if( doclist.n>0 + && ((!bAsc && iRowid>=iLastRowid) || (bAsc && iRowid<=iLastRowid)) + ){ + for(i=0; doclist.n && p->rc==SQLITE_OK; i++){ assert( irc, &doclist, iRowid); - }else{ + }else if( bAsc==0 ){ fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); + }else{ + fts5BufferAppendVarint(&p->rc, &doclist, iRowid - iLastRowid); } iLastRowid = iRowid; fts5MultiIterPoslist(p, p1, 1, &doclist); } for(i=0; ipDoclist = pDoclist; - fts5DoclistIterInit(&doclist, pIter->pDoclist); + fts5DoclistIterInit(&doclist, bAsc, pIter->pDoclist); } } @@ -3296,11 +3527,12 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( pRet->pStruct = fts5StructureRead(p, iIdx); if( pRet->pStruct ){ fts5MultiIterNew(p, pRet->pStruct, - iIdx, 0, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + iIdx, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti ); } }else{ - fts5SetupPrefixIter(p, (const u8*)pToken, nToken, pRet); + int bAsc = (flags & FTS5INDEX_QUERY_ASC)!=0; + fts5SetupPrefixIter(p, bAsc, (const u8*)pToken, nToken, pRet); } } @@ -3362,6 +3594,7 @@ const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ Fts5Index *p = pIter->pIndex; fts5BufferZero(&pIter->poslist); fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); + assert( p->rc==SQLITE_OK ); if( p->rc ) return 0; *pn = pIter->poslist.n; return pIter->poslist.p; diff --git a/manifest b/manifest index 4480043dec..f9998628d2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sprefix\squeries\sto\sfts5. -D 2014-07-08T16:27:37.120 +C Support\s"ORDER\sBY\srowid\sASC". +D 2014-07-10T20:21:12.482 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,8 +107,8 @@ F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 F ext/fts5/fts5Int.h bb716a6e6a376a7c8211e55e5577c6c020d176c2 F ext/fts5/fts5_buffer.c 83b463a179ad4348fa87796fce78b0e4ef6b898a F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 21351cdd256f8e561a57a38490d27f7922247696 -F ext/fts5/fts5_index.c a3084168a384a9d43f7fb045511b386ccb6e55e8 +F ext/fts5/fts5_expr.c 0dc31b06d444cad097bec05699797590729d2638 +F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -593,9 +593,9 @@ F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 -F test/fts5ab.test 4db86a9473ee2a8c2cb30e0d81df21c6022f99b6 -F test/fts5ac.test d3aeb7a079d40093b34ac8053fc5e4c0ed7e88dc -F test/fts5ad.test a4d2f344c86a45ee53b424512585b3900ccb8cf3 +F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 +F test/fts5ac.test 28203ba2334030514d7a6271c5fb1ba3cbc219b1 +F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -763,7 +763,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 43a4c2397b5e8a45c41fac20c7a8a2d4094f470f +F test/permutations.test 0b5333e5dcdeffba0ecbe5ee8dc7577029ffab6c F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1191,7 +1191,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8682b87e794767cefcaa080fd53c8973c24c556a -R dba83c3d230dbf413439715289f715cf +P 75ebd3cd5904a4f89f7f3a9b25d32b2a42a31310 +R 5a76d2f2fc0d7fcaa9a60fabc7fdb146 U dan -Z 876bab147b2ac69a43a21b2ef49df211 +Z 870004bd588f44c77d8063239acbea69 diff --git a/manifest.uuid b/manifest.uuid index 9182ec19d5..e8c5df2055 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -75ebd3cd5904a4f89f7f3a9b25d32b2a42a31310 \ No newline at end of file +b96b5e166990e4ec363b24f66e04cfa5f00f6342 \ No newline at end of file diff --git a/test/fts5ab.test b/test/fts5ab.test index 88a876d3b2..1f6b7171da 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -124,7 +124,6 @@ foreach {tn expr res} { 3 {abase + abash} {1} 4 {abash + abase} {9} 5 {abaft + abashing} {8 5} - 6 {abandon + abandoning} {10} 7 {"abashing abases abasement abaft abashing"} {8} } { @@ -133,10 +132,23 @@ foreach {tn expr res} { } $res } -breakpoint do_execsql_test 3.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'NEAR(aback abate, 2)' } {6} +foreach {tn expr res} { + 1 {abash} {1 3 5 9} + 2 {abase} {1 3 4 9} + 3 {abase + abash} {1} + 4 {abash + abase} {9} + 5 {abaft + abashing} {5 8} + 6 {abandon + abandoning} {10} + 7 {"abashing abases abasement abaft abashing"} {8} +} { + do_execsql_test 3.4.$tn { + SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC + } $res +} + finish_test diff --git a/test/fts5ac.test b/test/fts5ac.test index 849ea52e5e..ddd27481a1 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -208,18 +208,24 @@ proc nearset {aCol args} { return $bMatch } -proc matchdata {expr {print 0}} { +proc matchdata {expr {bAsc 0}} { set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] set res [list] foreach {id x y} $::data { set cols [list $x $y] if $tclexpr { - set res [concat $id $res] + lappend res $id } } - if {$print} { - puts $tclexpr + + # puts $tclexpr + + if {$bAsc} { + set res [lsort -integer -increasing $res] + } else { + set res [lsort -integer -decreasing $res] } + return $res } @@ -263,32 +269,37 @@ foreach {tn expr tclexpr} { #------------------------------------------------------------------------- # -foreach {tn expr} { - 1 { NEAR(r c) } - 2 { NEAR(r c, 5) } - 3 { NEAR(r c, 3) } - 4 { NEAR(r c, 2) } - 5 { NEAR(r c, 0) } - 6 { NEAR(a b c) } - 7 { NEAR(a b c, 8) } - 8 { x : NEAR(r c) } - 9 { y : NEAR(r c) } - 10 { x : "r c" } - 11 { y : "r c" } - 12 { a AND b } - 13 { a AND b AND c } - 14a { a } - 14b { a OR b } - 15 { a OR b AND c } - 16 { c AND b OR a } - 17 { c AND (b OR a) } - 18 { c NOT (b OR a) } - 19 { c NOT b OR a AND d } +foreach {bAsc sql} { + 0 {SELECT rowid FROM xx WHERE xx MATCH $expr} + 1 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid ASC} } { - set res [matchdata $expr] - do_execsql_test 4.$tn.[llength $res] { - SELECT rowid FROM xx WHERE xx match $expr - } $res + foreach {tn expr} { + 0.1 x + + 1 { NEAR(r c) } + 2 { NEAR(r c, 5) } + 3 { NEAR(r c, 3) } + 4 { NEAR(r c, 2) } + 5 { NEAR(r c, 0) } + 6 { NEAR(a b c) } + 7 { NEAR(a b c, 8) } + 8 { x : NEAR(r c) } + 9 { y : NEAR(r c) } + 10 { x : "r c" } + 11 { y : "r c" } + 12 { a AND b } + 13 { a AND b AND c } + 14a { a } + 14b { a OR b } + 15 { a OR b AND c } + 16 { c AND b OR a } + 17 { c AND (b OR a) } + 18 { c NOT (b OR a) } + 19 { c NOT b OR a AND d } + } { + set res [matchdata $expr $bAsc] + do_execsql_test 4.$bAsc.$tn.[llength $res] $sql $res + } } diff --git a/test/fts5ad.test b/test/fts5ad.test index 3898b4c89c..70349388ee 100644 --- a/test/fts5ad.test +++ b/test/fts5ad.test @@ -41,6 +41,17 @@ foreach {tn match res} { } $res } +foreach {tn match res} { + 5 {c*} {1} + 6 {i*} {2 3} + 7 {t*} {1 3} + 8 {r*} {1 3} +} { + do_execsql_test 1.$tn { + SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid ASC + } $res +} + foreach {T create} { 2 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); @@ -178,17 +189,25 @@ foreach {T create} { return $ret } - foreach {tn prefix} { - 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} - 6 {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*} - 11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*} - 16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*} - 21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*} - 27 {x*} + foreach {bAsc sql} { + 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} + 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid ASC} } { - set res [prefix_query $prefix] - set n [llength $res] - do_execsql_test $T.$tn.$n {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} $res + foreach {tn prefix} { + 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} + 6 {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*} + 11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*} + 16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*} + 21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*} + 27 {x*} + } { + set res [prefix_query $prefix] + if {$bAsc} { + set res [lsort -integer -increasing $res] + } + set n [llength $res] + do_execsql_test $T.$bAsc.$tn.$n $sql $res + } } } diff --git a/test/permutations.test b/test/permutations.test index d03895e8e6..c75cdbfd43 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -225,7 +225,7 @@ test_suite "fts3" -prefix "" -description { test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { - fts5aa.test fts5ab.test fts5ea.test + fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ea.test } test_suite "nofaultsim" -prefix "" -description { From 9cfd51f587708cb9bba15a892770b7551214a6d8 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 16 Jul 2014 19:15:57 +0000 Subject: [PATCH 012/206] Begin adding interface for auxiliary functions. FossilOrigin-Name: 1e2a7ba0889093416455f488fca893eaeb195d45 --- ext/fts5/fts5.c | 356 +++++++++++++++++++++++++++++++++++------ ext/fts5/fts5.h | 87 ++++++++++ ext/fts5/fts5Int.h | 38 +++++ ext/fts5/fts5_aux.c | 144 +++++++++++++++++ ext/fts5/fts5_buffer.c | 26 +++ ext/fts5/fts5_expr.c | 52 ++++++ ext/fts5/fts5parse.y | 155 ++++++++++++++++++ main.mk | 7 +- manifest | 25 +-- manifest.uuid | 2 +- test/fts5ac.test | 140 ++++++++++++---- 11 files changed, 938 insertions(+), 94 deletions(-) create mode 100644 ext/fts5/fts5.h create mode 100644 ext/fts5/fts5_aux.c create mode 100644 ext/fts5/fts5parse.y diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index ba9117c527..1278ab11f7 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -17,6 +17,34 @@ typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; +typedef struct Fts5Global Fts5Global; +typedef struct Fts5Auxiliary Fts5Auxiliary; + +/* +** A single object of this type is allocated when the FTS5 module is +** registered with a database handle. It is used to store pointers to +** all registered FTS5 extensions - tokenizers and auxiliary functions. +*/ +struct Fts5Global { + sqlite3 *db; /* Associated database connection */ + i64 iNextId; /* Used to allocate unique cursor ids */ + Fts5Auxiliary *pAux; /* First in list of all aux. functions */ + Fts5Cursor *pCsr; /* First in list of all open cursors */ +}; + +/* +** Each auxiliary function registered with the FTS5 module is represented +** by an object of the following type. All such objects are stored as part +** of the Fts5Global.pAux list. +*/ +struct Fts5Auxiliary { + Fts5Global *pGlobal; /* Global context for this function */ + char *zFunc; /* Function name (nul-terminated) */ + void *pUserData; /* User-data pointer */ + fts5_extension_function xFunc; /* Callback function */ + void (*xDestroy)(void*); /* Destructor function */ + Fts5Auxiliary *pNext; /* Next registered auxiliary function */ +}; /* ** Virtual-table object. @@ -26,6 +54,12 @@ struct Fts5Table { Fts5Config *pConfig; /* Virtual table configuration */ Fts5Index *pIndex; /* Full-text index */ Fts5Storage *pStorage; /* Document store */ + Fts5Global *pGlobal; /* Global (connection wide) data */ +}; + +struct Fts5MatchPhrase { + Fts5Buffer *pPoslist; /* Pointer to current poslist */ + int nTerm; /* Size of phrase in terms */ }; /* @@ -37,7 +71,12 @@ struct Fts5Cursor { sqlite3_stmt *pStmt; /* Statement used to read %_content */ int bEof; /* True at EOF */ Fts5Expr *pExpr; /* Expression for MATCH queries */ - int bSeekRequired; + int bSeekRequired; /* True if seek is required */ + Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ + + /* Variables used by auxiliary functions */ + i64 iCsrId; /* Cursor id */ + Fts5Auxiliary *pAux; /* Currently executing function */ }; /* @@ -108,6 +147,7 @@ static int fts5InitVtab( }else{ memset(pTab, 0, sizeof(Fts5Table)); pTab->pConfig = pConfig; + pTab->pGlobal = (Fts5Global*)pAux; } } @@ -234,11 +274,17 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ ** Implementation of xOpen method. */ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ - Fts5Cursor *pCsr; - int rc = SQLITE_OK; + Fts5Table *pTab = (Fts5Table*)pVTab; + Fts5Cursor *pCsr; /* New cursor object */ + int rc = SQLITE_OK; /* Return code */ + pCsr = (Fts5Cursor*)sqlite3_malloc(sizeof(Fts5Cursor)); if( pCsr ){ + Fts5Global *pGlobal = pTab->pGlobal; memset(pCsr, 0, sizeof(Fts5Cursor)); + pCsr->pNext = pGlobal->pCsr; + pGlobal->pCsr = pCsr; + pCsr->iCsrId = ++pGlobal->iNextId; }else{ rc = SQLITE_NOMEM; } @@ -260,11 +306,17 @@ static int fts5StmtType(int idxNum){ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + Fts5Cursor **pp; if( pCsr->pStmt ){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } sqlite3Fts5ExprFree(pCsr->pExpr); + + /* Remove the cursor from the Fts5Global.pCsr list */ + for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); + *pp = pCsr->pNext; + sqlite3_free(pCsr); return SQLITE_OK; } @@ -373,22 +425,14 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ return SQLITE_OK; } -/* -** This is the xColumn method, called by SQLite to request a value from -** the row that the supplied cursor currently points to. +/* +** If the cursor requires seeking (bSeekRequired flag is set), seek it. +** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. */ -static int fts5ColumnMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ - int iCol /* Index of column to read value from */ -){ - Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int ePlan = FTS5_PLAN(pCsr->idxNum); +static int fts5SeekCursor(Fts5Cursor *pCsr){ int rc = SQLITE_OK; - - assert( pCsr->bEof==0 ); if( pCsr->bSeekRequired ){ - assert( ePlan==FTS5_PLAN_MATCH && pCsr->pExpr ); + assert( pCsr->pExpr ); sqlite3_reset(pCsr->pStmt); sqlite3_bind_int64(pCsr->pStmt, 1, sqlite3Fts5ExprRowid(pCsr->pExpr)); rc = sqlite3_step(pCsr->pStmt); @@ -401,9 +445,35 @@ static int fts5ColumnMethod( } } } + return rc; +} - if( rc==SQLITE_OK ){ - sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); +/* +** This is the xColumn method, called by SQLite to request a value from +** the row that the supplied cursor currently points to. +*/ +static int fts5ColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts5Config *pConfig = ((Fts5Table*)(pCursor->pVtab))->pConfig; + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + int rc = SQLITE_OK; + + assert( pCsr->bEof==0 ); + + if( iCol==pConfig->nCol ){ + /* User is requesting the value of the special column with the same name + ** as the table. Return the cursor integer id number. This value is only + ** useful in that it may be passed as the first argument to an FTS5 + ** auxiliary function. */ + sqlite3_result_int64(pCtx, pCsr->iCsrId); + }else{ + rc = fts5SeekCursor(pCsr); + if( rc==SQLITE_OK ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + } } return rc; } @@ -513,6 +583,121 @@ static int fts5RollbackMethod(sqlite3_vtab *pVtab){ return rc; } +static void *fts5ApiUserData(Fts5Context *pCtx){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return pCsr->pAux->pUserData; +} + +static int fts5ApiColumnCount(Fts5Context *pCtx){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; +} + +static int fts5ApiColumnAvgSize(Fts5Context *pCtx, int iCol, int *pnToken){ + assert( 0 ); + return 0; +} + +static int fts5ApiTokenize( + Fts5Context *pCtx, + const char *pText, int nText, + void *pUserData, + int (*xToken)(void*, const char*, int, int, int, int) +){ + assert( 0 ); + return SQLITE_OK; +} + +static int fts5ApiPhraseCount(Fts5Context *pCtx){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); +} + +static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); +} + +static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return sqlite3Fts5ExprRowid(pCsr->pExpr); +} + +static int fts5ApiColumnText( + Fts5Context *pCtx, + int iCol, + const char **pz, + int *pn +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + int rc = fts5SeekCursor(pCsr); + if( rc==SQLITE_OK ){ + *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol); + *pn = sqlite3_column_bytes(pCsr->pStmt, iCol); + } + return rc; +} + +static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ + assert( 0 ); + return 0; +} + +static int fts5ApiPoslist( + Fts5Context *pCtx, + int iPhrase, + int *pi, + int *piCol, + int *piOff +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + const u8 *a; int n; /* Poslist for phrase iPhrase */ + n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a); + return sqlite3Fts5PoslistNext(a, n, pi, piCol, piOff); +} + +static void fts5ApiCallback( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + static const Fts5ExtensionApi sApi = { + 1, /* iVersion */ + fts5ApiUserData, + fts5ApiColumnCount, + fts5ApiColumnAvgSize, + fts5ApiTokenize, + fts5ApiPhraseCount, + fts5ApiPhraseSize, + fts5ApiRowid, + fts5ApiColumnText, + fts5ApiColumnSize, + fts5ApiPoslist, + }; + + Fts5Auxiliary *pAux; + Fts5Cursor *pCsr; + i64 iCsrId; + + assert( argc>=1 ); + pAux = (Fts5Auxiliary*)sqlite3_user_data(context); + iCsrId = sqlite3_value_int64(argv[0]); + + for(pCsr=pAux->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ + if( pCsr->iCsrId==iCsrId ) break; + } + if( pCsr==0 ){ + char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); + sqlite3_result_error(context, zErr, -1); + }else{ + assert( pCsr->pAux==0 ); + pCsr->pAux = pAux; + pAux->xFunc(&sApi, (Fts5Context*)pCsr, context, argc-1, &argv[1]); + pCsr->pAux = 0; + } +} + + /* ** This routine implements the xFindFunction method for the FTS3 ** virtual table. @@ -522,8 +707,19 @@ static int fts5FindFunctionMethod( int nArg, /* Number of SQL function arguments */ const char *zName, /* Name of SQL function */ void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ - void **ppArg /* Unused */ + void **ppArg /* OUT: User data for *pxFunc */ ){ + Fts5Table *pTab = (Fts5Table*)pVtab; + Fts5Auxiliary *pAux; + + for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ + if( sqlite3_stricmp(zName, pAux->zFunc)==0 ){ + *pxFunc = fts5ApiCallback; + *ppArg = (void*)pAux; + return 1; + } + } + /* No function of the specified name was found. Return 0. */ return 0; } @@ -567,37 +763,99 @@ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ return SQLITE_OK; } -static const sqlite3_module fts5Module = { - /* iVersion */ 2, - /* xCreate */ fts5CreateMethod, - /* xConnect */ fts5ConnectMethod, - /* xBestIndex */ fts5BestIndexMethod, - /* xDisconnect */ fts5DisconnectMethod, - /* xDestroy */ fts5DestroyMethod, - /* xOpen */ fts5OpenMethod, - /* xClose */ fts5CloseMethod, - /* xFilter */ fts5FilterMethod, - /* xNext */ fts5NextMethod, - /* xEof */ fts5EofMethod, - /* xColumn */ fts5ColumnMethod, - /* xRowid */ fts5RowidMethod, - /* xUpdate */ fts5UpdateMethod, - /* xBegin */ fts5BeginMethod, - /* xSync */ fts5SyncMethod, - /* xCommit */ fts5CommitMethod, - /* xRollback */ fts5RollbackMethod, - /* xFindFunction */ fts5FindFunctionMethod, - /* xRename */ fts5RenameMethod, - /* xSavepoint */ fts5SavepointMethod, - /* xRelease */ fts5ReleaseMethod, - /* xRollbackTo */ fts5RollbackToMethod, -}; +/* +** Register a new auxiliary function with global context pGlobal. +*/ +int sqlite3Fts5CreateAux( + Fts5Global *pGlobal, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + void *pUserData, /* User data for aux. function */ + fts5_extension_function xFunc, /* Aux. function implementation */ + void(*xDestroy)(void*) /* Destructor for pUserData */ +){ + int rc = sqlite3_overload_function(pGlobal->db, zName, -1); + if( rc==SQLITE_OK ){ + Fts5Auxiliary *pAux; + int nByte; /* Bytes of space to allocate */ + + nByte = sizeof(Fts5Auxiliary) + strlen(zName) + 1; + pAux = (Fts5Auxiliary*)sqlite3_malloc(nByte); + if( pAux ){ + memset(pAux, 0, nByte); + pAux->zFunc = (char*)&pAux[1]; + strcpy(pAux->zFunc, zName); + pAux->pGlobal = pGlobal; + pAux->pUserData = pUserData; + pAux->xFunc = xFunc; + pAux->xDestroy = xDestroy; + pAux->pNext = pGlobal->pAux; + pGlobal->pAux = pAux; + }else{ + rc = SQLITE_NOMEM; + } + } -int sqlite3Fts5Init(sqlite3 *db){ - int rc; - rc = sqlite3_create_module_v2(db, "fts5", &fts5Module, 0, 0); - if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(db); return rc; } +static void fts5ModuleDestroy(void *pCtx){ + Fts5Auxiliary *pAux; + Fts5Auxiliary *pNext; + Fts5Global *pGlobal = (Fts5Global*)pCtx; + for(pAux=pGlobal->pAux; pAux; pAux=pNext){ + pNext = pAux->pNext; + if( pAux->xDestroy ){ + pAux->xDestroy(pAux->pUserData); + } + sqlite3_free(pAux); + } + sqlite3_free(pGlobal); +} + + +int sqlite3Fts5Init(sqlite3 *db){ + static const sqlite3_module fts5Mod = { + /* iVersion */ 2, + /* xCreate */ fts5CreateMethod, + /* xConnect */ fts5ConnectMethod, + /* xBestIndex */ fts5BestIndexMethod, + /* xDisconnect */ fts5DisconnectMethod, + /* xDestroy */ fts5DestroyMethod, + /* xOpen */ fts5OpenMethod, + /* xClose */ fts5CloseMethod, + /* xFilter */ fts5FilterMethod, + /* xNext */ fts5NextMethod, + /* xEof */ fts5EofMethod, + /* xColumn */ fts5ColumnMethod, + /* xRowid */ fts5RowidMethod, + /* xUpdate */ fts5UpdateMethod, + /* xBegin */ fts5BeginMethod, + /* xSync */ fts5SyncMethod, + /* xCommit */ fts5CommitMethod, + /* xRollback */ fts5RollbackMethod, + /* xFindFunction */ fts5FindFunctionMethod, + /* xRename */ fts5RenameMethod, + /* xSavepoint */ fts5SavepointMethod, + /* xRelease */ fts5ReleaseMethod, + /* xRollbackTo */ fts5RollbackToMethod, + }; + + int rc; + Fts5Global *pGlobal = 0; + pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); + + if( pGlobal==0 ){ + rc = SQLITE_NOMEM; + }else{ + void *p = (void*)pGlobal; + memset(pGlobal, 0, sizeof(Fts5Global)); + pGlobal->db = db; + rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); + if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(pGlobal); + } + return rc; +} + + diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h new file mode 100644 index 0000000000..2e7363006d --- /dev/null +++ b/ext/fts5/fts5.h @@ -0,0 +1,87 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Interfaces to extend FTS5. Using the interfaces defined in this file, +** FTS5 may be extended with: +** +** * custom tokenizers, and +** * custom auxiliary functions. +*/ + + +#ifndef _FTS5_H +#define _FTS5_H + +#include "sqlite3.h" + +/************************************************************************* +** CUSTOM AUXILIARY FUNCTIONS +** +** Virtual table implemenations may overload SQL functions by implementing +** the sqlite3_module.xFindFunction() method. +*/ + +typedef struct Fts5ExtensionApi Fts5ExtensionApi; +typedef struct Fts5Context Fts5Context; + +typedef void (*fts5_extension_function)( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +); + +/* +** xColumnCount: +** Returns the number of columns in the FTS5 table. +** +** xPhraseCount: +** Returns the number of phrases in the current query expression. +** +** xPhraseSize: +** Returns the number of tokens in phrase iPhrase of the query. Phrases +** are numbered starting from zero. +** +** xRowid: +** Returns the rowid of the current row. +** +** xPoslist: +** Iterate through instances of phrase iPhrase in the current row. +*/ +struct Fts5ExtensionApi { + int iVersion; /* Currently always set to 1 */ + + void *(*xUserData)(Fts5Context*); + + int (*xColumnCount)(Fts5Context*); + int (*xColumnAvgSize)(Fts5Context*, int iCol, int *pnToken); + int (*xTokenize)(Fts5Context*, + const char *pText, int nText, /* Text to tokenize */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ + ); + + int (*xPhraseCount)(Fts5Context*); + int (*xPhraseSize)(Fts5Context*, int iPhrase); + + sqlite3_int64 (*xRowid)(Fts5Context*); + int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); + int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); + int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, int *piCol, int *piOff); +}; + +/* +** CUSTOM AUXILIARY FUNCTIONS +*************************************************************************/ +#endif /* _FTS5_H */ + diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 94206a849f..c7392b06e6 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -14,6 +14,7 @@ #ifndef _FTS5INT_H #define _FTS5INT_H +#include "fts5.h" #include "sqliteInt.h" #include "fts3_tokenizer.h" @@ -122,6 +123,12 @@ struct Fts5PoslistWriter { }; int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); +int sqlite3Fts5PoslistNext( + const u8 *a, int n, /* Buffer containing poslist */ + int *pi, /* IN/OUT: Offset within a[] */ + int *piCol, /* IN/OUT: Current column */ + int *piOff /* IN/OUT: Current token offset */ +); /* ** End of interface to code in fts5_buffer.c. @@ -331,6 +338,10 @@ void sqlite3Fts5ExprFree(Fts5Expr*); /* Called during startup to register a UDF with SQLite */ int sqlite3Fts5ExprInit(sqlite3*); +int sqlite3Fts5ExprPhraseCount(Fts5Expr*); +int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); +int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); + /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by @@ -373,4 +384,31 @@ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); ** End of interface to code in fts5_expr.c. **************************************************************************/ + +/************************************************************************** +** Interface to code in fts5.c. +*/ +typedef struct Fts5Global Fts5Global; + +int sqlite3Fts5CreateAux( + Fts5Global*, + const char*, + void*, + fts5_extension_function, + void(*)(void*) +); +/* +** End of interface to code in fts5.c. +**************************************************************************/ + + +/************************************************************************** +** Interface to code in fts5_aux.c. +*/ + +int sqlite3Fts5AuxInit(Fts5Global*); +/* +** End of interface to code in fts5_expr.c. +**************************************************************************/ + #endif diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c new file mode 100644 index 0000000000..3eea3c6ec2 --- /dev/null +++ b/ext/fts5/fts5_aux.c @@ -0,0 +1,144 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +#include "fts5Int.h" + +static void fts5SnippetFunction( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +){ + assert( 0 ); +} + +static void fts5TestFunction( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +){ + Fts5Buffer s; /* Build up text to return here */ + int nCol; /* Number of columns in table */ + int nPhrase; /* Number of phrases in query */ + i64 iRowid; /* Rowid of current row */ + const char *zReq = 0; + int rc = SQLITE_OK; + int i; + + if( nVal>=1 ){ + zReq = (const char*)sqlite3_value_text(apVal[0]); + } + + memset(&s, 0, sizeof(Fts5Buffer)); + + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "columncount "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "columncount") ){ + nCol = pApi->xColumnCount(pFts); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nCol); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasecount "); + } + nPhrase = pApi->xPhraseCount(pFts); + if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasecount") ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nPhrase); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasesize "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasesize") ){ + if( nPhrase==1 ){ + int nSize = pApi->xPhraseSize(pFts, 0); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nSize); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); + for(i=0; ixPhraseSize(pFts, i); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", (i==0?"":" "), nSize); + } + sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + } + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " poslist "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "poslist") ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); + for(i=0; ixPoslist(pFts, i, &j, &iCol, &iOff) ){ + sqlite3Fts5BufferAppendPrintf( + &rc, &s, "%s%d.%d", (bFirst?"":" "), iCol, iOff + ); + bFirst = 0; + } + sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + } + sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " rowid "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowid") ){ + iRowid = pApi->xRowid(pFts); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", iRowid); + } + + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)s.p, -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + sqlite3Fts5BufferFree(&s); +} + +int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ + struct Builtin { + const char *zFunc; /* Function name (nul-terminated) */ + void *pUserData; /* User-data pointer */ + fts5_extension_function xFunc;/* Callback function */ + void (*xDestroy)(void*); /* Destructor function */ + } aBuiltin [] = { + { "snippet", 0, fts5SnippetFunction, 0 }, + { "fts5_test", 0, fts5TestFunction, 0 }, + }; + + int rc = SQLITE_OK; /* Return code */ + int i; /* To iterate through builtin functions */ + + for(i=0; rc==SQLITE_OK && i=n ){ + /* EOF */ + return 1; + } + i += getVarint32(&a[i], iVal); + if( iVal==1 ){ + i += getVarint32(&a[i], iVal); + *piCol = iVal; + *piOff = 0; + i += getVarint32(&a[i], iVal); + } + *piOff += (iVal-2); + *pi = i; + return 0; +} + + diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 4618e7d491..bcbc3745e7 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -33,6 +33,8 @@ struct Fts5Expr { Fts5Index *pIndex; Fts5ExprNode *pRoot; int bAsc; + int nPhrase; /* Number of phrases in expression */ + Fts5ExprPhrase **apPhrase; /* Pointers to phrase objects */ }; /* @@ -92,6 +94,8 @@ struct Fts5Parse { Fts5Config *pConfig; char *zErr; int rc; + int nPhrase; /* Size of apPhrase array */ + Fts5ExprPhrase **apPhrase; /* Array of all phrases */ Fts5ExprNode *pExpr; /* Result of a successful parse */ }; @@ -211,9 +215,13 @@ int sqlite3Fts5ExprNew( }else{ pNew->pRoot = sParse.pExpr; pNew->pIndex = 0; + pNew->apPhrase = sParse.apPhrase; + pNew->nPhrase = sParse.nPhrase; + sParse.apPhrase = 0; } } + sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } @@ -236,6 +244,7 @@ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ void sqlite3Fts5ExprFree(Fts5Expr *p){ if( p ){ sqlite3Fts5ParseNodeFree(p->pRoot); + sqlite3_free(p->apPhrase); sqlite3_free(p); } } @@ -959,6 +968,17 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( int rc; /* Tokenize return code */ char *z = 0; + if( pPhrase==0 ){ + if( (pParse->nPhrase % 8)==0 ){ + int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); + Fts5ExprPhrase **apNew; + apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); + if( apNew==0 ) return 0; + pParse->apPhrase = apNew; + } + pParse->nPhrase++; + } + pParse->rc = fts5ParseStringFromToken(pToken, &z); if( z==0 ) return 0; sqlite3Fts5Dequote(z); @@ -974,6 +994,8 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } + + pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; sqlite3_free(z); return sCtx.pPhrase; } @@ -1354,3 +1376,33 @@ int sqlite3Fts5ExprInit(sqlite3 *db){ return rc; } +/* +** Return the number of phrases in expression pExpr. +*/ +int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ + return pExpr->nPhrase; +} + +/* +** Return the number of terms in the iPhrase'th phrase in pExpr. +*/ +int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; + return pExpr->apPhrase[iPhrase]->nTerm; +} + +/* +** This function is used to access the current position list for phrase +** iPhrase. +*/ +int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ + *pa = 0; + return 0; + }else{ + Fts5ExprPhrase *pPhrase = pExpr->apPhrase[iPhrase]; + *pa = pPhrase->poslist.p; + return pPhrase->poslist.n; + } +} + diff --git a/ext/fts5/fts5parse.y b/ext/fts5/fts5parse.y new file mode 100644 index 0000000000..ec52bdbeeb --- /dev/null +++ b/ext/fts5/fts5parse.y @@ -0,0 +1,155 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + + +// All token codes are small integers with #defines that begin with "TK_" +%token_prefix FTS5_ + +// The type of the data attached to each token is Token. This is also the +// default type for non-terminals. +// +%token_type {Fts5Token} +%default_type {Fts5Token} + +// The generated parser function takes a 4th argument as follows: +%extra_argument {Fts5Parse *pParse} + +// This code runs whenever there is a syntax error +// +%syntax_error { + sqlite3Fts5ParseError( + pParse, "fts5: syntax error near \"%.*s\"",TOKEN.n,TOKEN.p + ); +} +%stack_overflow { + assert( 0 ); +} + +// The name of the generated procedure that implements the parser +// is as follows: +%name sqlite3Fts5Parser + +// The following text is included near the beginning of the C source +// code file that implements the parser. +// +%include { +#include "fts5Int.h" +#include "fts5parse.h" + +/* +** Disable all error recovery processing in the parser push-down +** automaton. +*/ +#define YYNOERRORRECOVERY 1 + +/* +** Make yytestcase() the same as testcase() +*/ +#define yytestcase(X) testcase(X) + +} // end %include + +%left OR. +%left AND. +%left NOT. +%left COLON. + +input ::= expr(X). { sqlite3Fts5ParseFinished(pParse, X); } + +%type cnearset {Fts5ExprNode*} +%type expr {Fts5ExprNode*} +%type exprlist {Fts5ExprNode*} +%destructor cnearset { sqlite3Fts5ParseNodeFree($$); } +%destructor expr { sqlite3Fts5ParseNodeFree($$); } +%destructor exprlist { sqlite3Fts5ParseNodeFree($$); } + +expr(A) ::= expr(X) AND expr(Y). { + A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0); +} +expr(A) ::= expr(X) OR expr(Y). { + A = sqlite3Fts5ParseNode(pParse, FTS5_OR, X, Y, 0); +} +expr(A) ::= expr(X) NOT expr(Y). { + A = sqlite3Fts5ParseNode(pParse, FTS5_NOT, X, Y, 0); +} + +expr(A) ::= LP expr(X) RP. {A = X;} +expr(A) ::= exprlist(X). {A = X;} + +exprlist(A) ::= cnearset(X). {A = X;} +exprlist(A) ::= exprlist(X) cnearset(Y). { + A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0); +} + +cnearset(A) ::= nearset(X). { + A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); +} +cnearset(A) ::= STRING(X) COLON nearset(Y). { + sqlite3Fts5ParseSetColumn(pParse, Y, &X); + A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); +} + +%type nearset {Fts5ExprNearset*} +%type nearphrases {Fts5ExprNearset*} +%destructor nearset { sqlite3Fts5ParseNearsetFree($$); } +%destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); } + +nearset(A) ::= phrase(X). { A = sqlite3Fts5ParseNearset(pParse, 0, X); } +nearset(A) ::= STRING(X) LP nearphrases(Y) neardist_opt(Z) RP. { + sqlite3Fts5ParseNear(pParse, &X); + sqlite3Fts5ParseSetDistance(pParse, Y, &Z); + A = Y; +} + +nearphrases(A) ::= phrase(X). { + A = sqlite3Fts5ParseNearset(pParse, 0, X); +} +nearphrases(A) ::= nearphrases(X) phrase(Y). { + A = sqlite3Fts5ParseNearset(pParse, X, Y); +} + +/* +** The optional ", " at the end of the NEAR() arguments. +*/ +neardist_opt(A) ::= . { A.p = 0; A.n = 0; } +neardist_opt(A) ::= COMMA STRING(X). { A = X; } + +/* +** A phrase. A set of primitives connected by "+" operators. Examples: +** +** "the" + "quick brown" + fo * +** "the quick brown fo" * +** the+quick+brown+fo* +*/ +%type phrase {Fts5ExprPhrase*} +%destructor phrase { sqlite3Fts5ParsePhraseFree($$); } + +phrase(A) ::= phrase(X) PLUS STRING(Y) star_opt(Z). { + A = sqlite3Fts5ParseTerm(pParse, X, &Y, Z); +} +phrase(A) ::= STRING(Y) star_opt(Z). { + A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z); +} + +/* +** Optional "*" character. +*/ +%type star_opt {int} + +star_opt(A) ::= STAR. { A = 1; } +star_opt(A) ::= . { A = 0; } + + + + diff --git a/main.mk b/main.mk index 953d63e39f..a1582fb6dc 100644 --- a/main.mk +++ b/main.mk @@ -73,6 +73,7 @@ LIBOBJ+= vdbe.o parse.o \ vdbetrace.o wal.o walker.o where.o utf.o vtab.o LIBOBJ += fts5.o +LIBOBJ += fts5_aux.o LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o @@ -385,7 +386,8 @@ EXTHDR += \ EXTHDR += \ $(TOP)/ext/icu/sqliteicu.h EXTHDR += \ - $(TOP)/ext/fts5/fts5Int.h + $(TOP)/ext/fts5/fts5Int.h \ + $(TOP)/ext/fts5/fts5.h # This is the default Makefile target. The objects listed here # are what get build when you type just "make" with no arguments. @@ -573,6 +575,9 @@ rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) # FTS5 things # +fts5_aux.o: $(TOP)/ext/fts5/fts5_aux.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_aux.c + fts5_buffer.o: $(TOP)/ext/fts5/fts5_buffer.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_buffer.c diff --git a/manifest b/manifest index f9998628d2..16bcb4aad6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Support\s"ORDER\sBY\srowid\sASC". -D 2014-07-10T20:21:12.482 +C Begin\sadding\sinterface\sfor\sauxiliary\sfunctions. +D 2014-07-16T19:15:57.212 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,13 +103,16 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05 -F ext/fts5/fts5Int.h bb716a6e6a376a7c8211e55e5577c6c020d176c2 -F ext/fts5/fts5_buffer.c 83b463a179ad4348fa87796fce78b0e4ef6b898a +F ext/fts5/fts5.c 20bcb1e10756c72b550947236960edf96929ca2f +F ext/fts5/fts5.h cda3b9d73e6ffa6d0cd35b7da6b808bf3a1ada32 +F ext/fts5/fts5Int.h 2d4c1e1ebdf18278776fcd8a64233ff3c04ea51f +F ext/fts5/fts5_aux.c 53ab338c6a469dc67e7a6bd8685ce727beee8403 +F ext/fts5/fts5_buffer.c b7aa6cdf4a63642fcc12359cedc4be748ca400cc F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 0dc31b06d444cad097bec05699797590729d2638 +F ext/fts5/fts5_expr.c e4e4e6d32beff1ab0d076f8fbf5cf3b2241d4dbc F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 +F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -153,7 +156,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk c5524f888196af43a9b5dfae878205044f549dbf +F main.mk cffc02a30f1af82d35410674f70a0286587add81 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -594,7 +597,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 28203ba2334030514d7a6271c5fb1ba3cbc219b1 +F test/fts5ac.test 398a2d8d9576e0579a0f0955fabd8410ace969e4 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d @@ -1191,7 +1194,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 75ebd3cd5904a4f89f7f3a9b25d32b2a42a31310 -R 5a76d2f2fc0d7fcaa9a60fabc7fdb146 +P b96b5e166990e4ec363b24f66e04cfa5f00f6342 +R ff6cbab233811678a295f9640beec5d4 U dan -Z 870004bd588f44c77d8063239acbea69 +Z 5e7398b52fb14b2e0bc342aa9223ff97 diff --git a/manifest.uuid b/manifest.uuid index e8c5df2055..7b1ee2441d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b96b5e166990e4ec363b24f66e04cfa5f00f6342 \ No newline at end of file +1e2a7ba0889093416455f488fca893eaeb195d45 \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index ddd27481a1..66cce1342b 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -138,9 +138,7 @@ do_test 1.1 { } {} proc phrasematch {phrase value} { - if {[string first $phrase $value]>=0} { - return 1 - } + if {[string first $phrase $value]>=0} { return 1 } return 0 } @@ -177,9 +175,9 @@ proc nearmatch {nNear phraselist value} { # Usage: # -# nearset aCol ?-near N? ?-col C? -- phrase1 phrase2... +# poslist aCol ?-near N? ?-col C? -- phrase1 phrase2... # -proc nearset {aCol args} { +proc poslist {aCol args} { set O(-near) 10 set O(-col) -1 @@ -191,44 +189,121 @@ proc nearset {aCol args} { set O($k) $v } + # Set phraselist to be a list of phrases. nPhrase its length. set phraselist [lrange $args [expr $nOpt+1] end] + set nPhrase [llength $phraselist] + + for {set j 0} {$j < [llength $aCol]} {incr j} { + for {set i 0} {$i < $nPhrase} {incr i} { + set A($j,$i) [list] + } + } - set bMatch 0 set iCol -1 foreach col $aCol { incr iCol if {$O(-col)>=0 && $O(-col)!=$iCol} continue - if {[nearmatch $O(-near) $phraselist $col]} { - set bMatch 1 - break + set nToken [llength $col] + + set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] + for { } {$iFL < $nToken} {incr iFL} { + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set B($iPhrase) [list] + } + + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set p [lindex $phraselist $iPhrase] + set nPm1 [expr {[llength $p] - 1}] + set iFirst [expr $iFL - $O(-near) - [llength $p]] + + for {set i $iFirst} {$i <= $iFL} {incr i} { + if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } + } + if {[llength $B($iPhrase)] == 0} break + } + + if {$iPhrase==$nPhrase} { + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] + set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] + } + } } } - return $bMatch -} - -proc matchdata {expr {bAsc 0}} { - set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] set res [list] - foreach {id x y} $::data { - set cols [list $x $y] - if $tclexpr { - lappend res $id + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set plist [list] + for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { + foreach a $A($iCol,$iPhrase) { + lappend plist "$iCol.$a" + } } - } - - # puts $tclexpr - - if {$bAsc} { - set res [lsort -integer -increasing $res] - } else { - set res [lsort -integer -decreasing $res] + lappend res $plist } return $res } +# Usage: +# +# nearset aCol ?-near N? ?-col C? -- phrase1 phrase2... +# +proc nearset {args} { + set plist [poslist {*}$args] + return [expr [llength [lindex $plist 0]]>0] +} + +# Argument $expr is an FTS5 match expression designed to be executed against +# an FTS5 table with the following schema: +# +# CREATE VIRTUAL TABLE xy USING fts5(x, y); +# +# Assuming the table contains the same records as stored int the global +# $::data array (see above), this function returns a list containing one +# element for each match in the dataset. The elements are themselves lists +# formatted as follows: +# +# { ...} +# +# where each element is a list of phrase matches in the +# same form as returned by auxiliary scalar function fts5_test(). +# +proc matchdata {bPos expr {bAsc 0}} { + + set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] + set res [list] + + #puts $tclexpr + foreach {id x y} $::data { + set cols [list $x $y] + if $tclexpr { + if {$bPos} { + set N [regexp -all -inline {\[nearset [^\]]*\]} $tclexpr] + set rowres [list] + foreach phrase $N { + set cmd "poslist [string range $phrase 9 end-1]" + lappend rowres [eval $cmd] + } + if {[string first "\{" $rowres]<0} { set rowres "{{$rowres}}" } + lappend res [list $id $rowres] + } else { + lappend res $id + } + } + } + + if {$bAsc} { + set res [lsort -integer -increasing -index 0 $res] + } else { + set res [lsort -integer -decreasing -index 0 $res] + } + + return [concat {*}$res] +} + + foreach {tn phrase} { 1 "o" 2 "b q" @@ -243,10 +318,10 @@ foreach {tn phrase} { } { set expr "\"$phrase\"" - set res [matchdata $expr] + set res [matchdata 1 $expr] do_execsql_test 1.2.$tn.[llength $res] { - SELECT rowid FROM xx WHERE xx match $expr + SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr } $res } @@ -261,6 +336,10 @@ do_test 2.5 { nearmatch 400 {a b} {a x x b} } 1 do_test 2.6 { nearmatch 0 {a} {a x x b} } 1 do_test 2.7 { nearmatch 0 {b} {a x x b} } 1 +do_test 2.8 { poslist {{a b c}} -- a } {0.0} +do_test 2.9 { poslist {{a b c}} -- c } {0.2} + + foreach {tn expr tclexpr} { 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} } { @@ -275,7 +354,6 @@ foreach {bAsc sql} { } { foreach {tn expr} { 0.1 x - 1 { NEAR(r c) } 2 { NEAR(r c, 5) } 3 { NEAR(r c, 3) } @@ -297,12 +375,10 @@ foreach {bAsc sql} { 18 { c NOT (b OR a) } 19 { c NOT b OR a AND d } } { - set res [matchdata $expr $bAsc] + set res [matchdata 0 $expr $bAsc] do_execsql_test 4.$bAsc.$tn.[llength $res] $sql $res } } - - finish_test From 18689f1bd4e6f5c791146d2a1d6bb232c3769f2c Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 16 Jul 2014 20:07:59 +0000 Subject: [PATCH 013/206] Fixes for tcl list generation in fts5_test(). FossilOrigin-Name: c1f9a4b76c0bbc1ef9f6cdb5d62aa5d536fdf38e --- ext/fts5/fts5_aux.c | 39 ++++++++++++++++++------ manifest | 14 ++++----- manifest.uuid | 2 +- test/fts5ac.test | 74 +++++++++++++++------------------------------ 4 files changed, 62 insertions(+), 67 deletions(-) diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 3eea3c6ec2..c7e2deccac 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -81,22 +81,43 @@ static void fts5TestFunction( sqlite3Fts5BufferAppendPrintf(&rc, &s, " poslist "); } if( 0==zReq || 0==sqlite3_stricmp(zReq, "poslist") ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); + int bParen = 0; + Fts5Buffer s3; + memset(&s3, 0, sizeof(s3)); + + for(i=0; ixPoslist(pFts, i, &j, &iCol, &iOff) ){ - sqlite3Fts5BufferAppendPrintf( - &rc, &s, "%s%d.%d", (bFirst?"":" "), iCol, iOff - ); - bFirst = 0; + if( nElem!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s2, " "); + sqlite3Fts5BufferAppendPrintf(&rc, &s2, "%d.%d", iCol, iOff); + nElem++; } - sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + + if( i!=0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s3, " "); + } + if( nElem==1 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s3, "%s", (const char*)s2.p); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc, &s3, "{%s}", (const char*)s2.p); + bParen = 1; + } + sqlite3_free(s2.p); } - sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + + if(zReq==0 && (nPhrase>1 || bParen) ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "{%s}", (const char*)s3.p); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s", (const char*)s3.p); + } + sqlite3_free(s3.p); } if( zReq==0 ){ diff --git a/manifest b/manifest index 16bcb4aad6..607b5f1c62 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Begin\sadding\sinterface\sfor\sauxiliary\sfunctions. -D 2014-07-16T19:15:57.212 +C Fixes\sfor\stcl\slist\sgeneration\sin\sfts5_test(). +D 2014-07-16T20:07:59.378 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,7 +106,7 @@ F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 20bcb1e10756c72b550947236960edf96929ca2f F ext/fts5/fts5.h cda3b9d73e6ffa6d0cd35b7da6b808bf3a1ada32 F ext/fts5/fts5Int.h 2d4c1e1ebdf18278776fcd8a64233ff3c04ea51f -F ext/fts5/fts5_aux.c 53ab338c6a469dc67e7a6bd8685ce727beee8403 +F ext/fts5/fts5_aux.c 27b082732fd76277fd7e9277f52903723d97f99b F ext/fts5/fts5_buffer.c b7aa6cdf4a63642fcc12359cedc4be748ca400cc F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c e4e4e6d32beff1ab0d076f8fbf5cf3b2241d4dbc @@ -597,7 +597,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 398a2d8d9576e0579a0f0955fabd8410ace969e4 +F test/fts5ac.test 14d05f412b99ccac34316b76861b5bfe3a33d0a1 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d @@ -1194,7 +1194,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b96b5e166990e4ec363b24f66e04cfa5f00f6342 -R ff6cbab233811678a295f9640beec5d4 +P 1e2a7ba0889093416455f488fca893eaeb195d45 +R c6840136b0b681ac8e496da634d56e88 U dan -Z 5e7398b52fb14b2e0bc342aa9223ff97 +Z a290facfa88166ccaa147b2550745bf7 diff --git a/manifest.uuid b/manifest.uuid index 7b1ee2441d..00e65c7fa9 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1e2a7ba0889093416455f488fca893eaeb195d45 \ No newline at end of file +c1f9a4b76c0bbc1ef9f6cdb5d62aa5d536fdf38e \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index 66cce1342b..db5d9f151b 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -137,42 +137,6 @@ do_test 1.1 { } } {} -proc phrasematch {phrase value} { - if {[string first $phrase $value]>=0} { return 1 } - return 0 -} - -# Usage: -# -proc nearmatch {nNear phraselist value} { - set nPhrase [llength $phraselist] - - set phraselist [string tolower $phraselist] - set value [string tolower $value] - - if {$nPhrase==1} { - set bMatch [phrasematch [lindex $phraselist 0] $value] - } else { - set nValue [llength $value] - if {$nNear >= $nValue} {set nNear [expr $nValue-1]} - - for {set i $nNear} {$i < $nValue} {incr i} { - set bMatch 1 - foreach phrase $phraselist { - set iMin [expr $i - $nNear - [llength $phrase]] - set iMax [expr $i - 1 + [llength $phrase]] - set subdoc [lrange $value $iMin $iMax] - if {![phrasematch $phrase $subdoc]} { - set bMatch 0 - break - } - } - if {$bMatch} break - } - } - return $bMatch -} - # Usage: # # poslist aCol ?-near N? ?-col C? -- phrase1 phrase2... @@ -243,6 +207,7 @@ proc poslist {aCol args} { lappend res $plist } + #puts $res return $res } @@ -284,9 +249,9 @@ proc matchdata {bPos expr {bAsc 0}} { set rowres [list] foreach phrase $N { set cmd "poslist [string range $phrase 9 end-1]" - lappend rowres [eval $cmd] + set pos [eval $cmd] + set rowres [concat $rowres $pos] } - if {[string first "\{" $rowres]<0} { set rowres "{{$rowres}}" } lappend res [list $id $rowres] } else { lappend res $id @@ -303,7 +268,13 @@ proc matchdata {bPos expr {bAsc 0}} { return [concat {*}$res] } +# +# End of test code +#------------------------------------------------------------------------- +#------------------------------------------------------------------------- +# Test phrase queries. +# foreach {tn phrase} { 1 "o" 2 "b q" @@ -316,7 +287,6 @@ foreach {tn phrase} { 9 "no" 10 "L O O L V V K" } { - set expr "\"$phrase\"" set res [matchdata 1 $expr] @@ -325,20 +295,24 @@ foreach {tn phrase} { } $res } -# Test the "nearmatch" commnad. +#------------------------------------------------------------------------- +# Test some AND and OR queries. # -do_test 2.0 { nearmatch 2 {a b} {a x x b} } 1 -do_test 2.1 { nearmatch 2 {b a} {a x x b} } 1 -do_test 2.2 { nearmatch 1 {b a} {a x x b} } 0 -do_test 2.3 { nearmatch 1 {"a b" "c d"} {x x a b x c d} } 1 -do_test 2.4 { nearmatch 1 {"a b" "c d"} {x a b x x c d} } 0 -do_test 2.5 { nearmatch 400 {a b} {a x x b} } 1 -do_test 2.6 { nearmatch 0 {a} {a x x b} } 1 -do_test 2.7 { nearmatch 0 {b} {a x x b} } 1 +foreach {tn expr} { + 1 "a AND b" + 2 "a+b AND c" + 3 "d+c AND u" + 4 "d+c AND u+d" +} { + set res [matchdata 1 $expr] + do_execsql_test 2.1.$tn.[llength $res] { + SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + } $res +} -do_test 2.8 { poslist {{a b c}} -- a } {0.0} -do_test 2.9 { poslist {{a b c}} -- c } {0.2} +do_test 2.1 { poslist {{a b c}} -- a } {0.0} +do_test 2.2 { poslist {{a b c}} -- c } {0.2} foreach {tn expr tclexpr} { 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} From c5b44f3d906ea814e50a955e31dcf3b39c3b7487 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 17 Jul 2014 15:14:07 +0000 Subject: [PATCH 014/206] Fix a problem with position list processing for OR queries. FossilOrigin-Name: 5808f30fae0d844c52a785bf18872be371d4af68 --- ext/fts5/fts5_expr.c | 13 +++++++------ manifest | 14 +++++++------- manifest.uuid | 2 +- test/fts5ac.test | 16 +++++++++++----- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index bcbc3745e7..3ea885e75c 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1396,13 +1396,14 @@ int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ ** iPhrase. */ int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ - if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ - *pa = 0; - return 0; - }else{ + if( iPhrase>=0 && iPhrasenPhrase ){ Fts5ExprPhrase *pPhrase = pExpr->apPhrase[iPhrase]; - *pa = pPhrase->poslist.p; - return pPhrase->poslist.n; + if( sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter)==pExpr->pRoot->iRowid ){ + *pa = pPhrase->poslist.p; + return pPhrase->poslist.n; + } } + *pa = 0; + return 0; } diff --git a/manifest b/manifest index 607b5f1c62..7d4537622b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sfor\stcl\slist\sgeneration\sin\sfts5_test(). -D 2014-07-16T20:07:59.378 +C Fix\sa\sproblem\swith\sposition\slist\sprocessing\sfor\sOR\squeries. +D 2014-07-17T15:14:07.541 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -109,7 +109,7 @@ F ext/fts5/fts5Int.h 2d4c1e1ebdf18278776fcd8a64233ff3c04ea51f F ext/fts5/fts5_aux.c 27b082732fd76277fd7e9277f52903723d97f99b F ext/fts5/fts5_buffer.c b7aa6cdf4a63642fcc12359cedc4be748ca400cc F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c e4e4e6d32beff1ab0d076f8fbf5cf3b2241d4dbc +F ext/fts5/fts5_expr.c 52a1b47cfd30feb09c522392b1ba246eda7023f4 F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -597,7 +597,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 14d05f412b99ccac34316b76861b5bfe3a33d0a1 +F test/fts5ac.test 84599f8253abc7e10b929b8ee0b47c5edd4eafbd F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d @@ -1194,7 +1194,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1e2a7ba0889093416455f488fca893eaeb195d45 -R c6840136b0b681ac8e496da634d56e88 +P c1f9a4b76c0bbc1ef9f6cdb5d62aa5d536fdf38e +R c3e4d3fb829636894b73dbf001062a3f U dan -Z a290facfa88166ccaa147b2550745bf7 +Z 4c19ad3988f765a5de2b9174d276eba9 diff --git a/manifest.uuid b/manifest.uuid index 00e65c7fa9..0f87023763 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c1f9a4b76c0bbc1ef9f6cdb5d62aa5d536fdf38e \ No newline at end of file +5808f30fae0d844c52a785bf18872be371d4af68 \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index db5d9f151b..3257480134 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -153,7 +153,7 @@ proc poslist {aCol args} { set O($k) $v } - # Set phraselist to be a list of phrases. nPhrase its length. + # Set $phraselist to be a list of phrases. $nPhrase its length. set phraselist [lrange $args [expr $nOpt+1] end] set nPhrase [llength $phraselist] @@ -299,10 +299,16 @@ foreach {tn phrase} { # Test some AND and OR queries. # foreach {tn expr} { - 1 "a AND b" - 2 "a+b AND c" - 3 "d+c AND u" - 4 "d+c AND u+d" + 1.1 "a AND b" + 1.2 "a+b AND c" + 1.3 "d+c AND u" + 1.4 "d+c AND u+d" + + 2.1 "a OR b" + 2.2 "a+b OR c" + 2.3 "d+c OR u" + 2.4 "d+c OR u+d" + } { set res [matchdata 1 $expr] do_execsql_test 2.1.$tn.[llength $res] { From 30b650e2f423bda7af3f2ca2ae8eb9442b5b4f7e Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 18 Jul 2014 19:59:00 +0000 Subject: [PATCH 015/206] Fix issues with position lists and NEAR constraints. FossilOrigin-Name: 16352d3654d5672cd0251db51dbe19f779373feb --- ext/fts5/fts5Int.h | 9 ++- ext/fts5/fts5_buffer.c | 56 +++++++++------ ext/fts5/fts5_expr.c | 156 ++++++++++++++++++++++++++++++++++------- manifest | 19 ++--- manifest.uuid | 2 +- test/fts5ac.test | 46 ++++++++++-- test/fts5ae.test | 136 +++++++++++++++++++++++++++++++++++ 7 files changed, 357 insertions(+), 67 deletions(-) create mode 100644 test/fts5ae.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index c7392b06e6..d9249b93b1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -118,8 +118,7 @@ int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); typedef struct Fts5PoslistWriter Fts5PoslistWriter; struct Fts5PoslistWriter { - int iCol; - int iOff; + i64 iPrev; }; int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); @@ -130,6 +129,12 @@ int sqlite3Fts5PoslistNext( int *piOff /* IN/OUT: Current token offset */ ); +int sqlite3Fts5PoslistNext64( + const u8 *a, int n, /* Buffer containing poslist */ + int *pi, /* IN/OUT: Offset within a[] */ + i64 *piOff /* IN/OUT: Current offset */ +); + /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 2afce95020..b69af615b9 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -138,27 +138,40 @@ void sqlite3Fts5BufferSet( sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); } +int sqlite3Fts5PoslistNext64( + const u8 *a, int n, /* Buffer containing poslist */ + int *pi, /* IN/OUT: Offset within a[] */ + i64 *piOff /* IN/OUT: Current offset */ +){ + int i = *pi; + if( i>=n ){ + /* EOF */ + return 1; + }else{ + i64 iOff = *piOff; + int iVal; + i += getVarint32(&a[i], iVal); + if( iVal==1 ){ + i += getVarint32(&a[i], iVal); + iOff = ((i64)iVal) << 32; + i += getVarint32(&a[i], iVal); + } + *piOff = iOff + (iVal-2); + *pi = i; + return 0; + } +} + /* ** Advance the iterator object passed as the only argument. Return true ** if the iterator reaches EOF, or false otherwise. */ int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ - if( pIter->i>=pIter->n ){ + if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) + || (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol) + ){ pIter->bEof = 1; - }else{ - int iVal; - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - if( iVal==1 ){ - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - if( pIter->iCol>=0 && iVal>pIter->iCol ){ - pIter->bEof = 1; - }else{ - pIter->iPos = ((u64)iVal << 32); - pIter->i += getVarint32(&pIter->a[pIter->i], iVal); - } - } - pIter->iPos += (iVal-2); } return pIter->bEof; } @@ -183,18 +196,15 @@ int sqlite3Fts5PoslistWriterAppend( Fts5PoslistWriter *pWriter, i64 iPos ){ + static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; int rc = SQLITE_OK; - int iCol = (int)(iPos >> 32); - int iOff = (iPos & 0x7FFFFFFF); - - if( iCol!=pWriter->iCol ){ + if( (iPos & colmask) != (pWriter->iPrev & colmask) ){ fts5BufferAppendVarint(&rc, pBuf, 1); - fts5BufferAppendVarint(&rc, pBuf, iCol); - pWriter->iCol = iCol; - pWriter->iOff = 0; + fts5BufferAppendVarint(&rc, pBuf, (iPos >> 32)); + pWriter->iPrev = (iPos & colmask); } - fts5BufferAppendVarint(&rc, pBuf, (iOff - pWriter->iOff) + 2); - + fts5BufferAppendVarint(&rc, pBuf, (iPos - pWriter->iPrev) + 2); + pWriter->iPrev = iPos; return rc; } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 3ea885e75c..06faf7ebff 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -52,7 +52,7 @@ struct Fts5ExprNode { Fts5ExprNode *pRight; /* Right hand child node */ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ int bEof; /* True at EOF */ - i64 iRowid; + i64 iRowid; /* Current rowid */ }; /* @@ -70,6 +70,7 @@ struct Fts5ExprTerm { ** within a document for it to match. */ struct Fts5ExprPhrase { + Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ Fts5Buffer poslist; /* Current position list */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ @@ -266,7 +267,7 @@ static int fts5ExprPhraseIsMatch( Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ - Fts5PoslistWriter writer = {0, 0}; + Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int i; @@ -322,6 +323,46 @@ static int fts5ExprPhraseIsMatch( return rc; } +typedef struct Fts5LookaheadReader Fts5LookaheadReader; +struct Fts5LookaheadReader { + const u8 *a; /* Buffer containing position list */ + int n; /* Size of buffer a[] in bytes */ + int i; /* Current offset in position list */ + i64 iPos; /* Current position */ + i64 iLookahead; /* Next position */ +}; + +#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) + +static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ + p->iPos = p->iLookahead; + if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ + p->iLookahead = FTS5_LOOKAHEAD_EOF; + } + return (p->iPos==FTS5_LOOKAHEAD_EOF); +} + +static int fts5LookaheadReaderInit( + const u8 *a, int n, /* Buffer to read position list from */ + Fts5LookaheadReader *p /* Iterator object to initialize */ +){ + memset(p, 0, sizeof(Fts5LookaheadReader)); + p->a = a; + p->n = n; + fts5LookaheadReaderNext(p); + return fts5LookaheadReaderNext(p); +} + +static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){ + return (p->iPos==FTS5_LOOKAHEAD_EOF); +} + +typedef struct Fts5NearTrimmer Fts5NearTrimmer; +struct Fts5NearTrimmer { + Fts5LookaheadReader reader; /* Input iterator */ + Fts5PoslistWriter writer; /* Writer context */ + Fts5Buffer *pOut; /* Output poslist */ +}; /* ** The near-set object passed as the first argument contains more than @@ -340,8 +381,11 @@ static int fts5ExprPhraseIsMatch( ** a set of intances that collectively matches the NEAR constraint. */ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ - Fts5PoslistReader aStatic[4]; - Fts5PoslistReader *aIter = aStatic; + Fts5NearTrimmer aStatic[4]; + Fts5NearTrimmer *a = aStatic; + + Fts5ExprPhrase **apPhrase = pNear->apPhrase; + int i; int rc = SQLITE_OK; int bMatch; @@ -352,36 +396,75 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ - int nByte = sizeof(Fts5PoslistReader) * pNear->nPhrase; - aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); - if( !aIter ) return SQLITE_NOMEM; + int nByte = sizeof(Fts5LookaheadReader) * pNear->nPhrase; + a = (Fts5NearTrimmer*)sqlite3_malloc(nByte); + if( !a ) return SQLITE_NOMEM; + memset(a, 0, nByte); + }else{ + memset(aStatic, 0, sizeof(aStatic)); } - /* Initialize a term iterator for each phrase */ + /* Initialize a lookahead iterator for each phrase. After passing the + ** buffer and buffer size to the lookaside-reader init function, zero + ** the phrase poslist buffer. The new poslist for the phrase (containing + ** the same entries as the original with some entries removed on account + ** of the NEAR constraint) is written over the original even as it is + ** being read. This is safe as the entries for the new poslist are a + ** subset of the old, so it is not possible for data yet to be read to + ** be overwritten. */ for(i=0; inPhrase; i++){ - Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist; - sqlite3Fts5PoslistReaderInit(-1, pPoslist->p, pPoslist->n, &aIter[i]); + Fts5Buffer *pPoslist = &apPhrase[i]->poslist; + fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); + pPoslist->n = 0; + a[i].pOut = pPoslist; } - iMax = aIter[0].iPos; - do { - bMatch = 1; - for(i=0; inPhrase; i++){ - Fts5PoslistReader *pPos = &aIter[i]; - i64 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; - if( pPos->iPosiPos>iMax ){ - bMatch = 0; - while( pPos->iPosnPhrase; i++){ + Fts5LookaheadReader *pPos = &a[i].reader; + iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; + if( pPos->iPosiPos>iMax ){ + bMatch = 0; + while( pPos->iPosiPos>iMax ) iMax = pPos->iPos; } - if( pPos->iPos>iMax ) iMax = pPos->iPos; + } + }while( bMatch==0 ); + + /* Add an entry to each output position list */ + for(i=0; inPhrase; i++){ + i64 iPos = a[i].reader.iPos; + Fts5PoslistWriter *pWriter = &a[i].writer; + if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ + sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); } } - }while( bMatch==0 ); + + iAdv = 0; + iMin = a[0].reader.iLookahead; + for(i=0; inPhrase; i++){ + if( a[i].reader.iLookahead < iMin ){ + iMin = a[i].reader.iLookahead; + iAdv = i; + } + } + if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; + } ismatch_out: - *pbMatch = bMatch; - if( aIter!=aStatic ) sqlite3_free(aIter); + *pbMatch = (a[0].pOut->n>0); + if( a!=aStatic ) sqlite3_free(a); return rc; } @@ -519,7 +602,7 @@ static int fts5ExprNearNextMatch( while( 1 ){ int i; - /* Advance the iterators until they are a match */ + /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode); if( pNode->bEof || rc!=SQLITE_OK ) break; @@ -545,6 +628,8 @@ static int fts5ExprNearNextMatch( if( rc!=SQLITE_OK || bMatch ) break; } + /* If control flows to here, then the current rowid is not a match. + ** Advance all term iterators in all phrases to the next rowid. */ rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); if( pNode->bEof || rc!=SQLITE_OK ) break; } @@ -661,6 +746,14 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ return rc; } +static void fts5ExprSetEof(Fts5ExprNode *pNode){ + if( pNode ){ + pNode->bEof = 1; + fts5ExprSetEof(pNode->pLeft); + fts5ExprSetEof(pNode->pRight); + } +} + /* ** */ @@ -688,7 +781,9 @@ static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ rc = fts5ExprNodeNext(pExpr, pAdv); if( rc!=SQLITE_OK ) break; } - pNode->bEof = p1->bEof || p2->bEof; + if( p1->bEof || p2->bEof ){ + fts5ExprSetEof(pNode); + } pNode->iRowid = p1->iRowid; break; } @@ -1090,6 +1185,12 @@ Fts5ExprNode *sqlite3Fts5ParseNode( pRet->pLeft = pLeft; pRet->pRight = pRight; pRet->pNear = pNear; + if( eType==FTS5_STRING ){ + int iPhrase; + for(iPhrase=0; iPhrasenPhrase; iPhrase++){ + pNear->apPhrase[iPhrase]->pNode = pRet; + } + } } } @@ -1398,7 +1499,8 @@ int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ if( iPhrase>=0 && iPhrasenPhrase ){ Fts5ExprPhrase *pPhrase = pExpr->apPhrase[iPhrase]; - if( sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter)==pExpr->pRoot->iRowid ){ + Fts5ExprNode *pNode = pPhrase->pNode; + if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ *pa = pPhrase->poslist.p; return pPhrase->poslist.n; } diff --git a/manifest b/manifest index 7d4537622b..c84bbf7a32 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sposition\slist\sprocessing\sfor\sOR\squeries. -D 2014-07-17T15:14:07.541 +C Fix\sissues\swith\sposition\slists\sand\sNEAR\sconstraints. +D 2014-07-18T19:59:00.547 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,11 +105,11 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 20bcb1e10756c72b550947236960edf96929ca2f F ext/fts5/fts5.h cda3b9d73e6ffa6d0cd35b7da6b808bf3a1ada32 -F ext/fts5/fts5Int.h 2d4c1e1ebdf18278776fcd8a64233ff3c04ea51f +F ext/fts5/fts5Int.h 6cf315d3999c14572012d676fa1baf4f4323587b F ext/fts5/fts5_aux.c 27b082732fd76277fd7e9277f52903723d97f99b -F ext/fts5/fts5_buffer.c b7aa6cdf4a63642fcc12359cedc4be748ca400cc +F ext/fts5/fts5_buffer.c 71cf2016b2881e7aea39f952995eafa510d96cbd F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 52a1b47cfd30feb09c522392b1ba246eda7023f4 +F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -597,8 +597,9 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 84599f8253abc7e10b929b8ee0b47c5edd4eafbd +F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 +F test/fts5ae.test 5d5ffba68e850d9ade99cdd3f5c6431c82dad81d F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1194,7 +1195,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c1f9a4b76c0bbc1ef9f6cdb5d62aa5d536fdf38e -R c3e4d3fb829636894b73dbf001062a3f +P 5808f30fae0d844c52a785bf18872be371d4af68 +R c645036fa73431553c03d7990bbe09ec U dan -Z 4c19ad3988f765a5de2b9174d276eba9 +Z 973aba4d2c5c8ae6a3e94f9739fdfe1b diff --git a/manifest.uuid b/manifest.uuid index 0f87023763..e082d6160a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5808f30fae0d844c52a785bf18872be371d4af68 \ No newline at end of file +16352d3654d5672cd0251db51dbe19f779373feb \ No newline at end of file diff --git a/test/fts5ac.test b/test/fts5ac.test index 3257480134..ae6e56e7e7 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -309,21 +309,57 @@ foreach {tn expr} { 2.3 "d+c OR u" 2.4 "d+c OR u+d" + 3.1 { a AND b AND c } } { set res [matchdata 1 $expr] - do_execsql_test 2.1.$tn.[llength $res] { + do_execsql_test 2.$tn.[llength $res] { SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr } $res } +#------------------------------------------------------------------------- +# Queries on a specific column. +# +foreach {tn expr} { + 1 "x:a" + 2 "y:a" + 3 "x:b" + 4 "y:b" +} { + set res [matchdata 1 $expr] + do_execsql_test 3.$tn.[llength $res] { + SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + } $res +} -do_test 2.1 { poslist {{a b c}} -- a } {0.0} -do_test 2.2 { poslist {{a b c}} -- c } {0.2} +#------------------------------------------------------------------------- +# Some NEAR queries. +# +foreach {tn expr} { + 1 "NEAR(a b)" + 2 "NEAR(r c)" + 2 { NEAR(r c, 5) } + 3 { NEAR(r c, 3) } + 4 { NEAR(r c, 2) } + 5 { NEAR(r c, 0) } + 6 { NEAR(a b c) } + 7 { NEAR(a b c, 8) } + 8 { x : NEAR(r c) } + 9 { y : NEAR(r c) } +} { + set res [matchdata 1 $expr] + do_execsql_test 4.1.$tn.[llength $res] { + SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + } $res +} + +do_test 4.1 { poslist {{a b c}} -- a } {0.0} +do_test 4.2 { poslist {{a b c}} -- c } {0.2} foreach {tn expr tclexpr} { 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} } { - do_execsql_test 3.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr] + do_execsql_test 5.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr] } #------------------------------------------------------------------------- @@ -356,7 +392,7 @@ foreach {bAsc sql} { 19 { c NOT b OR a AND d } } { set res [matchdata 0 $expr $bAsc] - do_execsql_test 4.$bAsc.$tn.[llength $res] $sql $res + do_execsql_test 6.$bAsc.$tn.[llength $res] $sql $res } } diff --git a/test/fts5ae.test b/test/fts5ae.test new file mode 100644 index 0000000000..c433d43208 --- /dev/null +++ b/test/fts5ae.test @@ -0,0 +1,136 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5ae + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b); + INSERT INTO t1(t1) VALUES('pgsz=32'); +} + +do_execsql_test 1.1 { + INSERT INTO t1 VALUES('hello', 'world'); + SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; +} {1} + +do_execsql_test 1.2 { + INSERT INTO t1 VALUES('world', 'hello'); + SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; +} {1 2} + +do_execsql_test 1.3 { + INSERT INTO t1 VALUES('world', 'world'); + SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; +} {1 2} + +do_execsql_test 1.4.1 { + INSERT INTO t1 VALUES('hello', 'hello'); +} + +do_execsql_test 1.4.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; +} {1 2 4} + + +#------------------------------------------------------------------------- +# +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(x, y); + INSERT INTO t2 VALUES('u t l w w m s', 'm f m o l t k o p e'); + INSERT INTO t2 VALUES('f g q e l n d m z x q', 'z s i i i m f w w f n g p'); +} + +do_execsql_test 2.1 { + SELECT rowid, fts5_test(t2, 'poslist') FROM t2 + WHERE t2 MATCH 'm' ORDER BY rowid; +} { + 1 {{0.5 1.0 1.2}} + 2 {{0.7 1.5}} +} + +do_execsql_test 2.2 { + SELECT rowid, fts5_test(t2, 'poslist') FROM t2 + WHERE t2 MATCH 'u OR q' ORDER BY rowid; +} { + 1 {0.0 {}} + 2 {{} {0.2 0.10}} +} + +do_execsql_test 2.3 { + SELECT rowid, fts5_test(t2, 'poslist') FROM t2 + WHERE t2 MATCH 'y:o' ORDER BY rowid; +} { + 1 {{1.3 1.7}} +} + +#------------------------------------------------------------------------- +# +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t3 USING fts5(x, y); + INSERT INTO t3 VALUES( 'j f h o x x a z g b a f a m i b', 'j z c z y x w t'); + INSERT INTO t3 VALUES( 'r c', ''); +} + +do_execsql_test 3.1 { + SELECT rowid, fts5_test(t3, 'poslist') FROM t3 WHERE t3 MATCH 'NEAR(a b)'; +} { + 1 {{0.6 0.10 0.12} {0.9 0.15}} +} + +do_execsql_test 3.2 { + SELECT rowid, fts5_test(t3, 'poslist') FROM t3 WHERE t3 MATCH 'NEAR(r c)'; +} { + 2 {0.0 0.1} +} + +do_execsql_test 3.3 { + INSERT INTO t3 + VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); + SELECT rowid, fts5_test(t3, 'poslist') + FROM t3 WHERE t3 MATCH 'a OR b AND c'; +} { + 3 {0.5 {} {}} + 1 {{0.6 0.10 0.12} {0.9 0.15} 1.2} +} + +#------------------------------------------------------------------------- +# +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t4 USING fts5(x, y); + INSERT INTO t4 + VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); +} + +breakpoint +do_execsql_test 4.1 { + SELECT rowid, fts5_test(t4, 'poslist') FROM t4 WHERE t4 MATCH 'a OR b AND c'; +} { + 1 {0.5 {} {}} +} + +#93 {0.5 1.6 {}} + + +finish_test + + From 4a165c0af4f4f613b664ce75fa6a163ef15f83dc Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 19 Jul 2014 15:35:09 +0000 Subject: [PATCH 016/206] Fixes for the xColumnSize() fts5 extension API. FossilOrigin-Name: 43fcb844726cfeeb1c8a0dbfaa0d2ca22e6ac16c --- ext/fts5/fts5.c | 69 +++++++++++++++++++++++-------- ext/fts5/fts5.h | 8 ++++ ext/fts5/fts5Int.h | 3 ++ ext/fts5/fts5_aux.c | 15 ++++++- ext/fts5/fts5_storage.c | 91 +++++++++++++++++++++++++++++++++++++++-- manifest | 24 +++++------ manifest.uuid | 2 +- test/fts5ae.test | 22 ++++++++-- test/permutations.test | 2 +- 9 files changed, 198 insertions(+), 38 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 1278ab11f7..0db73196f2 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -69,16 +69,30 @@ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ int idxNum; /* idxNum passed to xFilter() */ sqlite3_stmt *pStmt; /* Statement used to read %_content */ - int bEof; /* True at EOF */ Fts5Expr *pExpr; /* Expression for MATCH queries */ - int bSeekRequired; /* True if seek is required */ + int csrflags; /* Mask of cursor flags (see below) */ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ Fts5Auxiliary *pAux; /* Currently executing function */ + int *aColumnSize; /* Values for xColumnSize() */ }; +/* +** Values for Fts5Cursor.csrflags +*/ +#define FTS5CSR_REQUIRE_CONTENT 0x01 +#define FTS5CSR_REQUIRE_DOCSIZE 0x02 +#define FTS5CSR_EOF 0x04 + +/* +** Macros to Set(), Clear() and Test() cursor flags. +*/ +#define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag)) +#define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) +#define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) + /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy ** argument is non-zero, attempt delete the shadow tables from teh database @@ -275,13 +289,17 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ */ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ Fts5Table *pTab = (Fts5Table*)pVTab; + Fts5Config *pConfig = pTab->pConfig; Fts5Cursor *pCsr; /* New cursor object */ + int nByte; /* Bytes of space to allocate */ int rc = SQLITE_OK; /* Return code */ - pCsr = (Fts5Cursor*)sqlite3_malloc(sizeof(Fts5Cursor)); + nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); + pCsr = (Fts5Cursor*)sqlite3_malloc(nByte); if( pCsr ){ Fts5Global *pGlobal = pTab->pGlobal; - memset(pCsr, 0, sizeof(Fts5Cursor)); + memset(pCsr, 0, nByte); + pCsr->aColumnSize = (int*)&pCsr[1]; pCsr->pNext = pGlobal->pCsr; pGlobal->pCsr = pCsr; pCsr->iCsrId = ++pGlobal->iNextId; @@ -338,15 +356,17 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ if( ePlan!=FTS5_PLAN_MATCH ){ rc = sqlite3_step(pCsr->pStmt); if( rc!=SQLITE_ROW ){ - pCsr->bEof = 1; + CsrFlagSet(pCsr, FTS5CSR_EOF); rc = sqlite3_reset(pCsr->pStmt); }else{ rc = SQLITE_OK; } }else{ rc = sqlite3Fts5ExprNext(pCsr->pExpr); - pCsr->bEof = sqlite3Fts5ExprEof(pCsr->pExpr); - pCsr->bSeekRequired = 1; + if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + } + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); } return rc; @@ -371,8 +391,10 @@ static int fts5FilterMethod( int eStmt = fts5StmtType(idxNum); int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); - memset(&pCursor[1], 0, sizeof(Fts5Cursor) - sizeof(sqlite3_vtab_cursor)); pCsr->idxNum = idxNum; + assert( pCsr->pStmt==0 ); + assert( pCsr->pExpr==0 ); + assert( pCsr->csrflags==0 ); rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); if( rc==SQLITE_OK ){ @@ -382,8 +404,10 @@ static int fts5FilterMethod( rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); - pCsr->bEof = sqlite3Fts5ExprEof(pCsr->pExpr); - pCsr->bSeekRequired = 1; + if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + } + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); } }else{ if( ePlan==FTS5_PLAN_ROWID ){ @@ -402,7 +426,7 @@ static int fts5FilterMethod( */ static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - return pCsr->bEof; + return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0); } /* @@ -415,7 +439,7 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = FTS5_PLAN(pCsr->idxNum); - assert( pCsr->bEof==0 ); + assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); if( ePlan!=FTS5_PLAN_MATCH ){ *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); }else{ @@ -431,13 +455,14 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ */ static int fts5SeekCursor(Fts5Cursor *pCsr){ int rc = SQLITE_OK; - if( pCsr->bSeekRequired ){ + if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ assert( pCsr->pExpr ); sqlite3_reset(pCsr->pStmt); sqlite3_bind_int64(pCsr->pStmt, 1, sqlite3Fts5ExprRowid(pCsr->pExpr)); rc = sqlite3_step(pCsr->pStmt); if( rc==SQLITE_ROW ){ rc = SQLITE_OK; + CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT); }else{ rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK ){ @@ -461,7 +486,7 @@ static int fts5ColumnMethod( Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int rc = SQLITE_OK; - assert( pCsr->bEof==0 ); + assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); if( iCol==pConfig->nCol ){ /* User is requesting the value of the special column with the same name @@ -639,8 +664,20 @@ static int fts5ApiColumnText( } static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ - assert( 0 ); - return 0; + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + int rc = SQLITE_OK; + + if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ + i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); + rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); + } + if( iCol>=0 && iColpConfig->nCol ){ + *pnToken = pCsr->aColumnSize[iCol]; + }else{ + *pnToken = 0; + } + return rc; } static int fts5ApiPoslist( diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 2e7363006d..4c8f6c0a67 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -42,9 +42,17 @@ typedef void (*fts5_extension_function)( ); /* +** +** xUserData: +** Return a copy of the context pointer the extension function was +** registered with. +** ** xColumnCount: ** Returns the number of columns in the FTS5 table. ** +** xColumnSize: +** Reports the size in tokens of a column value from the current row. +** ** xPhraseCount: ** Returns the number of phrases in the current query expression. ** diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d9249b93b1..c56214c8d1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -295,6 +295,9 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p); int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); +int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); +int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int *aCol); + /* ** End of interface to code in fts5_storage.c. diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index c7e2deccac..ed7beb932e 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -47,11 +47,24 @@ static void fts5TestFunction( if( zReq==0 ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, "columncount "); } + nCol = pApi->xColumnCount(pFts); if( 0==zReq || 0==sqlite3_stricmp(zReq, "columncount") ){ - nCol = pApi->xColumnCount(pFts); sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nCol); } + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "columnsize "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnsize") ){ + if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); + for(i=0; rc==SQLITE_OK && ixColumnSize(pFts, i, &colsz); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz); + } + if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + } + if( zReq==0 ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasecount "); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 6b86218e42..d3715dac07 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -17,8 +17,7 @@ struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; - - sqlite3_stmt *aStmt[8]; + sqlite3_stmt *aStmt[9]; }; @@ -36,9 +35,11 @@ struct Fts5Storage { #define FTS5_STMT_REPLACE_CONTENT 4 #define FTS5_STMT_DELETE_CONTENT 5 -#define FTS5_STMT_INSERT_DOCSIZE 6 +#define FTS5_STMT_REPLACE_DOCSIZE 6 #define FTS5_STMT_DELETE_DOCSIZE 7 +#define FTS5_STMT_LOOKUP_DOCSIZE 8 + /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and ** Fts5Storage.pInsertDocsize - if they have not already been prepared. @@ -62,8 +63,10 @@ static int fts5StorageGetStmt( "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ - "INSERT INTO %Q.'%q_docsize' VALUES(?,?)", /* INSERT_DOCSIZE */ + "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */ "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ + + "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ }; Fts5Config *pConfig = p->pConfig; char *zSql = 0; @@ -234,6 +237,7 @@ typedef struct Fts5InsertCtx Fts5InsertCtx; struct Fts5InsertCtx { Fts5Storage *pStorage; int iCol; + int szCol; /* Size of column value in tokens */ }; /* @@ -249,6 +253,7 @@ static int fts5StorageInsertCallback( ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; + pCtx->szCol = iPos+1; sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); return SQLITE_OK; } @@ -289,6 +294,27 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ return rc; } +/* +** Insert a record into the %_docsize table. Specifically, do: +** +** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); +*/ +static int fts5StorageInsertDocsize( + Fts5Storage *p, /* Storage module to write to */ + i64 iRowid, /* id value */ + Fts5Buffer *pBuf /* sz value */ +){ + sqlite3_stmt *pReplace = 0; + int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pReplace, 1, iRowid); + sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + } + return rc; +} + /* ** Insert a new row into the FTS table. */ @@ -304,6 +330,9 @@ int sqlite3Fts5StorageInsert( int eStmt; /* Type of statement used on %_content */ int i; /* Counter variable */ Fts5InsertCtx ctx; /* Tokenization callback context object */ + Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ + + memset(&buf, 0, sizeof(Fts5Buffer)); /* Insert the new row into the %_content table. */ if( eConflict==SQLITE_REPLACE ){ @@ -330,14 +359,22 @@ int sqlite3Fts5StorageInsert( sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); ctx.pStorage = p; for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ + ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); + sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); } + /* Write the %_docsize record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageInsertDocsize(p, *piRowid, &buf); + } + sqlite3_free(buf.p); + return rc; } @@ -458,4 +495,50 @@ void sqlite3Fts5StorageStmtRelease( } } +static int fts5StorageDecodeSizeArray( + int *aCol, int nCol, /* Array to populate */ + const u8 *aBlob, int nBlob /* Record to read varints from */ +){ + int i; + int iOff = 0; + for(i=0; i=nBlob ) return 1; + iOff += getVarint32(&aBlob[iOff], aCol[i]); + } + return (iOff!=nBlob); +} + +/* +** Argument aCol points to an array of integers containing one entry for +** each table column. This function reads the %_docsize record for the +** specified rowid and populates aCol[] with the results. +** +** An SQLite error code is returned if an error occurs, or SQLITE_OK +** otherwise. +*/ +int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ + int nCol = p->pConfig->nCol; + sqlite3_stmt *pLookup = 0; + int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup); + if( rc==SQLITE_OK ){ + int bCorrupt = 1; + sqlite3_bind_int64(pLookup, 1, iRowid); + if( SQLITE_ROW==sqlite3_step(pLookup) ){ + const u8 *aBlob = sqlite3_column_blob(pLookup, 0); + int nBlob = sqlite3_column_bytes(pLookup, 0); + if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ + bCorrupt = 0; + } + } + rc = sqlite3_reset(pLookup); + if( bCorrupt && rc==SQLITE_OK ){ + rc = SQLITE_CORRUPT_VTAB; + } + } + return rc; +} + +int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int *aCol){ + return 0; +} diff --git a/manifest b/manifest index c84bbf7a32..edef13b28f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sissues\swith\sposition\slists\sand\sNEAR\sconstraints. -D 2014-07-18T19:59:00.547 +C Fixes\sfor\sthe\sxColumnSize()\sfts5\sextension\sAPI. +D 2014-07-19T15:35:09.453 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,15 +103,15 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 20bcb1e10756c72b550947236960edf96929ca2f -F ext/fts5/fts5.h cda3b9d73e6ffa6d0cd35b7da6b808bf3a1ada32 -F ext/fts5/fts5Int.h 6cf315d3999c14572012d676fa1baf4f4323587b -F ext/fts5/fts5_aux.c 27b082732fd76277fd7e9277f52903723d97f99b +F ext/fts5/fts5.c 86655d1e2ba35c719d3cc480cb9fdd7f3887b74e +F ext/fts5/fts5.h 844898034fa3e0458d93a6c34dd6ba6bd3c7e03a +F ext/fts5/fts5Int.h cca221a5cf7234f92faf3b4b5f2e4cf43bce83ee +F ext/fts5/fts5_aux.c 978a90fe90a6d34d9bd260948b5678caf5489894 F ext/fts5/fts5_buffer.c 71cf2016b2881e7aea39f952995eafa510d96cbd F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 -F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8 +F ext/fts5/fts5_storage.c fcf66173e55927cee0675ecfb1038d0000e4fa10 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -599,7 +599,7 @@ F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test 5d5ffba68e850d9ade99cdd3f5c6431c82dad81d +F test/fts5ae.test 7da37ac01debf2e238552d0ef2f61669fd232936 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -767,7 +767,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 0b5333e5dcdeffba0ecbe5ee8dc7577029ffab6c +F test/permutations.test c3eb62a88337d9a5046c509dd90ba6d43debc76d F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1195,7 +1195,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5808f30fae0d844c52a785bf18872be371d4af68 -R c645036fa73431553c03d7990bbe09ec +P 16352d3654d5672cd0251db51dbe19f779373feb +R a0466df485f1c616be1f64f0989b7925 U dan -Z 973aba4d2c5c8ae6a3e94f9739fdfe1b +Z affe87335b53c4c8634348527de72153 diff --git a/manifest.uuid b/manifest.uuid index e082d6160a..0a4fb0b2a6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -16352d3654d5672cd0251db51dbe19f779373feb \ No newline at end of file +43fcb844726cfeeb1c8a0dbfaa0d2ca22e6ac16c \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index c433d43208..3b6720c79b 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -121,16 +121,32 @@ do_execsql_test 4.0 { VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); } -breakpoint do_execsql_test 4.1 { SELECT rowid, fts5_test(t4, 'poslist') FROM t4 WHERE t4 MATCH 'a OR b AND c'; } { 1 {0.5 {} {}} } -#93 {0.5 1.6 {}} +#------------------------------------------------------------------------- +# Test that the xColumnSize() API works. +# + +reset_db +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE t5 USING fts5(x, y); + INSERT INTO t5 VALUES('a b c d', 'e f g h i j'); + INSERT INTO t5 VALUES('', 'a'); + INSERT INTO t5 VALUES('a', ''); +} +do_execsql_test 5.2 { + SELECT rowid, fts5_test(t5, 'columnsize') FROM t5 WHERE t5 MATCH 'a' + ORDER BY rowid DESC; +} { + 3 {1 0} + 2 {0 1} + 1 {4 6} +} finish_test - diff --git a/test/permutations.test b/test/permutations.test index c75cdbfd43..308d521f0e 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -225,7 +225,7 @@ test_suite "fts3" -prefix "" -description { test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { - fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ea.test + fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test } test_suite "nofaultsim" -prefix "" -description { From e1c77bcfcd21bb157d115d771d838af3ca358ad8 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 19 Jul 2014 20:27:54 +0000 Subject: [PATCH 017/206] Add simple tests for the xColumnText() extension api. FossilOrigin-Name: 1e9053abdaf5e128d44504ee00dfd909dc25f378 --- ext/fts5/fts5.c | 4 ++-- ext/fts5/fts5.h | 3 +++ ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_aux.c | 13 +++++++++++++ ext/fts5/fts5_buffer.c | 31 +++++++++++++++++++++++++++++++ manifest | 22 +++++++++++----------- manifest.uuid | 2 +- test/fts5ae.test | 9 +++++++++ 8 files changed, 71 insertions(+), 14 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 0db73196f2..eb688a7bc6 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -657,8 +657,8 @@ static int fts5ApiColumnText( Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; int rc = fts5SeekCursor(pCsr); if( rc==SQLITE_OK ){ - *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol); - *pn = sqlite3_column_bytes(pCsr->pStmt, iCol); + *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); + *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); } return rc; } diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 4c8f6c0a67..6e5176adbb 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -53,6 +53,9 @@ typedef void (*fts5_extension_function)( ** xColumnSize: ** Reports the size in tokens of a column value from the current row. ** +** xColumnText: +** Reports the size in tokens of a column value from the current row. +** ** xPhraseCount: ** Returns the number of phrases in the current query expression. ** diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index c56214c8d1..f274baa3f9 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -89,6 +89,7 @@ void sqlite3Fts5BufferFree(Fts5Buffer*); void sqlite3Fts5BufferZero(Fts5Buffer*); void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); +void sqlite3Fts5BufferAppendListElem(int*, Fts5Buffer*, const char*, int); #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) #define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index ed7beb932e..7345a68f29 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -65,6 +65,19 @@ static void fts5TestFunction( if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); } + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntext "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntext") ){ + for(i=0; rc==SQLITE_OK && ixColumnText(pFts, i, &z, &n); + if( i!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " "); + sqlite3Fts5BufferAppendListElem(&rc, &s, z, n); + } + } + if( zReq==0 ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasecount "); } diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index b69af615b9..b798ad2119 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -232,4 +232,35 @@ int sqlite3Fts5PoslistNext( return 0; } +void sqlite3Fts5BufferAppendListElem( + int *pRc, /* IN/OUT: Error code */ + Fts5Buffer *pBuf, /* Buffer to append to */ + const char *z, int n /* Value to append to buffer */ +){ + int bParen = (n==0); + int nMax = n*2 + 2 + 1; + u8 *pOut; + int i; + + /* Ensure the buffer has space for the new list element */ + if( sqlite3Fts5BufferGrow(pRc, pBuf, nMax) ) return; + pOut = &pBuf->p[pBuf->n]; + + /* Figure out if we need the enclosing {} */ + for(i=0; in = pOut - pBuf->p; + *pOut = '\0'; +} + diff --git a/manifest b/manifest index edef13b28f..6019a0fb5d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sfor\sthe\sxColumnSize()\sfts5\sextension\sAPI. -D 2014-07-19T15:35:09.453 +C Add\ssimple\stests\sfor\sthe\sxColumnText()\sextension\sapi. +D 2014-07-19T20:27:54.153 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,11 +103,11 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 86655d1e2ba35c719d3cc480cb9fdd7f3887b74e -F ext/fts5/fts5.h 844898034fa3e0458d93a6c34dd6ba6bd3c7e03a -F ext/fts5/fts5Int.h cca221a5cf7234f92faf3b4b5f2e4cf43bce83ee -F ext/fts5/fts5_aux.c 978a90fe90a6d34d9bd260948b5678caf5489894 -F ext/fts5/fts5_buffer.c 71cf2016b2881e7aea39f952995eafa510d96cbd +F ext/fts5/fts5.c fbd94670336eb95a26c705c1e4c188818720c888 +F ext/fts5/fts5.h 84060c2fe91aa03a783cc993f313d8a5b22cbe11 +F ext/fts5/fts5Int.h 5105506a180942811aa7104867518bc84d36c17a +F ext/fts5/fts5_aux.c 75f9abf7a7ccc7712357f04eeb6297c64095528d +F ext/fts5/fts5_buffer.c 00361d4a70040ebd2c32bc349ab708ff613a1749 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 @@ -599,7 +599,7 @@ F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test 7da37ac01debf2e238552d0ef2f61669fd232936 +F test/fts5ae.test c95b106236ea2f9f151715311ad0a2e45c49ccc1 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1195,7 +1195,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 16352d3654d5672cd0251db51dbe19f779373feb -R a0466df485f1c616be1f64f0989b7925 +P 43fcb844726cfeeb1c8a0dbfaa0d2ca22e6ac16c +R fe613ccf4d230813074cd2e3b5b79d5f U dan -Z affe87335b53c4c8634348527de72153 +Z 057df6fbedf27e3f40e741066d4f0911 diff --git a/manifest.uuid b/manifest.uuid index 0a4fb0b2a6..7556250b6e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -43fcb844726cfeeb1c8a0dbfaa0d2ca22e6ac16c \ No newline at end of file +1e9053abdaf5e128d44504ee00dfd909dc25f378 \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index 3b6720c79b..f5a53625f7 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -147,6 +147,15 @@ do_execsql_test 5.2 { 1 {4 6} } +do_execsql_test 5.2 { + SELECT rowid, fts5_test(t5, 'columntext') FROM t5 WHERE t5 MATCH 'a' + ORDER BY rowid DESC; +} { + 3 {a {}} + 2 {{} a} + 1 {{a b c d} {e f g h i j}} +} + finish_test From 84d6fb394f71d0feea3cd5108daf6bc938c85585 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 21 Jul 2014 11:44:47 +0000 Subject: [PATCH 018/206] Fix the xColumnSize() extension API. FossilOrigin-Name: 19504c4108472d2ad1281221642b8bd06eb69f4e --- ext/fts5/fts5.c | 5 +- ext/fts5/fts5Int.h | 5 +- ext/fts5/fts5_aux.c | 15 +++++- ext/fts5/fts5_index.c | 22 ++++++++- ext/fts5/fts5_storage.c | 103 ++++++++++++++++++++++++++++++++++++---- manifest | 24 +++++----- manifest.uuid | 2 +- test/fts5aa.test | 1 + test/fts5ae.test | 22 ++++++++- 9 files changed, 170 insertions(+), 29 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index eb688a7bc6..745a557128 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -619,8 +619,9 @@ static int fts5ApiColumnCount(Fts5Context *pCtx){ } static int fts5ApiColumnAvgSize(Fts5Context *pCtx, int iCol, int *pnToken){ - assert( 0 ); - return 0; + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + return sqlite3Fts5StorageAvgsize(pTab->pStorage, iCol, pnToken); } static int fts5ApiTokenize( diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index f274baa3f9..fc78d64589 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -267,6 +267,9 @@ int sqlite3Fts5IndexInit(sqlite3*); void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); +int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); +int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ @@ -297,7 +300,7 @@ int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); -int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int *aCol); +int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg); /* diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 7345a68f29..deb2c8233e 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -43,11 +43,24 @@ static void fts5TestFunction( } memset(&s, 0, sizeof(Fts5Buffer)); + nCol = pApi->xColumnCount(pFts); + + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, "columnavgsize "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnavgsize") ){ + if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); + for(i=0; rc==SQLITE_OK && ixColumnAvgSize(pFts, i, &colsz); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz); + } + if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); + } if( zReq==0 ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, "columncount "); } - nCol = pApi->xColumnCount(pFts); if( 0==zReq || 0==sqlite3_stricmp(zReq, "columncount") ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nCol); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e6ea440f5f..5050159206 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -734,7 +734,7 @@ static void fts5DataReference(Fts5Data *pData){ /* ** INSERT OR REPLACE a record into the %_data table. */ -static void fts5DataWrite(Fts5Index *p, i64 iRowid, u8 *pData, int nData){ +static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->rc!=SQLITE_OK ) return; if( p->pWriter==0 ){ @@ -2734,6 +2734,7 @@ int sqlite3Fts5IndexOpen( } rc = p->rc; } + sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); } if( rc ){ @@ -3619,3 +3620,22 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ } } +/* +** Read the "averages" record into the buffer supplied as the second +** argument. Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ + fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); + return p->rc; +} + +/* +** Replace the current "averages" record with the contents of the buffer +** supplied as the second argument. +*/ +int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ + fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); + return p->rc; +} + diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index d3715dac07..fdab68d465 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -17,6 +17,8 @@ struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; + i64 nTotalRow; /* Total number of rows in FTS table */ + i64 *aTotalSize; /* Total sizes of each column */ sqlite3_stmt *aStmt[9]; }; @@ -168,11 +170,15 @@ int sqlite3Fts5StorageOpen( ){ int rc; Fts5Storage *p; /* New object */ + int nByte; /* Bytes of space to allocate */ - *pp = p = (Fts5Storage*)sqlite3_malloc(sizeof(Fts5Storage)); + nByte = sizeof(Fts5Storage) /* Fts5Storage object */ + + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ + *pp = p = (Fts5Storage*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; - memset(p, 0, sizeof(Fts5Storage)); + memset(p, 0, nByte); + p->aTotalSize = (i64*)&p[1]; p->pConfig = pConfig; p->pIndex = pIndex; @@ -285,7 +291,9 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ (void*)&ctx, fts5StorageInsertCallback ); + p->aTotalSize[iCol-1] -= (i64)ctx.szCol; } + p->nTotalRow--; } rc2 = sqlite3_reset(pSeek); if( rc==SQLITE_OK ) rc = rc2; @@ -315,6 +323,62 @@ static int fts5StorageInsertDocsize( return rc; } +/* +** Load the contents of the "averages" record from disk into the +** p->nTotalRow and p->aTotalSize[] variables. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +static int fts5StorageLoadTotals(Fts5Storage *p){ + int nCol = p->pConfig->nCol; + Fts5Buffer buf; + int rc; + memset(&buf, 0, sizeof(buf)); + + memset(p->aTotalSize, 0, sizeof(i64) * nCol); + p->nTotalRow = 0; + rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf); + if( rc==SQLITE_OK && buf.n ){ + int i = 0; + int iCol; + i += getVarint(&buf.p[i], (u64*)&p->nTotalRow); + for(iCol=0; iaTotalSize[iCol]); + } + } + sqlite3_free(buf.p); + + return rc; +} + +/* +** Store the current contents of the p->nTotalRow and p->aTotalSize[] +** variables in the "averages" record on disk. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +static int fts5StorageSaveTotals(Fts5Storage *p){ + int nCol = p->pConfig->nCol; + int i; + Fts5Buffer buf; + int rc = SQLITE_OK; + memset(&buf, 0, sizeof(buf)); + + sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); + for(i=0; iaTotalSize[i]); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); + } + sqlite3_free(buf.p); + + return rc; +} + + /* ** Insert a new row into the FTS table. */ @@ -333,15 +397,18 @@ int sqlite3Fts5StorageInsert( Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ memset(&buf, 0, sizeof(Fts5Buffer)); + rc = fts5StorageLoadTotals(p); /* Insert the new row into the %_content table. */ - if( eConflict==SQLITE_REPLACE ){ - eStmt = FTS5_STMT_REPLACE_CONTENT; - if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ - rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); + if( rc==SQLITE_OK ){ + if( eConflict==SQLITE_REPLACE ){ + eStmt = FTS5_STMT_REPLACE_CONTENT; + if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ + rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); + } + }else{ + eStmt = FTS5_STMT_INSERT_CONTENT; } - }else{ - eStmt = FTS5_STMT_INSERT_CONTENT; } if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, eStmt, &pInsert); @@ -367,7 +434,9 @@ int sqlite3Fts5StorageInsert( fts5StorageInsertCallback ); sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); + p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } + p->nTotalRow++; /* Write the %_docsize record */ if( rc==SQLITE_OK ){ @@ -375,6 +444,11 @@ int sqlite3Fts5StorageInsert( } sqlite3_free(buf.p); + /* Write the averages record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageSaveTotals(p); + } + return rc; } @@ -538,7 +612,16 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ return rc; } -int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int *aCol){ - return 0; +int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg){ + int rc = fts5StorageLoadTotals(p); + if( rc==SQLITE_OK ){ + int nAvg = 1; + if( p->nTotalRow ){ + nAvg = (int)((p->aTotalSize[iCol] + (p->nTotalRow/2)) / p->nTotalRow); + if( nAvg<1 ) nAvg = 1; + *pnAvg = nAvg; + } + } + return rc; } diff --git a/manifest b/manifest index 6019a0fb5d..d18e5f644b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssimple\stests\sfor\sthe\sxColumnText()\sextension\sapi. -D 2014-07-19T20:27:54.153 +C Fix\sthe\sxColumnSize()\sextension\sAPI. +D 2014-07-21T11:44:47.659 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,15 +103,15 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c fbd94670336eb95a26c705c1e4c188818720c888 +F ext/fts5/fts5.c 5bf93402f9bafa55181dfa70c3a785a41af31025 F ext/fts5/fts5.h 84060c2fe91aa03a783cc993f313d8a5b22cbe11 -F ext/fts5/fts5Int.h 5105506a180942811aa7104867518bc84d36c17a -F ext/fts5/fts5_aux.c 75f9abf7a7ccc7712357f04eeb6297c64095528d +F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065 +F ext/fts5/fts5_aux.c 9f0487715cd9933f2268620b41fb47f78a389297 F ext/fts5/fts5_buffer.c 00361d4a70040ebd2c32bc349ab708ff613a1749 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 -F ext/fts5/fts5_index.c 9ff3008e903aa9077b0a7a7aa76ab6080eb07a36 -F ext/fts5/fts5_storage.c fcf66173e55927cee0675ecfb1038d0000e4fa10 +F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 +F ext/fts5/fts5_storage.c f722b080b9794f9e49cc4d36f0d9fb516cb7f309 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -595,11 +595,11 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343 +F test/fts5aa.test 0f5d29bf0a86b9dff0906c9e166d624c591d3437 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test c95b106236ea2f9f151715311ad0a2e45c49ccc1 +F test/fts5ae.test b856782549ae0c56628e0980444fc38cf20b41a0 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1195,7 +1195,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 43fcb844726cfeeb1c8a0dbfaa0d2ca22e6ac16c -R fe613ccf4d230813074cd2e3b5b79d5f +P 1e9053abdaf5e128d44504ee00dfd909dc25f378 +R 962590916dedf53a395813418f70a86f U dan -Z 057df6fbedf27e3f40e741066d4f0911 +Z 393e4d18f784466676fdeef71a737c83 diff --git a/manifest.uuid b/manifest.uuid index 7556250b6e..766db6dab7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1e9053abdaf5e128d44504ee00dfd909dc25f378 \ No newline at end of file +19504c4108472d2ad1281221642b8bd06eb69f4e \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index f8b8b54d08..9159cf3bc6 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -129,6 +129,7 @@ foreach {i x y} { #------------------------------------------------------------------------- # +breakpoint reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); diff --git a/test/fts5ae.test b/test/fts5ae.test index f5a53625f7..3fa99d3d51 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -128,7 +128,7 @@ do_execsql_test 4.1 { } #------------------------------------------------------------------------- -# Test that the xColumnSize() API works. +# Test that the xColumnSize() and xColumnAvgsize() APIs work. # reset_db @@ -156,6 +156,26 @@ do_execsql_test 5.2 { 1 {{a b c d} {e f g h i j}} } +do_execsql_test 5.3 { + SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a' + ORDER BY rowid DESC; +} { + 3 {2 2} + 2 {2 2} + 1 {2 2} +} + +do_execsql_test 5.4 { + INSERT INTO t5 VALUES('x y z', 'v w x y z'); + SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a' + ORDER BY rowid DESC; +} { + 3 {2 3} + 2 {2 3} + 1 {2 3} +} + + finish_test From 5c1b820460417d5ad2a137edf4e5d20fe5056078 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 21 Jul 2014 14:22:38 +0000 Subject: [PATCH 019/206] Add the xTokenize extension API. FossilOrigin-Name: 8c6b0aff3443fae4b7f0b9adcbf1514992b70653 --- ext/fts5/fts5.c | 5 +++-- ext/fts5/fts5.h | 3 +++ ext/fts5/fts5_aux.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ manifest | 18 +++++++++--------- manifest.uuid | 2 +- test/fts5ae.test | 15 +++++++++++++++ 6 files changed, 76 insertions(+), 12 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 745a557128..2a0b428fd2 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -630,8 +630,9 @@ static int fts5ApiTokenize( void *pUserData, int (*xToken)(void*, const char*, int, int, int, int) ){ - assert( 0 ); - return SQLITE_OK; + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 6e5176adbb..572aef03c8 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -68,6 +68,9 @@ typedef void (*fts5_extension_function)( ** ** xPoslist: ** Iterate through instances of phrase iPhrase in the current row. +** +** xTokenize: +** Tokenize text using the tokenizer belonging to the FTS5 table. */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 1 */ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index deb2c8233e..25a1747fdb 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -23,6 +23,24 @@ static void fts5SnippetFunction( assert( 0 ); } +static int fts5TestCallback( + void *pContext, /* Pointer to Fts5Buffer object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + int rc = SQLITE_OK; + Fts5Buffer *pBuf = (Fts5Buffer*)pContext; + if( pBuf->n!=0 ){ + sqlite3Fts5BufferAppendString(&rc, pBuf, " "); + } + sqlite3Fts5BufferAppendListElem(&rc, pBuf, pToken, nToken); + return rc; +} + + static void fts5TestFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -167,6 +185,33 @@ static void fts5TestFunction( sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", iRowid); } + if( zReq==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " tokenize "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "tokenize") ){ + Fts5Buffer buf; + memset(&buf, 0, sizeof(buf)); + for(i=0; rc==SQLITE_OK && ixColumnText(pFts, i, &z, &n); + if( rc==SQLITE_OK ){ + Fts5Buffer buf1; + memset(&buf1, 0, sizeof(Fts5Buffer)); + rc = pApi->xTokenize(pFts, z, n, (void*)&buf1, fts5TestCallback); + if( i!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &buf, " "); + sqlite3Fts5BufferAppendListElem(&rc, &buf, (const char*)buf1.p, buf1.n); + sqlite3_free(buf1.p); + } + } + if( zReq==0 ){ + sqlite3Fts5BufferAppendListElem(&rc, &s, (const char*)buf.p, buf.n); + }else{ + sqlite3Fts5BufferAppendString(&rc, &s, (const char*)buf.p); + } + sqlite3_free(buf.p); + } + if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)s.p, -1, SQLITE_TRANSIENT); }else{ diff --git a/manifest b/manifest index d18e5f644b..c08a4a6ccc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sxColumnSize()\sextension\sAPI. -D 2014-07-21T11:44:47.659 +C Add\sthe\sxTokenize\sextension\sAPI. +D 2014-07-21T14:22:38.753 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,10 +103,10 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 5bf93402f9bafa55181dfa70c3a785a41af31025 -F ext/fts5/fts5.h 84060c2fe91aa03a783cc993f313d8a5b22cbe11 +F ext/fts5/fts5.c fda4dd1d2c3e1c6bcb0bf673428a23e4752c21f5 +F ext/fts5/fts5.h c77b6a4a56d80f70fc4f0444030c88724397ed10 F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065 -F ext/fts5/fts5_aux.c 9f0487715cd9933f2268620b41fb47f78a389297 +F ext/fts5/fts5_aux.c 14961135231dd50e6c17894e649c3bbc8c042829 F ext/fts5/fts5_buffer.c 00361d4a70040ebd2c32bc349ab708ff613a1749 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 @@ -599,7 +599,7 @@ F test/fts5aa.test 0f5d29bf0a86b9dff0906c9e166d624c591d3437 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test b856782549ae0c56628e0980444fc38cf20b41a0 +F test/fts5ae.test fe9db78201bbb87c6f82b72a14b946d0f7fc3026 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1195,7 +1195,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1e9053abdaf5e128d44504ee00dfd909dc25f378 -R 962590916dedf53a395813418f70a86f +P 19504c4108472d2ad1281221642b8bd06eb69f4e +R 5c4f852f4d425543d226dd267d759872 U dan -Z 393e4d18f784466676fdeef71a737c83 +Z 17df0b0e1c67547ce560ffb2b8c93af3 diff --git a/manifest.uuid b/manifest.uuid index 766db6dab7..d01210d922 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -19504c4108472d2ad1281221642b8bd06eb69f4e \ No newline at end of file +8c6b0aff3443fae4b7f0b9adcbf1514992b70653 \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index 3fa99d3d51..c1eabef3f8 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -175,7 +175,22 @@ do_execsql_test 5.4 { 1 {2 3} } +#------------------------------------------------------------------------- +# Test the xTokenize() API +# +reset_db +do_execsql_test 6.1 { + CREATE VIRTUAL TABLE t6 USING fts5(x, y); + INSERT INTO t6 VALUES('There are more', 'things in heaven and earth'); + INSERT INTO t6 VALUES(', Horatio, Than are', 'dreamt of in your philosophy.'); +} +do_execsql_test 6.2 { + SELECT rowid, fts5_test(t6, 'tokenize') FROM t6 WHERE t6 MATCH 't*' +} { + 2 {{horatio than are} {dreamt of in your philosophy}} + 1 {{there are more} {things in heaven and earth}} +} finish_test From a5983da6e16f388c7f639b626be4172a48f1949f Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 21 Jul 2014 15:45:26 +0000 Subject: [PATCH 020/206] Fix DELETE and UPDATE operations on fts5 tables. FossilOrigin-Name: d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c --- ext/fts5/fts5.c | 2 +- ext/fts5/fts5_storage.c | 50 ++++++++++++++++++++++++++++++++++------- manifest | 16 ++++++------- manifest.uuid | 2 +- test/fts5aa.test | 34 ++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 18 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 2a0b428fd2..a3876671a3 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -551,7 +551,7 @@ static int fts5UpdateMethod( assert( nArg==1 || nArg==(2 + pConfig->nCol + 1) ); - if( SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ + if( nArg>1 && SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ return fts5SpecialCommand(pTab, apVal[2 + pConfig->nCol]); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index fdab68d465..13e1f08082 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -231,14 +231,6 @@ int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ return rc; } -/* -** Remove a row from the FTS table. -*/ -int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ - assert( !"do this" ); - return SQLITE_OK; -} - typedef struct Fts5InsertCtx Fts5InsertCtx; struct Fts5InsertCtx { Fts5Storage *pStorage; @@ -302,6 +294,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ return rc; } + /* ** Insert a record into the %_docsize table. Specifically, do: ** @@ -378,6 +371,47 @@ static int fts5StorageSaveTotals(Fts5Storage *p){ return rc; } +/* +** Remove a row from the FTS table. +*/ +int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ + int rc; + sqlite3_stmt *pDel; + + rc = fts5StorageLoadTotals(p); + + /* Delete the index records */ + if( rc==SQLITE_OK ){ + rc = fts5StorageDeleteFromIndex(p, iDel); + } + + /* Delete the %_docsize record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDel, 1, iDel); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } + + /* Delete the %_content record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDel, 1, iDel); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } + + /* Write the averages record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageSaveTotals(p); + } + + return rc; +} /* ** Insert a new row into the FTS table. diff --git a/manifest b/manifest index c08a4a6ccc..a9ec4b7137 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sxTokenize\sextension\sAPI. -D 2014-07-21T14:22:38.753 +C Fix\sDELETE\sand\sUPDATE\soperations\son\sfts5\stables. +D 2014-07-21T15:45:26.584 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,7 +103,7 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c fda4dd1d2c3e1c6bcb0bf673428a23e4752c21f5 +F ext/fts5/fts5.c 35124fe8a49868808604c6a5264bf4f23587ac99 F ext/fts5/fts5.h c77b6a4a56d80f70fc4f0444030c88724397ed10 F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065 F ext/fts5/fts5_aux.c 14961135231dd50e6c17894e649c3bbc8c042829 @@ -111,7 +111,7 @@ F ext/fts5/fts5_buffer.c 00361d4a70040ebd2c32bc349ab708ff613a1749 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 -F ext/fts5/fts5_storage.c f722b080b9794f9e49cc4d36f0d9fb516cb7f309 +F ext/fts5/fts5_storage.c 7bb34138d134841cbe0a809467070d07013d8d7d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -595,7 +595,7 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test 0f5d29bf0a86b9dff0906c9e166d624c591d3437 +F test/fts5aa.test a2c7bbc18f25f0b57ea8fc483c8a8830273b9ed4 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 @@ -1195,7 +1195,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 19504c4108472d2ad1281221642b8bd06eb69f4e -R 5c4f852f4d425543d226dd267d759872 +P 8c6b0aff3443fae4b7f0b9adcbf1514992b70653 +R ceadf07a7e21667d721f7d8d3cf93a59 U dan -Z 17df0b0e1c67547ce560ffb2b8c93af3 +Z 2d0ad058324af420450f942c4237ae4f diff --git a/manifest.uuid b/manifest.uuid index d01210d922..f831f113b7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8c6b0aff3443fae4b7f0b9adcbf1514992b70653 \ No newline at end of file +d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 9159cf3bc6..1206a0cae8 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -242,5 +242,39 @@ for {set i 1} {$i <= 10} {incr i} { if {[set_test_counter errors]} break } +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 10.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x,y); +} +set d10 { + 1 {g f d b f} {h h e i a} + 2 {f i g j e} {i j c f f} + 3 {e e i f a} {e h f d f} + 4 {h j f j i} {h a c f j} + 5 {d b j c g} {f e i b e} + 6 {a j a e e} {j d f d e} + 7 {g i j c h} {j d h c a} + 8 {j j i d d} {e e d f b} + 9 {c j j d c} {h j i f g} + 10 {b f h i a} {c f b b j} +} +foreach {rowid x y} $d10 { + do_execsql_test 10.1.$rowid.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 10.1.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +} +foreach rowid {5 9 8 1 2 4 10 7 3 5 6} { + do_execsql_test 10.2.$rowid.1 { DELETE FROM t1 WHERE rowid = $rowid } + do_execsql_test 10.2.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +} +foreach {rowid x y} $d10 { + do_execsql_test 10.3.$rowid.1 { INSERT INTO t1 VALUES($x, $y) } + do_execsql_test 10.3.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +} + +do_execsql_test 10.4.1 { DELETE FROM t1 } +do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + finish_test From b30860abe2056bd1d47c23227a0b1d35cbc27b13 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 23 Jul 2014 19:31:56 +0000 Subject: [PATCH 021/206] Add a snippet() function to fts5. FossilOrigin-Name: bdc58fd28a63ac9632c3df6c7768a9a236566605 --- ext/fts5/fts5.c | 5 +- ext/fts5/fts5.h | 7 +- ext/fts5/fts5_aux.c | 408 +++++++++++++++++++++++++++++++++++++++- ext/fts5/fts5_buffer.c | 1 + ext/fts5/fts5_expr.c | 1 - ext/fts5/fts5_storage.c | 59 ++++++ manifest | 25 +-- manifest.uuid | 2 +- test/fts5af.test | 138 ++++++++++++++ test/permutations.test | 1 + 10 files changed, 626 insertions(+), 21 deletions(-) create mode 100644 test/fts5af.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index a3876671a3..14da56ec40 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -686,13 +686,12 @@ static int fts5ApiPoslist( Fts5Context *pCtx, int iPhrase, int *pi, - int *piCol, - int *piOff + i64 *piPos ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; const u8 *a; int n; /* Poslist for phrase iPhrase */ n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a); - return sqlite3Fts5PoslistNext(a, n, pi, piCol, piOff); + return sqlite3Fts5PoslistNext64(a, n, pi, piPos); } static void fts5ApiCallback( diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 572aef03c8..4d45ee60b0 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -69,6 +69,8 @@ typedef void (*fts5_extension_function)( ** xPoslist: ** Iterate through instances of phrase iPhrase in the current row. ** +** At EOF, a non-zero value is returned and output variable iPos set to -1. +** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. */ @@ -91,9 +93,12 @@ struct Fts5ExtensionApi { sqlite3_int64 (*xRowid)(Fts5Context*); int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); - int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, int *piCol, int *piOff); + int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos); }; +#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) +#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF) + /* ** CUSTOM AUXILIARY FUNCTIONS *************************************************************************/ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 25a1747fdb..faee34c82d 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -13,12 +13,413 @@ #include "fts5Int.h" +typedef struct SnippetPhrase SnippetPhrase; +typedef struct SnippetIter SnippetIter; +typedef struct SnippetCtx SnippetCtx; + +struct SnippetPhrase { + u64 mask; /* Current mask */ + int nToken; /* Tokens in this phrase */ + int i; /* Current offset in phrase poslist */ + i64 iPos; /* Next position in phrase (-ve -> EOF) */ +}; + +struct SnippetIter { + i64 iLast; /* Last token position of current snippet */ + int nScore; /* Score of current snippet */ + + const Fts5ExtensionApi *pApi; + Fts5Context *pFts; + u64 szmask; /* Mask used to on SnippetPhrase.mask */ + int nPhrase; /* Number of phrases */ + SnippetPhrase aPhrase[0]; /* Array of size nPhrase */ +}; + +struct SnippetCtx { + int iFirst; /* Offset of first token to record */ + int nToken; /* Size of aiStart[] and aiEnd[] arrays */ + int iSeen; /* Set to largest offset seen */ + int *aiStart; + int *aiEnd; +}; + +static int fts5SnippetCallback( + void *pContext, /* Pointer to Fts5Buffer object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + int rc = SQLITE_OK; + SnippetCtx *pCtx = (SnippetCtx*)pContext; + int iOff = iPos - pCtx->iFirst; + + if( iOff>=0 ){ + if( iOff < pCtx->nToken ){ + pCtx->aiStart[iOff] = iStart; + pCtx->aiEnd[iOff] = iEnd; + } + pCtx->iSeen = iPos; + if( iOff>=pCtx->nToken ) rc = SQLITE_DONE; + } + + return rc; +} + +/* +** Set pIter->nScore to the score for the current entry. +*/ +static void fts5SnippetCalculateScore(SnippetIter *pIter){ + int i; + int nScore = 0; + assert( pIter->iLast>=0 ); + + for(i=0; inPhrase; i++){ + SnippetPhrase *p = &pIter->aPhrase[i]; + u64 mask = p->mask; + if( mask ){ + u64 j; + nScore += 1000; + for(j=1; j & pIter->szmask; j<<=1){ + if( mask & j ) nScore++; + } + } + } + + pIter->nScore = nScore; +} + +/* +** Allocate a new snippet iter. +*/ +static int fts5SnippetIterNew( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + int nToken, /* Number of tokens in snippets */ + SnippetIter **ppIter /* OUT: New object */ +){ + int i; /* Counter variable */ + SnippetIter *pIter; /* New iterator object */ + int nByte; /* Bytes of space to allocate */ + int nPhrase; /* Number of phrases in query */ + + *ppIter = 0; + nPhrase = pApi->xPhraseCount(pFts); + nByte = sizeof(SnippetIter) + nPhrase * sizeof(SnippetPhrase); + pIter = (SnippetIter*)sqlite3_malloc(nByte); + if( pIter==0 ) return SQLITE_NOMEM; + memset(pIter, 0, nByte); + + pIter->nPhrase = nPhrase; + pIter->pApi = pApi; + pIter->pFts = pFts; + pIter->szmask = ((u64)1 << nToken) - 1; + assert( nToken<=63 ); + + for(i=0; iaPhrase[i].nToken = pApi->xPhraseSize(pFts, i); + } + + *ppIter = pIter; + return SQLITE_OK; +} + +/* +** Set the iterator to point to the first candidate snippet. +*/ +static void fts5SnippetIterFirst(SnippetIter *pIter){ + const Fts5ExtensionApi *pApi = pIter->pApi; + Fts5Context *pFts = pIter->pFts; + int i; /* Used to iterate through phrases */ + SnippetPhrase *pMin = 0; /* Phrase with first match */ + + memset(pIter->aPhrase, 0, sizeof(SnippetPhrase) * pIter->nPhrase); + + for(i=0; inPhrase; i++){ + SnippetPhrase *p = &pIter->aPhrase[i]; + p->nToken = pApi->xPhraseSize(pFts, i); + pApi->xPoslist(pFts, i, &p->i, &p->iPos); + if( p->iPos>=0 && (pMin==0 || p->iPosiPos) ){ + pMin = p; + } + } + assert( pMin ); + + pIter->iLast = pMin->iPos + pMin->nToken - 1; + pMin->mask = 0x01; + pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos); + fts5SnippetCalculateScore(pIter); +} + +/* +** Advance the snippet iterator to the next candidate snippet. +*/ +static void fts5SnippetIterNext(SnippetIter *pIter){ + const Fts5ExtensionApi *pApi = pIter->pApi; + Fts5Context *pFts = pIter->pFts; + int nPhrase = pIter->nPhrase; + int i; /* Used to iterate through phrases */ + SnippetPhrase *pMin = 0; + + for(i=0; iaPhrase[i]; + if( p->iPos>=0 && (pMin==0 || p->iPosiPos) ) pMin = p; + } + + if( pMin==0 ){ + /* pMin==0 indicates that the SnippetIter is at EOF. */ + pIter->iLast = -1; + }else{ + i64 nShift = pMin->iPos - pIter->iLast; + assert( nShift>=0 ); + for(i=0; iaPhrase[i]; + if( nShift>=63 ){ + p->mask = 0; + }else{ + p->mask = p->mask << (int)nShift; + p->mask &= pIter->szmask; + } + } + + pIter->iLast = pMin->iPos; + pMin->mask |= 0x01; + fts5SnippetCalculateScore(pIter); + pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos); + } +} + +static void fts5SnippetIterFree(SnippetIter *pIter){ + if( pIter ){ + sqlite3_free(pIter); + } +} + +static int fts5SnippetText( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + SnippetIter *pIter, /* Snippet to write to buffer */ + int nToken, /* Size of desired snippet in tokens */ + const char *zStart, + const char *zFinal, + const char *zEllip, + Fts5Buffer *pBuf /* Write output to this buffer */ +){ + SnippetCtx ctx; + int i; + u64 all = 0; + const char *zCol; /* Column text to extract snippet from */ + int nCol; /* Size of column text in bytes */ + int rc; + int nShift; + + rc = pApi->xColumnText(pFts, FTS5_POS2COLUMN(pIter->iLast), &zCol, &nCol); + if( rc!=SQLITE_OK ) return rc; + + /* At this point pIter->iLast is the offset of the last token in the + ** proposed snippet. However, in all cases pIter->iLast contains the + ** final token of one of the phrases. This makes the snippet look + ** unbalanced. For example: + ** + ** "...x x x x x term..." + ** + ** It is better to increase iLast a little so that the snippet looks + ** more like: + ** + ** "...x x x term y y..." + ** + ** The problem is that there is no easy way to discover whether or not + ** how many tokens are present in the column following "term". + */ + + /* Set variable nShift to the number of tokens by which the snippet + ** should be shifted, assuming there are sufficient tokens to the right + ** of iLast in the column value. */ + for(i=0; inPhrase; i++){ + int iToken; + for(iToken=0; iTokenaPhrase[i].nToken; iToken++){ + all |= (pIter->aPhrase[i].mask << iToken); + } + } + for(i=nToken-1; i>=0; i--){ + if( all & ((u64)1 << i) ) break; + } + assert( i>=0 ); + nShift = (nToken - i) / 2; + + memset(&ctx, 0, sizeof(SnippetCtx)); + ctx.nToken = nToken + nShift; + ctx.iFirst = FTS5_POS2OFFSET(pIter->iLast) - nToken + 1; + if( ctx.iFirst<0 ){ + nShift += ctx.iFirst; + if( nShift<0 ) nShift = 0; + ctx.iFirst = 0; + } + ctx.aiStart = (int*)sqlite3_malloc(sizeof(int) * ctx.nToken * 2); + if( ctx.aiStart==0 ) return SQLITE_NOMEM; + ctx.aiEnd = &ctx.aiStart[ctx.nToken]; + + rc = pApi->xTokenize(pFts, zCol, nCol, (void*)&ctx, fts5SnippetCallback); + if( rc==SQLITE_OK ){ + int i1; /* First token from input to include */ + int i2; /* Last token from input to include */ + + int iPrint; + int iMatchto; + int iBit0; + int iLast; + + int *aiStart = ctx.aiStart - ctx.iFirst; + int *aiEnd = ctx.aiEnd - ctx.iFirst; + + /* Ideally we want to start the snippet with token (ctx.iFirst + nShift). + ** However, this is only possible if there are sufficient tokens within + ** the column. This block sets variables i1 and i2 to the first and last + ** input tokens to include in the snippet. */ + if( (ctx.iFirst + nShift + nToken)<=ctx.iSeen ){ + i1 = ctx.iFirst + nShift; + i2 = i1 + nToken - 1; + }else{ + i2 = ctx.iSeen; + i1 = ctx.iSeen - nToken + 1; + assert( i1>=0 || ctx.iFirst==0 ); + if( i1<0 ) i1 = 0; + } + + /* If required, append the preceding ellipsis. */ + if( i1>0 ) sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%s", zEllip); + + iLast = FTS5_POS2OFFSET(pIter->iLast); + iPrint = i1; + iMatchto = -1; + + for(i=i1; i<=i2; i++){ + + /* Check if this is the first token of any phrase match. */ + int ip; + for(ip=0; ipnPhrase; ip++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[ip]; + u64 m = (1 << (iLast - i - pPhrase->nToken + 1)); + + if( i<=iLast && (pPhrase->mask & m) ){ + if( iMatchto<0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s", + aiStart[i] - aiStart[iPrint], + &zCol[aiStart[iPrint]], + zStart + ); + iPrint = i; + } + if( i>iMatchto ) iMatchto = i + pPhrase->nToken - 1; + } + } + + if( i==iMatchto ){ + sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s", + aiEnd[i] - aiStart[iPrint], + &zCol[aiStart[iPrint]], + zFinal + ); + iMatchto = -1; + iPrint = i+1; + + if( i=0 ){ + sqlite3Fts5BufferAppendString(&rc, pBuf, zFinal); + } + } + + /* If required, append the trailing ellipsis. */ + if( i2=1 ) zStart = (const char*)sqlite3_value_text(apVal[0]); + if( nVal>=2 ) zFinal = (const char*)sqlite3_value_text(apVal[1]); + if( nVal>=3 ) zEllip = (const char*)sqlite3_value_text(apVal[2]); + if( nVal>=4 ){ + nToken = sqlite3_value_int(apVal[3]); + if( nToken==0 ) nToken = -15; + } + nAbs = nToken * (nToken<0 ? -1 : 1); + + rc = fts5SnippetIterNew(pApi, pFts, nAbs, &pIter); + if( rc==SQLITE_OK ){ + Fts5Buffer buf; /* Result buffer */ + int nBestScore = 0; /* Score of best snippet found */ + int n; /* Size of column snippet is from in bytes */ + int i; /* Used to iterate through phrases */ + + for(fts5SnippetIterFirst(pIter); + pIter->iLast>=0; + fts5SnippetIterNext(pIter) + ){ + if( pIter->nScore>nBestScore ) nBestScore = pIter->nScore; + } + for(fts5SnippetIterFirst(pIter); + pIter->iLast>=0; + fts5SnippetIterNext(pIter) + ){ + if( pIter->nScore==nBestScore ) break; + } + + memset(&buf, 0, sizeof(Fts5Buffer)); + rc = fts5SnippetText(pApi, pFts, pIter, nAbs, zStart, zFinal, zEllip, &buf); + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)buf.p, buf.n, SQLITE_TRANSIENT); + } + sqlite3_free(buf.p); + } + + fts5SnippetIterFree(pIter); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } +} + +static void fts5Bm25Function( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ ){ assert( 0 ); } @@ -146,12 +547,13 @@ static void fts5TestFunction( for(i=0; ixPoslist(pFts, i, &j, &iCol, &iOff) ){ + while( 0==pApi->xPoslist(pFts, i, &j, &iPos) ){ + int iOff = FTS5_POS2OFFSET(iPos); + int iCol = FTS5_POS2COLUMN(iPos); if( nElem!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s2, " "); sqlite3Fts5BufferAppendPrintf(&rc, &s2, "%d.%d", iCol, iOff); nElem++; diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index b798ad2119..bea316eda4 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -146,6 +146,7 @@ int sqlite3Fts5PoslistNext64( int i = *pi; if( i>=n ){ /* EOF */ + *piOff = -1; return 1; }else{ i64 iOff = *piOff; diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 06faf7ebff..9eea4552bf 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -389,7 +389,6 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ int i; int rc = SQLITE_OK; int bMatch; - i64 iMax; assert( pNear->nPhrase>1 ); diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 13e1f08082..c56c2d038a 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -486,6 +486,29 @@ int sqlite3Fts5StorageInsert( return rc; } +static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ + Fts5Config *pConfig = p->pConfig; + char *zSql; + int rc; + + zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'", + pConfig->zDb, pConfig->zName, zSuffix + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_stmt *pCnt = 0; + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); + if( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pCnt) ){ + *pnRow = sqlite3_column_int64(pCnt, 0); + } + rc = sqlite3_finalize(pCnt); + } + + sqlite3_free(zSql); + return rc; +} + /* ** Context object used by sqlite3Fts5StorageIntegrity(). */ @@ -493,6 +516,7 @@ typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; struct Fts5IntegrityCtx { i64 iRowid; int iCol; + int szCol; u64 cksum; Fts5Config *pConfig; }; @@ -512,6 +536,7 @@ static int fts5StorageIntegrityCallback( pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken ); + pCtx->szCol = iPos+1; return SQLITE_OK; } @@ -524,11 +549,17 @@ static int fts5StorageIntegrityCallback( int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ Fts5Config *pConfig = p->pConfig; int rc; /* Return code */ + int *aColSize; /* Array of size pConfig->nCol */ + i64 *aTotalSize; /* Array of size pConfig->nCol */ Fts5IntegrityCtx ctx; sqlite3_stmt *pScan; memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); ctx.pConfig = p->pConfig; + aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64))); + if( !aTotalSize ) return SQLITE_NOMEM; + aColSize = (int*)&aTotalSize[pConfig->nCol]; + memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); /* Generate the expected index checksum based on the contents of the ** %_content table. This block stores the checksum in ctx.cksum. */ @@ -538,6 +569,8 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); + ctx.szCol = 0; + rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && inCol; i++){ ctx.iCol = i; rc = sqlite3Fts5Tokenize( @@ -547,12 +580,37 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ (void*)&ctx, fts5StorageIntegrityCallback ); + if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB; + aTotalSize[i] += ctx.szCol; } + if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); if( rc==SQLITE_OK ) rc = rc2; } + /* Test that the "totals" (sometimes called "averages") record looks Ok */ + if( rc==SQLITE_OK ){ + int i; + rc = fts5StorageLoadTotals(p); + for(i=0; rc==SQLITE_OK && inCol; i++){ + if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB; + } + } + + /* Check that the %_docsize and %_content tables contain the expected + ** number of rows. */ + if( rc==SQLITE_OK ){ + i64 nRow; + rc = fts5StorageCount(p, "content", &nRow); + if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; + } + if( rc==SQLITE_OK ){ + i64 nRow; + rc = fts5StorageCount(p, "docsize", &nRow); + if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; + } + /* Pass the expected checksum down to the FTS index module. It will ** verify, amongst other things, that it matches the checksum generated by ** inspecting the index itself. */ @@ -560,6 +618,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum); } + sqlite3_free(aTotalSize); return rc; } diff --git a/manifest b/manifest index a9ec4b7137..ec56664116 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sDELETE\sand\sUPDATE\soperations\son\sfts5\stables. -D 2014-07-21T15:45:26.584 +C Add\sa\ssnippet()\sfunction\sto\sfts5. +D 2014-07-23T19:31:56.454 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,15 +103,15 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 35124fe8a49868808604c6a5264bf4f23587ac99 -F ext/fts5/fts5.h c77b6a4a56d80f70fc4f0444030c88724397ed10 +F ext/fts5/fts5.c 6f859d444eb8be46cb3f7aba3aaae369c5b26809 +F ext/fts5/fts5.h 57325b418b26dcd60be5bc8aab05b33767d81590 F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065 -F ext/fts5/fts5_aux.c 14961135231dd50e6c17894e649c3bbc8c042829 -F ext/fts5/fts5_buffer.c 00361d4a70040ebd2c32bc349ab708ff613a1749 +F ext/fts5/fts5_aux.c cba929fb13931c9b8be7d572991e648b98f14cf2 +F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 288b3e016253eab69ea8cefbff346a4697b44291 +F ext/fts5/fts5_expr.c 2911813db174afa28b69ccc7031b6dd80293b241 F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 -F ext/fts5/fts5_storage.c 7bb34138d134841cbe0a809467070d07013d8d7d +F ext/fts5/fts5_storage.c 9a2744f492413395a0e75f20c19b797c801a7308 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -600,6 +600,7 @@ F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test fe9db78201bbb87c6f82b72a14b946d0f7fc3026 +F test/fts5af.test a2980528a04b67ac4690e6c02ebe9455f45c9a35 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -767,7 +768,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test c3eb62a88337d9a5046c509dd90ba6d43debc76d +F test/permutations.test e6e951c816199693676d8c3d22d6bf54bcd719fc F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1195,7 +1196,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8c6b0aff3443fae4b7f0b9adcbf1514992b70653 -R ceadf07a7e21667d721f7d8d3cf93a59 +P d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c +R 638d6826a594d773b5778bd6943c3d96 U dan -Z 2d0ad058324af420450f942c4237ae4f +Z 30db824dafb73f9c4c6895383aa25ed9 diff --git a/manifest.uuid b/manifest.uuid index f831f113b7..990c947a9e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c \ No newline at end of file +bdc58fd28a63ac9632c3df6c7768a9a236566605 \ No newline at end of file diff --git a/test/fts5af.test b/test/fts5af.test new file mode 100644 index 0000000000..cd5f91f13e --- /dev/null +++ b/test/fts5af.test @@ -0,0 +1,138 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# More specifically, the tests in this file focus on the built-in +# snippet() function. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5af + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, y); +} + + +foreach {tn doc res} { + + 1.1 {X o o o o o o} {[X] o o o o o o} + 1.2 {o X o o o o o} {o [X] o o o o o} + 1.3 {o o X o o o o} {o o [X] o o o o} + 1.4 {o o o X o o o} {o o o [X] o o o} + 1.5 {o o o o X o o} {o o o o [X] o o} + 1.6 {o o o o o X o} {o o o o o [X] o} + 1.7 {o o o o o o X} {o o o o o o [X]} + + 2.1 {X o o o o o o o} {[X] o o o o o o...} + 2.2 {o X o o o o o o} {o [X] o o o o o...} + 2.3 {o o X o o o o o} {o o [X] o o o o...} + 2.4 {o o o X o o o o} {o o o [X] o o o...} + 2.5 {o o o o X o o o} {...o o o [X] o o o} + 2.6 {o o o o o X o o} {...o o o o [X] o o} + 2.7 {o o o o o o X o} {...o o o o o [X] o} + 2.8 {o o o o o o o X} {...o o o o o o [X]} + + 3.1 {X o o o o o o o o} {[X] o o o o o o...} + 3.2 {o X o o o o o o o} {o [X] o o o o o...} + 3.3 {o o X o o o o o o} {o o [X] o o o o...} + 3.4 {o o o X o o o o o} {o o o [X] o o o...} + 3.5 {o o o o X o o o o} {...o o o [X] o o o...} + 3.6 {o o o o o X o o o} {...o o o [X] o o o} + 3.7 {o o o o o o X o o} {...o o o o [X] o o} + 3.8 {o o o o o o o X o} {...o o o o o [X] o} + 3.9 {o o o o o o o o X} {...o o o o o o [X]} + + 4.1 {X o o o o o X o o} {[X] o o o o o [X]...} + 4.2 {o X o o o o o X o} {...[X] o o o o o [X]...} + 4.3 {o o X o o o o o X} {...[X] o o o o o [X]} + + 5.1 {X o o o o X o o o} {[X] o o o o [X] o...} + 5.2 {o X o o o o X o o} {...[X] o o o o [X] o...} + 5.3 {o o X o o o o X o} {...[X] o o o o [X] o} + 5.4 {o o o X o o o o X} {...o [X] o o o o [X]} + + 6.1 {X o o o X o o o} {[X] o o o [X] o o...} + 6.2 {o X o o o X o o o} {o [X] o o o [X] o...} + 6.3 {o o X o o o X o o} {...o [X] o o o [X] o...} + 6.4 {o o o X o o o X o} {...o [X] o o o [X] o} + 6.5 {o o o o X o o o X} {...o o [X] o o o [X]} + + 7.1 {X o o X o o o o o} {[X] o o [X] o o o...} + 7.2 {o X o o X o o o o} {o [X] o o [X] o o...} + 7.3 {o o X o o X o o o} {...o [X] o o [X] o o...} + 7.4 {o o o X o o X o o} {...o [X] o o [X] o o} + 7.5 {o o o o X o o X o} {...o o [X] o o [X] o} + 7.6 {o o o o o X o o X} {...o o o [X] o o [X]} +} { + do_execsql_test 1.$tn.1 { + DELETE FROM t1; + INSERT INTO t1 VALUES($doc, NULL); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X'; + } [list $res] + + do_execsql_test 1.$tn.2 { + DELETE FROM t1; + INSERT INTO t1 VALUES(NULL, $doc); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X'; + } [list $res] +} + +foreach {tn doc res} { + 1.1 {X Y o o o o o} {[X Y] o o o o o} + 1.2 {o X Y o o o o} {o [X Y] o o o o} + 1.3 {o o X Y o o o} {o o [X Y] o o o} + 1.4 {o o o X Y o o} {o o o [X Y] o o} + 1.5 {o o o o X Y o} {o o o o [X Y] o} + 1.6 {o o o o o X Y} {o o o o o [X Y]} + + 2.1 {X Y o o o o o o} {[X Y] o o o o o...} + 2.2 {o X Y o o o o o} {o [X Y] o o o o...} + 2.3 {o o X Y o o o o} {o o [X Y] o o o...} + 2.4 {o o o X Y o o o} {...o o [X Y] o o o} + 2.5 {o o o o X Y o o} {...o o o [X Y] o o} + 2.6 {o o o o o X Y o} {...o o o o [X Y] o} + 2.7 {o o o o o o X Y} {...o o o o o [X Y]} + + 3.1 {X Y o o o o o o o} {[X Y] o o o o o...} + 3.2 {o X Y o o o o o o} {o [X Y] o o o o...} + 3.3 {o o X Y o o o o o} {o o [X Y] o o o...} + 3.4 {o o o X Y o o o o} {...o o [X Y] o o o...} + 3.5 {o o o o X Y o o o} {...o o [X Y] o o o} + 3.6 {o o o o o X Y o o} {...o o o [X Y] o o} + 3.7 {o o o o o o X Y o} {...o o o o [X Y] o} + 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} + +} { + do_execsql_test 2.$tn.1 { + DELETE FROM t1; + INSERT INTO t1 VALUES($doc, NULL); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y'; + } [list $res] + + do_execsql_test 2.$tn.2 { + DELETE FROM t1; + INSERT INTO t1 VALUES(NULL, $doc); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y'; + } [list $res] +} + +finish_test + diff --git a/test/permutations.test b/test/permutations.test index 308d521f0e..38c8321e3a 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -226,6 +226,7 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test + fts5af.test } test_suite "nofaultsim" -prefix "" -description { From 700b33d7a5665a24b3a806d195f0dfd248f8257b Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 25 Jul 2014 20:30:47 +0000 Subject: [PATCH 022/206] Add extension apis xRowCount, xQueryPhrase, xSetAuxdata and xGetAuxdata. And a ranking function that uses all of the above. FossilOrigin-Name: c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb --- ext/fts5/fts5.c | 177 ++++++++++++++++++++++++---- ext/fts5/fts5.h | 82 ++++++++++++- ext/fts5/fts5Int.h | 5 +- ext/fts5/fts5_aux.c | 249 +++++++++++++++++++++++++++++++++++++--- ext/fts5/fts5_expr.c | 100 ++++++++++++++++ ext/fts5/fts5_storage.c | 17 +-- manifest | 26 ++--- manifest.uuid | 2 +- test/fts5ae.test | 55 +++++++-- test/fts5af.test | 43 ++++--- 10 files changed, 667 insertions(+), 89 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 14da56ec40..e89817d04e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -19,6 +19,7 @@ typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; +typedef struct Fts5Auxdata Fts5Auxdata; /* ** A single object of this type is allocated when the FTS5 module is @@ -75,7 +76,8 @@ struct Fts5Cursor { /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ - Fts5Auxiliary *pAux; /* Currently executing function */ + Fts5Auxiliary *pAux; /* Currently executing extension function */ + Fts5Auxdata *pAuxdata; /* First in linked list of aux-data */ int *aColumnSize; /* Values for xColumnSize() */ }; @@ -93,6 +95,13 @@ struct Fts5Cursor { #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) +struct Fts5Auxdata { + Fts5Auxiliary *pAux; /* Extension to which this belongs */ + void *pPtr; /* Pointer value */ + void(*xDelete)(void*); /* Destructor */ + Fts5Auxdata *pNext; /* Next object in linked list */ +}; + /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy ** argument is non-zero, attempt delete the shadow tables from teh database @@ -325,12 +334,21 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; Fts5Cursor **pp; + Fts5Auxdata *pData; + Fts5Auxdata *pNext; + if( pCsr->pStmt ){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } sqlite3Fts5ExprFree(pCsr->pExpr); + for(pData=pCsr->pAuxdata; pData; pData=pNext){ + pNext = pData->pNext; + if( pData->xDelete ) pData->xDelete(pData->pPtr); + sqlite3_free(pData); + } + /* Remove the cursor from the Fts5Global.pCsr list */ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); *pp = pCsr->pNext; @@ -372,6 +390,16 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ return rc; } +static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ + int rc; + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); + if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + } + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + return rc; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -403,11 +431,7 @@ static int fts5FilterMethod( const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); - if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ - CsrFlagSet(pCsr, FTS5CSR_EOF); - } - CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + rc = fts5CursorFirst(pTab, pCsr, bAsc); } }else{ if( ePlan==FTS5_PLAN_ROWID ){ @@ -618,10 +642,20 @@ static int fts5ApiColumnCount(Fts5Context *pCtx){ return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; } -static int fts5ApiColumnAvgSize(Fts5Context *pCtx, int iCol, int *pnToken){ +static int fts5ApiColumnTotalSize( + Fts5Context *pCtx, + int iCol, + sqlite3_int64 *pnToken +){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); - return sqlite3Fts5StorageAvgsize(pTab->pStorage, iCol, pnToken); + return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); +} + +static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( @@ -694,24 +728,123 @@ static int fts5ApiPoslist( return sqlite3Fts5PoslistNext64(a, n, pi, piPos); } +static int fts5ApiSetAuxdata( + Fts5Context *pCtx, /* Fts5 context */ + void *pPtr, /* Pointer to save as auxdata */ + void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Auxdata *pData; + + for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ + if( pData->pAux==pCsr->pAux ) break; + } + + if( pData ){ + if( pData->xDelete ){ + pData->xDelete(pData->pPtr); + } + }else{ + pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata)); + if( pData==0 ) return SQLITE_NOMEM; + memset(pData, 0, sizeof(Fts5Auxdata)); + pData->pAux = pCsr->pAux; + pData->pNext = pCsr->pAuxdata; + pCsr->pAuxdata = pData; + } + + pData->xDelete = xDelete; + pData->pPtr = pPtr; + return SQLITE_OK; +} + +static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Auxdata *pData; + void *pRet = 0; + + for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ + if( pData->pAux==pCsr->pAux ) break; + } + + if( pData ){ + pRet = pData->pPtr; + if( bClear ){ + pData->pPtr = 0; + pData->xDelete = 0; + } + } + + return pRet; +} + +static int fts5ApiQueryPhrase(Fts5Context*, int, void*, + int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) +); + +static const Fts5ExtensionApi sFts5Api = { + 1, /* iVersion */ + fts5ApiUserData, + fts5ApiColumnCount, + fts5ApiRowCount, + fts5ApiColumnTotalSize, + fts5ApiTokenize, + fts5ApiPhraseCount, + fts5ApiPhraseSize, + fts5ApiRowid, + fts5ApiColumnText, + fts5ApiColumnSize, + fts5ApiPoslist, + fts5ApiQueryPhrase, + fts5ApiSetAuxdata, + fts5ApiGetAuxdata, +}; + + +/* +** Implementation of API function xQueryPhrase(). +*/ +static int fts5ApiQueryPhrase( + Fts5Context *pCtx, + int iPhrase, + void *pUserData, + int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + int rc; + Fts5Cursor *pNew = 0; + + rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); + if( rc==SQLITE_OK ){ + Fts5Config *pConf = pTab->pConfig; + pNew->idxNum = FTS5_PLAN_MATCH; + pNew->base.pVtab = (sqlite3_vtab*)pTab; + rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); + } + + if( rc==SQLITE_OK ){ + for(rc = fts5CursorFirst(pTab, pNew, 0); + rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; + rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) + ){ + rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + break; + } + } + } + + fts5CloseMethod((sqlite3_vtab_cursor*)pNew); + return rc; +} + static void fts5ApiCallback( sqlite3_context *context, int argc, sqlite3_value **argv ){ - static const Fts5ExtensionApi sApi = { - 1, /* iVersion */ - fts5ApiUserData, - fts5ApiColumnCount, - fts5ApiColumnAvgSize, - fts5ApiTokenize, - fts5ApiPhraseCount, - fts5ApiPhraseSize, - fts5ApiRowid, - fts5ApiColumnText, - fts5ApiColumnSize, - fts5ApiPoslist, - }; Fts5Auxiliary *pAux; Fts5Cursor *pCsr; @@ -730,7 +863,7 @@ static void fts5ApiCallback( }else{ assert( pCsr->pAux==0 ); pCsr->pAux = pAux; - pAux->xFunc(&sApi, (Fts5Context*)pCsr, context, argc-1, &argv[1]); + pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc-1, &argv[1]); pCsr->pAux = 0; } } diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 4d45ee60b0..82d4884dc7 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -43,10 +43,18 @@ typedef void (*fts5_extension_function)( /* ** -** xUserData: +** xUserData(pFts): +** ** Return a copy of the context pointer the extension function was ** registered with. ** +** +** xColumnTotalSize(pFts, iCol, pnToken): +** +** Returns the total number of tokens in column iCol, considering all +** rows in the FTS5 table. +** +** ** xColumnCount: ** Returns the number of columns in the FTS5 table. ** @@ -73,6 +81,68 @@ typedef void (*fts5_extension_function)( ** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. +** +** +** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): +** +** This API function is used to query the FTS table for phrase iPhrase +** of the current query. Specifically, a query equivalent to: +** +** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY DESC +** +** with $p set to a phrase equivalent to the phrase iPhrase of the +** current query is executed. For each row visited, the callback function +** passed as the fourth argument is invoked. The context and API objects +** passed to the callback function may be used to access the properties of +** each matched row. Invoking Api.xUserData() returns a copy of the pointer +** passed as the third argument to pUserData. +** +** If the callback function returns any value other than SQLITE_OK, the +** query is abandoned and the xQueryPhrase function returns immediately. +** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. +** Otherwise, the error code is propagated upwards. +** +** If the query runs to completion without incident, SQLITE_OK is returned. +** Or, if some error occurs before the query completes or is aborted by +** the callback, an SQLite error code is returned. +** +** +** xSetAuxdata(pFts5, pAux, xDelete) +** +** Save the pointer passed as the second argument as the extension functions +** "auxiliary data". The pointer may then be retrieved by the current or any +** future invocation of the same fts5 extension function made as part of +** of the same MATCH query using the xGetAuxdata() API. +** +** Each extension function is allocated a single auxiliary data slot per +** query. If the extension function is invoked more than once by the SQL +** query, then all invocations share a single auxiliary data context. +** +** If there is already an auxiliary data pointer when this function is +** invoked, then it is replaced by the new pointer. If an xDelete callback +** was specified along with the original pointer, it is invoked at this +** point. +** +** The xDelete callback, if one is specified, is also invoked on the +** auxiliary data pointer after the FTS5 query has finished. +** +** +** xGetAuxdata(pFts5, bClear) +** +** Returns the current auxiliary data pointer for the fts5 extension +** function. See the xSetAuxdata() method for details. +** +** If the bClear argument is non-zero, then the auxiliary data is cleared +** (set to NULL) before this function returns. In this case the xDelete, +** if any, is not invoked. +** +** +** xRowCount(pFts5, pnRow) +** +** This function is used to retrieve the total number of rows in the table. +** In other words, the same value that would be returned by: +** +** SELECT count(*) FROM ftstable; */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 1 */ @@ -80,7 +150,9 @@ struct Fts5ExtensionApi { void *(*xUserData)(Fts5Context*); int (*xColumnCount)(Fts5Context*); - int (*xColumnAvgSize)(Fts5Context*, int iCol, int *pnToken); + int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); + int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); + int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ @@ -94,6 +166,12 @@ struct Fts5ExtensionApi { int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos); + + int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, + int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) + ); + int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); + void *(*xGetAuxdata)(Fts5Context*, int bClear); }; #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index fc78d64589..21c6d459d9 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -300,7 +300,8 @@ int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); -int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg); +int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); +int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); /* @@ -354,6 +355,8 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); +int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**); + /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index faee34c82d..6281cf60d0 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -12,6 +12,7 @@ */ #include "fts5Int.h" +#include typedef struct SnippetPhrase SnippetPhrase; typedef struct SnippetIter SnippetIter; @@ -267,7 +268,6 @@ static int fts5SnippetText( int iPrint; int iMatchto; - int iBit0; int iLast; int *aiStart = ctx.aiStart - ctx.iFirst; @@ -367,7 +367,6 @@ static void fts5SnippetFunction( const char *zEllip = "..."; int nToken = -15; int nAbs; - int nFrag; /* Number of fragments to return */ int rc; SnippetIter *pIter = 0; @@ -384,8 +383,6 @@ static void fts5SnippetFunction( if( rc==SQLITE_OK ){ Fts5Buffer buf; /* Result buffer */ int nBestScore = 0; /* Score of best snippet found */ - int n; /* Size of column snippet is from in bytes */ - int i; /* Used to iterate through phrases */ for(fts5SnippetIterFirst(pIter); pIter->iLast>=0; @@ -414,6 +411,93 @@ static void fts5SnippetFunction( } } +typedef struct Fts5GatherCtx Fts5GatherCtx; +struct Fts5GatherCtx { + int nCol; + int iPhrase; + int *anVal; +}; + +static int fts5GatherCallback( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + void *pUserData +){ + Fts5GatherCtx *p = (Fts5GatherCtx*)pUserData; + int i = 0; + int iPrev = -1; + i64 iPos = 0; + + while( 0==pApi->xPoslist(pFts, 0, &i, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + if( iCol!=iPrev ){ + p->anVal[p->iPhrase * p->nCol + iCol]++; + iPrev = iCol; + } + } + + return SQLITE_OK; +} + +/* +** This function returns a pointer to an array of integers containing entries +** indicating the number of rows in the table for which each phrase features +** at least once in each column. +** +** If nCol is the number of matchable columns in the table, and nPhrase is +** the number of phrases in the query, the array contains a total of +** (nPhrase*nCol) entries. +** +** For phrase iPhrase and column iCol: +** +** anVal[iPhrase * nCol + iCol] +** +** is set to the number of rows in the table for which column iCol contains +** at least one instance of phrase iPhrase. +*/ +static int fts5GatherTotals( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + int **panVal +){ + int rc = SQLITE_OK; + int *anVal = 0; + int i; /* For iterating through expression phrases */ + int nPhrase = pApi->xPhraseCount(pFts); + int nCol = pApi->xColumnCount(pFts); + int nByte = nCol * nPhrase * sizeof(int); + Fts5GatherCtx sCtx; + + sCtx.nCol = nCol; + anVal = sCtx.anVal = (int*)sqlite3_malloc(nByte); + if( anVal==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(anVal, 0, nByte); + } + + for(i=0; ixQueryPhrase(pFts, i, (void*)&sCtx, fts5GatherCallback); + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(anVal); + anVal = 0; + } + + *panVal = anVal; + return rc; +} + +typedef struct Fts5Bm25Context Fts5Bm25Context; +struct Fts5Bm25Context { + int nPhrase; + int nCol; + double *aIDF; /* Array of IDF values */ + double *aAvg; /* Average size of each column in tokens */ +}; + static void fts5Bm25Function( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -421,7 +505,99 @@ static void fts5Bm25Function( int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ - assert( 0 ); + const double k1 = 1.2; + const double B = 0.75; + + int rc = SQLITE_OK; + Fts5Bm25Context *p; + + p = pApi->xGetAuxdata(pFts, 0); + if( p==0 ){ + int *anVal = 0; + int ic; /* For iterating through columns */ + int ip; /* For iterating through phrases */ + i64 nRow; /* Total number of rows in table */ + int nPhrase = pApi->xPhraseCount(pFts); + int nCol = pApi->xColumnCount(pFts); + int nByte = sizeof(Fts5Bm25Context) + + sizeof(double) * nPhrase * nCol /* aIDF[] */ + + sizeof(double) * nCol; /* aAvg[] */ + + p = (Fts5Bm25Context*)sqlite3_malloc(nByte); + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(p, 0, nByte); + p->aAvg = (double*)&p[1]; + p->aIDF = (double*)&p->aAvg[nCol]; + } + + if( rc==SQLITE_OK ){ + rc = pApi->xRowCount(pFts, &nRow); + assert( nRow>0 || rc!=SQLITE_OK ); + } + + for(ic=0; rc==SQLITE_OK && icxColumnTotalSize(pFts, ic, &nToken); + p->aAvg[ic] = (double)nToken / (double)nRow; + } + + if( rc==SQLITE_OK ){ + rc = fts5GatherTotals(pApi, pFts, &anVal); + } + for(ic=0; icaIDF[idx] = log( (0.5 + nRow - anVal[idx]) / (0.5 + anVal[idx]) ); + if( p->aIDF[idx]<0.0 ) p->aIDF[idx] = 0.0; + } + } + + sqlite3_free(anVal); + if( rc==SQLITE_OK ){ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); + } + if( rc!=SQLITE_OK ){ + sqlite3_free(p); + } + } + + if( rc==SQLITE_OK ){ + int ip; + double score = 0.0; + + for(ip=0; rc==SQLITE_OK && ipnPhrase; ip++){ + int iPrev = 0; + int nHit = 0; + int i = 0; + i64 iPos = 0; + + while( rc==SQLITE_OK && 0==pApi->xPoslist(pFts, ip, &i, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + if( iCol!=iPrev && nHit>0 ){ + int sz = 0; + int idx = ip * p->nCol + iPrev; + rc = pApi->xColumnSize(pFts, iPrev, &sz); + + score += p->aIDF[idx] * nHit * (k1+1.0) / + (nHit + k1 * (1.0 - B + B * sz / p->aAvg[iCol])); + nHit = 0; + } + nHit++; + iPrev = iCol; + } + } + + if( rc==SQLITE_OK ){ + sqlite3_result_double(pCtx, score); + } + + } + + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } } static int fts5TestCallback( @@ -465,13 +641,13 @@ static void fts5TestFunction( nCol = pApi->xColumnCount(pFts); if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "columnavgsize "); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntotalsize "); } - if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnavgsize") ){ + if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntotalsize") ){ if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); for(i=0; rc==SQLITE_OK && ixColumnAvgSize(pFts, i, &colsz); + i64 colsz = 0; + rc = pApi->xColumnTotalSize(pFts, i, &colsz); sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz); } if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); @@ -580,7 +756,44 @@ static void fts5TestFunction( } if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " rowid "); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " queryphrase "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "queryphrase") ){ + int ic, ip; + int *anVal = 0; + Fts5Buffer buf1; + memset(&buf1, 0, sizeof(Fts5Buffer)); + + if( rc==SQLITE_OK ){ + anVal = (int*)pApi->xGetAuxdata(pFts, 0); + if( anVal==0 ){ + rc = fts5GatherTotals(pApi, pFts, &anVal); + if( rc==SQLITE_OK ){ + rc = pApi->xSetAuxdata(pFts, (void*)anVal, sqlite3_free); + } + } + } + + for(ip=0; rc==SQLITE_OK && ip0 ) sqlite3Fts5BufferAppendString(&rc, &buf1, " "); + if( nCol>1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "{"); + for(ic=0; ic1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "}"); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendListElem(&rc, &s, (const char*)buf1.p, buf1.n); + }else{ + sqlite3Fts5BufferAppendString(&rc, &s, (const char*)buf1.p); + } + sqlite3_free(buf1.p); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendString(&rc, &s, " rowid "); } if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowid") ){ iRowid = pApi->xRowid(pFts); @@ -588,7 +801,16 @@ static void fts5TestFunction( } if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " tokenize "); + sqlite3Fts5BufferAppendString(&rc, &s, " rowcount "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowcount") ){ + i64 nRow; + rc = pApi->xRowCount(pFts, &nRow); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", nRow); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendString(&rc, &s, " tokenize "); } if( 0==zReq || 0==sqlite3_stricmp(zReq, "tokenize") ){ Fts5Buffer buf; @@ -629,8 +851,9 @@ int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ fts5_extension_function xFunc;/* Callback function */ void (*xDestroy)(void*); /* Destructor function */ } aBuiltin [] = { - { "snippet", 0, fts5SnippetFunction, 0 }, - { "fts5_test", 0, fts5TestFunction, 0 }, + { "bm25", 0, fts5Bm25Function, 0 }, + { "snippet", 0, fts5SnippetFunction, 0 }, + { "fts5_test", 0, fts5TestFunction, 0 }, }; int rc = SQLITE_OK; /* Return code */ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 9eea4552bf..ab9b307bc2 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -227,6 +227,106 @@ int sqlite3Fts5ExprNew( return sParse.rc; } +static char *fts5ExprStrdup(int *pRc, const char *zIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + int nByte = strlen(zIn) + 1; + zRet = sqlite3_malloc(nByte); + if( zRet ){ + memcpy(zRet, zIn, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + return zRet; +} + +static void *fts5ExprMalloc(int *pRc, int nByte){ + void *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet ){ + memset(pRet, 0, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + return pRet; +} + +/* +** Create a new FTS5 expression by cloning phrase iPhrase of the +** expression passed as the second argument. +*/ +int sqlite3Fts5ExprPhraseExpr( + Fts5Config *pConfig, + Fts5Expr *pExpr, + int iPhrase, + Fts5Expr **ppNew +){ + int rc = SQLITE_OK; /* Return code */ + Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ + int i; /* Used to iterate through phrase terms */ + + /* Components of the new expression object */ + Fts5Expr *pNew; + Fts5ExprPhrase **apPhrase; + Fts5ExprNode *pNode; + Fts5ExprNearset *pNear; + Fts5ExprPhrase *pCopy; + + pOrig = pExpr->apPhrase[iPhrase]; + pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); + apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); + pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); + pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) + ); + pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm + ); + + for(i=0; rc==SQLITE_OK && inTerm; i++){ + pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); + pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; + } + + if( rc==SQLITE_OK ){ + /* All the allocations succeeded. Put the expression object together. */ + pNew->pIndex = pExpr->pIndex; + pNew->pRoot = pNode; + pNew->nPhrase = 1; + pNew->apPhrase = apPhrase; + pNew->apPhrase[0] = pCopy; + + pNode->eType = FTS5_STRING; + pNode->pNear = pNear; + + pNear->iCol = -1; + pNear->nPhrase = 1; + pNear->apPhrase[0] = pCopy; + + pCopy->nTerm = pOrig->nTerm; + pCopy->pNode = pNode; + }else{ + /* At least one allocation failed. Free them all. */ + if( pCopy ){ + for(i=0; inTerm; i++){ + sqlite3_free(pCopy->aTerm[i].zTerm); + } + sqlite3_free(pCopy); + sqlite3_free(pNear); + sqlite3_free(pNode); + sqlite3_free(apPhrase); + sqlite3_free(pNew); + pNew = 0; + } + } + + *ppNew = pNew; + return rc; +} + /* ** Free the expression node object passed as the only argument. */ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index c56c2d038a..22cb427e05 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -705,15 +705,18 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ return rc; } -int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg){ +int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ int rc = fts5StorageLoadTotals(p); if( rc==SQLITE_OK ){ - int nAvg = 1; - if( p->nTotalRow ){ - nAvg = (int)((p->aTotalSize[iCol] + (p->nTotalRow/2)) / p->nTotalRow); - if( nAvg<1 ) nAvg = 1; - *pnAvg = nAvg; - } + *pnToken = p->aTotalSize[iCol]; + } + return rc; +} + +int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ + int rc = fts5StorageLoadTotals(p); + if( rc==SQLITE_OK ){ + *pnRow = p->nTotalRow; } return rc; } diff --git a/manifest b/manifest index ec56664116..e50b5af932 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\ssnippet()\sfunction\sto\sfts5. -D 2014-07-23T19:31:56.454 +C Add\sextension\sapis\sxRowCount,\sxQueryPhrase,\sxSetAuxdata\sand\sxGetAuxdata.\sAnd\sa\sranking\sfunction\sthat\suses\sall\sof\sthe\sabove. +D 2014-07-25T20:30:47.445 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,15 +103,15 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 6f859d444eb8be46cb3f7aba3aaae369c5b26809 -F ext/fts5/fts5.h 57325b418b26dcd60be5bc8aab05b33767d81590 -F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065 -F ext/fts5/fts5_aux.c cba929fb13931c9b8be7d572991e648b98f14cf2 +F ext/fts5/fts5.c 1496aff16dd9b0a013d14b6c8cf5b7df8c170abe +F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a +F ext/fts5/fts5Int.h 92fb9c4f759674ef569aebc338f363e167a8933c +F ext/fts5/fts5_aux.c f8bed7a86b65cb07cffdafbf4f0611f127b36274 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 2911813db174afa28b69ccc7031b6dd80293b241 +F ext/fts5/fts5_expr.c 65c1918002f2ec1755e4c0c28bf007659409fbd8 F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 -F ext/fts5/fts5_storage.c 9a2744f492413395a0e75f20c19b797c801a7308 +F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -599,8 +599,8 @@ F test/fts5aa.test a2c7bbc18f25f0b57ea8fc483c8a8830273b9ed4 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test fe9db78201bbb87c6f82b72a14b946d0f7fc3026 -F test/fts5af.test a2980528a04b67ac4690e6c02ebe9455f45c9a35 +F test/fts5ae.test 1424ec557d543ace1f3cf6d231b247bc7b9f337c +F test/fts5af.test 5f53d0a52280b63caf5a519d6994c4d428835155 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1196,7 +1196,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c -R 638d6826a594d773b5778bd6943c3d96 +P bdc58fd28a63ac9632c3df6c7768a9a236566605 +R 2e8cb20122478987f116ef8ff9f6144b U dan -Z 30db824dafb73f9c4c6895383aa25ed9 +Z 5dd5c36b8a0e52d63a87d23e7179571f diff --git a/manifest.uuid b/manifest.uuid index 990c947a9e..8319bdf8c4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bdc58fd28a63ac9632c3df6c7768a9a236566605 \ No newline at end of file +c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index c1eabef3f8..4480c081df 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -157,22 +157,22 @@ do_execsql_test 5.2 { } do_execsql_test 5.3 { - SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { - 3 {2 2} - 2 {2 2} - 1 {2 2} + 3 {5 7} + 2 {5 7} + 1 {5 7} } do_execsql_test 5.4 { INSERT INTO t5 VALUES('x y z', 'v w x y z'); - SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { - 3 {2 3} - 2 {2 3} - 1 {2 3} + 3 {8 12} + 2 {8 12} + 1 {8 12} } #------------------------------------------------------------------------- @@ -192,5 +192,44 @@ do_execsql_test 6.2 { 1 {{there are more} {things in heaven and earth}} } +#------------------------------------------------------------------------- +# Test the xQueryPhrase() API +# +reset_db +do_execsql_test 7.1 { + CREATE VIRTUAL TABLE t7 USING fts5(x, y); +} +do_test 7.2 { + foreach {x y} { + {q i b w s a a e l o} {i b z a l f p t e u} + {b a z t a l o x d i} {b p a d b f h d w y} + {z m h n p p u i e g} {v h d v b x j j c z} + {a g i m v a u c b i} {p k s o t l r t b m} + {v v c j o d a s c p} {f f v o k p o f o g} + } { + execsql {INSERT INTO t7 VALUES($x, $y)} + } + execsql { SELECT count(*) FROM t7 } +} {5} + +foreach {tn q res} { + 1 a {{4 2}} + 2 b {{3 4}} + 3 c {{2 1}} + 4 d {{2 2}} + 5 {a AND b} {{4 2} {3 4}} + 6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}} +} { + do_execsql_test 7.3.$tn { + SELECT fts5_test(t7, 'queryphrase') FROM t7 WHERE t7 MATCH $q LIMIT 1 + } [list $res] +} + +do_execsql_test 7.4 { + SELECT fts5_test(t7, 'rowcount') FROM t7 WHERE t7 MATCH 'a'; +} {5 5 5 5} + + + finish_test diff --git a/test/fts5af.test b/test/fts5af.test index cd5f91f13e..2412b4a7a4 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -30,6 +30,25 @@ do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); } +proc do_snippet_test {tn doc match res} { + + uplevel #0 [list set v1 $doc] + uplevel #0 [list set v2 $match] + + do_execsql_test $tn.1 { + DELETE FROM t1; + INSERT INTO t1 VALUES($v1, NULL); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + } [list $res] + + do_execsql_test $tn.2 { + DELETE FROM t1; + INSERT INTO t1 VALUES(NULL, $v1); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + } [list $res] + +} + foreach {tn doc res} { @@ -82,17 +101,7 @@ foreach {tn doc res} { 7.5 {o o o o X o o X o} {...o o [X] o o [X] o} 7.6 {o o o o o X o o X} {...o o o [X] o o [X]} } { - do_execsql_test 1.$tn.1 { - DELETE FROM t1; - INSERT INTO t1 VALUES($doc, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X'; - } [list $res] - - do_execsql_test 1.$tn.2 { - DELETE FROM t1; - INSERT INTO t1 VALUES(NULL, $doc); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X'; - } [list $res] + do_snippet_test 1.$tn $doc X $res } foreach {tn doc res} { @@ -121,17 +130,7 @@ foreach {tn doc res} { 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} } { - do_execsql_test 2.$tn.1 { - DELETE FROM t1; - INSERT INTO t1 VALUES($doc, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y'; - } [list $res] - - do_execsql_test 2.$tn.2 { - DELETE FROM t1; - INSERT INTO t1 VALUES(NULL, $doc); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y'; - } [list $res] + do_snippet_test 1.$tn $doc "X + Y" $res } finish_test From 454b5ce52418972a4ecc61f0c4c60bdb2e01f0ef Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 26 Jul 2014 18:38:51 +0000 Subject: [PATCH 023/206] Add tests and fixes for bm25() function. FossilOrigin-Name: 71d32f53e81921e43c933cc968cb1c18d83fe1e0 --- ext/fts5/fts5_aux.c | 162 ++++++++++++++++++++++++++++++++++++-------- manifest | 14 ++-- manifest.uuid | 2 +- test/fts5ae.test | 36 ++++++++++ 4 files changed, 178 insertions(+), 36 deletions(-) diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 6281cf60d0..2426c1dc51 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -411,19 +411,26 @@ static void fts5SnippetFunction( } } -typedef struct Fts5GatherCtx Fts5GatherCtx; + +/* +** Context object passed by fts5GatherTotals() to xQueryPhrase callback +** fts5GatherCallback(). +*/ struct Fts5GatherCtx { - int nCol; - int iPhrase; - int *anVal; + int nCol; /* Number of columns in FTS table */ + int iPhrase; /* Phrase currently under investigation */ + int *anVal; /* Array to populate */ }; +/* +** Callback used by fts5GatherTotals() with the xQueryPhrase() API. +*/ static int fts5GatherCallback( const Fts5ExtensionApi *pApi, Fts5Context *pFts, - void *pUserData + void *pUserData /* Pointer to Fts5GatherCtx object */ ){ - Fts5GatherCtx *p = (Fts5GatherCtx*)pUserData; + struct Fts5GatherCtx *p = (struct Fts5GatherCtx*)pUserData; int i = 0; int iPrev = -1; i64 iPos = 0; @@ -466,7 +473,7 @@ static int fts5GatherTotals( int nPhrase = pApi->xPhraseCount(pFts); int nCol = pApi->xColumnCount(pFts); int nByte = nCol * nPhrase * sizeof(int); - Fts5GatherCtx sCtx; + struct Fts5GatherCtx sCtx; sCtx.nCol = nCol; anVal = sCtx.anVal = (int*)sqlite3_malloc(nByte); @@ -492,24 +499,19 @@ static int fts5GatherTotals( typedef struct Fts5Bm25Context Fts5Bm25Context; struct Fts5Bm25Context { - int nPhrase; - int nCol; + int nPhrase; /* Number of phrases in query */ + int nCol; /* Number of columns in FTS table */ double *aIDF; /* Array of IDF values */ double *aAvg; /* Average size of each column in tokens */ }; -static void fts5Bm25Function( +static int fts5Bm25GetContext( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ - sqlite3_context *pCtx, /* Context for returning result/error */ - int nVal, /* Number of values in apVal[] array */ - sqlite3_value **apVal /* Array of trailing arguments */ + Fts5Bm25Context **pp /* OUT: Context object */ ){ - const double k1 = 1.2; - const double B = 0.75; - - int rc = SQLITE_OK; Fts5Bm25Context *p; + int rc = SQLITE_OK; p = pApi->xGetAuxdata(pFts, 0); if( p==0 ){ @@ -530,11 +532,14 @@ static void fts5Bm25Function( memset(p, 0, nByte); p->aAvg = (double*)&p[1]; p->aIDF = (double*)&p->aAvg[nCol]; + p->nCol = nCol; + p->nPhrase = nPhrase; } if( rc==SQLITE_OK ){ rc = pApi->xRowCount(pFts, &nRow); assert( nRow>0 || rc!=SQLITE_OK ); + if( nRow<2 ) nRow = 2; } for(ic=0; rc==SQLITE_OK && icaIDF[idx] = log( (0.5 + nRow - anVal[idx]) / (0.5 + anVal[idx]) ); - if( p->aIDF[idx]<0.0 ) p->aIDF[idx] = 0.0; + /* Calculate the IDF (Inverse Document Frequency) for phrase ip + ** in column ic. This is done using the standard BM25 formula as + ** found on wikipedia: + ** + ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) + ** + ** where "N" is the total number of documents in the set and nHit + ** is the number that contain at least one instance of the phrase + ** under consideration. + ** + ** The problem with this is that if (N < 2*nHit), the IDF is + ** negative. Which is undesirable. So the mimimum allowable IDF is + ** (1e-6) - roughly the same as a term that appears in just over + ** half of set of 5,000,000 documents. */ + int idx = ip * nCol + ic; /* Index in aIDF[] and anVal[] arrays */ + int nHit = anVal[idx]; /* Number of docs matching "ic: ip" */ + + p->aIDF[idx] = log( (0.5 + nRow - nHit) / (0.5 + nHit) ); + if( p->aIDF[idx]<=0.0 ) p->aIDF[idx] = 1e-6; + assert( p->aIDF[idx]>=0.0 ); } } @@ -560,39 +582,122 @@ static void fts5Bm25Function( } if( rc!=SQLITE_OK ){ sqlite3_free(p); + p = 0; } } + *pp = p; + return rc; +} + +static void fts5Bm25DebugContext( + int *pRc, /* IN/OUT: Return code */ + Fts5Buffer *pBuf, /* Buffer to populate */ + Fts5Bm25Context *p /* Context object to decode */ +){ + int ip; + int ic; + + sqlite3Fts5BufferAppendString(pRc, pBuf, "idf "); + if( p->nPhrase>1 || p->nCol>1 ){ + sqlite3Fts5BufferAppendString(pRc, pBuf, "{"); + } + for(ip=0; ipnPhrase; ip++){ + if( ip>0 ) sqlite3Fts5BufferAppendString(pRc, pBuf, " "); + if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "{"); + for(ic=0; icnCol; ic++){ + if( ic>0 ) sqlite3Fts5BufferAppendString(pRc, pBuf, " "); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%f", p->aIDF[ip*p->nCol+ic]); + } + if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "}"); + } + if( p->nPhrase>1 || p->nCol>1 ){ + sqlite3Fts5BufferAppendString(pRc, pBuf, "}"); + } + + sqlite3Fts5BufferAppendString(pRc, pBuf, " avgdl "); + if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "{"); + for(ic=0; icnCol; ic++){ + if( ic>0 ) sqlite3Fts5BufferAppendString(pRc, pBuf, " "); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%f", p->aAvg[ic]); + } + if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "}"); +} + +static void fts5Bm25DebugRow( + int *pRc, + Fts5Buffer *pBuf, + Fts5Bm25Context *p, + const Fts5ExtensionApi *pApi, + Fts5Context *pFts +){ +} + +static void fts5Bm25Function( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +){ + const double k1 = 1.2; + const double B = 0.75; + int rc = SQLITE_OK; + Fts5Bm25Context *p; + + rc = fts5Bm25GetContext(pApi, pFts, &p); + if( rc==SQLITE_OK ){ + /* If the bDebug flag is set, instead of returning a numeric rank, this + ** function returns a text value showing how the rank is calculated. */ + Fts5Buffer debug; + int bDebug = (pApi->xUserData(pFts)!=0); + memset(&debug, 0, sizeof(Fts5Buffer)); + int ip; double score = 0.0; + if( bDebug ){ + fts5Bm25DebugContext(&rc, &debug, p); + fts5Bm25DebugRow(&rc, &debug, p, pApi, pFts); + } + for(ip=0; rc==SQLITE_OK && ipnPhrase; ip++){ int iPrev = 0; int nHit = 0; int i = 0; i64 iPos = 0; - while( rc==SQLITE_OK && 0==pApi->xPoslist(pFts, ip, &i, &iPos) ){ + while( rc==SQLITE_OK ){ + int bDone = pApi->xPoslist(pFts, ip, &i, &iPos); int iCol = FTS5_POS2COLUMN(iPos); - if( iCol!=iPrev && nHit>0 ){ + if( (iCol!=iPrev || bDone) && nHit>0 ){ int sz = 0; int idx = ip * p->nCol + iPrev; + double bm25; rc = pApi->xColumnSize(pFts, iPrev, &sz); - score += p->aIDF[idx] * nHit * (k1+1.0) / - (nHit + k1 * (1.0 - B + B * sz / p->aAvg[iCol])); + bm25 = (p->aIDF[idx] * nHit * (k1+1.0)) / + (nHit + k1 * (1.0 - B + B * sz / p->aAvg[iPrev])); + + + score = score + bm25; nHit = 0; } + if( bDone ) break; nHit++; iPrev = iCol; } } - - if( rc==SQLITE_OK ){ - sqlite3_result_double(pCtx, score); - } + if( rc==SQLITE_OK ){ + if( bDebug ){ + sqlite3_result_text(pCtx, (const char*)debug.p, -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_double(pCtx, score); + } + } + sqlite3_free(debug.p); } if( rc!=SQLITE_OK ){ @@ -852,6 +957,7 @@ int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ void (*xDestroy)(void*); /* Destructor function */ } aBuiltin [] = { { "bm25", 0, fts5Bm25Function, 0 }, + { "bm25debug", (void*)1, fts5Bm25Function, 0 }, { "snippet", 0, fts5SnippetFunction, 0 }, { "fts5_test", 0, fts5TestFunction, 0 }, }; diff --git a/manifest b/manifest index e50b5af932..84cd1f32f8 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sextension\sapis\sxRowCount,\sxQueryPhrase,\sxSetAuxdata\sand\sxGetAuxdata.\sAnd\sa\sranking\sfunction\sthat\suses\sall\sof\sthe\sabove. -D 2014-07-25T20:30:47.445 +C Add\stests\sand\sfixes\sfor\sbm25()\sfunction. +D 2014-07-26T18:38:51.294 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,7 +106,7 @@ F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1496aff16dd9b0a013d14b6c8cf5b7df8c170abe F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a F ext/fts5/fts5Int.h 92fb9c4f759674ef569aebc338f363e167a8933c -F ext/fts5/fts5_aux.c f8bed7a86b65cb07cffdafbf4f0611f127b36274 +F ext/fts5/fts5_aux.c 78adc5db0ff4d6834df220ba6b3caa351d98b971 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef F ext/fts5/fts5_expr.c 65c1918002f2ec1755e4c0c28bf007659409fbd8 @@ -599,7 +599,7 @@ F test/fts5aa.test a2c7bbc18f25f0b57ea8fc483c8a8830273b9ed4 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test 1424ec557d543ace1f3cf6d231b247bc7b9f337c +F test/fts5ae.test 24b337571c51a10da1ae439b96b70317813a2fd4 F test/fts5af.test 5f53d0a52280b63caf5a519d6994c4d428835155 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d @@ -1196,7 +1196,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bdc58fd28a63ac9632c3df6c7768a9a236566605 -R 2e8cb20122478987f116ef8ff9f6144b +P c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb +R 3301ccb2b839356242606883792ca77e U dan -Z 5dd5c36b8a0e52d63a87d23e7179571f +Z 456b4a2f1abc554b124e25c35490489e diff --git a/manifest.uuid b/manifest.uuid index 8319bdf8c4..17caf8ac8b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb \ No newline at end of file +71d32f53e81921e43c933cc968cb1c18d83fe1e0 \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index 4480c081df..bb4904f210 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -229,6 +229,42 @@ do_execsql_test 7.4 { SELECT fts5_test(t7, 'rowcount') FROM t7 WHERE t7 MATCH 'a'; } {5 5 5 5} +#do_execsql_test 7.4 { +# SELECT rowid, bm25debug(t7) FROM t7 WHERE t7 MATCH 'a'; +#} {5 5 5 5} +# + +#------------------------------------------------------------------------- +# +do_test 8.1 { + execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y) } + foreach {rowid x y} { + 0 {A o} {o o o C o o o o o o o o} + 1 {o o B} {o o o C C o o o o o o o} + 2 {A o o} {o o o o D D o o o o o o} + 3 {o B} {o o o o o D o o o o o o} + 4 {E o G} {H o o o o o o o o o o o} + 5 {F o G} {I o J o o o o o o o o o} + 6 {E o o} {H o J o o o o o o o o o} + 7 {o o o} {o o o o o o o o o o o o} + 9 {o o o} {o o o o o o o o o o o o} + } { + execsql { INSERT INTO t8(rowid, x, y) VALUES($rowid, $x, $y) } + } +} {} + +foreach {tn q res} { + 1 {a} {0 2} + 2 {b} {3 1} + 3 {c} {1 0} + 4 {d} {2 3} + 5 {g AND (e OR f)} {5 4} + 6 {j AND (h OR i)} {5 6} +} { + do_execsql_test 8.2.$tn { + SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8) DESC; + } $res +} finish_test From 92e497e5171ab21e4127f083a15d6cb2670808d5 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 28 Jul 2014 20:14:02 +0000 Subject: [PATCH 024/206] Add the "loadfts" program, for performance testing the loading of data into fts3/fts4/fts5 tables. FossilOrigin-Name: 770b9540c19ad1e3d24adff382332bf032065efd --- ext/fts5/fts5_aux.c | 64 ++++++------- ext/fts5/fts5_config.c | 4 +- ext/fts5/fts5_expr.c | 6 +- main.mk | 15 +++ manifest | 21 +++-- manifest.uuid | 2 +- tool/loadfts.c | 204 +++++++++++++++++++++++++++++++++++++++++ tool/mksqlite3c.tcl | 11 +++ 8 files changed, 279 insertions(+), 48 deletions(-) create mode 100644 tool/loadfts.c diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 2426c1dc51..5214296e35 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -14,26 +14,26 @@ #include "fts5Int.h" #include -typedef struct SnippetPhrase SnippetPhrase; -typedef struct SnippetIter SnippetIter; +typedef struct SnipPhrase SnipPhrase; +typedef struct SnipIter SnipIter; typedef struct SnippetCtx SnippetCtx; -struct SnippetPhrase { +struct SnipPhrase { u64 mask; /* Current mask */ int nToken; /* Tokens in this phrase */ int i; /* Current offset in phrase poslist */ i64 iPos; /* Next position in phrase (-ve -> EOF) */ }; -struct SnippetIter { +struct SnipIter { i64 iLast; /* Last token position of current snippet */ int nScore; /* Score of current snippet */ const Fts5ExtensionApi *pApi; Fts5Context *pFts; - u64 szmask; /* Mask used to on SnippetPhrase.mask */ + u64 szmask; /* Mask used to on SnipPhrase.mask */ int nPhrase; /* Number of phrases */ - SnippetPhrase aPhrase[0]; /* Array of size nPhrase */ + SnipPhrase aPhrase[0]; /* Array of size nPhrase */ }; struct SnippetCtx { @@ -71,13 +71,13 @@ static int fts5SnippetCallback( /* ** Set pIter->nScore to the score for the current entry. */ -static void fts5SnippetCalculateScore(SnippetIter *pIter){ +static void fts5SnippetCalculateScore(SnipIter *pIter){ int i; int nScore = 0; assert( pIter->iLast>=0 ); for(i=0; inPhrase; i++){ - SnippetPhrase *p = &pIter->aPhrase[i]; + SnipPhrase *p = &pIter->aPhrase[i]; u64 mask = p->mask; if( mask ){ u64 j; @@ -94,21 +94,21 @@ static void fts5SnippetCalculateScore(SnippetIter *pIter){ /* ** Allocate a new snippet iter. */ -static int fts5SnippetIterNew( +static int fts5SnipIterNew( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ int nToken, /* Number of tokens in snippets */ - SnippetIter **ppIter /* OUT: New object */ + SnipIter **ppIter /* OUT: New object */ ){ int i; /* Counter variable */ - SnippetIter *pIter; /* New iterator object */ + SnipIter *pIter; /* New iterator object */ int nByte; /* Bytes of space to allocate */ int nPhrase; /* Number of phrases in query */ *ppIter = 0; nPhrase = pApi->xPhraseCount(pFts); - nByte = sizeof(SnippetIter) + nPhrase * sizeof(SnippetPhrase); - pIter = (SnippetIter*)sqlite3_malloc(nByte); + nByte = sizeof(SnipIter) + nPhrase * sizeof(SnipPhrase); + pIter = (SnipIter*)sqlite3_malloc(nByte); if( pIter==0 ) return SQLITE_NOMEM; memset(pIter, 0, nByte); @@ -129,16 +129,16 @@ static int fts5SnippetIterNew( /* ** Set the iterator to point to the first candidate snippet. */ -static void fts5SnippetIterFirst(SnippetIter *pIter){ +static void fts5SnipIterFirst(SnipIter *pIter){ const Fts5ExtensionApi *pApi = pIter->pApi; Fts5Context *pFts = pIter->pFts; int i; /* Used to iterate through phrases */ - SnippetPhrase *pMin = 0; /* Phrase with first match */ + SnipPhrase *pMin = 0; /* Phrase with first match */ - memset(pIter->aPhrase, 0, sizeof(SnippetPhrase) * pIter->nPhrase); + memset(pIter->aPhrase, 0, sizeof(SnipPhrase) * pIter->nPhrase); for(i=0; inPhrase; i++){ - SnippetPhrase *p = &pIter->aPhrase[i]; + SnipPhrase *p = &pIter->aPhrase[i]; p->nToken = pApi->xPhraseSize(pFts, i); pApi->xPoslist(pFts, i, &p->i, &p->iPos); if( p->iPos>=0 && (pMin==0 || p->iPosiPos) ){ @@ -156,26 +156,26 @@ static void fts5SnippetIterFirst(SnippetIter *pIter){ /* ** Advance the snippet iterator to the next candidate snippet. */ -static void fts5SnippetIterNext(SnippetIter *pIter){ +static void fts5SnipIterNext(SnipIter *pIter){ const Fts5ExtensionApi *pApi = pIter->pApi; Fts5Context *pFts = pIter->pFts; int nPhrase = pIter->nPhrase; int i; /* Used to iterate through phrases */ - SnippetPhrase *pMin = 0; + SnipPhrase *pMin = 0; for(i=0; iaPhrase[i]; + SnipPhrase *p = &pIter->aPhrase[i]; if( p->iPos>=0 && (pMin==0 || p->iPosiPos) ) pMin = p; } if( pMin==0 ){ - /* pMin==0 indicates that the SnippetIter is at EOF. */ + /* pMin==0 indicates that the SnipIter is at EOF. */ pIter->iLast = -1; }else{ i64 nShift = pMin->iPos - pIter->iLast; assert( nShift>=0 ); for(i=0; iaPhrase[i]; + SnipPhrase *p = &pIter->aPhrase[i]; if( nShift>=63 ){ p->mask = 0; }else{ @@ -191,7 +191,7 @@ static void fts5SnippetIterNext(SnippetIter *pIter){ } } -static void fts5SnippetIterFree(SnippetIter *pIter){ +static void fts5SnipIterFree(SnipIter *pIter){ if( pIter ){ sqlite3_free(pIter); } @@ -200,7 +200,7 @@ static void fts5SnippetIterFree(SnippetIter *pIter){ static int fts5SnippetText( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ - SnippetIter *pIter, /* Snippet to write to buffer */ + SnipIter *pIter, /* Snippet to write to buffer */ int nToken, /* Size of desired snippet in tokens */ const char *zStart, const char *zFinal, @@ -299,7 +299,7 @@ static int fts5SnippetText( /* Check if this is the first token of any phrase match. */ int ip; for(ip=0; ipnPhrase; ip++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[ip]; + SnipPhrase *pPhrase = &pIter->aPhrase[ip]; u64 m = (1 << (iLast - i - pPhrase->nToken + 1)); if( i<=iLast && (pPhrase->mask & m) ){ @@ -368,7 +368,7 @@ static void fts5SnippetFunction( int nToken = -15; int nAbs; int rc; - SnippetIter *pIter = 0; + SnipIter *pIter = 0; if( nVal>=1 ) zStart = (const char*)sqlite3_value_text(apVal[0]); if( nVal>=2 ) zFinal = (const char*)sqlite3_value_text(apVal[1]); @@ -379,20 +379,20 @@ static void fts5SnippetFunction( } nAbs = nToken * (nToken<0 ? -1 : 1); - rc = fts5SnippetIterNew(pApi, pFts, nAbs, &pIter); + rc = fts5SnipIterNew(pApi, pFts, nAbs, &pIter); if( rc==SQLITE_OK ){ Fts5Buffer buf; /* Result buffer */ int nBestScore = 0; /* Score of best snippet found */ - for(fts5SnippetIterFirst(pIter); + for(fts5SnipIterFirst(pIter); pIter->iLast>=0; - fts5SnippetIterNext(pIter) + fts5SnipIterNext(pIter) ){ if( pIter->nScore>nBestScore ) nBestScore = pIter->nScore; } - for(fts5SnippetIterFirst(pIter); + for(fts5SnipIterFirst(pIter); pIter->iLast>=0; - fts5SnippetIterNext(pIter) + fts5SnipIterNext(pIter) ){ if( pIter->nScore==nBestScore ) break; } @@ -405,7 +405,7 @@ static void fts5SnippetFunction( sqlite3_free(buf.p); } - fts5SnippetIterFree(pIter); + fts5SnipIterFree(pIter); if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); } diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index bbcbc5e0e5..5bed69def3 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -113,7 +113,7 @@ static char *fts5Strdup(const char *z){ return sqlite3_mprintf("%s", z); } -void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module**); +void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**); /* ** Allocate an instance of the default tokenizer ("simple") at @@ -121,7 +121,7 @@ void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module**); ** code if an error occurs. */ static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ - sqlite3_tokenizer_module *pMod; /* Tokenizer module "simple" */ + const sqlite3_tokenizer_module *pMod; /* Tokenizer module "simple" */ sqlite3_tokenizer *pTokenizer; /* Tokenizer instance */ int rc; /* Return code */ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index ab9b307bc2..2fd7f63f76 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -997,7 +997,7 @@ i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ ** It is the responsibility of the caller to eventually free the returned ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. */ -static char *fts5Strdup(const char *pIn, int nIn){ +static char *fts5Strndup(const char *pIn, int nIn){ char *zRet = (char*)sqlite3_malloc(nIn+1); if( zRet ){ memcpy(zRet, pIn, nIn); @@ -1007,7 +1007,7 @@ static char *fts5Strdup(const char *pIn, int nIn){ } static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ - *pz = fts5Strdup(pToken->p, pToken->n); + *pz = fts5Strndup(pToken->p, pToken->n); if( *pz==0 ) return SQLITE_NOMEM; return SQLITE_OK; } @@ -1115,7 +1115,7 @@ static int fts5ParseTokenize( pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = fts5Strdup(pToken, nToken); + pTerm->zTerm = fts5Strndup(pToken, nToken); return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; } diff --git a/main.mk b/main.mk index a1582fb6dc..1a75c39827 100644 --- a/main.mk +++ b/main.mk @@ -224,6 +224,18 @@ SRC += \ $(TOP)/ext/rtree/rtree.h \ $(TOP)/ext/rtree/rtree.c +SRC += \ + $(TOP)/ext/fts5/fts5.h \ + $(TOP)/ext/fts5/fts5Int.h \ + $(TOP)/ext/fts5/fts5_aux.c \ + $(TOP)/ext/fts5/fts5_buffer.c \ + $(TOP)/ext/fts5/fts5.c \ + $(TOP)/ext/fts5/fts5_config.c \ + $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_index.c \ + fts5parse.c \ + $(TOP)/ext/fts5/fts5_storage.c + # Generated source code files # @@ -684,6 +696,9 @@ wordcount$(EXE): $(TOP)/test/wordcount.c sqlite3.c speedtest1$(EXE): $(TOP)/test/speedtest1.c sqlite3.o $(TCC) -I. -o speedtest1$(EXE) $(TOP)/test/speedtest1.c sqlite3.o $(THREADLIB) +loadfts: $(TOP)/tool/loadfts.c libsqlite3.a + $(TCC) $(TOP)/tool/loadfts.c libsqlite3.a -o loadfts $(THREADLIB) + # This target will fail if the SQLite amalgamation contains any exported # symbols that do not begin with "sqlite3_". It is run as part of the # releasetest.tcl script. diff --git a/manifest b/manifest index 84cd1f32f8..36de4c8844 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sand\sfixes\sfor\sbm25()\sfunction. -D 2014-07-26T18:38:51.294 +C Add\sthe\s"loadfts"\sprogram,\sfor\sperformance\stesting\sthe\sloading\sof\sdata\sinto\sfts3/fts4/fts5\stables. +D 2014-07-28T20:14:02.001 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,10 +106,10 @@ F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 1496aff16dd9b0a013d14b6c8cf5b7df8c170abe F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a F ext/fts5/fts5Int.h 92fb9c4f759674ef569aebc338f363e167a8933c -F ext/fts5/fts5_aux.c 78adc5db0ff4d6834df220ba6b3caa351d98b971 +F ext/fts5/fts5_aux.c 243156c197384e17983d6a3ed149fa2270b5bb85 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 -F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 65c1918002f2ec1755e4c0c28bf007659409fbd8 +F ext/fts5/fts5_config.c 2138741013e189724b5d40ea7af0f48952a44916 +F ext/fts5/fts5_expr.c e426baa54b9473cb31b8d891d7d1b923bfb5d017 F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -156,7 +156,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk cffc02a30f1af82d35410674f70a0286587add81 +F main.mk 8118631727a27fa88eb38a07ac3b86ecb86e9eb0 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1158,6 +1158,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce F tool/lemon.c 3ff0fec22f92dfb54e62eeb48772eddffdbeb0d6 F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc +F tool/loadfts.c 3bdd46090112c84df44a4fbae740af3836108b3f F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6 F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383 F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670 @@ -1165,7 +1166,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 78a77b2c554d534c6f2dc903130186ed15715460 F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 1712d3d71256ca1f297046619c89e77a4d7c8f6d -F tool/mksqlite3c.tcl ba274df71f5e6534b0a913c7c48eabfcbd0934b6 +F tool/mksqlite3c.tcl becaa9d5617dfe137e73dddda9dab8f58bc71e8c F tool/mksqlite3h.tcl ba24038056f51fde07c0079c41885ab85e2cff12 F tool/mksqlite3internalh.tcl b6514145a7d5321b47e64e19b8116cc44f973eb1 F tool/mkvsix.tcl 52a4c613707ac34ae9c226e5ccc69cb948556105 @@ -1196,7 +1197,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb -R 3301ccb2b839356242606883792ca77e +P 71d32f53e81921e43c933cc968cb1c18d83fe1e0 +R 378763b2640fc19d9f72a0522c9f77b1 U dan -Z 456b4a2f1abc554b124e25c35490489e +Z 3cf4ed481646bab9077300595c244e00 diff --git a/manifest.uuid b/manifest.uuid index 17caf8ac8b..2f2c34352f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -71d32f53e81921e43c933cc968cb1c18d83fe1e0 \ No newline at end of file +770b9540c19ad1e3d24adff382332bf032065efd \ No newline at end of file diff --git a/tool/loadfts.c b/tool/loadfts.c new file mode 100644 index 0000000000..18bd355a4d --- /dev/null +++ b/tool/loadfts.c @@ -0,0 +1,204 @@ +/* +** 2013-06-10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "sqlite3.h" + +/* +** Implementation of the "readtext(X)" SQL function. The entire content +** of the file named X is read and returned as a TEXT value. It is assumed +** the file contains UTF-8 text. NULL is returned if the file does not +** exist or is unreadable. +*/ +static void readfileFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *zName; + FILE *in; + long nIn; + void *pBuf; + + zName = (const char*)sqlite3_value_text(argv[0]); + if( zName==0 ) return; + in = fopen(zName, "rb"); + if( in==0 ) return; + fseek(in, 0, SEEK_END); + nIn = ftell(in); + rewind(in); + pBuf = sqlite3_malloc( nIn ); + if( pBuf && 1==fread(pBuf, nIn, 1, in) ){ + sqlite3_result_text(context, pBuf, nIn, sqlite3_free); + }else{ + sqlite3_free(pBuf); + } + fclose(in); +} + +/* +** Print usage text for this program and exit. +*/ +static void showHelp(const char *zArgv0){ + printf("\n" +"Usage: %s SWITCHES... DB\n" +"\n" +" This program opens the database named on the command line and attempts to\n" +" create an FTS table named \"fts\" with a single column. If successful, it\n" +" recursively traverses the directory named by the -dir option and inserts\n" +" the contents of each file into the fts table. All files are assumed to\n" +" contain UTF-8 text.\n" +"\n" +"Switches are:\n" +" -fts [345] FTS version to use (default=5)\n" +" -idx [01] Create a mapping from filename to rowid (default=0)\n" +" -dir Root of directory tree to load data from (default=.)\n" +, zArgv0 +); + exit(1); +} + +/* +** Exit with a message based on the argument and the current value of errno. +*/ +static void error_out(const char *zText){ + fprintf(stderr, "%s: %s\n", zText, strerror(errno)); + exit(-1); +} + +/* +** Exit with a message based on the first argument and the error message +** currently stored in database handle db. +*/ +static void sqlite_error_out(const char *zText, sqlite3 *db){ + fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db)); + exit(-1); +} + +/* +** Context object for visit_file(). +*/ +typedef struct VisitContext VisitContext; +struct VisitContext { + sqlite3 *db; /* Database handle */ + sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ +}; + +/* +** Callback used with traverse(). The first argument points to an object +** of type VisitContext. This function inserts the contents of the text +** file zPath into the FTS table. +*/ +void visit_file(void *pCtx, const char *zPath){ + int rc; + VisitContext *p = (VisitContext*)pCtx; + /* printf("%s\n", zPath); */ + sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); + sqlite3_step(p->pInsert); + rc = sqlite3_reset(p->pInsert); + if( rc!=SQLITE_OK ) sqlite_error_out("insert", p->db); +} + +/* +** Recursively traverse directory zDir. For each file that is not a +** directory, invoke the supplied callback with its path. +*/ +static void traverse( + const char *zDir, /* Directory to traverse */ + void *pCtx, /* First argument passed to callback */ + void (*xCallback)(void*, const char *zPath) +){ + DIR *d; + struct dirent *e; + + d = opendir(zDir); + if( d==0 ) error_out("opendir()"); + + for(e=readdir(d); e; e=readdir(d)){ + if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue; + char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name); + if (e->d_type & DT_DIR) { + traverse(zPath, pCtx, xCallback); + }else{ + xCallback(pCtx, zPath); + } + sqlite3_free(zPath); + } + + closedir(d); +} + +int main(int argc, char **argv){ + int iFts = 5; /* Value of -fts option */ + int bMap = 0; /* True to create mapping table */ + const char *zDir = "."; /* Directory to scan */ + int i; + int rc; + sqlite3 *db; + char *zSql; + VisitContext sCtx; + + if( argc % 2 ) showHelp(argv[0]); + + for(i=1; i<(argc-1); i+=2){ + char *zOpt = argv[i]; + char *zArg = argv[i+1]; + if( strcmp(zOpt, "-fts")==0 ){ + iFts = atoi(zArg); + if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); + } + else if( strcmp(zOpt, "-idx")==0 ){ + bMap = atoi(zArg); + if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); + } + else if( strcmp(zOpt, "-dir")==0 ){ + zDir = zArg; + } + } + + /* Open the database file */ + rc = sqlite3_open(argv[argc-1], &db); + if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db); + + rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0, + readfileFunc, 0, 0); + if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db); + + /* Create the FTS table */ + zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts); + rc = sqlite3_exec(db, zSql, 0, 0, 0); + if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); + sqlite3_free(zSql); + + /* Compile the INSERT statement to write data to the FTS table. */ + memset(&sCtx, 0, sizeof(VisitContext)); + sCtx.db = db; + rc = sqlite3_prepare_v2(db, + "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 + ); + if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); + + /* Load all files in the directory hierarchy into the FTS table. */ + traverse(zDir, (void*)&sCtx, visit_file); + + /* Clean up and exit. */ + sqlite3_finalize(sCtx.pInsert); + sqlite3_close(db); + return 0; +} diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 64207727be..0d3120ce1a 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -97,6 +97,8 @@ foreach hdr { fts3Int.h fts3_hash.h fts3_tokenizer.h + fts5.h + fts5Int.h hash.h hwtime.h keywordhash.h @@ -328,6 +330,15 @@ foreach file { fts3_unicode.c fts3_unicode2.c + fts5_aux.c + fts5_buffer.c + fts5.c + fts5_config.c + fts5_expr.c + fts5_index.c + fts5parse.c + fts5_storage.c + rtree.c icu.c fts3_icu.c From fdb4a30af29797184ddb38d2cf85b0fb06137be1 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 30 Jul 2014 19:41:58 +0000 Subject: [PATCH 025/206] Add hidden column "rank". Currently this always returns the same value as the bm25() function. FossilOrigin-Name: 4cc048c3651e830a6aeded924c7f3a60b634e133 --- ext/fts5/fts5.c | 308 ++++++++++++++++++++++++++++++----------- ext/fts5/fts5Int.h | 17 ++- ext/fts5/fts5_aux.c | 2 +- ext/fts5/fts5_config.c | 16 ++- ext/fts5/fts5_expr.c | 16 +-- manifest | 24 ++-- manifest.uuid | 2 +- test/fts5aa.test | 9 ++ test/fts5ae.test | 10 +- 9 files changed, 299 insertions(+), 105 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index e89817d04e..22df7a62f7 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -56,6 +56,7 @@ struct Fts5Table { Fts5Index *pIndex; /* Full-text index */ Fts5Storage *pStorage; /* Document store */ Fts5Global *pGlobal; /* Global (connection wide) data */ + Fts5Cursor *pSortCsr; /* Sort data from this cursor */ }; struct Fts5MatchPhrase { @@ -63,6 +64,20 @@ struct Fts5MatchPhrase { int nTerm; /* Size of phrase in terms */ }; +/* +** Variable pStmt is set to a compiled SQL statement of the form: +** +** SELECT rowid, FROM ORDER BY +rank; +** +*/ +struct Fts5Sorter { + sqlite3_stmt *pStmt; + i64 iRowid; /* Current rowid */ + u8 *aPoslist; /* Position lists for current row */ + int nIdx; /* Number of entries in aIdx[] */ + int aIdx[0]; /* Offsets into aPoslist for current row */ +}; + /* ** Virtual-table cursor object. */ @@ -71,13 +86,15 @@ struct Fts5Cursor { int idxNum; /* idxNum passed to xFilter() */ sqlite3_stmt *pStmt; /* Statement used to read %_content */ Fts5Expr *pExpr; /* Expression for MATCH queries */ + Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ int csrflags; /* Mask of cursor flags (see below) */ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ + Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ Fts5Auxiliary *pAux; /* Currently executing extension function */ - Fts5Auxdata *pAuxdata; /* First in linked list of aux-data */ + Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ int *aColumnSize; /* Values for xColumnSize() */ }; @@ -227,9 +244,11 @@ static int fts5CreateMethod( /* ** The three query plans xBestIndex may choose between. */ -#define FTS5_PLAN_SCAN 1 /* No usable constraint */ -#define FTS5_PLAN_MATCH 2 /* ( MATCH ?) */ -#define FTS5_PLAN_ROWID 3 /* (rowid = ?) */ +#define FTS5_PLAN_SCAN 1 /* No usable constraint */ +#define FTS5_PLAN_MATCH 2 /* ( MATCH ?) */ +#define FTS5_PLAN_SORTED_MATCH 3 /* ( MATCH ? ORDER BY rank) */ +#define FTS5_PLAN_ROWID 4 /* (rowid = ?) */ +#define FTS5_PLAN_SOURCE 5 /* A source cursor for SORTED_MATCH */ #define FTS5_PLAN(idxNum) ((idxNum) & 0x7) @@ -284,9 +303,20 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ pInfo->estimatedCost = 10000000.0; } - if( pInfo->nOrderBy==1 && pInfo->aOrderBy[0].iColumn<0 ){ - pInfo->orderByConsumed = 1; - ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC; + if( pInfo->nOrderBy==1 ){ + int iSort = pInfo->aOrderBy[0].iColumn; + if( iSort<0 ){ + /* ORDER BY rowid [ASC|DESC] */ + pInfo->orderByConsumed = 1; + }else if( iSort==(pConfig->nCol+1) && ePlan==FTS5_PLAN_MATCH ){ + /* ORDER BY rank [ASC|DESC] */ + pInfo->orderByConsumed = 1; + ePlan = FTS5_PLAN_SORTED_MATCH; + } + + if( pInfo->orderByConsumed ){ + ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC; + } } pInfo->idxNum = ePlan; @@ -341,7 +371,15 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } - sqlite3Fts5ExprFree(pCsr->pExpr); + if( pCsr->pSorter ){ + Fts5Sorter *pSorter = pCsr->pSorter; + sqlite3_finalize(pSorter->pStmt); + sqlite3_free(pSorter); + } + + if( pCsr->idxNum!=FTS5_PLAN_SOURCE ){ + sqlite3Fts5ExprFree(pCsr->pExpr); + } for(pData=pCsr->pAuxdata; pData; pData=pNext){ pNext = pData->pNext; @@ -357,6 +395,21 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ return SQLITE_OK; } +static int fts5SorterNext(Fts5Cursor *pCsr){ + Fts5Sorter *pSorter = pCsr->pSorter; + int rc; + + rc = sqlite3_step(pSorter->pStmt); + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + CsrFlagSet(pCsr, FTS5CSR_EOF); + }else if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); + } + + return rc; +} /* ** Advance the cursor to the next row in the table that matches the @@ -371,25 +424,77 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ int ePlan = FTS5_PLAN(pCsr->idxNum); int rc = SQLITE_OK; - if( ePlan!=FTS5_PLAN_MATCH ){ - rc = sqlite3_step(pCsr->pStmt); - if( rc!=SQLITE_ROW ){ - CsrFlagSet(pCsr, FTS5CSR_EOF); - rc = sqlite3_reset(pCsr->pStmt); - }else{ - rc = SQLITE_OK; + switch( ePlan ){ + case FTS5_PLAN_MATCH: + case FTS5_PLAN_SOURCE: + rc = sqlite3Fts5ExprNext(pCsr->pExpr); + if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + } + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + break; + + case FTS5_PLAN_SORTED_MATCH: { + rc = fts5SorterNext(pCsr); + break; } - }else{ - rc = sqlite3Fts5ExprNext(pCsr->pExpr); - if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ - CsrFlagSet(pCsr, FTS5CSR_EOF); - } - CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + + default: + rc = sqlite3_step(pCsr->pStmt); + if( rc!=SQLITE_ROW ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + rc = sqlite3_reset(pCsr->pStmt); + }else{ + rc = SQLITE_OK; + } + break; } return rc; } +static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ + Fts5Config *pConfig = pTab->pConfig; + Fts5Sorter *pSorter; + int nPhrase; + int nByte; + int rc = SQLITE_OK; + char *zSql; + + nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); + nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase; + pSorter = (Fts5Sorter*)sqlite3_malloc(nByte); + if( pSorter==0 ) return SQLITE_NOMEM; + memset(pSorter, 0, nByte); + + zSql = sqlite3_mprintf("SELECT rowid, %Q FROM %Q.%Q ORDER BY +%s %s", + pConfig->zName, pConfig->zDb, pConfig->zName, FTS5_RANK_NAME, + bAsc ? "ASC" : "DESC" + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pSorter->pStmt, 0); + sqlite3_free(zSql); + } + + pCsr->pSorter = pSorter; + if( rc==SQLITE_OK ){ + assert( pTab->pSortCsr==0 ); + pTab->pSortCsr = pCsr; + rc = fts5SorterNext(pCsr); + pTab->pSortCsr = 0; + } + + if( rc!=SQLITE_OK ){ + sqlite3_finalize(pSorter->pStmt); + sqlite3_free(pSorter); + pCsr->pSorter = 0; + } + + return rc; +} + static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ int rc; rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); @@ -414,30 +519,43 @@ static int fts5FilterMethod( ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int rc = SQLITE_OK; - int ePlan = FTS5_PLAN(idxNum); - int eStmt = fts5StmtType(idxNum); int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); + int rc = SQLITE_OK; - pCsr->idxNum = idxNum; assert( pCsr->pStmt==0 ); assert( pCsr->pExpr==0 ); assert( pCsr->csrflags==0 ); + assert( pCsr->pRank==0 ); - rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); - if( rc==SQLITE_OK ){ - if( ePlan==FTS5_PLAN_MATCH ){ - char **pzErr = &pTab->base.zErrMsg; - const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); - rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); - if( rc==SQLITE_OK ){ - rc = fts5CursorFirst(pTab, pCsr, bAsc); + if( pTab->pSortCsr ){ + pCsr->idxNum = FTS5_PLAN_SOURCE; + pCsr->pRank = pTab->pSortCsr->pRank; + pCsr->pExpr = pTab->pSortCsr->pExpr; + rc = fts5CursorFirst(pTab, pCsr, bAsc); + }else{ + int ePlan = FTS5_PLAN(idxNum); + int eStmt = fts5StmtType(idxNum); + pCsr->idxNum = idxNum; + rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); + if( rc==SQLITE_OK ){ + if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){ + char **pzErr = &pTab->base.zErrMsg; + const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); + pCsr->pRank = pTab->pGlobal->pAux; + rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + if( rc==SQLITE_OK ){ + if( ePlan==FTS5_PLAN_MATCH ){ + rc = fts5CursorFirst(pTab, pCsr, bAsc); + }else{ + rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); + } + } + }else{ + if( ePlan==FTS5_PLAN_ROWID ){ + sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + } + rc = fts5NextMethod(pCursor); } - }else{ - if( ePlan==FTS5_PLAN_ROWID ){ - sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); - } - rc = fts5NextMethod(pCursor); } } @@ -464,10 +582,19 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ int ePlan = FTS5_PLAN(pCsr->idxNum); assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); - if( ePlan!=FTS5_PLAN_MATCH ){ - *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); - }else{ - *pRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); + switch( ePlan ){ + case FTS5_PLAN_SOURCE: + case FTS5_PLAN_MATCH: + *pRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); + break; + + case FTS5_PLAN_SORTED_MATCH: + *pRowid = pCsr->pSorter->iRowid; + break; + + default: + *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); + break; } return SQLITE_OK; @@ -497,36 +624,6 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ return rc; } -/* -** This is the xColumn method, called by SQLite to request a value from -** the row that the supplied cursor currently points to. -*/ -static int fts5ColumnMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ - int iCol /* Index of column to read value from */ -){ - Fts5Config *pConfig = ((Fts5Table*)(pCursor->pVtab))->pConfig; - Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int rc = SQLITE_OK; - - assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); - - if( iCol==pConfig->nCol ){ - /* User is requesting the value of the special column with the same name - ** as the table. Return the cursor integer id number. This value is only - ** useful in that it may be passed as the first argument to an FTS5 - ** auxiliary function. */ - sqlite3_result_int64(pCtx, pCsr->iCsrId); - }else{ - rc = fts5SeekCursor(pCsr); - if( rc==SQLITE_OK ){ - sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); - } - } - return rc; -} - /* ** This function is called to handle an FTS INSERT command. In other words, ** an INSERT statement of the form: @@ -573,7 +670,15 @@ static int fts5UpdateMethod( int eConflict; /* ON CONFLICT for this DML */ int rc = SQLITE_OK; /* Return code */ - assert( nArg==1 || nArg==(2 + pConfig->nCol + 1) ); + /* A delete specifies a single argument - the rowid of the row to remove. + ** Update and insert operations pass: + ** + ** 1. The "old" rowid, or NULL. + ** 2. The "new" rowid. + ** 3. Values for each of the nCol matchable columns. + ** 4. Values for the two hidden columns ( and "rank"). + */ + assert( nArg==1 || nArg==(2 + pConfig->nCol + 2) ); if( nArg>1 && SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ return fts5SpecialCommand(pTab, apVal[2 + pConfig->nCol]); @@ -840,6 +945,19 @@ static int fts5ApiQueryPhrase( return rc; } +static void fts5ApiInvoke( + Fts5Auxiliary *pAux, + Fts5Cursor *pCsr, + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + assert( pCsr->pAux==0 ); + pCsr->pAux = pAux; + pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); + pCsr->pAux = 0; +} + static void fts5ApiCallback( sqlite3_context *context, int argc, @@ -861,13 +979,49 @@ static void fts5ApiCallback( char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); sqlite3_result_error(context, zErr, -1); }else{ - assert( pCsr->pAux==0 ); - pCsr->pAux = pAux; - pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc-1, &argv[1]); - pCsr->pAux = 0; + fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); } } +/* +** This is the xColumn method, called by SQLite to request a value from +** the row that the supplied cursor currently points to. +*/ +static int fts5ColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts5Config *pConfig = ((Fts5Table*)(pCursor->pVtab))->pConfig; + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + int rc = SQLITE_OK; + + assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); + + if( iCol==pConfig->nCol ){ + if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE ){ + /* todo */ + }else{ + /* User is requesting the value of the special column with the same name + ** as the table. Return the cursor integer id number. This value is only + ** useful in that it may be passed as the first argument to an FTS5 + ** auxiliary function. */ + sqlite3_result_int64(pCtx, pCsr->iCsrId); + } + }else if( iCol==pConfig->nCol+1 ){ + /* The value of the "rank" column. */ + if( pCsr->pRank ){ + fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, 0, 0); + } + }else{ + rc = fts5SeekCursor(pCsr); + if( rc==SQLITE_OK ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + } + } + return rc; +} + /* ** This routine implements the xFindFunction method for the FTS3 diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 21c6d459d9..bfbc8d7aca 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -28,6 +28,9 @@ #define FTS5_DEFAULT_NEARDIST 10 +/* Name of rank column */ +#define FTS5_RANK_NAME "rank" + /************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. @@ -394,7 +397,6 @@ void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); - /* ** End of interface to code in fts5_expr.c. **************************************************************************/ @@ -423,7 +425,18 @@ int sqlite3Fts5CreateAux( int sqlite3Fts5AuxInit(Fts5Global*); /* -** End of interface to code in fts5_expr.c. +** End of interface to code in fts5_aux.c. +**************************************************************************/ + +/************************************************************************** +** Interface to code in fts5_sorter.c. +*/ +typedef struct Fts5Sorter Fts5Sorter; + +int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp); + +/* +** End of interface to code in fts5_sorter.c. **************************************************************************/ #endif diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 5214296e35..dbf63a1c43 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -956,10 +956,10 @@ int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ fts5_extension_function xFunc;/* Callback function */ void (*xDestroy)(void*); /* Destructor function */ } aBuiltin [] = { - { "bm25", 0, fts5Bm25Function, 0 }, { "bm25debug", (void*)1, fts5Bm25Function, 0 }, { "snippet", 0, fts5SnippetFunction, 0 }, { "fts5_test", 0, fts5TestFunction, 0 }, + { "bm25", 0, fts5Bm25Function, 0 }, }; int rc = SQLITE_OK; /* Return code */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 5bed69def3..d326f60cc8 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -165,7 +165,10 @@ int sqlite3Fts5ConfigParse( pRet->azCol = (char**)sqlite3_malloc(sizeof(char*) * nArg); pRet->zDb = fts5Strdup(azArg[1]); pRet->zName = fts5Strdup(azArg[2]); - if( pRet->azCol==0 || pRet->zDb==0 || pRet->zName==0 ){ + if( sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ + *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); + rc = SQLITE_ERROR; + }else if( pRet->azCol==0 || pRet->zDb==0 || pRet->zName==0 ){ rc = SQLITE_NOMEM; }else{ int i; @@ -189,10 +192,15 @@ int sqlite3Fts5ConfigParse( } } - /* If it is not a special directive, it must be a column name */ + /* If it is not a special directive, it must be a column name. In + ** this case, check that it is not the reserved column name "rank". */ if( zDup ){ sqlite3Fts5Dequote(zDup); pRet->azCol[pRet->nCol++] = zDup; + if( sqlite3_stricmp(zDup, FTS5_RANK_NAME)==0 ){ + *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zDup); + rc = SQLITE_ERROR; + } } } } @@ -249,7 +257,9 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ if( zSql ){ zOld = zSql; - zSql = sqlite3_mprintf("%s, %Q HIDDEN)", zOld, pConfig->zName); + zSql = sqlite3_mprintf("%s, %Q HIDDEN, %s HIDDEN)", + zOld, pConfig->zName, FTS5_RANK_NAME + ); sqlite3_free(zOld); } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 2fd7f63f76..68d8a75b9a 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -34,7 +34,7 @@ struct Fts5Expr { Fts5ExprNode *pRoot; int bAsc; int nPhrase; /* Number of phrases in expression */ - Fts5ExprPhrase **apPhrase; /* Pointers to phrase objects */ + Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ }; /* @@ -216,7 +216,7 @@ int sqlite3Fts5ExprNew( }else{ pNew->pRoot = sParse.pExpr; pNew->pIndex = 0; - pNew->apPhrase = sParse.apPhrase; + pNew->apExprPhrase = sParse.apPhrase; pNew->nPhrase = sParse.nPhrase; sParse.apPhrase = 0; } @@ -275,7 +275,7 @@ int sqlite3Fts5ExprPhraseExpr( Fts5ExprNearset *pNear; Fts5ExprPhrase *pCopy; - pOrig = pExpr->apPhrase[iPhrase]; + pOrig = pExpr->apExprPhrase[iPhrase]; pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); @@ -296,8 +296,8 @@ int sqlite3Fts5ExprPhraseExpr( pNew->pIndex = pExpr->pIndex; pNew->pRoot = pNode; pNew->nPhrase = 1; - pNew->apPhrase = apPhrase; - pNew->apPhrase[0] = pCopy; + pNew->apExprPhrase = apPhrase; + pNew->apExprPhrase[0] = pCopy; pNode->eType = FTS5_STRING; pNode->pNear = pNear; @@ -345,7 +345,7 @@ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ void sqlite3Fts5ExprFree(Fts5Expr *p){ if( p ){ sqlite3Fts5ParseNodeFree(p->pRoot); - sqlite3_free(p->apPhrase); + sqlite3_free(p->apExprPhrase); sqlite3_free(p); } } @@ -1588,7 +1588,7 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ */ int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; - return pExpr->apPhrase[iPhrase]->nTerm; + return pExpr->apExprPhrase[iPhrase]->nTerm; } /* @@ -1597,7 +1597,7 @@ int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ */ int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ if( iPhrase>=0 && iPhrasenPhrase ){ - Fts5ExprPhrase *pPhrase = pExpr->apPhrase[iPhrase]; + Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; Fts5ExprNode *pNode = pPhrase->pNode; if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ *pa = pPhrase->poslist.p; diff --git a/manifest b/manifest index 36de4c8844..7af098c004 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\s"loadfts"\sprogram,\sfor\sperformance\stesting\sthe\sloading\sof\sdata\sinto\sfts3/fts4/fts5\stables. -D 2014-07-28T20:14:02.001 +C Add\shidden\scolumn\s"rank".\sCurrently\sthis\salways\sreturns\sthe\ssame\svalue\sas\sthe\sbm25()\sfunction. +D 2014-07-30T19:41:58.841 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,13 +103,13 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 1496aff16dd9b0a013d14b6c8cf5b7df8c170abe +F ext/fts5/fts5.c f786dd1a3d091f7cb6572f6ee95f10f09de52224 F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h 92fb9c4f759674ef569aebc338f363e167a8933c -F ext/fts5/fts5_aux.c 243156c197384e17983d6a3ed149fa2270b5bb85 +F ext/fts5/fts5Int.h 9a195c1706876c538902f007149b39e982e9da53 +F ext/fts5/fts5_aux.c 3cae4225d458af41b64bb40ed9dcc052eb41b9a0 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 -F ext/fts5/fts5_config.c 2138741013e189724b5d40ea7af0f48952a44916 -F ext/fts5/fts5_expr.c e426baa54b9473cb31b8d891d7d1b923bfb5d017 +F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 +F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -595,11 +595,11 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test a2c7bbc18f25f0b57ea8fc483c8a8830273b9ed4 +F test/fts5aa.test f90836c002804a82386d66c79b380128c5f3005e F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test 24b337571c51a10da1ae439b96b70317813a2fd4 +F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 5f53d0a52280b63caf5a519d6994c4d428835155 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d @@ -1197,7 +1197,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 71d32f53e81921e43c933cc968cb1c18d83fe1e0 -R 378763b2640fc19d9f72a0522c9f77b1 +P 770b9540c19ad1e3d24adff382332bf032065efd +R b6911e3c357a5fea00c97182f6d0088b U dan -Z 3cf4ed481646bab9077300595c244e00 +Z 526c63f1e91d68c01d64fde27fedef0b diff --git a/manifest.uuid b/manifest.uuid index 2f2c34352f..62005c928a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -770b9540c19ad1e3d24adff382332bf032065efd \ No newline at end of file +4cc048c3651e830a6aeded924c7f3a60b634e133 \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 1206a0cae8..c59183512a 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -276,5 +276,14 @@ foreach {rowid x y} $d10 { do_execsql_test 10.4.1 { DELETE FROM t1 } do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +#------------------------------------------------------------------------- +# +do_catchsql_test 11.1 { + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank); +} {1 {reserved fts5 column name: rank}} +do_catchsql_test 11.2 { + CREATE VIRTUAL TABLE rank USING fts5(a, b, c); +} {1 {reserved fts5 table name: rank}} + finish_test diff --git a/test/fts5ae.test b/test/fts5ae.test index bb4904f210..4a5e4d041e 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -261,9 +261,17 @@ foreach {tn q res} { 5 {g AND (e OR f)} {5 4} 6 {j AND (h OR i)} {5 6} } { - do_execsql_test 8.2.$tn { + do_execsql_test 8.2.$tn.1 { SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8) DESC; } $res + + do_execsql_test 8.2.$tn.2 { + SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank DESC; + } $res + + do_execsql_test 8.3.$tn.3 { + SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank DESC; + } $res } From 3fc4a856a229de7c9f00e3e22de7292909cd26df Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 30 Jul 2014 20:26:24 +0000 Subject: [PATCH 026/206] Fix things so that the fts5 extension API works with "ORDER BY rank" queries. FossilOrigin-Name: f1b4e1a98d49ecaba962beba16f8224175e4ba59 --- ext/fts5/fts5.c | 80 +++++++++++++++++++++++++++++++++++++++++------- manifest | 14 ++++----- manifest.uuid | 2 +- test/fts5af.test | 8 +++++ 4 files changed, 85 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 22df7a62f7..794dc791d4 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -73,7 +73,7 @@ struct Fts5MatchPhrase { struct Fts5Sorter { sqlite3_stmt *pStmt; i64 iRowid; /* Current rowid */ - u8 *aPoslist; /* Position lists for current row */ + const u8 *aPoslist; /* Position lists for current row */ int nIdx; /* Number of entries in aIdx[] */ int aIdx[0]; /* Offsets into aPoslist for current row */ }; @@ -404,8 +404,22 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ rc = SQLITE_OK; CsrFlagSet(pCsr, FTS5CSR_EOF); }else if( rc==SQLITE_ROW ){ + const u8 *a; + const u8 *aBlob; + int nBlob; + int i; rc = SQLITE_OK; + pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); + nBlob = sqlite3_column_bytes(pSorter->pStmt, 1); + aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); + + for(i=0; i<(pSorter->nIdx-1); i++){ + a += getVarint32(a, pSorter->aIdx[i]); + } + pSorter->aIdx[i] = &aBlob[nBlob] - a; + pSorter->aPoslist = a; + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); } return rc; @@ -467,7 +481,7 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ if( pSorter==0 ) return SQLITE_NOMEM; memset(pSorter, 0, nByte); - zSql = sqlite3_mprintf("SELECT rowid, %Q FROM %Q.%Q ORDER BY +%s %s", + zSql = sqlite3_mprintf("SELECT rowid, %s FROM %Q.%Q ORDER BY +%s %s", pConfig->zName, pConfig->zDb, pConfig->zName, FTS5_RANK_NAME, bAsc ? "ASC" : "DESC" ); @@ -571,6 +585,21 @@ static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0); } +/* +** Return the rowid that the cursor currently points to. +*/ +static i64 fts5CursorRowid(Fts5Cursor *pCsr){ + assert( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_MATCH + || FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SORTED_MATCH + || FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE + ); + if( pCsr->pSorter ){ + return pCsr->pSorter->iRowid; + }else{ + return sqlite3Fts5ExprRowid(pCsr->pExpr); + } +} + /* ** This is the xRowid method. The SQLite core calls this routine to ** retrieve the rowid for the current row of the result set. fts5 @@ -585,11 +614,8 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ switch( ePlan ){ case FTS5_PLAN_SOURCE: case FTS5_PLAN_MATCH: - *pRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); - break; - case FTS5_PLAN_SORTED_MATCH: - *pRowid = pCsr->pSorter->iRowid; + *pRowid = fts5CursorRowid(pCsr); break; default: @@ -609,7 +635,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ assert( pCsr->pExpr ); sqlite3_reset(pCsr->pStmt); - sqlite3_bind_int64(pCsr->pStmt, 1, sqlite3Fts5ExprRowid(pCsr->pExpr)); + sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); rc = sqlite3_step(pCsr->pStmt); if( rc==SQLITE_ROW ){ rc = SQLITE_OK; @@ -786,7 +812,7 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; - return sqlite3Fts5ExprRowid(pCsr->pExpr); + return fts5CursorRowid((Fts5Cursor*)pCtx); } static int fts5ApiColumnText( @@ -810,7 +836,7 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ int rc = SQLITE_OK; if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ - i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); + i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); } if( iCol>=0 && iColpConfig->nCol ){ @@ -829,7 +855,14 @@ static int fts5ApiPoslist( ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; const u8 *a; int n; /* Poslist for phrase iPhrase */ - n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a); + if( pCsr->pSorter ){ + Fts5Sorter *pSorter = pCsr->pSorter; + int i1 = (iPhrase ? 0 : pSorter->aIdx[iPhrase-1]); + n = pSorter->aIdx[iPhrase] - i1; + a = &pSorter->aPoslist[i1]; + }else{ + n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a); + } return sqlite3Fts5PoslistNext64(a, n, pi, piPos); } @@ -983,6 +1016,31 @@ static void fts5ApiCallback( } } +static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ + int i; + int rc = SQLITE_OK; + int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); + Fts5Buffer val; + int iOff = 0; + + memset(&val, 0, sizeof(Fts5Buffer)); + for(i=0; ipExpr, i, &dummy); + } + + for(i=0; ipExpr, i, &pPoslist); + sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); + } + + sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free); + return rc; +} + /* ** This is the xColumn method, called by SQLite to request a value from ** the row that the supplied cursor currently points to. @@ -1000,7 +1058,7 @@ static int fts5ColumnMethod( if( iCol==pConfig->nCol ){ if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE ){ - /* todo */ + fts5PoslistBlob(pCtx, pCsr); }else{ /* User is requesting the value of the special column with the same name ** as the table. Return the cursor integer id number. This value is only diff --git a/manifest b/manifest index 7af098c004..405af5b550 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\shidden\scolumn\s"rank".\sCurrently\sthis\salways\sreturns\sthe\ssame\svalue\sas\sthe\sbm25()\sfunction. -D 2014-07-30T19:41:58.841 +C Fix\sthings\sso\sthat\sthe\sfts5\sextension\sAPI\sworks\swith\s"ORDER\sBY\srank"\squeries. +D 2014-07-30T20:26:24.443 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,7 +103,7 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c f786dd1a3d091f7cb6572f6ee95f10f09de52224 +F ext/fts5/fts5.c f322286c6a37dcea4b46b00677e9aad2e59abe9d F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a F ext/fts5/fts5Int.h 9a195c1706876c538902f007149b39e982e9da53 F ext/fts5/fts5_aux.c 3cae4225d458af41b64bb40ed9dcc052eb41b9a0 @@ -600,7 +600,7 @@ F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e -F test/fts5af.test 5f53d0a52280b63caf5a519d6994c4d428835155 +F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1197,7 +1197,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 770b9540c19ad1e3d24adff382332bf032065efd -R b6911e3c357a5fea00c97182f6d0088b +P 4cc048c3651e830a6aeded924c7f3a60b634e133 +R e80b35a7b7fca0c8edda241edc415407 U dan -Z 526c63f1e91d68c01d64fde27fedef0b +Z fdc04c3e2e7e600dae907e07830ae6e8 diff --git a/manifest.uuid b/manifest.uuid index 62005c928a..25423418e3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4cc048c3651e830a6aeded924c7f3a60b634e133 \ No newline at end of file +f1b4e1a98d49ecaba962beba16f8224175e4ba59 \ No newline at end of file diff --git a/test/fts5af.test b/test/fts5af.test index 2412b4a7a4..d59df77291 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -47,6 +47,14 @@ proc do_snippet_test {tn doc match res} { SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] + do_execsql_test $tn.3 { + DELETE FROM t1; + INSERT INTO t1 VALUES($v1, NULL); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2 + ORDER BY rank DESC; + } [list $res] + + } From 937490d4b4a595f17eda775055a948fa9af831ee Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 31 Jul 2014 11:57:59 +0000 Subject: [PATCH 027/206] Add further tests for the extension APIs with "ORDER BY rank" queries. FossilOrigin-Name: 37a417d27e4ebafd4783f62728d7467316b75b17 --- ext/fts5/fts5.c | 44 ++++++++++--- ext/fts5/fts5_aux.c | 6 +- manifest | 17 ++--- manifest.uuid | 2 +- test/fts5ag.test | 137 +++++++++++++++++++++++++++++++++++++++++ test/permutations.test | 2 +- 6 files changed, 186 insertions(+), 22 deletions(-) create mode 100644 test/fts5ag.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 794dc791d4..9849b87b3e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -65,10 +65,13 @@ struct Fts5MatchPhrase { }; /* -** Variable pStmt is set to a compiled SQL statement of the form: -** +** pStmt: ** SELECT rowid, FROM ORDER BY +rank; ** +** aIdx[]: +** There is one entry in the aIdx[] array for each phrase in the query, +** the value of which is the offset within aPoslist[] following the last +** byte of the position list for the corresponding phrase. */ struct Fts5Sorter { sqlite3_stmt *pStmt; @@ -78,6 +81,7 @@ struct Fts5Sorter { int aIdx[0]; /* Offsets into aPoslist for current row */ }; + /* ** Virtual-table cursor object. */ @@ -408,6 +412,7 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ const u8 *aBlob; int nBlob; int i; + int iOff = 0; rc = SQLITE_OK; pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); @@ -415,9 +420,13 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); for(i=0; i<(pSorter->nIdx-1); i++){ - a += getVarint32(a, pSorter->aIdx[i]); + int iVal; + a += getVarint32(a, iVal); + iOff += iVal; + pSorter->aIdx[i] = iOff; } pSorter->aIdx[i] = &aBlob[nBlob] - a; + pSorter->aPoslist = a; CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); } @@ -480,6 +489,7 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ pSorter = (Fts5Sorter*)sqlite3_malloc(nByte); if( pSorter==0 ) return SQLITE_NOMEM; memset(pSorter, 0, nByte); + pSorter->nIdx = nPhrase; zSql = sqlite3_mprintf("SELECT rowid, %s FROM %Q.%Q ORDER BY +%s %s", pConfig->zName, pConfig->zDb, pConfig->zName, FTS5_RANK_NAME, @@ -811,7 +821,6 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ } static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ - Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return fts5CursorRowid((Fts5Cursor*)pCtx); } @@ -857,7 +866,7 @@ static int fts5ApiPoslist( const u8 *a; int n; /* Poslist for phrase iPhrase */ if( pCsr->pSorter ){ Fts5Sorter *pSorter = pCsr->pSorter; - int i1 = (iPhrase ? 0 : pSorter->aIdx[iPhrase-1]); + int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); n = pSorter->aIdx[iPhrase] - i1; a = &pSorter->aPoslist[i1]; }else{ @@ -1016,20 +1025,37 @@ static void fts5ApiCallback( } } +/* +** Return a "position-list blob" corresponding to the current position of +** cursor pCsr via sqlite3_result_blob(). A position-list blob contains +** the current position-list for each phrase in the query associated with +** cursor pCsr. +** +** A position-list blob begins with (nPhrase-1) varints, where nPhrase is +** the number of phrases in the query. Following the varints are the +** concatenated position lists for each phrase, in order. +** +** The first varint (if it exists) contains the size of the position list +** for phrase 0. The second (same disclaimer) contains the size of position +** list 1. And so on. There is no size field for the final position list, +** as it can be derived from the total size of the blob. +*/ static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ int i; int rc = SQLITE_OK; int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); Fts5Buffer val; - int iOff = 0; memset(&val, 0, sizeof(Fts5Buffer)); - for(i=0; ipExpr, i, &dummy); + int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); + sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); } + /* Append the position lists */ for(i=0; i1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); @@ -779,7 +779,7 @@ static void fts5TestFunction( } if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntext "); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " columntext "); } if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntext") ){ for(i=0; rc==SQLITE_OK && i Date: Thu, 31 Jul 2014 17:53:03 +0000 Subject: [PATCH 028/206] Add a comment explaining why fts5 cannot cache "sorter statements". FossilOrigin-Name: e6af3b7a3cf331210f4c87848e2af007dbd5ef30 --- ext/fts5/fts5.c | 7 +++++++ manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 9849b87b3e..ec9529c444 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -491,6 +491,13 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ memset(pSorter, 0, nByte); pSorter->nIdx = nPhrase; + /* TODO: It would be better to have some system for reusing statement + ** handles here, rather than preparing a new one for each query. But that + ** is not possible as SQLite reference counts the virtual table objects. + ** And since the statement required here reads from this very virtual + ** table, saving it creates a circular reference. + ** + ** If SQLite a built-in statement cache, this wouldn't be a problem. */ zSql = sqlite3_mprintf("SELECT rowid, %s FROM %Q.%Q ORDER BY +%s %s", pConfig->zName, pConfig->zDb, pConfig->zName, FTS5_RANK_NAME, bAsc ? "ASC" : "DESC" diff --git a/manifest b/manifest index 7b3e89ea0b..46190fb107 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sfurther\stests\sfor\sthe\sextension\sAPIs\swith\s"ORDER\sBY\srank"\squeries. -D 2014-07-31T11:57:59.052 +C Add\sa\scomment\sexplaining\swhy\sfts5\scannot\scache\s"sorter\sstatements". +D 2014-07-31T17:53:03.405 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,7 +103,7 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c b54b776771d1e965bac02bffcf875a0bfc3660db +F ext/fts5/fts5.c aa269bbecf78cdb7aaa9c6dba26f6ee906ceedaf F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a F ext/fts5/fts5Int.h 9a195c1706876c538902f007149b39e982e9da53 F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc @@ -1198,7 +1198,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f1b4e1a98d49ecaba962beba16f8224175e4ba59 -R bf2807e3a776f0c27fe888a62861bcb4 +P 37a417d27e4ebafd4783f62728d7467316b75b17 +R e0b14b9e45e7f8113c4d7a699a937c5a U dan -Z 17018f27d9e25b34230168ffaec13df5 +Z cd0e862a57439796abd2a3aa1ce5c8f8 diff --git a/manifest.uuid b/manifest.uuid index c5f4f69e5e..7a46238c68 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -37a417d27e4ebafd4783f62728d7467316b75b17 \ No newline at end of file +e6af3b7a3cf331210f4c87848e2af007dbd5ef30 \ No newline at end of file From a29284d65fa0ad50db9457e8a824e84f95a742a7 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 Aug 2014 11:16:25 +0000 Subject: [PATCH 029/206] Add "doclist index" records to the database. These are to make navigating within very large doclists faster. They are not yet used by queries. FossilOrigin-Name: 89377421ff69f2450364987afe781b6d8bcbf087 --- ext/fts5/fts5_index.c | 131 +++++++++++++++++++++++++++++++++++------- manifest | 13 +++-- manifest.uuid | 2 +- test/fts5ah.test | 58 +++++++++++++++++++ 4 files changed, 177 insertions(+), 27 deletions(-) create mode 100644 test/fts5ah.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 5050159206..1443751046 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -47,6 +47,8 @@ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ +#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ + /* ** Details: ** @@ -184,8 +186,10 @@ ** 5. Segment doclist indexes: ** ** A list of varints - the first docid on each page (starting with the -** second) of the doclist. First element in the list is a literal docid. -** Each docid thereafter is a (negative) delta. +** first termless page) of the doclist. First element in the list is a +** literal docid. Each docid thereafter is a (negative) delta. If there +** are no docids at all on a page, a 0x00 byte takes the place of the +** delta value. */ /* @@ -235,7 +239,7 @@ ** (1<pgnoFirst==0 ){ + /* This happens if the segment is being used as an input to an incremental + ** merge and all data has already been "trimmed". See function + ** fts5TrimSegments() for details. In this case leave the iterator empty. + ** The caller will see the (pIter->pLeaf==0) and assume the iterator is + ** at EOF already. */ + assert( pIter->pLeaf==0 ); + return; + } if( p->rc==SQLITE_OK ){ memset(pIter, 0, sizeof(*pIter)); @@ -2061,6 +2076,33 @@ static int fts5PrefixCompress( return i; } +/* +** If an "nEmpty" record must be written to the b-tree before the next +** term, write it now. +*/ +static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ + if( pWriter->nEmpty ){ + Fts5PageWriter *pPg = &pWriter->aWriter[1]; + int bFlag = 0; + if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + i64 iKey = FTS5_DOCLIST_IDX_ROWID( + pWriter->iIdx, pWriter->iSegid, + pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty + ); + fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n); + bFlag = 1; + } + fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); + fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); + pWriter->nEmpty = 0; + } + + /* Whether or not it was written to disk, zero the doclist index at this + ** point */ + sqlite3Fts5BufferZero(&pWriter->dlidx); + pWriter->bDlidxPrevValid = 0; +} + /* ** This is called once for each leaf page except the first that contains @@ -2097,12 +2139,7 @@ static void fts5WriteBtreeTerm( } pPage = &pWriter->aWriter[iHeight]; - if( pWriter->nEmpty ){ - assert( iHeight==1 ); - fts5BufferAppendVarint(&p->rc, &pPage->buf, 0); - fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->nEmpty); - pWriter->nEmpty = 0; - } + fts5WriteBtreeNEmpty(p, pWriter); if( pPage->buf.n>=p->pgsz ){ /* pPage will be written to disk. The term will be written into the @@ -2130,9 +2167,34 @@ static void fts5WriteBtreeNoTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter /* Writer object */ ){ + if( pWriter->bFirstRowidInPage ){ + /* No rowids on this page. Append an 0x00 byte to the current + ** doclist-index */ + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, 0); + } pWriter->nEmpty++; } +/* +** Rowid iRowid has just been appended to the current leaf page. As it is +** the first on its page, append an entry to the current doclist-index. +*/ +static void fts5WriteDlidxAppend( + Fts5Index *p, + Fts5SegWriter *pWriter, + i64 iRowid +){ + i64 iVal; + if( pWriter->bDlidxPrevValid ){ + iVal = pWriter->iDlidxPrev - iRowid; + }else{ + iVal = iRowid; + } + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, iVal); + pWriter->bDlidxPrevValid = 1; + pWriter->iDlidxPrev = iRowid; +} + static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->aWriter[0]; @@ -2226,8 +2288,12 @@ static void fts5WriteAppendRowid( Fts5PageWriter *pPage = &pWriter->aWriter[0]; /* If this is to be the first docid written to the page, set the - ** docid-pointer in the page-header. */ - if( pWriter->bFirstRowidInPage ) fts5PutU16(pPage->buf.p, pPage->buf.n); + ** docid-pointer in the page-header. Also append a value to the dlidx + ** buffer, in case a doclist-index is required. */ + if( pWriter->bFirstRowidInPage ){ + fts5PutU16(pPage->buf.p, pPage->buf.n); + fts5WriteDlidxAppend(p, pWriter, iRowid); + } /* Write the docid. */ if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ @@ -2301,20 +2367,22 @@ static void fts5WritePendingDoclist( fts5WriteAppendZerobyte(p, pWriter); } +/* +** Flush any data cached by the writer object to the database. Free any +** allocations associated with the writer. +*/ static void fts5WriteFinish( Fts5Index *p, - Fts5SegWriter *pWriter, - int *pnHeight, - int *pnLeaf + Fts5SegWriter *pWriter, /* Writer object */ + int *pnHeight, /* OUT: Height of the b-tree */ + int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; *pnHeight = pWriter->nWriter; fts5WriteFlushLeaf(p, pWriter); - if( pWriter->nWriter>1 && pWriter->nEmpty ){ - Fts5PageWriter *pPg = &pWriter->aWriter[1]; - fts5BufferAppendVarint(&p->rc, &pPg->buf, 0); - fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); + if( pWriter->nWriter>1 ){ + fts5WriteBtreeNEmpty(p, pWriter); } for(i=1; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; @@ -2327,6 +2395,7 @@ static void fts5WriteFinish( fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); + sqlite3Fts5BufferFree(&pWriter->dlidx); } static void fts5WriteInit( @@ -3145,6 +3214,28 @@ static void fts5DecodeFunction( a = sqlite3_value_blob(apVal[1]); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); + if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + int i = 0; + i64 iPrev; + sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)", + iIdx, iSegid, iHeight, iPgno + ); + if( n>0 ){ + i = getVarint(&a[i], (u64*)&iPrev); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev); + } + while( i Date: Fri, 1 Aug 2014 19:27:07 +0000 Subject: [PATCH 030/206] Have the fts5 integrity-check verify that doclist indexes match the contents of the leaf pages that they index. FossilOrigin-Name: 37a7d3035eb4bbad7e32fe550321ac9fae611a57 --- ext/fts5/fts5_index.c | 115 +++++++++++++++++++++++++++++++++++------ manifest | 16 +++--- manifest.uuid | 2 +- test/fts5ah.test | 3 -- test/permutations.test | 2 +- 5 files changed, 108 insertions(+), 30 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 1443751046..685492d0b7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -255,6 +255,12 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif +#ifdef SQLITE_DEBUG +static int fts5MissingData() { return 0; } +#else +# define fts5MissingData() +#endif + typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; @@ -530,6 +536,7 @@ struct Fts5NodeIter { Fts5Buffer term; int nEmpty; int iChild; + int bDlidx; }; /* @@ -566,6 +573,7 @@ struct Fts5BtreeIter { int iLeaf; /* Leaf containing terms >= current term */ int nEmpty; /* Number of "empty" leaves following iLeaf */ int bEof; /* Set to true at EOF */ + int bDlidx; /* True if there exists a dlidx */ }; static void fts5PutU16(u8 *aOut, u16 iVal){ @@ -670,6 +678,8 @@ static Fts5Data *fts5DataReadOrBuffer( rc = sqlite3_blob_reopen(p->pReader, iRowid); } + if( rc ) fts5MissingData(); + if( rc==SQLITE_OK ){ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ @@ -980,10 +990,12 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ */ static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; pIter->iOff++; pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); }else{ pIter->nEmpty = 0; + pIter->bDlidx = 0; } } @@ -2082,13 +2094,15 @@ static int fts5PrefixCompress( */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ - Fts5PageWriter *pPg = &pWriter->aWriter[1]; int bFlag = 0; + Fts5PageWriter *pPg; + pPg = &pWriter->aWriter[1]; if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty ); + assert( pWriter->dlidx.n>0 ); fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n); bFlag = 1; } @@ -2103,6 +2117,22 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ pWriter->bDlidxPrevValid = 0; } +static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ + Fts5PageWriter *aNew; + Fts5PageWriter *pNew; + int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); + + aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); + if( aNew==0 ) return; + + pNew = &aNew[pWriter->nWriter]; + memset(pNew, 0, sizeof(Fts5PageWriter)); + pNew->pgno = 1; + fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); + + pWriter->nWriter++; + pWriter->aWriter = aNew; +} /* ** This is called once for each leaf page except the first that contains @@ -2123,19 +2153,8 @@ static void fts5WriteBtreeTerm( Fts5PageWriter *pPage; if( iHeight>=pWriter->nWriter ){ - Fts5PageWriter *aNew; - Fts5PageWriter *pNew; - int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); - aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); - if( aNew==0 ) return; - - pNew = &aNew[pWriter->nWriter]; - memset(pNew, 0, sizeof(Fts5PageWriter)); - pNew->pgno = 1; - fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); - - pWriter->nWriter++; - pWriter->aWriter = aNew; + fts5WriteBtreeGrow(p, pWriter); + if( p->rc ) return; } pPage = &pWriter->aWriter[iHeight]; @@ -2202,6 +2221,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ if( pPage->term.n==0 ){ /* No term was written to this page. */ + assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); } @@ -2379,11 +2399,15 @@ static void fts5WriteFinish( ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; - *pnHeight = pWriter->nWriter; fts5WriteFlushLeaf(p, pWriter); + if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + fts5WriteBtreeGrow(p, pWriter); + } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); } + *pnHeight = pWriter->nWriter; + for(i=1; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno); @@ -2905,6 +2929,7 @@ static void fts5BtreeIterInit( }else{ pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->iLeaf = pIter->aLvl[0].s.iChild; + pIter->bDlidx = pIter->aLvl[0].s.bDlidx; } } @@ -2940,6 +2965,7 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ } pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->bDlidx = pIter->aLvl[0].s.bDlidx; pIter->iLeaf = pIter->aLvl[0].s.iChild; assert( p->rc==SQLITE_OK || pIter->bEof ); } @@ -2958,6 +2984,37 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ fts5BufferFree(&pIter->term); } +typedef struct DoclistIdxIter DoclistIdxIter; +struct DoclistIdxIter { + Fts5Data *pDlidx; /* Data for doclist index, if any */ + int iOff; /* Current offset into pDlidx */ + int bRowidValid; /* iRowid is valid */ + + int bZero; /* True if current leaf has no rowid */ + i64 iRowid; /* If bZero==0, first rowid on leaf */ +}; + +/* +** Return non-zero if EOF is reached. +*/ +static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){ + i64 iVal; + if( pIter->iOff>=pIter->pDlidx->n ) return 1; + pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal); + if( iVal==0 ){ + pIter->bZero = 1; + }else{ + pIter->bZero = 0; + if( pIter->bRowidValid ){ + pIter->iRowid -= iVal; + }else{ + pIter->bRowidValid = 1; + pIter->iRowid = iVal; + } + } + return 0; +} + static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index that pSeg is a part of */ @@ -2974,6 +3031,7 @@ static void fts5IndexIntegrityCheckSegment( Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ + DoclistIdxIter dliter; /* For iterating through any doclist index */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ @@ -3000,6 +3058,12 @@ static void fts5IndexIntegrityCheckSegment( fts5DataRelease(pLeaf); if( p->rc ) break; + memset(&dliter, 0, sizeof(DoclistIdxIter)); + if( iter.bDlidx ){ + i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf); + dliter.pDlidx = fts5DataRead(p, iDlidxRowid); + } + /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; i<=iter.nEmpty; i++){ @@ -3007,8 +3071,23 @@ static void fts5IndexIntegrityCheckSegment( if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } + if( pLeaf && dliter.pDlidx ){ + if( fts5IndexDoclistIterNext(&dliter) ){ + p->rc = FTS5_CORRUPT; + }else{ + int iRowidOff = fts5GetU16(&pLeaf->p[0]); + if( dliter.bZero ){ + if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT; + }else{ + i64 iRowid; + getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; + } + } + } fts5DataRelease(pLeaf); } + fts5DataRelease(dliter.pDlidx); } if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ @@ -3218,7 +3297,7 @@ static void fts5DecodeFunction( int i = 0; i64 iPrev; sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)", - iIdx, iSegid, iHeight, iPgno + iIdx, iSegid, iPgno ); if( n>0 ){ i = getVarint(&a[i], (u64*)&iPrev); @@ -3305,7 +3384,9 @@ static void fts5DecodeFunction( ); } if( ss.nEmpty ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d", ss.nEmpty); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty, + ss.bDlidx ? "*" : "" + ); } } fts5NodeIterFree(&ss); diff --git a/manifest b/manifest index f006bde39e..ec7f134bbc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\s"doclist\sindex"\srecords\sto\sthe\sdatabase.\sThese\sare\sto\smake\snavigating\swithin\svery\slarge\sdoclists\sfaster.\sThey\sare\snot\syet\sused\sby\squeries. -D 2014-08-01T11:16:25.207 +C Have\sthe\sfts5\sintegrity-check\sverify\sthat\sdoclist\sindexes\smatch\sthe\scontents\sof\sthe\sleaf\spages\sthat\sthey\sindex. +D 2014-08-01T19:27:07.492 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a -F ext/fts5/fts5_index.c 618d54ecf41887b6db59491b71e654ae3315f8c9 +F ext/fts5/fts5_index.c 3e33e3b86f026fc5b2cb3c573ba05375c8e4de0b F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -602,7 +602,7 @@ F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test bfa6ebd7ee87f73c4146b9e316a105fd0e43d01a +F test/fts5ah.test dfb54897c470e2dcf88912fc4f5b1ca4ac8307f7 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -770,7 +770,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 5f1f942bae4139b33626b82627aa262c0f72d936 +F test/permutations.test 542edb965245565d06b9284e708f17bb93d70691 F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e6af3b7a3cf331210f4c87848e2af007dbd5ef30 -R a017a4de54c141d4f4f840978af83e33 +P 89377421ff69f2450364987afe781b6d8bcbf087 +R 49a5d37abb265ab7fa662e06ee8ea874 U dan -Z 90f2786a7e9f28e43c6798f77c65d6dc +Z 90844fe42071f9a2a3f80f69e16c73d8 diff --git a/manifest.uuid b/manifest.uuid index 5a6a2d5b54..e25707dfd1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -89377421ff69f2450364987afe781b6d8bcbf087 \ No newline at end of file +37a7d3035eb4bbad7e32fe550321ac9fae611a57 \ No newline at end of file diff --git a/test/fts5ah.test b/test/fts5ah.test index 88fd524eb9..f5d1eee1b9 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -49,9 +49,6 @@ do_execsql_test 1.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } -do_execsql_test 1.4 { - SELECT count(*) FROM t1_data -} finish_test diff --git a/test/permutations.test b/test/permutations.test index 9587d3bef0..41659ef898 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test - fts5af.test fts5ag.test + fts5af.test fts5ag.test fts5ah.test } test_suite "nofaultsim" -prefix "" -description { From ff31da03e57e92018d543a841a5aaabd3d8c3338 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 Aug 2014 20:13:49 +0000 Subject: [PATCH 031/206] Add a special case to the integrity-check code to check that the final integer in a doclist index is as expected. FossilOrigin-Name: c98934155cb48adfda57bd0fd1b950226d45f67a --- ext/fts5/fts5_index.c | 29 +++++++++++++++++++++++++++++ manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 685492d0b7..8c4734e265 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3087,6 +3087,35 @@ static void fts5IndexIntegrityCheckSegment( } fts5DataRelease(pLeaf); } + + /* There may (or may not be) a final entry in the doclist. The entry + ** is only present if the page following the nEmpty termless pages + ** (a) exists and (b) contains at least one rowid that is part of + ** the doclist. */ + if( dliter.pDlidx ){ + if( (iter.iLeaf + iter.nEmpty)==pSeg->pgnoLast ){ + /* The next page does not exist. So the iterator should be at EOF. */ + if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT; + }else{ + Fts5Data *pLeaf = fts5DataRead(p, iRow+i); + if( pLeaf ){ + int iRowidOff = fts5GetU16(&pLeaf->p[0]); + if( iRowidOff==0 ){ + if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT; + }else{ + if( fts5IndexDoclistIterNext(&dliter) ){ + p->rc = FTS5_CORRUPT; + }else{ + i64 iRowid; + getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; + } + } + fts5DataRelease(pLeaf); + } + } + } + fts5DataRelease(dliter.pDlidx); } diff --git a/manifest b/manifest index ec7f134bbc..089486568e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Have\sthe\sfts5\sintegrity-check\sverify\sthat\sdoclist\sindexes\smatch\sthe\scontents\sof\sthe\sleaf\spages\sthat\sthey\sindex. -D 2014-08-01T19:27:07.492 +C Add\sa\sspecial\scase\sto\sthe\sintegrity-check\scode\sto\scheck\sthat\sthe\sfinal\sinteger\sin\sa\sdoclist\sindex\sis\sas\sexpected. +D 2014-08-01T20:13:49.462 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a -F ext/fts5/fts5_index.c 3e33e3b86f026fc5b2cb3c573ba05375c8e4de0b +F ext/fts5/fts5_index.c 13f9dd9788f90c419ea33db0fcb2214c2f1290ef F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 89377421ff69f2450364987afe781b6d8bcbf087 -R 49a5d37abb265ab7fa662e06ee8ea874 +P 37a7d3035eb4bbad7e32fe550321ac9fae611a57 +R 21856f96ed42128f5622f6977d9547a6 U dan -Z 90844fe42071f9a2a3f80f69e16c73d8 +Z 83c769d88a67f25fcdc4af671199630b diff --git a/manifest.uuid b/manifest.uuid index e25707dfd1..312136f109 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -37a7d3035eb4bbad7e32fe550321ac9fae611a57 \ No newline at end of file +c98934155cb48adfda57bd0fd1b950226d45f67a \ No newline at end of file From 9af0705e844274e721abb82de42fb2574e613a77 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 2 Aug 2014 20:49:36 +0000 Subject: [PATCH 032/206] Start changing things to use doclist indexes as required. code is not activated yet. FossilOrigin-Name: b8864da95db2c0e611116304d607e35a86c9247d --- ext/fts5/fts5.c | 100 +++++++++++++++++++++++++++++++++++++++--- ext/fts5/fts5Int.h | 5 ++- ext/fts5/fts5_expr.c | 19 ++++---- ext/fts5/fts5_index.c | 45 ++++++++++++++++++- manifest | 22 +++++----- manifest.uuid | 2 +- test/fts5aa.test | 15 +++++++ test/fts5ah.test | 15 +++++++ 8 files changed, 192 insertions(+), 31 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index ec9529c444..d2a5f0ce6e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -84,6 +84,12 @@ struct Fts5Sorter { /* ** Virtual-table cursor object. +** +** zSpecial: +** If this is a 'special' query (refer to function fts5SpecialMatch()), +** then this variable points to a nul-terminated buffer containing the +** result to return through the table-name column. It is nul-terminated +** and should eventually be freed using sqlite3_free(). */ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ @@ -94,6 +100,7 @@ struct Fts5Cursor { int csrflags; /* Mask of cursor flags (see below) */ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ + char *zSpecial; /* Result of special query */ /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ @@ -253,6 +260,7 @@ static int fts5CreateMethod( #define FTS5_PLAN_SORTED_MATCH 3 /* ( MATCH ? ORDER BY rank) */ #define FTS5_PLAN_ROWID 4 /* (rowid = ?) */ #define FTS5_PLAN_SOURCE 5 /* A source cursor for SORTED_MATCH */ +#define FTS5_PLAN_SPECIAL 6 /* An internal query */ #define FTS5_PLAN(idxNum) ((idxNum) & 0x7) @@ -395,6 +403,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); *pp = pCsr->pNext; + sqlite3_free(pCsr->zSpecial); sqlite3_free(pCsr); return SQLITE_OK; } @@ -457,6 +466,11 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); break; + case FTS5_PLAN_SPECIAL: { + CsrFlagSet(pCsr, FTS5CSR_EOF); + break; + } + case FTS5_PLAN_SORTED_MATCH: { rc = fts5SorterNext(pCsr); break; @@ -536,6 +550,42 @@ static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ return rc; } +/* +** Process a "special" query. A special query is identified as one with a +** MATCH expression that begins with a '*' character. The remainder of +** the text passed to the MATCH operator are used as the special query +** parameters. +*/ +static int fts5SpecialMatch( + Fts5Table *pTab, + Fts5Cursor *pCsr, + const char *zQuery +){ + int rc = SQLITE_OK; /* Return code */ + const char *z = zQuery; /* Special query text */ + int n; /* Number of bytes in text at z */ + + while( z[0]==' ' ) z++; + for(n=0; z[n] && z[n]!=' '; n++); + + assert( pTab->base.zErrMsg==0 ); + assert( pCsr->zSpecial==0 ); + + if( 0==sqlite3_strnicmp("reads", z, n) ){ + pCsr->zSpecial = sqlite3_mprintf("%d", sqlite3Fts5IndexReads(pTab->pIndex)); + pCsr->idxNum = FTS5_PLAN_SPECIAL; + if( pCsr->zSpecial==0 ) rc = SQLITE_NOMEM; + } + else{ + /* An unrecognized directive. Return an error message. */ + pTab->base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z); + rc = SQLITE_ERROR; + } + + return rc; +} + + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -559,19 +609,30 @@ static int fts5FilterMethod( assert( pCsr->pRank==0 ); if( pTab->pSortCsr ){ + /* If pSortCsr is non-NULL, then this call is being made as part of + ** processing for a "... MATCH ORDER BY rank" query (ePlan is + ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will + ** return results to the user for this query. The current cursor + ** (pCursor) is used to execute the query issued by function + ** fts5CursorFirstSorted() above. */ + assert( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ); pCsr->idxNum = FTS5_PLAN_SOURCE; pCsr->pRank = pTab->pSortCsr->pRank; pCsr->pExpr = pTab->pSortCsr->pExpr; rc = fts5CursorFirst(pTab, pCsr, bAsc); }else{ int ePlan = FTS5_PLAN(idxNum); - int eStmt = fts5StmtType(idxNum); pCsr->idxNum = idxNum; - rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); - if( rc==SQLITE_OK ){ - if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){ + if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){ + const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); + + if( zExpr[0]=='*' ){ + /* The user has issued a query of the form "MATCH '*...'". This + ** indicates that the MATCH expression is not a full text query, + ** but a request for an internal parameter. */ + rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); + }else{ char **pzErr = &pTab->base.zErrMsg; - const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); pCsr->pRank = pTab->pGlobal->pAux; rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ @@ -581,7 +642,13 @@ static int fts5FilterMethod( rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); } } - }else{ + } + }else{ + /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup + ** by rowid (ePlan==FTS5_PLAN_ROWID). */ + int eStmt = fts5StmtType(idxNum); + rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); + if( rc==SQLITE_OK ){ if( ePlan==FTS5_PLAN_ROWID ){ sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); } @@ -629,6 +696,10 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); switch( ePlan ){ + case FTS5_PLAN_SPECIAL: + *pRowid = 0; + break; + case FTS5_PLAN_SOURCE: case FTS5_PLAN_MATCH: case FTS5_PLAN_SORTED_MATCH: @@ -649,7 +720,16 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ */ static int fts5SeekCursor(Fts5Cursor *pCsr){ int rc = SQLITE_OK; - if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ + + /* If the cursor does not yet have a statement handle, obtain one now. */ + if( pCsr->pStmt==0 ){ + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + int eStmt = fts5StmtType(pCsr->idxNum); + rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); + assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); + } + + if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ assert( pCsr->pExpr ); sqlite3_reset(pCsr->pStmt); sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); @@ -1089,6 +1169,12 @@ static int fts5ColumnMethod( assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); + if( pCsr->idxNum==FTS5_PLAN_SPECIAL ){ + if( iCol==pConfig->nCol ){ + sqlite3_result_text(pCtx, pCsr->zSpecial, -1, SQLITE_TRANSIENT); + } + }else + if( iCol==pConfig->nCol ){ if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE ){ fts5PoslistBlob(pCtx, pCsr); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index bfbc8d7aca..07903abf83 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -188,7 +188,8 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( ** Docid list iteration. */ int sqlite3Fts5IterEof(Fts5IndexIter*); -void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch); +void sqlite3Fts5IterNext(Fts5IndexIter*); +void sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* @@ -273,6 +274,8 @@ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); +int sqlite3Fts5IndexReads(Fts5Index *p); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 68d8a75b9a..82645a2619 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -583,7 +583,7 @@ static int fts5ExprNearAdvanceAll( Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - sqlite3Fts5IterNext(pIter, 0); + sqlite3Fts5IterNext(pIter); if( sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; return rc; @@ -612,19 +612,18 @@ static int fts5ExprAdvanceto( ){ i64 iLast = *piLast; i64 iRowid; - while( 1 ){ - iRowid = sqlite3Fts5IterRowid(pIter); - if( (bAsc==0 && iRowid<=iLast) || (bAsc==1 && iRowid>=iLast) ) break; - sqlite3Fts5IterNext(pIter, 0); + + iRowid = sqlite3Fts5IterRowid(pIter); + if( (bAsc==0 && iRowid>iLast) || (bAsc && iRowid=iLast) ); } - if( iRowid!=iLast ){ - assert( (bAsc==0 && iRowidiLast) ); - *piLast = iRowid; - } + *piLast = iRowid; return 0; } @@ -769,7 +768,7 @@ static int fts5ExprNearInitAll( return SQLITE_OK; } -/* fts3ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ +/* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); /* diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8c4734e265..271caa8a0d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -306,6 +306,7 @@ struct Fts5Index { sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ + int nRead; /* Total number of blocks read */ }; struct Fts5DoclistIter { @@ -702,6 +703,7 @@ static Fts5Data *fts5DataReadOrBuffer( } } p->rc = rc; + p->nRead++; } return pRet; @@ -1666,9 +1668,28 @@ static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ + assert( pIter->aSeg[ pIter->aFirst[1] ].pLeaf ); return pIter->aSeg[ pIter->aFirst[1] ].iRowid; } +/* +** Move the iterator to the next entry at or following iMatch. +*/ +static void fts5MultiIterNextFrom( + Fts5Index *p, + Fts5MultiSegIter *pIter, + i64 iMatch +){ + while( 1 ){ + i64 iRowid; + fts5MultiIterNext(p, pIter); + if( fts5MultiIterEof(p, pIter) ) break; + iRowid = fts5MultiIterRowid(pIter); + if( pIter->bRev==0 && iRowid<=iMatch ) break; + if( pIter->bRev!=0 && iRowid>=iMatch ) break; + } +} + /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. @@ -3759,7 +3780,7 @@ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ /* ** Move to the next matching rowid. */ -void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){ +void sqlite3Fts5IterNext(Fts5IndexIter *pIter){ if( pIter->pDoclist ){ fts5DoclistIterNext(pIter->pDoclist); }else{ @@ -3768,6 +3789,20 @@ void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){ } } +/* +** Move to the next matching rowid that occurs at or after iMatch. The +** definition of "at or after" depends on whether this iterator iterates +** in ascending or descending rowid order. +*/ +void sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ + if( pIter->pDoclist ){ + assert( 0 ); + /* fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); */ + }else{ + fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); + } +} + /* ** Return the current rowid. */ @@ -3840,3 +3875,11 @@ int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ return p->rc; } +/* +** Return the total number of blocks this module has read from the %_data +** table since it was created. +*/ +int sqlite3Fts5IndexReads(Fts5Index *p){ + return p->nRead; +} + diff --git a/manifest b/manifest index 089486568e..87e8bca504 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\sspecial\scase\sto\sthe\sintegrity-check\scode\sto\scheck\sthat\sthe\sfinal\sinteger\sin\sa\sdoclist\sindex\sis\sas\sexpected. -D 2014-08-01T20:13:49.462 +C Start\schanging\sthings\sto\suse\sdoclist\sindexes\sas\srequired.\scode\sis\snot\sactivated\syet. +D 2014-08-02T20:49:36.405 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,14 +103,14 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c aa269bbecf78cdb7aaa9c6dba26f6ee906ceedaf +F ext/fts5/fts5.c 23f875e24ffa722107690d14b449141a25a2d697 F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h 9a195c1706876c538902f007149b39e982e9da53 +F ext/fts5/fts5Int.h aef50f3078e60707aeb2e4b2787d8c5eecdd02dc F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 -F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a -F ext/fts5/fts5_index.c 13f9dd9788f90c419ea33db0fcb2214c2f1290ef +F ext/fts5/fts5_expr.c 9402474456732ddb5019f83a77907852f108a96a +F ext/fts5/fts5_index.c 20c905c323d866251e15d7ed2486c309914ceeb9 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -595,14 +595,14 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test f90836c002804a82386d66c79b380128c5f3005e +F test/fts5aa.test ec150ac2778f871550bcdbea34598fba08717a4e F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test dfb54897c470e2dcf88912fc4f5b1ca4ac8307f7 +F test/fts5ah.test 2b01e7d2b3a31b668cba2afad5cb1c651895a255 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 37a7d3035eb4bbad7e32fe550321ac9fae611a57 -R 21856f96ed42128f5622f6977d9547a6 +P c98934155cb48adfda57bd0fd1b950226d45f67a +R 0addd1f8d0c92beb67e8a764402a7ad4 U dan -Z 83c769d88a67f25fcdc4af671199630b +Z f1895ff018d8274d19451cd024daaa99 diff --git a/manifest.uuid b/manifest.uuid index 312136f109..8a1f54c530 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c98934155cb48adfda57bd0fd1b950226d45f67a \ No newline at end of file +b8864da95db2c0e611116304d607e35a86c9247d \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index c59183512a..87e1494d55 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -285,5 +285,20 @@ do_catchsql_test 11.2 { CREATE VIRTUAL TABLE rank USING fts5(a, b, c); } {1 {reserved fts5 table name: rank}} +#------------------------------------------------------------------------- +# +do_execsql_test 12.1 { + CREATE VIRTUAL TABLE t2 USING fts5(x,y); +} {} + +do_catchsql_test 12.2 { + SELECT t2 FROM t2 WHERE t2 MATCH '*stuff' +} {1 {unknown special query: stuff}} + +do_test 12.3 { + set res [db one { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }] + string is integer $res +} {1} + finish_test diff --git a/test/fts5ah.test b/test/fts5ah.test index f5d1eee1b9..e108ec2ddd 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -49,6 +49,21 @@ do_execsql_test 1.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } +proc reads {} { + db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'} +} + +do_test 1.4 { + set nRead [reads] + db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } + set a [expr [reads] - $nRead] +} {} + +do_test 1.5 { + set nRead [reads] + db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } + set a [expr [reads] - $nRead] +} {} finish_test From 9a67058a63da4e319c3cafb1ba94e228bdef2e88 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Aug 2014 20:07:40 +0000 Subject: [PATCH 033/206] Fix fts5_index.c to use doclist-indexes when possible. Only some cases work so far. FossilOrigin-Name: 90b82d3ef613b2915e0e280dc1d2e5a2b617d59c --- ext/fts5/fts5_index.c | 530 ++++++++++++++++++++++++++++++++---------- manifest | 16 +- manifest.uuid | 2 +- test/fts5aa.test | 3 +- test/fts5ah.test | 37 ++- 5 files changed, 452 insertions(+), 136 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 271caa8a0d..a1735a1104 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -266,6 +266,7 @@ typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; +typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; @@ -279,7 +280,6 @@ typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; - /* ** One object per %_data table. */ @@ -321,6 +321,9 @@ struct Fts5DoclistIter { int nPoslist; }; +/* +** Each iterator used by external modules is an instance of this type. +*/ struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; @@ -489,6 +492,8 @@ struct Fts5SegIter { int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ + Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ + /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ @@ -540,6 +545,26 @@ struct Fts5NodeIter { int bDlidx; }; +/* +** An instance of the following type is used to iterate through the contents +** of a doclist-index record. +** +** pData: +** A reference to the dlidx record. +*/ +struct Fts5DlidxIter { + Fts5Data *pData; /* Data for doclist index, if any */ + int iOff; /* Current offset into pDlidx */ + int bRowidValid; /* iRowid is valid */ + int bEof; /* At EOF already */ + + /* Output variables */ + int iLeafPgno; /* Page number of current leaf page */ + int bZero; /* True if current leaf has no rowids */ + i64 iRowid; /* If bZero==0, first rowid on leaf */ +}; + + /* ** An Fts5BtreeIter object is used to iterate through all entries in the ** b-tree hierarchy belonging to a single fts5 segment. In this case the @@ -577,6 +602,55 @@ struct Fts5BtreeIter { int bDlidx; /* True if there exists a dlidx */ }; + +/* +** Decode a segment-data rowid from the %_data table. This function is +** the opposite of macro FTS5_SEGMENT_ROWID(). +*/ +static void fts5DecodeRowid( + i64 iRowid, /* Rowid from %_data table */ + int *piIdx, /* OUT: Index */ + int *piSegid, /* OUT: Segment id */ + int *piHeight, /* OUT: Height */ + int *piPgno /* OUT: Page number */ +){ + *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); + iRowid >>= FTS5_DATA_PAGE_B; + + *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); + iRowid >>= FTS5_DATA_HEIGHT_B; + + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); + iRowid >>= FTS5_DATA_ID_B; + + *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); +} + +static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ + int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); + + if( iSegid==0 ){ + if( iKey==FTS5_AVERAGES_ROWID ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + "(structure idx=%d)", (int)(iKey-10) + ); + } + } + else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", + iIdx, iSegid, iPgno + ); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)", + iIdx, iSegid, iHeight, iPgno + ); + } +} + + static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); @@ -666,7 +740,15 @@ static Fts5Data *fts5DataReadOrBuffer( ){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ - int rc; + int rc = SQLITE_OK; + +#if 0 +Fts5Buffer buf = {0,0,0}; +fts5DebugRowid(&rc, &buf, iRowid); +fprintf(stdout, "read: %s\n", buf.p); +fflush(stdout); +sqlite3_free(buf.p); +#endif /* If the blob handle is not yet open, open and seek it. Otherwise, use ** the blob_reopen() API to reseek the existing blob handle. */ @@ -1041,6 +1123,115 @@ static void fts5NodeIterFree(Fts5NodeIter *pIter){ fts5BufferFree(&pIter->term); } +/* +** Return non-zero if EOF is reached. +*/ +static int fts5DlidxIterNext(Fts5DlidxIter *pIter, int bRev){ + if( bRev ){ + i64 iVal; + int iOff = pIter->iOff; + int iLimit; + u8 *a = pIter->pData->p; + + /* Currently iOff points to the first byte of a varint. This block + ** decrements iOff until it points to the first byte of the previous + ** varint. Taking care not to read any memory locations that occur + ** before the buffer in memory. */ + iLimit = (iOff>9 ? iOff-9 : 0); + for(iOff--; iOff>iLimit; iOff--){ + if( (a[iOff-1] & 0x80)==0 ) break; + } + pIter->iOff = iOff; + + if( iOff<=0 ){ + pIter->bEof = 1; + return 1; + } + + getVarint(&a[iOff], (u64*)&iVal); + if( iVal==0 ){ + pIter->bZero = 1; + }else if( iOff==0 ){ + pIter->iRowid = iVal; + }else{ + pIter->iRowid += iVal; + } + pIter->iLeafPgno--; + }else{ + i64 iVal; + if( pIter->iOff>=pIter->pData->n ){ + pIter->bEof = 1; + return 1; + } + pIter->iOff += getVarint(&pIter->pData->p[pIter->iOff], (u64*)&iVal); + if( iVal==0 ){ + pIter->bZero = 1; + }else{ + pIter->bZero = 0; + if( pIter->bRowidValid ){ + pIter->iRowid -= iVal; + }else{ + pIter->bRowidValid = 1; + pIter->iRowid = iVal; + } + } + pIter->iLeafPgno++; + } + return 0; +} + +static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ + while( 0==fts5DlidxIterNext(pIter, 0) ); + assert( pIter->iOff==pIter->pData->n && pIter->bEof==1 ); + pIter->bEof = 0; +} + +static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ + return (p->rc!=SQLITE_OK || pIter->bEof); +} + +static void fts5DlidxIterInit( + Fts5Index *p, /* Fts5 Backend to iterate within */ + int bRev, /* True for ORDER BY ASC */ + int iIdx, int iSegid, /* Segment iSegid within index iIdx */ + int iLeafPgno, /* Leaf page number to load dlidx for */ + Fts5DlidxIter **ppIter /* OUT: Populated iterator */ +){ + Fts5DlidxIter *pIter = *ppIter; + Fts5Data *pDlidx; + + pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno)); + if( pDlidx==0 ) return; + if( pIter==0 ){ + *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); + if( pIter==0 ){ + fts5DataRelease(pDlidx); + return; + } + }else{ + memset(pIter, 0, sizeof(Fts5DlidxIter)); + } + + pIter->pData = pDlidx; + + pIter->iLeafPgno = iLeafPgno; + if( bRev==0 ){ + fts5DlidxIterNext(pIter, 0); + }else{ + fts5DlidxIterLast(pIter); + } +} + +/* +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). +*/ +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ + if( pIter ){ + fts5DataRelease(pIter->pData); + sqlite3_free(pIter); + } +} + /* ** Load the next leaf page into the segment iterator. */ @@ -1175,6 +1366,50 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ pIter->iRowidOffset = iRowidOffset; } +/* +** +*/ +static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ + assert( pIter->flags & FTS5_SEGITER_REVERSE ); + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ + Fts5Data *pNew; + pIter->iLeafPgno--; + pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( + pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + )); + if( pNew ){ + if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ + if( pIter->iTermLeafOffsetn ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = pIter->iTermLeafOffset; + } + }else{ + int iRowidOff, dummy; + fts5LeafHeader(pNew, &iRowidOff, &dummy); + if( iRowidOff ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = iRowidOff; + } + } + + if( pIter->pLeaf ){ + u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; + pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid); + break; + }else{ + fts5DataRelease(pNew); + } + } + } + + if( pIter->pLeaf ){ + fts5SegIterReverseInitPage(p, pIter); + } +} /* ** Advance iterator pIter to the next entry. @@ -1202,6 +1437,8 @@ static void fts5SegIterNext( getVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid += iDelta; }else{ + fts5SegIterReverseNewPage(p, pIter); +#if 0 fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ @@ -1238,6 +1475,7 @@ static void fts5SegIterNext( if( pIter->pLeaf ){ fts5SegIterReverseInitPage(p, pIter); } +#endif } }else{ Fts5Data *pLeaf = pIter->pLeaf; @@ -1380,6 +1618,42 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ pIter->flags |= FTS5_SEGITER_REVERSE; } +/* +** Iterator pIter currently points to the first rowid of a doclist within +** index iIdx. There is a doclist-index associated with the final term on +** the current page. If the current term is the last term on the page, +** load the doclist-index from disk and initialize an iterator at +** (pIter->pDlidx). +*/ +static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ + int iSegid = pIter->pSeg->iSegid; + int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); + Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ + int iOff = pIter->iLeafOffset; /* Byte offset within current leaf */ + + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + assert( pIter->pDlidx==0 ); + + /* Check if the current doclist ends on this page. If it does, return + ** early without loading the doclist-index (as it belongs to a different + ** term. */ + while( iOffn ){ + i64 iDelta; + int nPoslist; + + /* iOff is currently the offset of the size field of a position list. */ + iOff += getVarint32(&pLeaf->p[iOff], nPoslist); + iOff += nPoslist; + + if( iOffn ){ + iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) return; + } + } + + fts5DlidxIterInit(p, bRev, iIdx, iSegid, pIter->iLeafPgno, &pIter->pDlidx); +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg, index iIdx. If there is no such term in the index, the iterator @@ -1399,6 +1673,7 @@ static void fts5SegIterSeekInit( int iPg = 1; int h; int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0); + int bDlidx = 0; /* True if there is a doclist-index */ assert( bGe==0 || (flags & FTS5INDEX_QUERY_ASC)==0 ); assert( pTerm && nTerm ); @@ -1418,11 +1693,13 @@ static void fts5SegIterSeekInit( assert( node.term.n==0 ); iPg = node.iChild; + bDlidx = node.bDlidx; for(fts5NodeIterNext(&p->rc, &node); node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; fts5NodeIterNext(&p->rc, &node) ){ iPg = node.iChild; + bDlidx = node.bDlidx; } fts5NodeIterFree(&node); fts5DataRelease(pNode); @@ -1430,6 +1707,7 @@ static void fts5SegIterSeekInit( if( iPgpgnoFirst ){ iPg = pSeg->pgnoFirst; + bDlidx = 0; } pIter->iLeafPgno = iPg - 1; @@ -1454,8 +1732,13 @@ static void fts5SegIterSeekInit( if( bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; - if( pIter->pLeaf && (flags & FTS5INDEX_QUERY_ASC) ){ - fts5SegIterReverse(p, iIdx, pIter); + if( pIter->pLeaf ){ + if( bDlidx ){ + fts5SegIterLoadDlidx(p, iIdx, pIter); + } + if( flags & FTS5INDEX_QUERY_ASC ){ + fts5SegIterReverse(p, iIdx, pIter); + } } } } @@ -1466,6 +1749,7 @@ static void fts5SegIterSeekInit( static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); + fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } @@ -1551,6 +1835,84 @@ static void fts5MultiIterAdvanced( } } +/* +** Move the seg-iter so that it points to the first rowid on page iLeafPgno. +** It is an error if leaf iLeafPgno contains no rowid. +*/ +static void fts5SegIterGotoPage( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int iLeafPgno +){ + assert( iLeafPgno>pIter->iLeafPgno ); + if( p->rc==SQLITE_OK ){ + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); + } + + if( p->rc==SQLITE_OK ){ + int iOff; + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + iOff = fts5GetU16(&a[0]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + } +} + +/* +** Advance the iterator passed as the second argument until it is at or +** past rowid iFrom. Regardless of the value of iFrom, the iterator is +** always advanced at least once. +*/ +static void fts5SegIterNextFrom( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + i64 iMatch /* Advance iterator at least this far */ +){ + int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); + Fts5DlidxIter *pDlidx = pIter->pDlidx; + int iLeafPgno = pIter->iLeafPgno; + + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + assert( pIter->pDlidx ); + assert( pIter->pLeaf ); + + + if( bRev==0 ){ + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ + if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno; + fts5DlidxIterNext(pDlidx, 0); + } + assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); + if( iLeafPgno>pIter->iLeafPgno ){ + fts5SegIterGotoPage(p, pIter, iLeafPgno); + } + }else if( 0 ){ + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ + fts5DlidxIterNext(pDlidx, 0); + if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno; + } + assert( iLeafPgno<=pIter->iLeafPgno || p->rc ); + if( iLeafPgnoiLeafPgno ){ + fts5SegIterGotoPage(p, pIter, iLeafPgno); + } + } + + while( 1 ){ + fts5SegIterNext(p, pIter); + if( pIter->pLeaf==0 ) break; + if( bRev==0 && pIter->iRowid<=iMatch ) break; + if( bRev!=0 && pIter->iRowid>=iMatch ) break; + } +} + /* ** Move the iterator to the next entry. ** @@ -1558,10 +1920,20 @@ static void fts5MultiIterAdvanced( ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called. */ -static void fts5MultiIterNext(Fts5Index *p, Fts5MultiSegIter *pIter){ +static void fts5MultiIterNext( + Fts5Index *p, + Fts5MultiSegIter *pIter, + int bFrom, /* True if argument iFrom is valid */ + i64 iFrom /* Advance at least as far as this */ +){ if( p->rc==SQLITE_OK ){ int iFirst = pIter->aFirst[1]; - fts5SegIterNext(p, &pIter->aSeg[iFirst]); + Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + if( bFrom && pSeg->pDlidx ){ + fts5SegIterNextFrom(p, pSeg, iFrom); + }else{ + fts5SegIterNext(p, pSeg); + } fts5MultiIterAdvanced(p, pIter, iFirst, 1); } } @@ -1682,7 +2054,7 @@ static void fts5MultiIterNextFrom( ){ while( 1 ){ i64 iRowid; - fts5MultiIterNext(p, pIter); + fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break; iRowid = fts5MultiIterRowid(pIter); if( pIter->bRev==0 && iRowid<=iMatch ) break; @@ -2589,7 +2961,7 @@ fflush(stdout); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; - fts5MultiIterNext(p, pIter) + fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ @@ -3005,36 +3377,7 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ fts5BufferFree(&pIter->term); } -typedef struct DoclistIdxIter DoclistIdxIter; -struct DoclistIdxIter { - Fts5Data *pDlidx; /* Data for doclist index, if any */ - int iOff; /* Current offset into pDlidx */ - int bRowidValid; /* iRowid is valid */ - int bZero; /* True if current leaf has no rowid */ - i64 iRowid; /* If bZero==0, first rowid on leaf */ -}; - -/* -** Return non-zero if EOF is reached. -*/ -static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){ - i64 iVal; - if( pIter->iOff>=pIter->pDlidx->n ) return 1; - pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal); - if( iVal==0 ){ - pIter->bZero = 1; - }else{ - pIter->bZero = 0; - if( pIter->bRowidValid ){ - pIter->iRowid -= iVal; - }else{ - pIter->bRowidValid = 1; - pIter->iRowid = iVal; - } - } - return 0; -} static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ @@ -3052,7 +3395,6 @@ static void fts5IndexIntegrityCheckSegment( Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ - DoclistIdxIter dliter; /* For iterating through any doclist index */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ @@ -3079,65 +3421,50 @@ static void fts5IndexIntegrityCheckSegment( fts5DataRelease(pLeaf); if( p->rc ) break; - memset(&dliter, 0, sizeof(DoclistIdxIter)); - if( iter.bDlidx ){ - i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf); - dliter.pDlidx = fts5DataRead(p, iDlidxRowid); - } /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ - for(i=1; i<=iter.nEmpty; i++){ + for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ pLeaf = fts5DataRead(p, iRow+i); if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } - if( pLeaf && dliter.pDlidx ){ - if( fts5IndexDoclistIterNext(&dliter) ){ - p->rc = FTS5_CORRUPT; - }else{ + fts5DataRelease(pLeaf); + } + + /* If there is a doclist-index, check that it looks right. */ + if( iter.bDlidx ){ + Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ + int nEntry = 0; + int iSegid = pSeg->iSegid; + int bRev = 0; + + for(fts5DlidxIterInit(p, bRev, iIdx, iSegid, iter.iLeaf, &pDlidx); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterNext(pDlidx, bRev) + ){ + i64 iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( dliter.bZero ){ + if( pDlidx->bZero ){ if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT; }else{ i64 iRowid; getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; - } - } - } - fts5DataRelease(pLeaf); - } - - /* There may (or may not be) a final entry in the doclist. The entry - ** is only present if the page following the nEmpty termless pages - ** (a) exists and (b) contains at least one rowid that is part of - ** the doclist. */ - if( dliter.pDlidx ){ - if( (iter.iLeaf + iter.nEmpty)==pSeg->pgnoLast ){ - /* The next page does not exist. So the iterator should be at EOF. */ - if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT; - }else{ - Fts5Data *pLeaf = fts5DataRead(p, iRow+i); - if( pLeaf ){ - int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( iRowidOff==0 ){ - if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT; - }else{ - if( fts5IndexDoclistIterNext(&dliter) ){ - p->rc = FTS5_CORRUPT; - }else{ - i64 iRowid; - getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; - } + if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } + nEntry++; } - } - fts5DataRelease(dliter.pDlidx); + /* Check that the doclist-index was the right length */ + if( p->rc==SQLITE_OK && nEntry!=iter.nEmpty && nEntry!=iter.nEmpty+1 ){ + p->rc = FTS5_CORRUPT; + } + fts5DlidxIterFree(pDlidx); + } } if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ @@ -3169,7 +3496,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ Fts5Structure *pStruct = fts5StructureRead(p, iIdx); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; - fts5MultiIterNext(p, pIter) + fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5PosIter sPos; /* Used to iterate through position list */ int n; /* Size of term in bytes */ @@ -3250,29 +3577,6 @@ static void fts5DecodeStructure( fts5StructureRelease(p); } -/* -** Decode a segment-data rowid from the %_data table. This function is -** the opposite of macro FTS5_SEGMENT_ROWID(). -*/ -static void fts5DecodeRowid( - i64 iRowid, /* Rowid from %_data table */ - int *piIdx, /* OUT: Index */ - int *piSegid, /* OUT: Segment id */ - int *piHeight, /* OUT: Height */ - int *piPgno /* OUT: Page number */ -){ - *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); - iRowid >>= FTS5_DATA_PAGE_B; - - *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); - iRowid >>= FTS5_DATA_HEIGHT_B; - - *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); - iRowid >>= FTS5_DATA_ID_B; - - *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); -} - /* ** Buffer (a/n) is assumed to contain a list of serialized varints. Read ** each varint and append its string representation to buffer pBuf. Return @@ -3331,7 +3635,7 @@ static void fts5DecodeFunction( sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ - int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ + int iIdx,iSegid,iHeight,iPgno; /* Rowid components */ const u8 *a; int n; /* Record to decode */ Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ @@ -3343,12 +3647,10 @@ static void fts5DecodeFunction( a = sqlite3_value_blob(apVal[1]); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); + fts5DebugRowid(&rc, &s, iRowid); if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ int i = 0; i64 iPrev; - sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)", - iIdx, iSegid, iPgno - ); if( n>0 ){ i = getVarint(&a[i], (u64*)&iPrev); sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev); @@ -3367,20 +3669,14 @@ static void fts5DecodeFunction( }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "{averages} "); + /* todo */ }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &s, - "{structure idx=%d}", (int)(iRowid-10) - ); fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; memset(&term, 0, sizeof(Fts5Buffer)); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "(idx=%d segid=%d h=%d pgno=%d) ", - iIdx, iSegid, iHeight, iPgno - ); if( iHeight==0 ){ int iTermOff = 0; @@ -3666,7 +3962,7 @@ static void fts5SetupPrefixIter( memset(&doclist, 0, sizeof(doclist)); for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext(p, p1) + fts5MultiIterNext(p, p1, 0, 0) ){ i64 iRowid = fts5MultiIterRowid(p1); int nTerm; @@ -3785,7 +4081,7 @@ void sqlite3Fts5IterNext(Fts5IndexIter *pIter){ fts5DoclistIterNext(pIter->pDoclist); }else{ fts5BufferZero(&pIter->poslist); - fts5MultiIterNext(pIter->pIndex, pIter->pMulti); + fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); } } diff --git a/manifest b/manifest index 87e8bca504..bd1784eeb1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Start\schanging\sthings\sto\suse\sdoclist\sindexes\sas\srequired.\scode\sis\snot\sactivated\syet. -D 2014-08-02T20:49:36.405 +C Fix\sfts5_index.c\sto\suse\sdoclist-indexes\swhen\spossible.\sOnly\ssome\scases\swork\sso\sfar. +D 2014-08-04T20:07:40.532 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 9402474456732ddb5019f83a77907852f108a96a -F ext/fts5/fts5_index.c 20c905c323d866251e15d7ed2486c309914ceeb9 +F ext/fts5/fts5_index.c 3578823a9a43fcc77ce46c7f6efddfd155544053 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -595,14 +595,14 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test ec150ac2778f871550bcdbea34598fba08717a4e +F test/fts5aa.test f54245091fee924030722234070fcba95a493549 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test 2b01e7d2b3a31b668cba2afad5cb1c651895a255 +F test/fts5ah.test ca1f12b5738992c2edbdeb6c16133d41cfb9c031 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c98934155cb48adfda57bd0fd1b950226d45f67a -R 0addd1f8d0c92beb67e8a764402a7ad4 +P b8864da95db2c0e611116304d607e35a86c9247d +R 2807aba63fb0a8821d708dc4cbc7f577 U dan -Z f1895ff018d8274d19451cd024daaa99 +Z 6114a7973c3151dffa74bd597b78489f diff --git a/manifest.uuid b/manifest.uuid index 8a1f54c530..a558eb6008 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b8864da95db2c0e611116304d607e35a86c9247d \ No newline at end of file +90b82d3ef613b2915e0e280dc1d2e5a2b617d59c \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 87e1494d55..3d1fc99289 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -50,7 +50,7 @@ do_execsql_test 2.1 { do_execsql_test 2.2 { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } { - {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} + {(structure idx=0) {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} } do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); @@ -182,7 +182,6 @@ for {set i 1} {$i <= 10} {incr i} { } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} - if {[set_test_counter errors]} exit } #------------------------------------------------------------------------- diff --git a/test/fts5ah.test b/test/fts5ah.test index e108ec2ddd..3f217d9f86 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -55,16 +55,37 @@ proc reads {} { do_test 1.4 { set nRead [reads] - db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } - set a [expr [reads] - $nRead] -} {} + execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } + set nReadX [expr [reads] - $nRead] + expr $nReadX>1000 +} {1} -do_test 1.5 { - set nRead [reads] - db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } - set a [expr [reads] - $nRead] -} {} +foreach {tn q res} " + 1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W] + 2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W] + 3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [list $W] + 4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y] +" { + do_test 1.5.$tn.1 { + set nRead [reads] + execsql $q + set n [expr [reads] - $nRead] + expr {$n < ($nReadX / 10)} + } {1} + + do_test 1.5.$tn.2 { + set nRead [reads] + execsql "$q ORDER BY rowid ASC" + set n [expr [reads] - $nRead] + expr {$n < ($nReadX / 10)} + } {1} + + do_execsql_test 1.5.$tn.3 $q [lsort -int -decr $res] + do_execsql_test 1.5.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res] +} + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} finish_test From d5997d80cb89e792e1667bb024bd8514273986ea Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 5 Aug 2014 19:00:22 +0000 Subject: [PATCH 034/206] Use doclist-indexes with "ORDER BY rowid ASC" fts5 queries as well. FossilOrigin-Name: d028ba6589f3122b635474c2683c0f93d5bc6c7c --- ext/fts5/fts5_index.c | 373 +++++++++++++++++++++++++----------------- manifest | 18 +- manifest.uuid | 2 +- test/fts5aa.test | 3 +- test/fts5ac.test | 4 + test/fts5ah.test | 30 ++-- 6 files changed, 260 insertions(+), 170 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a1735a1104..7d98ef72ff 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -550,18 +550,23 @@ struct Fts5NodeIter { ** of a doclist-index record. ** ** pData: -** A reference to the dlidx record. +** Record containing the doclist-index data. +** +** bEof: +** Set to true once iterator has reached EOF. +** +** iOff: +** Set to the current offset within record pData. */ struct Fts5DlidxIter { Fts5Data *pData; /* Data for doclist index, if any */ int iOff; /* Current offset into pDlidx */ - int bRowidValid; /* iRowid is valid */ int bEof; /* At EOF already */ + int iFirstOff; /* Used by reverse iterators only */ /* Output variables */ int iLeafPgno; /* Page number of current leaf page */ - int bZero; /* True if current leaf has no rowids */ - i64 iRowid; /* If bZero==0, first rowid on leaf */ + i64 iRowid; /* First rowid on leaf iLeafPgno */ }; @@ -1124,14 +1129,83 @@ static void fts5NodeIterFree(Fts5NodeIter *pIter){ } /* -** Return non-zero if EOF is reached. +** The iterator passed as the first argument has the following fields set +** as follows. This function sets up the rest of the iterator so that it +** points to the first rowid in the doclist-index. +** +** pData: pointer to doclist-index record, +** iLeafPgno: page number that this doclist-index is associated with. */ -static int fts5DlidxIterNext(Fts5DlidxIter *pIter, int bRev){ - if( bRev ){ +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ + Fts5Data *pData = pIter->pData; + int i; + + assert( pIter->pData ); + assert( pIter->iLeafPgno>0 ); + + /* Count the number of leading 0x00 bytes. Then set iLeafPgno. */ + for(i=0; in; i++){ + if( pData->p[i] ) break; + } + pIter->iLeafPgno += (i+1); + pIter->iOff = i; + + /* Unless we are already at the end of the doclist-index, load the first + ** rowid value. */ + if( pIter->iOffn ){ + pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&pIter->iRowid); + }else{ + pIter->bEof = 1; + } + pIter->iFirstOff = pIter->iOff; + return pIter->bEof; +} + +/* +** Advance the iterator passed as the only argument. +*/ +static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ + Fts5Data *pData = pIter->pData; + int iOff; + + for(iOff=pIter->iOff; iOffn; iOff++){ + if( pData->p[iOff] ) break; + } + + if( iOffn ){ i64 iVal; - int iOff = pIter->iOff; - int iLimit; + pIter->iLeafPgno += (iOff - pIter->iOff) + 1; + iOff += getVarint(&pData->p[iOff], (u64*)&iVal); + pIter->iRowid -= iVal; + pIter->iOff = iOff; + }else{ + pIter->bEof = 1; + } + + return pIter->bEof; +} + +static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ + return (p->rc!=SQLITE_OK || pIter->bEof); +} + +static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ + if( fts5DlidxIterFirst(pIter)==0 ){ + while( 0==fts5DlidxIterNext(pIter) ); + pIter->bEof = 0; + } +} + +static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ + int iOff = pIter->iOff; + + assert( pIter->bEof==0 ); + if( iOff<=pIter->iFirstOff ){ + pIter->bEof = 1; + }else{ u8 *a = pIter->pData->p; + i64 iVal; + int iLimit; /* Currently iOff points to the first byte of a varint. This block ** decrements iOff until it points to the first byte of the previous @@ -1141,53 +1215,19 @@ static int fts5DlidxIterNext(Fts5DlidxIter *pIter, int bRev){ for(iOff--; iOff>iLimit; iOff--){ if( (a[iOff-1] & 0x80)==0 ) break; } - pIter->iOff = iOff; - - if( iOff<=0 ){ - pIter->bEof = 1; - return 1; - } getVarint(&a[iOff], (u64*)&iVal); - if( iVal==0 ){ - pIter->bZero = 1; - }else if( iOff==0 ){ - pIter->iRowid = iVal; - }else{ - pIter->iRowid += iVal; - } + pIter->iRowid += iVal; pIter->iLeafPgno--; - }else{ - i64 iVal; - if( pIter->iOff>=pIter->pData->n ){ - pIter->bEof = 1; - return 1; + + while( a[iOff-1]==0x00 ){ + iOff--; + pIter->iLeafPgno--; } - pIter->iOff += getVarint(&pIter->pData->p[pIter->iOff], (u64*)&iVal); - if( iVal==0 ){ - pIter->bZero = 1; - }else{ - pIter->bZero = 0; - if( pIter->bRowidValid ){ - pIter->iRowid -= iVal; - }else{ - pIter->bRowidValid = 1; - pIter->iRowid = iVal; - } - } - pIter->iLeafPgno++; + pIter->iOff = iOff; } - return 0; -} -static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ - while( 0==fts5DlidxIterNext(pIter, 0) ); - assert( pIter->iOff==pIter->pData->n && pIter->bEof==1 ); - pIter->bEof = 0; -} - -static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ - return (p->rc!=SQLITE_OK || pIter->bEof); + return pIter->bEof; } static void fts5DlidxIterInit( @@ -1213,10 +1253,9 @@ static void fts5DlidxIterInit( } pIter->pData = pDlidx; - pIter->iLeafPgno = iLeafPgno; if( bRev==0 ){ - fts5DlidxIterNext(pIter, 0); + fts5DlidxIterFirst(pIter); }else{ fts5DlidxIterLast(pIter); } @@ -1438,44 +1477,6 @@ static void fts5SegIterNext( pIter->iRowid += iDelta; }else{ fts5SegIterReverseNewPage(p, pIter); -#if 0 - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ - Fts5Data *pNew; - pIter->iLeafPgno--; - pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( - pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno - )); - if( pNew ){ - if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ - if( pIter->iTermLeafOffsetn ){ - pIter->pLeaf = pNew; - pIter->iLeafOffset = pIter->iTermLeafOffset; - } - }else{ - int iRowidOff, dummy; - fts5LeafHeader(pNew, &iRowidOff, &dummy); - if( iRowidOff ){ - pIter->pLeaf = pNew; - pIter->iLeafOffset = iRowidOff; - } - } - - if( pIter->pLeaf ){ - u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; - pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid); - break; - }else{ - fts5DataRelease(pNew); - } - } - } - - if( pIter->pLeaf ){ - fts5SegIterReverseInitPage(p, pIter); - } -#endif } }else{ Fts5Data *pLeaf = pIter->pLeaf; @@ -1555,43 +1556,49 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ /* Move to the page that contains the last rowid in this doclist. */ pLeaf = pIter->pLeaf; - while( iOffn ){ - int nPos; - i64 iDelta; + if( pIter->pDlidx ){ + int iSegid = pIter->pSeg->iSegid; + pgnoLast = pIter->pDlidx->iLeafPgno; + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); + }else{ + while( iOffn ){ + int nPos; + i64 iDelta; - /* Position list size in bytes */ - iOff += getVarint32(&pLeaf->p[iOff], nPos); - iOff += nPos; - if( iOff>=pLeaf->n ) break; + /* Position list size in bytes */ + iOff += getVarint32(&pLeaf->p[iOff], nPos); + iOff += nPos; + if( iOff>=pLeaf->n ) break; - /* Rowid delta. Or, if 0x00, the end of doclist marker. */ - nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) break; - iOff += nPos; - } + /* Rowid delta. Or, if 0x00, the end of doclist marker. */ + nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) break; + iOff += nPos; + } - if( iOff>=pLeaf->n ){ - Fts5StructureSegment *pSeg = pIter->pSeg; - i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pIter->iLeafPgno); - i64 iLast = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pSeg->pgnoLast); + if( iOff>=pLeaf->n ){ + Fts5StructureSegment *pSeg = pIter->pSeg; + i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pIter->iLeafPgno); + i64 iLast = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pSeg->pgnoLast); - /* The last rowid in the doclist may not be on the current page. Search - ** forward to find the page containing the last rowid. */ - for(iAbs++; p->rc==SQLITE_OK && iAbs<=iLast; iAbs++){ - Fts5Data *pNew = fts5DataRead(p, iAbs); - if( pNew ){ - int iRowid, iTerm; - fts5LeafHeader(pNew, &iRowid, &iTerm); - if( iRowid ){ - Fts5Data *pTmp = pLast; - pLast = pNew; - pNew = pTmp; - pgnoLast = iAbs & (((i64)1 << FTS5_DATA_PAGE_B) - 1); + /* The last rowid in the doclist may not be on the current page. Search + ** forward to find the page containing the last rowid. */ + for(iAbs++; p->rc==SQLITE_OK && iAbs<=iLast; iAbs++){ + Fts5Data *pNew = fts5DataRead(p, iAbs); + if( pNew ){ + int iRowid, iTerm; + fts5LeafHeader(pNew, &iRowid, &iTerm); + if( iRowid ){ + Fts5Data *pTmp = pLast; + pLast = pNew; + pNew = pTmp; + pgnoLast = iAbs & (((i64)1 << FTS5_DATA_PAGE_B) - 1); + } + if( iTerm ){ + iAbs = iLast; + } + fts5DataRelease(pNew); } - if( iTerm ){ - iAbs = iLast; - } - fts5DataRelease(pNew); } } } @@ -1615,7 +1622,6 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ } fts5SegIterReverseInitPage(p, pIter); - pIter->flags |= FTS5_SEGITER_REVERSE; } /* @@ -1733,6 +1739,9 @@ static void fts5SegIterSeekInit( if( bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ + if( flags & FTS5INDEX_QUERY_ASC ){ + pIter->flags |= FTS5_SEGITER_REVERSE; + } if( bDlidx ){ fts5SegIterLoadDlidx(p, iIdx, pIter); } @@ -1879,37 +1888,44 @@ static void fts5SegIterNextFrom( int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5DlidxIter *pDlidx = pIter->pDlidx; int iLeafPgno = pIter->iLeafPgno; + int bMove = 1; assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx ); assert( pIter->pLeaf ); - if( bRev==0 ){ while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ - if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno; - fts5DlidxIterNext(pDlidx, 0); + iLeafPgno = pDlidx->iLeafPgno; + fts5DlidxIterNext(pDlidx); } assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); if( iLeafPgno>pIter->iLeafPgno ){ fts5SegIterGotoPage(p, pIter, iLeafPgno); + bMove = 0; } - }else if( 0 ){ + }else{ + assert( iMatch>pIter->iRowid ); while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ - fts5DlidxIterNext(pDlidx, 0); - if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno; + fts5DlidxIterPrev(pDlidx); } - assert( iLeafPgno<=pIter->iLeafPgno || p->rc ); + iLeafPgno = pDlidx->iLeafPgno; + + assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); + if( iLeafPgnoiLeafPgno ){ - fts5SegIterGotoPage(p, pIter, iLeafPgno); + pIter->iLeafPgno = iLeafPgno+1; + fts5SegIterReverseNewPage(p, pIter); + bMove = 0; } } while( 1 ){ - fts5SegIterNext(p, pIter); + if( bMove ) fts5SegIterNext(p, pIter); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid<=iMatch ) break; if( bRev!=0 && pIter->iRowid>=iMatch ) break; + bMove = 1; } } @@ -3377,7 +3393,50 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ fts5BufferFree(&pIter->term); } +/* +** This function is purely an internal test. It does not contribute to +** FTS functionality, or even the integrity-check, in any way. +** +** Instead, it tests that the same set of pgno/rowid combinations are +** visited regardless of whether the doclist-index identified by parameters +** iIdx/iSegid/iLeaf is iterated in forwards or reverse order. +*/ +#ifdef SQLITE_DEBUG +static void fts5DlidxIterTestReverse( + Fts5Index *p, + int iIdx, /* Index to load doclist-index from */ + int iSegid, /* Segment id to load from */ + int iLeaf /* Load doclist-index for this leaf */ +){ + Fts5DlidxIter *pDlidx = 0; + i64 cksum1 = 13; + i64 cksum2 = 13; + for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf, &pDlidx); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterNext(pDlidx) + ){ + cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); + cksum1 = (cksum1 ^ pDlidx->iRowid); + } + fts5DlidxIterFree(pDlidx); + pDlidx = 0; + + for(fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf, &pDlidx); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterPrev(pDlidx) + ){ + cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); + cksum2 = (cksum2 ^ pDlidx->iRowid); + } + fts5DlidxIterFree(pDlidx); + pDlidx = 0; + + if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; +} +#else +# define fts5DlidxIterTestReverse(w,x,y,z) +#endif static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ @@ -3421,7 +3480,6 @@ static void fts5IndexIntegrityCheckSegment( fts5DataRelease(pLeaf); if( p->rc ) break; - /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ @@ -3435,35 +3493,52 @@ static void fts5IndexIntegrityCheckSegment( /* If there is a doclist-index, check that it looks right. */ if( iter.bDlidx ){ Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ - int nEntry = 0; + int iPrevLeaf = iter.iLeaf; int iSegid = pSeg->iSegid; - int bRev = 0; + int iPg; + i64 iKey; - for(fts5DlidxIterInit(p, bRev, iIdx, iSegid, iter.iLeaf, &pDlidx); + for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iter.iLeaf, &pDlidx); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx, bRev) + fts5DlidxIterNext(pDlidx) ){ - i64 iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno); + + /* Check any rowid-less pages that occur before the current leaf. */ + for(iPg=iPrevLeaf+1; iPgiLeafPgno; iPg++){ + iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ + if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; + fts5DataRelease(pLeaf); + } + } + iPrevLeaf = pDlidx->iLeafPgno; + + /* Check that the leaf page indicated by the iterator really does + ** contain the rowid suggested by the same. */ + iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ + i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( pDlidx->bZero ){ - if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT; - }else{ - i64 iRowid; - getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; - } + getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } - nEntry++; + } - /* Check that the doclist-index was the right length */ - if( p->rc==SQLITE_OK && nEntry!=iter.nEmpty && nEntry!=iter.nEmpty+1 ){ - p->rc = FTS5_CORRUPT; + for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ + iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ + if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; + fts5DataRelease(pLeaf); + } } + fts5DlidxIterFree(pDlidx); + fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); } } diff --git a/manifest b/manifest index bd1784eeb1..cca7f70664 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sfts5_index.c\sto\suse\sdoclist-indexes\swhen\spossible.\sOnly\ssome\scases\swork\sso\sfar. -D 2014-08-04T20:07:40.532 +C Use\sdoclist-indexes\swith\s"ORDER\sBY\srowid\sASC"\sfts5\squeries\sas\swell. +D 2014-08-05T19:00:22.438 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 9402474456732ddb5019f83a77907852f108a96a -F ext/fts5/fts5_index.c 3578823a9a43fcc77ce46c7f6efddfd155544053 +F ext/fts5/fts5_index.c 40d9086948d6f1420a078bd9fb0b5372e54ec791 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -595,14 +595,14 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test f54245091fee924030722234070fcba95a493549 +F test/fts5aa.test 2d136b61c4523ec018699e59b35c005313569b9e F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 +F test/fts5ac.test 399533fe52b7383053368ab8ba01ae182391e5d7 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test ca1f12b5738992c2edbdeb6c16133d41cfb9c031 +F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b8864da95db2c0e611116304d607e35a86c9247d -R 2807aba63fb0a8821d708dc4cbc7f577 +P 90b82d3ef613b2915e0e280dc1d2e5a2b617d59c +R 0266e2e08ce753d75d582fc91e12512e U dan -Z 6114a7973c3151dffa74bd597b78489f +Z 1a32a19af984c36e1b27a239a8950f8a diff --git a/manifest.uuid b/manifest.uuid index a558eb6008..42fda03308 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -90b82d3ef613b2915e0e280dc1d2e5a2b617d59c \ No newline at end of file +d028ba6589f3122b635474c2683c0f93d5bc6c7c \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 3d1fc99289..5bd0912435 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -148,6 +148,7 @@ do_execsql_test 6.2 { #------------------------------------------------------------------------- # reset_db +expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); INSERT INTO t1(t1) VALUES('pgsz=32'); @@ -173,7 +174,7 @@ proc dump_structure {} { for {set i 1} {$i <= 10} {incr i} { do_test 7.$i { - for {set j 0} {$j < 100} {incr j} { + for {set j 0} {$j < 10} {incr j} { set x [doc] set y [doc] set z [doc] diff --git a/test/fts5ac.test b/test/fts5ac.test index ae6e56e7e7..b137e3a938 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -364,6 +364,10 @@ foreach {tn expr tclexpr} { #------------------------------------------------------------------------- # +do_execsql_test 6.integrity { + INSERT INTO xx(xx) VALUES('integrity-check'); +} +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r} foreach {bAsc sql} { 0 {SELECT rowid FROM xx WHERE xx MATCH $expr} 1 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid ASC} diff --git a/test/fts5ah.test b/test/fts5ah.test index 3f217d9f86..f5e25848ab 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -53,6 +53,12 @@ proc reads {} { db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'} } +proc execsql_reads {sql} { + set nRead [reads] + execsql $sql + expr [reads] - $nRead +} + do_test 1.4 { set nRead [reads] execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } @@ -60,6 +66,14 @@ do_test 1.4 { expr $nReadX>1000 } {1} +do_test 1.5 { + set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }] + set bwd [execsql_reads { + SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC + }] + expr {$bwd < $fwd + 10} +} {1} + foreach {tn q res} " 1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W] 2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W] @@ -67,22 +81,18 @@ foreach {tn q res} " 4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y] " { - do_test 1.5.$tn.1 { - set nRead [reads] - execsql $q - set n [expr [reads] - $nRead] + do_test 1.6.$tn.1 { + set n [execsql_reads $q] expr {$n < ($nReadX / 10)} } {1} - do_test 1.5.$tn.2 { - set nRead [reads] - execsql "$q ORDER BY rowid ASC" - set n [expr [reads] - $nRead] + do_test 1.6.$tn.2 { + set n [execsql_reads "$q ORDER BY rowid ASC"] expr {$n < ($nReadX / 10)} } {1} - do_execsql_test 1.5.$tn.3 $q [lsort -int -decr $res] - do_execsql_test 1.5.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res] + do_execsql_test 1.6.$tn.3 $q [lsort -int -decr $res] + do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res] } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} From 192d413e95217a8716ef3edae3be713a44889a78 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 5 Aug 2014 19:35:20 +0000 Subject: [PATCH 035/206] Use doclist indexes for AND queries as well as phrases. FossilOrigin-Name: 5d38e6edc40ef188fbf96505073797036aa6783a --- ext/fts5/fts5_expr.c | 83 ++++++++++++++++++++++++++++++++------------ manifest | 12 +++---- manifest.uuid | 2 +- 3 files changed, 67 insertions(+), 30 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 82645a2619..b1fbe9ea97 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -641,7 +641,9 @@ static int fts5ExprAdvanceto( */ static int fts5ExprNearNextRowidMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprNode *pNode + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom ){ Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; @@ -654,6 +656,9 @@ static int fts5ExprNearNextRowidMatch( ** minimum rowid. Or, if the iterator is "ORDER BY rowid ASC", then it ** means the maximum rowid. */ iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); + if( bFromValid && (iFrom>iLast)==(pExpr->bAsc!=0) ){ + iLast = iFrom; + } do { bMatch = 1; @@ -693,7 +698,9 @@ static int fts5ExprNearNextRowidMatch( */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ - Fts5ExprNode *pNode + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom ){ int rc = SQLITE_OK; Fts5ExprNearset *pNear = pNode->pNear; @@ -701,7 +708,7 @@ static int fts5ExprNearNextMatch( int i; /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); if( pNode->bEof || rc!=SQLITE_OK ) break; for(i=0; inPhrase; i++){ @@ -769,7 +776,7 @@ static int fts5ExprNearInitAll( } /* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ -static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); +static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*, int, i64); /* ** Compare the values currently indicated by the two nodes as follows: @@ -799,7 +806,19 @@ static int fts5NodeCompare( } } -static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ +/* +** Advance node iterator pNode, part of expression pExpr. If argument +** bFromValid is zero, then pNode is advanced exactly once. Or, if argument +** bFromValid is non-zero, then pNode is advanced until it is at or past +** rowid value iFrom. Whether "past" means "less than" or "greater than" +** depends on whether this is an ASC or DESC iterator. +*/ +static int fts5ExprNodeNext( + Fts5Expr *pExpr, + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom +){ int rc = SQLITE_OK; if( pNode->bEof==0 ){ @@ -810,8 +829,11 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ }; case FTS5_AND: { - rc = fts5ExprNodeNext(pExpr, pNode->pLeft); - if( rc==SQLITE_OK ) rc = fts5ExprNodeNext(pExpr, pNode->pRight); + rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); + if( rc==SQLITE_OK ){ + /* todo: update (iFrom/bFromValid) here */ + rc = fts5ExprNodeNext(pExpr, pNode->pRight, bFromValid, iFrom); + } break; } @@ -821,23 +843,25 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int cmp = fts5NodeCompare(pExpr, p1, p2); if( cmp==0 ){ - rc = fts5ExprNodeNext(pExpr, p1); - if( rc==SQLITE_OK ) rc = fts5ExprNodeNext(pExpr, p2); + rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); + } }else{ - rc = fts5ExprNodeNext(pExpr, (cmp < 0) ? p1 : p2); + rc = fts5ExprNodeNext(pExpr, (cmp < 0) ? p1 : p2, bFromValid, iFrom); } break; } default: assert( pNode->eType==FTS5_NOT ); { - rc = fts5ExprNodeNext(pExpr, pNode->pLeft); + rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); break; } } if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode); + rc = fts5ExprNodeNextMatch(pExpr, pNode, bFromValid, iFrom); } } @@ -855,13 +879,18 @@ static void fts5ExprSetEof(Fts5ExprNode *pNode){ /* ** */ -static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ +static int fts5ExprNodeNextMatch( + Fts5Expr *pExpr, + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom +){ int rc = SQLITE_OK; if( pNode->bEof==0 ){ switch( pNode->eType ){ case FTS5_STRING: { - rc = fts5ExprNearNextMatch(pExpr, pNode); + rc = fts5ExprNearNextMatch(pExpr, pNode, bFromValid, iFrom); break; } @@ -869,14 +898,22 @@ static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ Fts5ExprNode *p1 = pNode->pLeft; Fts5ExprNode *p2 = pNode->pRight; + while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ Fts5ExprNode *pAdv; - if( pExpr->bAsc ){ - pAdv = (p1->iRowid < p2->iRowid) ? p1 : p2; + assert( pExpr->bAsc==0 || pExpr->bAsc==1 ); + if( pExpr->bAsc==(p1->iRowid < p2->iRowid) ){ + pAdv = p1; + if( bFromValid==0 || pExpr->bAsc==(p2->iRowid > iFrom) ){ + iFrom = p2->iRowid; + } }else{ - pAdv = (p1->iRowid > p2->iRowid) ? p1 : p2; + pAdv = p2; + if( bFromValid==0 || pExpr->bAsc==(p1->iRowid > iFrom) ){ + iFrom = p1->iRowid; + } } - rc = fts5ExprNodeNext(pExpr, pAdv); + rc = fts5ExprNodeNext(pExpr, pAdv, 1, iFrom); if( rc!=SQLITE_OK ) break; } if( p1->bEof || p2->bEof ){ @@ -901,10 +938,10 @@ static int fts5ExprNodeNextMatch(Fts5Expr *pExpr, Fts5ExprNode *pNode){ while( rc==SQLITE_OK ){ int cmp; while( rc==SQLITE_OK && (cmp = fts5NodeCompare(pExpr, p1, p2))>0 ){ - rc = fts5ExprNodeNext(pExpr, p2); + rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); } if( rc || cmp ) break; - rc = fts5ExprNodeNext(pExpr, p1); + rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); } pNode->bEof = p1->bEof; pNode->iRowid = p1->iRowid; @@ -934,7 +971,7 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ /* Attempt to advance to the first match */ if( rc==SQLITE_OK && pNode->bEof==0 ){ - rc = fts5ExprNearNextMatch(pExpr, pNode); + rc = fts5ExprNearNextMatch(pExpr, pNode, 0, 0); } }else{ @@ -943,7 +980,7 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ rc = fts5ExprNodeFirst(pExpr, pNode->pRight); } if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode); + rc = fts5ExprNodeNextMatch(pExpr, pNode, 0, 0); } } return rc; @@ -976,7 +1013,7 @@ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bAsc){ */ int sqlite3Fts5ExprNext(Fts5Expr *p){ int rc; - rc = fts5ExprNodeNext(p, p->pRoot); + rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); return rc; } diff --git a/manifest b/manifest index cca7f70664..476c18f4f9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\sdoclist-indexes\swith\s"ORDER\sBY\srowid\sASC"\sfts5\squeries\sas\swell. -D 2014-08-05T19:00:22.438 +C Use\sdoclist\sindexes\sfor\sAND\squeries\sas\swell\sas\sphrases. +D 2014-08-05T19:35:20.490 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -109,7 +109,7 @@ F ext/fts5/fts5Int.h aef50f3078e60707aeb2e4b2787d8c5eecdd02dc F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 -F ext/fts5/fts5_expr.c 9402474456732ddb5019f83a77907852f108a96a +F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 F ext/fts5/fts5_index.c 40d9086948d6f1420a078bd9fb0b5372e54ec791 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 90b82d3ef613b2915e0e280dc1d2e5a2b617d59c -R 0266e2e08ce753d75d582fc91e12512e +P d028ba6589f3122b635474c2683c0f93d5bc6c7c +R df749c2987e3f7fa39a4c1c54f5a22e6 U dan -Z 1a32a19af984c36e1b27a239a8950f8a +Z 78f1e7641fc2b68987e4a3c99853c1c0 diff --git a/manifest.uuid b/manifest.uuid index 42fda03308..41e7ea9f15 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d028ba6589f3122b635474c2683c0f93d5bc6c7c \ No newline at end of file +5d38e6edc40ef188fbf96505073797036aa6783a \ No newline at end of file From 7b71fbaf93ca401372e7fced0296460a9d8b6197 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 6 Aug 2014 16:30:21 +0000 Subject: [PATCH 036/206] Add support for savepoints to fts5. FossilOrigin-Name: 3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c --- ext/fts5/fts5.c | 125 +++++++++++++++++++++++++++++++++++++++-- ext/fts5/fts5Int.h | 35 ++++++------ ext/fts5/fts5_aux.c | 51 +++++++++-------- ext/fts5/fts5_index.c | 62 ++++++++++++++------ manifest | 21 +++---- manifest.uuid | 2 +- test/fts5ai.test | 56 ++++++++++++++++++ test/permutations.test | 2 +- 8 files changed, 281 insertions(+), 73 deletions(-) create mode 100644 test/fts5ai.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index d2a5f0ce6e..06d5b8c70b 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -13,14 +13,52 @@ ** This is an SQLite module implementing full-text search. */ + #include "fts5Int.h" + typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; typedef struct Fts5Auxdata Fts5Auxdata; +/* +** NOTES ON TRANSACTIONS: +** +** SQLite invokes the following virtual table methods as transactions are +** opened and closed by the user: +** +** xBegin(): Start of a new transaction. +** xSync(): Initial part of two-phase commit. +** xCommit(): Final part of two-phase commit. +** xRollback(): Rollback the transaction. +** +** Anything that is required as part of a commit that may fail is performed +** in the xSync() callback. Current versions of SQLite ignore any errors +** returned by xCommit(). +** +** And as sub-transactions are opened/closed: +** +** xSavepoint(int S): Open savepoint S. +** xRelease(int S): Commit and close savepoint S. +** xRollbackTo(int S): Rollback to start of savepoint S. +** +** During a write-transaction the fts5_index.c module may cache some data +** in-memory. It is flushed to disk whenever xSync(), xRelease() or +** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() +** is called. +** +** Additionally, if SQLITE_DEBUG is defined, an instance of the following +** structure is used to record the current transaction state. This information +** is not required, but it is used in the assert() statements executed by +** function fts5CheckTransactionState() (see below). +*/ +struct Fts5TransactionState { + int eState; /* 0==closed, 1==open, 2==synced */ + int iSavepoint; /* Number of open savepoints (0 -> none) */ +}; + /* ** A single object of this type is allocated when the FTS5 module is ** registered with a database handle. It is used to store pointers to @@ -57,6 +95,9 @@ struct Fts5Table { Fts5Storage *pStorage; /* Document store */ Fts5Global *pGlobal; /* Global (connection wide) data */ Fts5Cursor *pSortCsr; /* Sort data from this cursor */ +#ifdef SQLITE_DEBUG + struct Fts5TransactionState ts; +#endif }; struct Fts5MatchPhrase { @@ -130,6 +171,64 @@ struct Fts5Auxdata { Fts5Auxdata *pNext; /* Next object in linked list */ }; +#ifdef SQLITE_DEBUG +#define FTS5_BEGIN 1 +#define FTS5_SYNC 2 +#define FTS5_COMMIT 3 +#define FTS5_ROLLBACK 4 +#define FTS5_SAVEPOINT 5 +#define FTS5_RELEASE 6 +#define FTS5_ROLLBACKTO 7 +static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){ + switch( op ){ + case FTS5_BEGIN: + assert( p->ts.eState==0 ); + p->ts.eState = 1; + p->ts.iSavepoint = -1; + break; + + case FTS5_SYNC: + assert( p->ts.eState==1 ); + p->ts.eState = 2; + break; + + case FTS5_COMMIT: + assert( p->ts.eState==2 ); + p->ts.eState = 0; + break; + + case FTS5_ROLLBACK: + assert( p->ts.eState==1 || p->ts.eState==2 ); + p->ts.eState = 0; + break; + + case FTS5_SAVEPOINT: + assert( p->ts.eState==1 ); + assert( iSavepoint>=0 ); + assert( iSavepoint>p->ts.iSavepoint ); + p->ts.iSavepoint = iSavepoint; + break; + + case FTS5_RELEASE: + assert( p->ts.eState==1 ); + assert( iSavepoint>=0 ); + assert( iSavepoint<=p->ts.iSavepoint ); + p->ts.iSavepoint = iSavepoint-1; + break; + + case FTS5_ROLLBACKTO: + assert( p->ts.eState==1 ); + assert( iSavepoint>=0 ); + assert( iSavepoint<=p->ts.iSavepoint ); + p->ts.iSavepoint = iSavepoint; + break; + } +} +#else +# define fts5CheckTransactionState(x,y,z) +#endif + + /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy ** argument is non-zero, attempt delete the shadow tables from teh database @@ -222,6 +321,8 @@ static int fts5InitVtab( if( rc!=SQLITE_OK ){ fts5FreeVtab(pTab, 0); pTab = 0; + }else if( bCreate ){ + fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); } *ppVTab = (sqlite3_vtab*)pTab; return rc; @@ -793,6 +894,9 @@ static int fts5UpdateMethod( int eConflict; /* ON CONFLICT for this DML */ int rc = SQLITE_OK; /* Return code */ + /* A transaction must be open when this is called. */ + assert( pTab->ts.eState==1 ); + /* A delete specifies a single argument - the rowid of the row to remove. ** Update and insert operations pass: ** @@ -829,7 +933,8 @@ static int fts5UpdateMethod( static int fts5SyncMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; - rc = sqlite3Fts5IndexSync(pTab->pIndex); + fts5CheckTransactionState(pTab, FTS5_SYNC, 0); + rc = sqlite3Fts5IndexSync(pTab->pIndex, 1); return rc; } @@ -837,6 +942,7 @@ static int fts5SyncMethod(sqlite3_vtab *pVtab){ ** Implementation of xBegin() method. */ static int fts5BeginMethod(sqlite3_vtab *pVtab){ + fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0); return SQLITE_OK; } @@ -846,6 +952,7 @@ static int fts5BeginMethod(sqlite3_vtab *pVtab){ ** by fts5SyncMethod(). */ static int fts5CommitMethod(sqlite3_vtab *pVtab){ + fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_COMMIT, 0); return SQLITE_OK; } @@ -854,8 +961,9 @@ static int fts5CommitMethod(sqlite3_vtab *pVtab){ ** hash-table. Any changes made to the database are reverted by SQLite. */ static int fts5RollbackMethod(sqlite3_vtab *pVtab){ - Fts5Table *pTab = (Fts5Table*)pVtab; int rc; + Fts5Table *pTab = (Fts5Table*)pVtab; + fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); rc = sqlite3Fts5IndexRollback(pTab->pIndex); return rc; } @@ -1243,8 +1351,9 @@ static int fts5RenameMethod( ** Flush the contents of the pending-terms table to disk. */ static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ - int rc = SQLITE_OK; - return rc; + Fts5Table *pTab = (Fts5Table*)pVtab; + fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); + return sqlite3Fts5IndexSync(pTab->pIndex, 0); } /* @@ -1253,7 +1362,9 @@ static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ ** This is a no-op. */ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ - return SQLITE_OK; + Fts5Table *pTab = (Fts5Table*)pVtab; + fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); + return sqlite3Fts5IndexSync(pTab->pIndex, 0); } /* @@ -1262,7 +1373,9 @@ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ ** Discard the contents of the pending terms table. */ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ - return SQLITE_OK; + Fts5Table *pTab = (Fts5Table*)pVtab; + fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); + return sqlite3Fts5IndexRollback(pTab->pIndex); } /* diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 07903abf83..4ae110fd19 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -72,6 +72,7 @@ void sqlite3Fts5Dequote(char *z); **************************************************************************/ /************************************************************************** +** Interface to code in fts5_buffer.c. */ /* @@ -156,7 +157,6 @@ typedef struct Fts5IndexIter Fts5IndexIter; */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_ASC 0x0002 /* Docs in ascending rowid order */ -#define FTS5INDEX_QUERY_MATCH 0x0004 /* Use the iMatch arg to Next() */ /* ** Create/destroy an Fts5Index object. @@ -230,22 +230,15 @@ void sqlite3Fts5IndexBeginWrite( /* ** Flush any data stored in the in-memory hash tables to the database. -** -** This is called whenever (a) the main transaction is committed or (b) a -** new sub-transaction is opened. +** If the bCommit flag is true, also close any open blob handles. */ -void sqlite3Fts5IndexFlush(Fts5Index *p); - -int sqlite3Fts5IndexSync(Fts5Index *p); +int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit); /* ** Discard any data stored in the in-memory hash tables. Do not write it ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated. -** -** This is called (a) whenever a main or sub-transaction is rolled back, -** and (b) whenever the read transaction is closed. */ int sqlite3Fts5IndexRollback(Fts5Index *p); @@ -256,9 +249,10 @@ int sqlite3Fts5IndexErrcode(Fts5Index*); void sqlite3Fts5IndexReset(Fts5Index*); /* -** Get (bSet==0) or set (bSet!=0) the "averages" record. +** Get or set the "averages" record. */ -void sqlite3Fts5IndexAverages(Fts5Index *p, int bSet, int nAvg, int *aAvg); +int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); +int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); /* ** Functions called by the storage module as part of integrity-check. @@ -266,14 +260,23 @@ void sqlite3Fts5IndexAverages(Fts5Index *p, int bSet, int nAvg, int *aAvg); u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int); int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); -/* Called during startup to register a UDF with SQLite */ +/* +** Called during virtual module initialization to register UDF +** fts5_decode() with SQLite +*/ int sqlite3Fts5IndexInit(sqlite3*); +/* +** Set the page size to use when writing. It doesn't matter if this +** changes mid-transaction, or if inconsistent values are used by +** multiple clients. +*/ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); -int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); - +/* +** Return the total number of entries read from the %_data table by +** this connection since it was created. +*/ int sqlite3Fts5IndexReads(Fts5Index *p); /* diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index d378bb67f4..85bad5c496 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -745,9 +745,10 @@ static void fts5TestFunction( memset(&s, 0, sizeof(Fts5Buffer)); nCol = pApi->xColumnCount(pFts); - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntotalsize "); - } + /* + ** xColumnTotalSize() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntotalsize "); if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntotalsize") ){ if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); for(i=0; rc==SQLITE_OK && i1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); } - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " columncount "); - } + /* + ** xColumnCount() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " columncount "); if( 0==zReq || 0==sqlite3_stricmp(zReq, "columncount") ){ + nCol = pApi->xColumnCount(pFts); sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nCol); } - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " columnsize "); - } + /* + ** xColumnSize() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " columnsize "); if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnsize") ){ if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); for(i=0; rc==SQLITE_OK && i1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); } - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " columntext "); - } + /* + ** xColumnText() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " columntext "); if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntext") ){ for(i=0; rc==SQLITE_OK && ixPhraseCount(pFts); if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasecount") ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nPhrase); } - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasesize "); - } + /* + ** xPhraseSize() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasesize "); if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasesize") ){ if( nPhrase==1 ){ int nSize = pApi->xPhraseSize(pFts, 0); @@ -816,15 +823,15 @@ static void fts5TestFunction( } } - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " poslist "); - } + /* + ** xPoslist() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " poslist "); if( 0==zReq || 0==sqlite3_stricmp(zReq, "poslist") ){ int bParen = 0; Fts5Buffer s3; memset(&s3, 0, sizeof(s3)); - for(i=0; ipReader ){ + /* This call may return SQLITE_ABORT if there has been a savepoint + ** rollback since it was last used. In this case a new blob handle + ** is required. */ + rc = sqlite3_blob_reopen(p->pReader, iRowid); + if( rc==SQLITE_ABORT ){ + fts5CloseReader(p); + rc = SQLITE_OK; + } + } /* If the blob handle is not yet open, open and seek it. Otherwise, use ** the blob_reopen() API to reseek the existing blob handle. */ @@ -762,8 +772,6 @@ sqlite3_free(buf.p); rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); - }else{ - rc = sqlite3_blob_reopen(p->pReader, iRowid); } if( rc ) fts5MissingData(); @@ -2482,6 +2490,25 @@ static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){ return pList; } + +/* +** Discard all data currently cached in the hash-tables. +*/ +static void fts5IndexDiscardData(Fts5Index *p){ + Fts5Config *pConfig = p->pConfig; + int i; + for(i=0; i<=pConfig->nPrefix; i++){ + Fts3Hash *pHash = &p->aHash[i]; + Fts3HashElem *pE; /* Iterator variable */ + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ + Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); + fts5FreePendingDoclist(pDoclist); + } + fts3HashClear(pHash); + } + p->nPendingData = 0; +} + /* ** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares ** with buffer (nOld/pOld). @@ -3145,21 +3172,10 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ fts5StructureRelease(pStruct); } -/* -** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain -** to the document with rowid iRowid. -*/ -void sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ - if( iRowid<=p->iWriteRowid ){ - sqlite3Fts5IndexFlush(p); - } - p->iWriteRowid = iRowid; -} - /* ** Flush any data stored in the in-memory hash tables to the database. */ -void sqlite3Fts5IndexFlush(Fts5Index *p){ +static void fts5IndexFlush(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; /* Used to iterate through indexes */ int nLeaf = 0; /* Number of leaves written */ @@ -3175,12 +3191,23 @@ void sqlite3Fts5IndexFlush(Fts5Index *p){ p->nPendingData = 0; } +/* +** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain +** to the document with rowid iRowid. +*/ +void sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ + if( iRowid<=p->iWriteRowid ){ + fts5IndexFlush(p); + } + p->iWriteRowid = iRowid; +} + /* ** Commit data to disk. */ -int sqlite3Fts5IndexSync(Fts5Index *p){ - sqlite3Fts5IndexFlush(p); - fts5CloseReader(p); +int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ + fts5IndexFlush(p); + if( bCommit ) fts5CloseReader(p); return p->rc; } @@ -3192,6 +3219,7 @@ int sqlite3Fts5IndexSync(Fts5Index *p){ */ int sqlite3Fts5IndexRollback(Fts5Index *p){ fts5CloseReader(p); + fts5IndexDiscardData(p); return SQLITE_OK; } diff --git a/manifest b/manifest index 476c18f4f9..f539440020 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\sdoclist\sindexes\sfor\sAND\squeries\sas\swell\sas\sphrases. -D 2014-08-05T19:35:20.490 +C Add\ssupport\sfor\ssavepoints\sto\sfts5. +D 2014-08-06T16:30:21.057 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,14 +103,14 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 23f875e24ffa722107690d14b449141a25a2d697 +F ext/fts5/fts5.c 15e585ed0194f94a1da360808f29184f9d44554c F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h aef50f3078e60707aeb2e4b2787d8c5eecdd02dc -F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc +F ext/fts5/fts5Int.h 410001da21bcc3d09b4290d4858352d0985ac7a6 +F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_index.c 40d9086948d6f1420a078bd9fb0b5372e54ec791 +F ext/fts5/fts5_index.c 6a9f851490562d8843edc4d54b27eb9472c62d68 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -603,6 +603,7 @@ F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7 +F test/fts5ai.test 4dee71c23ddbcf2b0fc5d5586f241002b883c10e F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -770,7 +771,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 542edb965245565d06b9284e708f17bb93d70691 +F test/permutations.test 9875e7bacd0ab0cf78525e4b2d287840f284599b F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1199,7 +1200,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d028ba6589f3122b635474c2683c0f93d5bc6c7c -R df749c2987e3f7fa39a4c1c54f5a22e6 +P 5d38e6edc40ef188fbf96505073797036aa6783a +R e4475b88a5ed55985c58ff03d579a6b6 U dan -Z 78f1e7641fc2b68987e4a3c99853c1c0 +Z 1737719499cd3c712a0cb60804b1642b diff --git a/manifest.uuid b/manifest.uuid index 41e7ea9f15..e8347f8205 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5d38e6edc40ef188fbf96505073797036aa6783a \ No newline at end of file +3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c \ No newline at end of file diff --git a/test/fts5ai.test b/test/fts5ai.test new file mode 100644 index 0000000000..705ca15988 --- /dev/null +++ b/test/fts5ai.test @@ -0,0 +1,56 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# Specifically, it tests transactions and savepoints +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5ai + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a); +} {} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO t1 VALUES('a b c'); + INSERT INTO t1 VALUES('d e f'); + SAVEPOINT one; + INSERT INTO t1 VALUES('g h i'); + SAVEPOINT two; + INSERT INTO t1 VALUES('j k l'); + ROLLBACK TO one; + INSERT INTO t1 VALUES('m n o'); + SAVEPOINT two; + INSERT INTO t1 VALUES('p q r'); + RELEASE one; + SAVEPOINT one; + INSERT INTO t1 VALUES('s t u'); + ROLLBACK TO one; + COMMIT; +} + +do_execsql_test 1.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + + +finish_test + diff --git a/test/permutations.test b/test/permutations.test index 41659ef898..550028ea32 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test - fts5af.test fts5ag.test fts5ah.test + fts5af.test fts5ag.test fts5ah.test fts5ai.test } test_suite "nofaultsim" -prefix "" -description { From d90aab8f949f0b183bc37a09e98a0f20e32cc662 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 6 Aug 2014 20:04:14 +0000 Subject: [PATCH 037/206] Avoid writing delete markers to the oldest segment in an FTS index. FossilOrigin-Name: 1baeb1cee61d9c56c718b50af034a24f1018a322 --- ext/fts5/fts5_index.c | 120 ++++++++++++++++++++++++++--------------- manifest | 17 +++--- manifest.uuid | 2 +- test/fts5aa.test | 2 +- test/fts5aj.test | 71 ++++++++++++++++++++++++ test/permutations.test | 2 +- 6 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 test/fts5aj.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 1c6d175ab2..9f8f25908f 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -640,7 +640,7 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - "(structure idx=%d)", (int)(iKey-10) + "{structure idx=%d}", (int)(iKey-10) ); } } @@ -1066,6 +1066,7 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); + assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSegnSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); @@ -1228,7 +1229,9 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ pIter->iRowid += iVal; pIter->iLeafPgno--; - while( a[iOff-1]==0x00 ){ + while( iOff>pIter->iFirstOff + && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 + ){ iOff--; pIter->iLeafPgno--; } @@ -2835,19 +2838,26 @@ static void fts5WriteFinish( ){ int i; *pnLeaf = pWriter->aWriter[0].pgno; - fts5WriteFlushLeaf(p, pWriter); - if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - fts5WriteBtreeGrow(p, pWriter); - } - if( pWriter->nWriter>1 ){ - fts5WriteBtreeNEmpty(p, pWriter); - } - *pnHeight = pWriter->nWriter; + if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ + *pnLeaf = 0; + *pnHeight = 0; + }else{ + fts5WriteFlushLeaf(p, pWriter); + if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + fts5WriteBtreeGrow(p, pWriter); + } + if( pWriter->nWriter>1 ){ + fts5WriteBtreeNEmpty(p, pWriter); + } + *pnHeight = pWriter->nWriter; - for(i=1; inWriter; i++){ - Fts5PageWriter *pPg = &pWriter->aWriter[i]; - i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno); - fts5DataWrite(p, iRow, pPg->buf.p, pPg->buf.n); + for(i=1; inWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + fts5DataWrite(p, + FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno), + pPg->buf.p, pPg->buf.n + ); + } } for(i=0; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; @@ -2970,7 +2980,8 @@ static void fts5IndexMergeLevel( Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; - int bRequireDoclistTerm = 0; + int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ + int bOldest; /* True if the output segment is the oldest */ assert( iLvlnLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); @@ -2997,6 +3008,8 @@ static void fts5IndexMergeLevel( /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } + bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); + #if 0 fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); @@ -3008,35 +3021,45 @@ fflush(stdout); ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ - int nTerm; - const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); - if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ - if( writer.nLeafWritten>nRem ) break; - - /* This is a new term. Append a term to the output segment. */ - if( bRequireDoclistTerm ){ - fts5WriteAppendZerobyte(p, &writer); - } - fts5WriteAppendTerm(p, &writer, nTerm, pTerm); - fts5BufferSet(&p->rc, &term, nTerm, pTerm); - bRequireDoclistTerm = 1; - } - - /* Append the rowid to the output */ - fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); - - /* Copy the position list from input to output */ + /* If the segment being written is the oldest in the entire index and + ** the position list is empty (i.e. the entry is a delete marker), no + ** entry need be written to the output. */ fts5ChunkIterInit(p, pSeg, &sPos); - fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); - for(/* noop */; fts5ChunkIterEof(p, &sPos)==0; fts5ChunkIterNext(p, &sPos)){ - int iOff = 0; - while( iOff0 ){ + int nTerm; + const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); + if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ + if( writer.nLeafWritten>nRem ){ + fts5ChunkIterRelease(&sPos); + break; + } + + /* This is a new term. Append a term to the output segment. */ + if( bRequireDoclistTerm ){ + fts5WriteAppendZerobyte(p, &writer); + } + fts5WriteAppendTerm(p, &writer, nTerm, pTerm); + fts5BufferSet(&p->rc, &term, nTerm, pTerm); + bRequireDoclistTerm = 1; + } + + /* Append the rowid to the output */ + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); + + /* Copy the position list from input to output */ + fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); + for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ + int iOff = 0; + while( iOffnSeg -= nInput; pLvl->nMerge = 0; + if( pSeg->pgnoLast==0 ){ + pLvlOut->nSeg--; + } }else{ + assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } @@ -3095,10 +3122,11 @@ static void fts5IndexWork( while( nRem>0 ){ int iLvl; /* To iterate through levels */ - int iBestLvl = -1; /* Level offering the most input segments */ + int iBestLvl = 0; /* Level offering the most input segments */ int nBest = 0; /* Number of input segments on best level */ /* Set iBestLvl to the level to read input segments from. */ + assert( pStruct->nLevel>0 ); for(iLvl=0; iLvlnLevel; iLvl++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; if( pLvl->nMerge ){ @@ -3113,7 +3141,13 @@ static void fts5IndexWork( iBestLvl = iLvl; } } - assert( iBestLvl>=0 && nBest>0 ); + + /* If nBest is still 0, then the index must be empty. */ +#ifdef SQLITE_DEBUG + for(iLvl=0; nBest==0 && iLvlnLevel; iLvl++){ + assert( pStruct->aLevel[iLvl].nSeg==0 ); + } +#endif if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); @@ -3444,6 +3478,7 @@ static void fts5DlidxIterTestReverse( fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ + assert( pDlidx->iLeafPgno>iLeaf ); cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); cksum1 = (cksum1 ^ pDlidx->iRowid); } @@ -3454,6 +3489,7 @@ static void fts5DlidxIterTestReverse( fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(pDlidx) ){ + assert( pDlidx->iLeafPgno>iLeaf ); cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); cksum2 = (cksum2 ^ pDlidx->iRowid); } diff --git a/manifest b/manifest index f539440020..4503e360f1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\ssavepoints\sto\sfts5. -D 2014-08-06T16:30:21.057 +C Avoid\swriting\sdelete\smarkers\sto\sthe\soldest\ssegment\sin\san\sFTS\sindex. +D 2014-08-06T20:04:14.831 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_index.c 6a9f851490562d8843edc4d54b27eb9472c62d68 +F ext/fts5/fts5_index.c dab399c67cb6bdd23009d2f1280ea60a9585b47c F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -595,7 +595,7 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test 2d136b61c4523ec018699e59b35c005313569b9e +F test/fts5aa.test 4c7cbf1d38d30e7aaa8febf44958dd13bbb53bf8 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 399533fe52b7383053368ab8ba01ae182391e5d7 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 @@ -604,6 +604,7 @@ F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7 F test/fts5ai.test 4dee71c23ddbcf2b0fc5d5586f241002b883c10e +F test/fts5aj.test d16f44bd1f7da9714ef99bd8b1996c5867aee8f5 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -771,7 +772,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 9875e7bacd0ab0cf78525e4b2d287840f284599b +F test/permutations.test 4f71bc5c9ce9a249cc94ad415cda809ce7f2360b F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1200,7 +1201,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5d38e6edc40ef188fbf96505073797036aa6783a -R e4475b88a5ed55985c58ff03d579a6b6 +P 3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c +R 26f7f3d5f6581939b65c2ce6e063db0c U dan -Z 1737719499cd3c712a0cb60804b1642b +Z 78bed80e24eecafbae841451888b9b1c diff --git a/manifest.uuid b/manifest.uuid index e8347f8205..3d1cd0f81b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c \ No newline at end of file +1baeb1cee61d9c56c718b50af034a24f1018a322 \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 5bd0912435..f5c1977f20 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -50,7 +50,7 @@ do_execsql_test 2.1 { do_execsql_test 2.2 { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } { - {(structure idx=0) {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} + {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} } do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); diff --git a/test/fts5aj.test b/test/fts5aj.test new file mode 100644 index 0000000000..31c8b71282 --- /dev/null +++ b/test/fts5aj.test @@ -0,0 +1,71 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# Specifically, this tests that, provided the amount of data remains +# constant, the FTS index does not grow indefinitely as rows are inserted +# and deleted, +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5aj + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +proc doc {} { + set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z] + set res [list] + for {set i 0} {$i < 20} {incr i} { + lappend res [lindex $dict [expr int(rand() * 26)]] + } + set res +} + +proc structure {} { + set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}] + foreach lvl [lrange $val 1 end] { + lappend res [expr [llength $lvl]-2] + } + set res +} + +expr srand(0) +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1) VALUES('pgsz=64'); +} + +for {set iTest 0} {$iTest < 50000} {incr iTest} { + if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } } + set new [doc] + execsql { INSERT INTO t1 VALUES($new) } + if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] } + if {0==($iTest % 1000)} { + set sz [db one {SELECT count(*) FROM t1_data}] + set s [structure] + do_test 1.$iTest.$sz.{$s} {} {} + } +} + +#db eval { SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} + +do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') } + + + +finish_test + diff --git a/test/permutations.test b/test/permutations.test index 550028ea32..ca34266a27 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test - fts5af.test fts5ag.test fts5ah.test fts5ai.test + fts5af.test fts5ag.test fts5ah.test fts5ai.test fts5aj.test } test_suite "nofaultsim" -prefix "" -description { From 1d3ed1f46a09d26376975382fcec68a1ceb31547 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 7 Aug 2014 18:47:33 +0000 Subject: [PATCH 038/206] Add "segment promotion" to fts5. This prevents the FTS index from growing indefinitely as data is added and deleted. FossilOrigin-Name: ba359d78e166d78e0dc89e3c63a9a41e9ffea989 --- ext/fts5/fts5_index.c | 217 ++++++++++++++++++++++++++++++++++++++---- manifest | 14 +-- manifest.uuid | 2 +- test/fts5aj.test | 7 +- 4 files changed, 212 insertions(+), 28 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 9f8f25908f..75fa0a83b4 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -655,6 +655,30 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ } } +static void fts5DebugStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + Fts5Structure *p +){ + int iLvl, iSeg; /* Iterate through levels, segments */ + + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge + ); + for(iSeg=0; iSegnSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, + pSeg->pgnoFirst, pSeg->pgnoLast + ); + } + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); + } +} + + static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); @@ -1080,6 +1104,181 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ fts5BufferFree(&buf); } +#if 0 +static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ + int rc = SQLITE_OK; + Fts5Buffer buf; + memset(&buf, 0, sizeof(buf)); + fts5DebugStructure(&rc, &buf, pStruct); + fprintf(stdout, "%s: %s\n", zCaption, buf.p); + fflush(stdout); + fts5BufferFree(&buf); +} +#else +# define fts5PrintStructure(x,y) +#endif + +/* +** Return a copy of index structure pStruct. Except, promote as many segments +** as possible to level iPromote. If an OOM occurs, NULL is returned. +*/ +static void fts5StructurePromoteTo( + Fts5Index *p, + int iPromote, + int szPromote, + Fts5Structure *pStruct +){ + Fts5Structure *pNew; + u8 *pSpace; + int nSeg = fts5StructureCountSegments(pStruct); + int nLvl = pStruct->nLevel; + int nByte = ( + sizeof(Fts5Structure) + + sizeof(Fts5StructureLevel) * (nLvl+1) + + sizeof(Fts5StructureSegment) * (nSeg+nLvl+1) + ); + int iTst; + + pNew = fts5IdxMalloc(p, nByte); + if( !pNew ) return; + pNew->nWriteCounter = pStruct->nWriteCounter; + pNew->nLevel = pStruct->nLevel; + pSpace = (u8*)&pNew->aLevel[nLvl+1]; + + for(iTst=0; iTstaLevel[iTst]; + pLvlOut->aSeg = (Fts5StructureSegment*)pSpace; + + if( iTst==iPromote ){ + int il, is; + int nSegCopy = 0; + + /* Figure out the number of segments that will be promoted. */ + for(il=iTst+1; ilnLevel; il++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; + if( pLvl->nMerge ) break; + for(is=pLvl->nSeg-1; is>=0; is--){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[is]; + int sz = pSeg->pgnoLast - pSeg->pgnoFirst + 1; + if( sz>szPromote ){ + il = pStruct->nLevel; + break; + } + nSegCopy++; + } + } + assert( nSegCopy>0 ); + pSpace += (nSegCopy * sizeof(Fts5StructureSegment)); + pLvlOut->nSeg = nSegCopy; + + for(il=iTst+1; ilnLevel && nSegCopy>0; il++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; + for(is=pLvl->nSeg-1; is>=0 && nSegCopy>0; is--){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[is]; + nSegCopy--; + memcpy(&pLvlOut->aSeg[nSegCopy], pSeg, sizeof(Fts5StructureSegment)); + pLvl->nSeg--; + } + } + assert( nSegCopy==0 ); + } + + nCopy = pStruct->aLevel[iTst].nSeg * sizeof(Fts5StructureSegment); + if( nCopy ) memcpy(pSpace, pStruct->aLevel[iTst].aSeg, nCopy); + pSpace += (nCopy + sizeof(Fts5StructureSegment)); + pLvlOut->nSeg += pStruct->aLevel[iTst].nSeg; + } + + fts5PrintStructure("NEW", pNew); + memcpy(pStruct, pNew, nByte); + for(iTst=0; iTstnLevel; iTst++){ + int iOff = pNew->aLevel[iTst].aSeg - (Fts5StructureSegment*)pNew; + pStruct->aLevel[iTst].aSeg = &((Fts5StructureSegment*)pStruct)[iOff]; + } + sqlite3_free(pNew); +} + +/* +** A new segment has just been written to level iLvl of index structure +** pStruct. This function determines if any segments should be promoted +** as a result. Segments are promoted in two scenarios: +** +** a) If the segment just written is smaller than one or more segments +** within the previous populated level, it is promoted to the previous +** populated level. +** +** b) If the segment just written is larger than the newest segment on +** the next populated level, then that segment, and any other adjacent +** segments that are also smaller than the one just written, are +** promoted. +** +** If one or more segments are promoted, the structure object is updated +** to reflect this. +*/ +static void fts5StructurePromote( + Fts5Index *p, /* FTS5 backend object */ + int iLvl, /* Index level just updated */ + Fts5Structure *pStruct /* Index structure */ +){ + if( p->rc==SQLITE_OK ){ + int iTst; + int iPromote = -1; + int szPromote; /* Promote anything this size or smaller */ + Fts5StructureSegment *pSeg; /* Segment just written */ + Fts5StructureLevel *pTst; + int szSeg; /* Size of segment just written */ + + + pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; + szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); + + /* Check for condition (a) */ + for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); + pTst = &pStruct->aLevel[iTst]; + if( iTst>=0 && pTst->nMerge==0 ){ + int i; + int szMax = 0; + for(i=0; inSeg; i++){ + int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; + if( sz>szMax ) szMax = sz; + } + if( szMax>=szSeg ){ + /* Condition (a) is true. Promote the newest segment on level + ** iLvl to level iTst. */ + iPromote = iTst; + szPromote = szMax; + } + } + + /* Check for condition (b) */ + if( iPromote<0 ){ + Fts5StructureLevel *pTst; + for(iTst=iLvl+1; iTstnLevel; iTst++){ + pTst = &pStruct->aLevel[iTst]; + if( pTst->nSeg ) break; + } + if( iTstnLevel && pTst->nMerge==0 ){ + Fts5StructureSegment *pSeg2 = &pTst->aSeg[pTst->nSeg-1]; + int sz = pSeg2->pgnoLast - pSeg2->pgnoFirst + 1; + if( sz<=szSeg ){ + iPromote = iLvl; + szPromote = szSeg; + } + } + } + + /* If iPromote is greater than or equal to zero at this point, then it + ** is the level number of a level to which segments that consist of + ** szPromote or fewer pages should be promoted. */ + if( iPromote>=0 ){ + fts5PrintStructure("BEFORE", pStruct); + fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); + fts5PrintStructure("AFTER", pStruct); + } + } +} + /* ** If the pIter->iOff offset currently points to an entry indicating one @@ -3151,6 +3350,7 @@ static void fts5IndexWork( if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); + fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); } } @@ -3689,7 +3889,6 @@ static void fts5DecodeStructure( const u8 *pBlob, int nBlob ){ int rc; /* Return code */ - int iLvl, iSeg; /* Iterate through levels, segments */ Fts5Structure *p = 0; /* Decoded structure object */ rc = fts5StructureDecode(pBlob, nBlob, &p); @@ -3698,21 +3897,7 @@ static void fts5DecodeStructure( return; } - for(iLvl=0; iLvlnLevel; iLvl++){ - Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge - ); - for(iSeg=0; iSegnSeg; iSeg++){ - Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, - pSeg->pgnoFirst, pSeg->pgnoLast - ); - } - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); - } - + fts5DebugStructure(pRc, pBuf, p); fts5StructureRelease(p); } diff --git a/manifest b/manifest index 4503e360f1..ad4cd2c337 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\swriting\sdelete\smarkers\sto\sthe\soldest\ssegment\sin\san\sFTS\sindex. -D 2014-08-06T20:04:14.831 +C Add\s"segment\spromotion"\sto\sfts5.\sThis\sprevents\sthe\sFTS\sindex\sfrom\sgrowing\sindefinitely\sas\sdata\sis\sadded\sand\sdeleted. +D 2014-08-07T18:47:33.788 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_index.c dab399c67cb6bdd23009d2f1280ea60a9585b47c +F ext/fts5/fts5_index.c 1e001ed7dd4650a0a853b986f34b71c8d3f71ec1 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -604,7 +604,7 @@ F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7 F test/fts5ai.test 4dee71c23ddbcf2b0fc5d5586f241002b883c10e -F test/fts5aj.test d16f44bd1f7da9714ef99bd8b1996c5867aee8f5 +F test/fts5aj.test 67014e9fc7c069425d67d549b133742b67755047 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1201,7 +1201,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 3b19eba042bb2eeb1be60f8d58ebaa0a045d6a5c -R 26f7f3d5f6581939b65c2ce6e063db0c +P 1baeb1cee61d9c56c718b50af034a24f1018a322 +R c515f8340a51bac4920372c5a517f13d U dan -Z 78bed80e24eecafbae841451888b9b1c +Z e9b5b5b15db061fad6b53bb80e61f761 diff --git a/manifest.uuid b/manifest.uuid index 3d1cd0f81b..c812fc95e2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1baeb1cee61d9c56c718b50af034a24f1018a322 \ No newline at end of file +ba359d78e166d78e0dc89e3c63a9a41e9ffea989 \ No newline at end of file diff --git a/test/fts5aj.test b/test/fts5aj.test index 31c8b71282..cb8e2d2a2f 100644 --- a/test/fts5aj.test +++ b/test/fts5aj.test @@ -57,15 +57,14 @@ for {set iTest 0} {$iTest < 50000} {incr iTest} { if {0==($iTest % 1000)} { set sz [db one {SELECT count(*) FROM t1_data}] set s [structure] - do_test 1.$iTest.$sz.{$s} {} {} + do_execsql_test 1.$iTest.$sz.{$s} { + INSERT INTO t1(t1) VALUES('integrity-check') + } } } -#db eval { SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} - do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') } - finish_test From b191db8390b491c1d6780e131a34ec14b5095e8f Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 9 Aug 2014 18:02:27 +0000 Subject: [PATCH 039/206] Use multiple memory allocations for a single Fts5Structure object. This is probably less efficient but much easier to get right. FossilOrigin-Name: 2821825f7a481755a333dcdcad780b3e24448f20 --- ext/fts5/fts5_index.c | 230 ++++++++++++++++++++++++------------------ manifest | 12 +-- manifest.uuid | 2 +- 3 files changed, 140 insertions(+), 104 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 75fa0a83b4..737c226051 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -66,7 +66,13 @@ ** recorded in a single record within the %_data table. The record is a list ** of SQLite varints. ** -** For each level from 0 to nMax: +** The record begins with three varints: +** +** + number of levels, +** + total number of segments on all levels, +** + value of write counter. +** +** Then, for each level from 0 to nMax: ** ** + number of input segments in ongoing merge. ** + total number of segments in level. @@ -707,6 +713,18 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ return pRet; } +static void *fts5MallocZero(int *pRc, int nByte){ + void *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet==0 && nByte>0 ){ + *pRc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + } + return pRet; +} /* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. @@ -973,27 +991,24 @@ static int fts5StructureDecode( int iLvl; int nLevel = 0; int nSegment = 0; - int nByte; /* Bytes of space to allocate */ - Fts5Structure *pRet = 0; + int nByte; /* Bytes of space to allocate at pRet */ + Fts5Structure *pRet = 0; /* Structure object to return */ /* Read the total number of levels and segments from the start of the - ** structure record. Use these values to allocate space for the deserialized - ** version of the record. */ + ** structure record. */ i = getVarint32(&pData[i], nLevel); i += getVarint32(&pData[i], nSegment); nByte = ( - sizeof(Fts5Structure) + - sizeof(Fts5StructureLevel) * (nLevel+1) + - sizeof(Fts5StructureSegment) * (nSegment+nLevel+1) + sizeof(Fts5Structure) + /* Main structure */ + sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */ ); - pRet = (Fts5Structure*)sqlite3_malloc(nByte); + pRet = (Fts5Structure*)fts5MallocZero(&rc, nByte); if( pRet ){ - u8 *pSpace = (u8*)&pRet->aLevel[nLevel+1]; - memset(pRet, 0, nByte); pRet->nLevel = nLevel; i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter); - for(iLvl=0; iLvlaLevel[iLvl]; int nTotal; int iSeg; @@ -1001,26 +1016,82 @@ static int fts5StructureDecode( i += getVarint32(&pData[i], pLvl->nMerge); i += getVarint32(&pData[i], nTotal); assert( nTotal>=pLvl->nMerge ); - pLvl->nSeg = nTotal; - pLvl->aSeg = (Fts5StructureSegment*)pSpace; - pSpace += ((nTotal+1) * sizeof(Fts5StructureSegment)); + pLvl->aSeg = (Fts5StructureSegment*)fts5MallocZero(&rc, + nTotal * sizeof(Fts5StructureSegment) + ); - for(iSeg=0; iSegaSeg[iSeg].iSegid); - i += getVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); - i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); - i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); + if( rc==SQLITE_OK ){ + pLvl->nSeg = nTotal; + for(iSeg=0; iSegaSeg[iSeg].iSegid); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); + i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); + } } } - pRet->aLevel[nLevel].aSeg = (Fts5StructureSegment*)pSpace; - }else{ - rc = SQLITE_NOMEM; } *ppOut = pRet; return rc; } +/* +** +*/ +static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ + if( *pRc==SQLITE_OK ){ + Fts5Structure *pStruct = *ppStruct; + int nLevel = pStruct->nLevel; + int nByte = ( + sizeof(Fts5Structure) + /* Main structure */ + sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ + ); + + pStruct = sqlite3_realloc(pStruct, nByte); + if( pStruct ){ + memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); + pStruct->nLevel++; + *ppStruct = pStruct; + }else{ + *pRc = SQLITE_NOMEM; + } + } +} + +/* +** Extend level iLvl so that there is room for at least nExtra more +** segments. +*/ +static void fts5StructureExtendLevel( + int *pRc, + Fts5Structure *pStruct, + int iLvl, + int nExtra, + int bInsert +){ + if( *pRc==SQLITE_OK ){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + Fts5StructureSegment *aNew; + int nByte; + + nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); + aNew = sqlite3_realloc(pLvl->aSeg, nByte); + if( aNew ){ + if( bInsert==0 ){ + memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); + }else{ + int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); + memmove(&aNew[nExtra], aNew, nMove); + memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); + } + pLvl->aSeg = aNew; + }else{ + *pRc = SQLITE_NOMEM; + } + } +} + /* ** Read, deserialize and return the structure record for index iIdx. ** @@ -1051,6 +1122,10 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ ** call to fts5StructureRead() or fts5StructureDecode(). */ static void fts5StructureRelease(Fts5Structure *pStruct){ + int i; + for(i=0; inLevel; i++){ + sqlite3_free(pStruct->aLevel[i].aSeg); + } sqlite3_free(pStruct); } @@ -1118,6 +1193,10 @@ static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ # define fts5PrintStructure(x,y) #endif +static int fts5SegmentSize(Fts5StructureSegment *pSeg){ + return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; +} + /* ** Return a copy of index structure pStruct. Except, promote as many segments ** as possible to level iPromote. If an OOM occurs, NULL is returned. @@ -1128,75 +1207,21 @@ static void fts5StructurePromoteTo( int szPromote, Fts5Structure *pStruct ){ - Fts5Structure *pNew; - u8 *pSpace; - int nSeg = fts5StructureCountSegments(pStruct); - int nLvl = pStruct->nLevel; - int nByte = ( - sizeof(Fts5Structure) + - sizeof(Fts5StructureLevel) * (nLvl+1) + - sizeof(Fts5StructureSegment) * (nSeg+nLvl+1) - ); - int iTst; + int il, is; + Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; - pNew = fts5IdxMalloc(p, nByte); - if( !pNew ) return; - pNew->nWriteCounter = pStruct->nWriteCounter; - pNew->nLevel = pStruct->nLevel; - pSpace = (u8*)&pNew->aLevel[nLvl+1]; - - for(iTst=0; iTstaLevel[iTst]; - pLvlOut->aSeg = (Fts5StructureSegment*)pSpace; - - if( iTst==iPromote ){ - int il, is; - int nSegCopy = 0; - - /* Figure out the number of segments that will be promoted. */ - for(il=iTst+1; ilnLevel; il++){ - Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; - if( pLvl->nMerge ) break; - for(is=pLvl->nSeg-1; is>=0; is--){ - Fts5StructureSegment *pSeg = &pLvl->aSeg[is]; - int sz = pSeg->pgnoLast - pSeg->pgnoFirst + 1; - if( sz>szPromote ){ - il = pStruct->nLevel; - break; - } - nSegCopy++; - } - } - assert( nSegCopy>0 ); - pSpace += (nSegCopy * sizeof(Fts5StructureSegment)); - pLvlOut->nSeg = nSegCopy; - - for(il=iTst+1; ilnLevel && nSegCopy>0; il++){ - Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; - for(is=pLvl->nSeg-1; is>=0 && nSegCopy>0; is--){ - Fts5StructureSegment *pSeg = &pLvl->aSeg[is]; - nSegCopy--; - memcpy(&pLvlOut->aSeg[nSegCopy], pSeg, sizeof(Fts5StructureSegment)); - pLvl->nSeg--; - } - } - assert( nSegCopy==0 ); + for(il=iPromote+1; ilnLevel; il++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; + for(is=pLvl->nSeg-1; is>=0; is--){ + int sz = fts5SegmentSize(&pLvl->aSeg[is]); + if( sz>szPromote ) return; + fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); + if( p->rc ) return; + memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); + pOut->nSeg++; + pLvl->nSeg--; } - - nCopy = pStruct->aLevel[iTst].nSeg * sizeof(Fts5StructureSegment); - if( nCopy ) memcpy(pSpace, pStruct->aLevel[iTst].aSeg, nCopy); - pSpace += (nCopy + sizeof(Fts5StructureSegment)); - pLvlOut->nSeg += pStruct->aLevel[iTst].nSeg; } - - fts5PrintStructure("NEW", pNew); - memcpy(pStruct, pNew, nByte); - for(iTst=0; iTstnLevel; iTst++){ - int iOff = pNew->aLevel[iTst].aSeg - (Fts5StructureSegment*)pNew; - pStruct->aLevel[iTst].aSeg = &((Fts5StructureSegment*)pStruct)[iOff]; - } - sqlite3_free(pNew); } /* @@ -3306,9 +3331,10 @@ fflush(stdout); static void fts5IndexWork( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ - Fts5Structure *pStruct, /* Current structure of index */ + Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */ ){ + Fts5Structure *pStruct = *ppStruct; i64 nWrite; /* Initial value of write-counter */ int nWork; /* Number of work-quanta to perform */ int nRem; /* Number of leaf pages left to write */ @@ -3349,9 +3375,14 @@ static void fts5IndexWork( #endif if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; + if( iBestLvl==pStruct->nLevel-1 ){ + fts5StructureAddLevel(&p->rc, &pStruct); + } + fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0); fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); + *ppStruct = pStruct; } } @@ -3393,15 +3424,20 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Edit the Fts5Structure and write it back to the database. */ - if( pStruct->nLevel==0 ) pStruct->nLevel = 1; - pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; - pSeg->iSegid = iSegid; - pSeg->nHeight = nHeight; - pSeg->pgnoFirst = 1; - pSeg->pgnoLast = pgnoLast; + if( pStruct->nLevel==0 ){ + fts5StructureAddLevel(&p->rc, &pStruct); + } + fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); + if( p->rc==SQLITE_OK ){ + pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; + pSeg->iSegid = iSegid; + pSeg->nHeight = nHeight; + pSeg->pgnoFirst = 1; + pSeg->pgnoLast = pgnoLast; + } } - fts5IndexWork(p, iHash, pStruct, pgnoLast); + fts5IndexWork(p, iHash, &pStruct, pgnoLast); fts5StructureWrite(p, iHash, pStruct); fts5StructureRelease(pStruct); } diff --git a/manifest b/manifest index ad4cd2c337..1f1332f603 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\s"segment\spromotion"\sto\sfts5.\sThis\sprevents\sthe\sFTS\sindex\sfrom\sgrowing\sindefinitely\sas\sdata\sis\sadded\sand\sdeleted. -D 2014-08-07T18:47:33.788 +C Use\smultiple\smemory\sallocations\sfor\sa\ssingle\sFts5Structure\sobject.\sThis\sis\sprobably\sless\sefficient\sbut\smuch\seasier\sto\sget\sright. +D 2014-08-09T18:02:27.223 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_index.c 1e001ed7dd4650a0a853b986f34b71c8d3f71ec1 +F ext/fts5/fts5_index.c 75b2ebfa97ad6054bba98cb923cd2d3c6cc5b112 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1201,7 +1201,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1baeb1cee61d9c56c718b50af034a24f1018a322 -R c515f8340a51bac4920372c5a517f13d +P ba359d78e166d78e0dc89e3c63a9a41e9ffea989 +R 6a35c04d12672be9d9a3f97537e4f2d1 U dan -Z e9b5b5b15db061fad6b53bb80e61f761 +Z fa46d6698c1e1fd549eedd5dc298f36b diff --git a/manifest.uuid b/manifest.uuid index c812fc95e2..ce1c2f3501 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ba359d78e166d78e0dc89e3c63a9a41e9ffea989 \ No newline at end of file +2821825f7a481755a333dcdcad780b3e24448f20 \ No newline at end of file From c45f24b2815ac892ec36bae7f0301cbbd9631926 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 9 Aug 2014 18:22:59 +0000 Subject: [PATCH 040/206] Fix an uninitialized variable causing a problem during fts5 table initialization. FossilOrigin-Name: a14fa876f0eb66028e302b908967cc4a05ede9fc --- ext/fts5/fts5_storage.c | 2 +- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 22cb427e05..a67421075f 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -168,7 +168,7 @@ int sqlite3Fts5StorageOpen( Fts5Storage **pp, char **pzErr /* OUT: Error message */ ){ - int rc; + int rc = SQLITE_OK; Fts5Storage *p; /* New object */ int nByte; /* Bytes of space to allocate */ diff --git a/manifest b/manifest index 1f1332f603..e3d5197922 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\smultiple\smemory\sallocations\sfor\sa\ssingle\sFts5Structure\sobject.\sThis\sis\sprobably\sless\sefficient\sbut\smuch\seasier\sto\sget\sright. -D 2014-08-09T18:02:27.223 +C Fix\san\suninitialized\svariable\scausing\sa\sproblem\sduring\sfts5\stable\sinitialization. +D 2014-08-09T18:22:59.679 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 F ext/fts5/fts5_index.c 75b2ebfa97ad6054bba98cb923cd2d3c6cc5b112 -F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 +F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1201,7 +1201,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ba359d78e166d78e0dc89e3c63a9a41e9ffea989 -R 6a35c04d12672be9d9a3f97537e4f2d1 +P 2821825f7a481755a333dcdcad780b3e24448f20 +R c0a232bfa9626e6e9a9c306fc05ca763 U dan -Z fa46d6698c1e1fd549eedd5dc298f36b +Z 9113dc9c4d427c4fad9a129f5cfa7a9b diff --git a/manifest.uuid b/manifest.uuid index ce1c2f3501..332f5fc368 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2821825f7a481755a333dcdcad780b3e24448f20 \ No newline at end of file +a14fa876f0eb66028e302b908967cc4a05ede9fc \ No newline at end of file From c7fe7a969b383bf3bdb280bbc6b31e1a21a401d9 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 11 Aug 2014 19:44:52 +0000 Subject: [PATCH 041/206] Replace the hash table borrowed from fts3. FossilOrigin-Name: 617e2fac1c128212254f71b1a8fddaf0d1d90262 --- ext/fts5/fts5Int.h | 41 +++++ ext/fts5/fts5_hash.c | 371 ++++++++++++++++++++++++++++++++++++++++++ ext/fts5/fts5_index.c | 335 +++++++++----------------------------- main.mk | 5 + manifest | 19 ++- manifest.uuid | 2 +- tool/loadfts.c | 17 +- 7 files changed, 519 insertions(+), 271 deletions(-) create mode 100644 ext/fts5/fts5_hash.c diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 4ae110fd19..4ef8454e1f 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -283,6 +283,47 @@ int sqlite3Fts5IndexReads(Fts5Index *p); ** End of interface to code in fts5_index.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5_hash.c. +*/ +typedef struct Fts5Hash Fts5Hash; + +/* +** Create a hash table, free a hash table. +*/ +int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize); +void sqlite3Fts5HashFree(Fts5Hash*); + +int sqlite3Fts5HashWrite( + Fts5Hash*, + i64 iRowid, /* Rowid for this entry */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +); + +/* +** Empty (but do not delete) a hash table. +*/ +void sqlite3Fts5HashClear(Fts5Hash*); + +/* +** Iterate through the contents of the hash table. +*/ +int sqlite3Fts5HashIterate( + Fts5Hash*, + void *pCtx, + int (*xTerm)(void*, const char*, int), + int (*xEntry)(void*, i64, const u8*, int), + int (*xTermDone)(void*) +); + + + +/* +** End of interface to code in fts5_hash.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c new file mode 100644 index 0000000000..3b50f3ac78 --- /dev/null +++ b/ext/fts5/fts5_hash.c @@ -0,0 +1,371 @@ +/* +** 2014 August 11 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#include "fts5Int.h" + +typedef struct Fts5HashEntry Fts5HashEntry; + +/* +** This file contains the implementation of an in-memory hash table used +** to accumuluate "term -> doclist" content before it is flused to a level-0 +** segment. +*/ + + +struct Fts5Hash { + int *pnByte; /* Pointer to bytes counter */ + int nEntry; /* Number of entries currently in hash */ + int nSlot; /* Size of aSlot[] array */ + Fts5HashEntry **aSlot; /* Array of hash slots */ +}; + +/* +** Each entry in the hash table is represented by an object of the +** following type. Each object, its key (zKey[]) and its current data +** are stored in a single memory allocation. The position list data +** immediately follows the key data in memory. +** +** The data that follows the key is in a similar, but not identical format +** to the doclist data stored in the database. It is: +** +** * Rowid, as a varint +** * Position list, without 0x00 terminator. +** * Size of previous position list and rowid, as a 4 byte +** big-endian integer. +** +** iRowidOff: +** Offset of last rowid written to data area. Relative to first byte of +** structure. +** +** nData: +** Bytes of data written since iRowidOff. +*/ +struct Fts5HashEntry { + Fts5HashEntry *pNext; /* Next hash entry with same hash-key */ + + int nAlloc; /* Total size of allocation */ + int iRowidOff; /* Offset of last rowid written */ + int nData; /* Total bytes of data (incl. structure) */ + + int iCol; /* Column of last value written */ + int iPos; /* Position of last value written */ + i64 iRowid; /* Rowid of last value written */ + char zKey[0]; /* Nul-terminated entry key */ +}; + + +/* +** Allocate a new hash table. +*/ +int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){ + int rc = SQLITE_OK; + Fts5Hash *pNew; + + *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + int nByte; + memset(pNew, 0, sizeof(Fts5Hash)); + pNew->pnByte = pnByte; + + pNew->nSlot = 1024; + nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; + pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); + if( pNew->aSlot==0 ){ + sqlite3_free(pNew); + *ppNew = 0; + rc = SQLITE_NOMEM; + }else{ + memset(pNew->aSlot, 0, nByte); + } + } + return rc; +} + +/* +** Free a hash table object. +*/ +void sqlite3Fts5HashFree(Fts5Hash *pHash){ + if( pHash ){ + sqlite3Fts5HashClear(pHash); + sqlite3_free(pHash->aSlot); + sqlite3_free(pHash); + } +} + +/* +** Empty (but do not delete) a hash table. +*/ +void sqlite3Fts5HashClear(Fts5Hash *pHash){ + int i; + for(i=0; inSlot; i++){ + if( pHash->aSlot[i] ){ + sqlite3_free(pHash->aSlot[i]); + pHash->aSlot[i] = 0; + } + } +} + +static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){ + int i; + unsigned int h = 13; + for(i=n-1; i>=0; i--){ + h = (h << 3) ^ h ^ p[i]; + } + return (h % pHash->nSlot); +} + +/* +** Store the 32-bit integer passed as the second argument in buffer p. +*/ +static int fts5PutNativeInt(u8 *p, int i){ + assert( sizeof(i)==4 ); + memcpy(p, &i, sizeof(i)); + return sizeof(i); +} + +/* +** Read and return the 32-bit integer stored in buffer p. +*/ +static int fts5GetNativeU32(u8 *p){ + int i; + assert( sizeof(i)==4 ); + memcpy(&i, p, sizeof(i)); + return i; +} + +int sqlite3Fts5HashWrite( + Fts5Hash *pHash, + i64 iRowid, /* Rowid for this entry */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + unsigned int iHash = fts5HashKey(pHash, pToken, nToken); + Fts5HashEntry *p; + u8 *pPtr; + int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ + + /* Attempt to locate an existing hash object */ + for(p=pHash->aSlot[iHash]; p; p=p->pNext){ + if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break; + } + + /* If an existing hash entry cannot be found, create a new one. */ + if( p==0 ){ + int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64; + if( nByte<128 ) nByte = 128; + + p = (Fts5HashEntry*)sqlite3_malloc(nByte); + if( !p ) return SQLITE_NOMEM; + memset(p, 0, sizeof(Fts5HashEntry)); + p->nAlloc = nByte; + memcpy(p->zKey, pToken, nToken); + p->zKey[nToken] = '\0'; + p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry); + p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); + p->iRowid = iRowid; + p->pNext = pHash->aSlot[iHash]; + pHash->aSlot[iHash] = p; + + nIncr += p->nData; + } + + /* Check there is enough space to append a new entry. Worst case scenario + ** is: + ** + ** + 4 bytes for the previous entry size field, + ** + 9 bytes for a new rowid, + ** + 1 byte for a "new column" byte, + ** + 3 bytes for a new column number (16-bit max) as a varint, + ** + 5 bytes for the new position offset (32-bit max). + */ + if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){ + int nNew = p->nAlloc * 2; + Fts5HashEntry *pNew; + Fts5HashEntry **pp; + pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); + if( pNew==0 ) return SQLITE_NOMEM; + pNew->nAlloc = nNew; + for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext); + *pp = pNew; + p = pNew; + } + pPtr = (u8*)p; + nIncr -= p->nData; + + /* If this is a new rowid, append the 4-byte size field for the previous + ** entry, and the new rowid for this entry. */ + if( iRowid!=p->iRowid ){ + p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff); + p->iRowidOff = p->nData; + p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid); + p->iCol = 0; + p->iPos = 0; + p->iRowid = iRowid; + } + + if( iCol>=0 ){ + /* Append a new column value, if necessary */ + assert( iCol>=p->iCol ); + if( iCol!=p->iCol ){ + pPtr[p->nData++] = 0x01; + p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol); + p->iCol = iCol; + p->iPos = 0; + } + + /* Append the new position offset */ + p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); + p->iPos = iPos; + } + nIncr += p->nData; + + *pHash->pnByte += nIncr; + return SQLITE_OK; +} + + +/* +** Arguments pLeft and pRight point to linked-lists of hash-entry objects, +** each sorted in key order. This function merges the two lists into a +** single list and returns a pointer to its first element. +*/ +static Fts5HashEntry *fts5HashEntryMerge( + Fts5HashEntry *pLeft, + Fts5HashEntry *pRight +){ + Fts5HashEntry *p1 = pLeft; + Fts5HashEntry *p2 = pRight; + Fts5HashEntry *pRet = 0; + Fts5HashEntry **ppOut = &pRet; + + while( p1 || p2 ){ + if( p1==0 ){ + *ppOut = p2; + p2 = 0; + }else if( p2==0 ){ + *ppOut = p1; + p1 = 0; + }else{ + int i = 0; + while( p1->zKey[i]==p2->zKey[i] ) i++; + + if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ + /* p2 is smaller */ + *ppOut = p2; + ppOut = &p2->pNext; + p2 = p2->pNext; + }else{ + /* p1 is smaller */ + *ppOut = p1; + ppOut = &p1->pNext; + p1 = p1->pNext; + } + *ppOut = 0; + } + } + + return pRet; +} + +/* +** Extract all tokens from hash table iHash and link them into a list +** in sorted order. The hash table is cleared before returning. It is +** the responsibility of the caller to free the elements of the returned +** list. +*/ +static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){ + const int nMergeSlot = 32; + Fts5HashEntry **ap; + Fts5HashEntry *pList; + int iSlot; + int i; + + *ppSorted = 0; + ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); + if( !ap ) return SQLITE_NOMEM; + memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); + + for(iSlot=0; iSlotnSlot; iSlot++){ + while( pHash->aSlot[iSlot] ){ + Fts5HashEntry *pEntry = pHash->aSlot[iSlot]; + pHash->aSlot[iSlot] = pEntry->pNext; + pEntry->pNext = 0; + for(i=0; ap[i]; i++){ + pEntry = fts5HashEntryMerge(pEntry, ap[i]); + ap[i] = 0; + } + ap[i] = pEntry; + } + } + + pList = 0; + for(i=0; ipNext; + if( rc==SQLITE_OK ){ + u8 *pPtr = (u8*)pList; + int nKey = strlen(pList->zKey); + int iOff = pList->iRowidOff; + int iEnd = sizeof(Fts5HashEntry) + nKey + 1; + int nByte = pList->nData - pList->iRowidOff; + + rc = xTerm(pCtx, pList->zKey, nKey); + while( rc==SQLITE_OK && iOff ){ + int nVarint; + i64 iRowid; + nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid); + rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint); + if( iOff==iEnd ){ + iOff = 0; + }else{ + nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]); + iOff = iOff - sizeof(int) - nByte; + } + } + if( rc==SQLITE_OK ){ + rc = xTermDone(pCtx); + } + } + sqlite3_free(pList); + pList = pNext; + } + } + return rc; +} + + + diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 737c226051..e1f6c70f60 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -17,7 +17,6 @@ */ #include "fts5Int.h" -#include "fts3_hash.h" /* ** Overview: @@ -276,8 +275,6 @@ typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; -typedef struct Fts5PendingDoclist Fts5PendingDoclist; -typedef struct Fts5PendingPoslist Fts5PendingPoslist; typedef struct Fts5PosIter Fts5PosIter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; @@ -300,7 +297,7 @@ struct Fts5Index { ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ - Fts3Hash *aHash; /* One hash for terms, one for each prefix */ + Fts5Hash **apHash; /* Array of hash tables */ int nMaxPendingData; /* Max pending data before flush to disk */ int nPendingData; /* Current bytes of pending data */ i64 iWriteRowid; /* Rowid for current doc being written */ @@ -347,26 +344,6 @@ struct Fts5Data { int nRef; /* Ref count */ }; -/* -** Before it is flushed to a level-0 segment, term data is collected in -** the hash tables in the Fts5Index.aHash[] array. Hash table keys are -** terms (or, for prefix indexes, term prefixes) and values are instances -** of type Fts5PendingDoclist. -*/ -struct Fts5PendingDoclist { - u8 *pTerm; /* Term for this entry */ - int nTerm; /* Bytes of data at pTerm */ - Fts5PendingPoslist *pPoslist; /* Linked list of position lists */ - int iCol; /* Column for last entry in pPending */ - int iPos; /* Pos value for last entry in pPending */ - Fts5PendingDoclist *pNext; /* Used during merge sort */ -}; -struct Fts5PendingPoslist { - i64 iRowid; /* Rowid for this doclist entry */ - Fts5Buffer buf; /* Current doclist contents */ - Fts5PendingPoslist *pNext; /* Previous poslist for same term */ -}; - /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the @@ -2458,18 +2435,6 @@ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ return (p->rc || pIter->chunk.pLeaf==0); } - -/* -** Allocate memory. The difference between this function and fts5IdxMalloc() -** is that this increments the Fts5Index.nPendingData variable by the -** number of bytes allocated. It should be used for all allocations used -** to store pending-data within the in-memory hash tables. -*/ -static void *fts5PendingMalloc(Fts5Index *p, int nByte){ - p->nPendingData += nByte; - return fts5IdxMalloc(p, nByte); -} - /* ** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) ** in hash table for index iIdx. If iIdx is zero, this is the main terms @@ -2485,78 +2450,11 @@ static void fts5AddTermToHash( int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ - Fts5Config *pConfig = p->pConfig; - Fts3Hash *pHash; - Fts5PendingDoclist *pDoclist; - Fts5PendingPoslist *pPoslist; - i64 iRowid = p->iWriteRowid; /* Rowid associated with these tokens */ - - /* If an error has already occured this call is a no-op. */ - if( p->rc!=SQLITE_OK ) return; - - /* Find the hash table to use. It has already been allocated. */ - assert( iIdx<=pConfig->nPrefix ); - assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] ); - pHash = &p->aHash[iIdx]; - - /* Find the doclist to append to. Allocate a new doclist object if - ** required. */ - pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken); - if( pDoclist==0 ){ - Fts5PendingDoclist *pDel; - pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken); - if( pDoclist==0 ) return; - pDoclist->pTerm = (u8*)&pDoclist[1]; - pDoclist->nTerm = nToken; - memcpy(pDoclist->pTerm, pToken, nToken); - pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist); - if( pDel ){ - assert( pDoclist==pDel ); - sqlite3_free(pDel); - p->rc = SQLITE_NOMEM; - return; - } + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3Fts5HashWrite( + p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken + ); } - - /* Find the poslist to append to. Allocate a new object if required. */ - pPoslist = pDoclist->pPoslist; - if( pPoslist==0 || pPoslist->iRowid!=iRowid ){ - pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist)); - if( pPoslist==0 ) return; - pPoslist->pNext = pDoclist->pPoslist; - pPoslist->iRowid = iRowid; - pDoclist->pPoslist = pPoslist; - pDoclist->iCol = 0; - pDoclist->iPos = 0; - } - - /* Append the values to the position list. */ - if( iCol>=0 ){ - p->nPendingData -= pPoslist->buf.nSpace; - if( iCol!=pDoclist->iCol ){ - fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1); - fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol); - pDoclist->iCol = iCol; - pDoclist->iPos = 0; - } - fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos); - p->nPendingData += pPoslist->buf.nSpace; - pDoclist->iPos = iPos; - } -} - -/* -** Free the pending-doclist object passed as the only argument. -*/ -static void fts5FreePendingDoclist(Fts5PendingDoclist *p){ - Fts5PendingPoslist *pPoslist; - Fts5PendingPoslist *pNext; - for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){ - pNext = pPoslist->pNext; - fts5BufferFree(&pPoslist->buf); - sqlite3_free(pPoslist); - } - sqlite3_free(p); } /* @@ -2582,15 +2480,11 @@ void sqlite3Fts5IndexWrite( if( p->rc!=SQLITE_OK ) return; /* Allocate hash tables if they have not already been allocated */ - if( p->aHash==0 ){ + if( p->apHash==0 ){ int nHash = pConfig->nPrefix + 1; - p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash); - if( p->aHash==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - for(i=0; iaHash[i], FTS3_HASH_STRING, 0); - } + p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); + for(i=0; p->rc==SQLITE_OK && irc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); } } @@ -2635,89 +2529,6 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ return 0; } -static Fts5PendingDoclist *fts5PendingMerge( - Fts5Index *p, - Fts5PendingDoclist *pLeft, - Fts5PendingDoclist *pRight -){ - Fts5PendingDoclist *p1 = pLeft; - Fts5PendingDoclist *p2 = pRight; - Fts5PendingDoclist *pRet = 0; - Fts5PendingDoclist **ppOut = &pRet; - - while( p1 || p2 ){ - if( p1==0 ){ - *ppOut = p2; - p2 = 0; - }else if( p2==0 ){ - *ppOut = p1; - p1 = 0; - }else{ - int nCmp = MIN(p1->nTerm, p2->nTerm); - int res = memcmp(p1->pTerm, p2->pTerm, nCmp); - if( res==0 ) res = p1->nTerm - p2->nTerm; - - if( res>0 ){ - /* p2 is smaller */ - *ppOut = p2; - ppOut = &p2->pNext; - p2 = p2->pNext; - }else{ - /* p1 is smaller */ - *ppOut = p1; - ppOut = &p1->pNext; - p1 = p1->pNext; - } - *ppOut = 0; - } - } - - return pRet; -} - -/* -** Extract all tokens from hash table iHash and link them into a list -** in sorted order. The hash table is cleared before returning. It is -** the responsibility of the caller to free the elements of the returned -** list. -** -** If an error occurs, set the Fts5Index.rc error code. If an error has -** already occurred, this function is a no-op. -*/ -static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){ - const int nMergeSlot = 32; - Fts3Hash *pHash; - Fts3HashElem *pE; /* Iterator variable */ - Fts5PendingDoclist **ap; - Fts5PendingDoclist *pList; - int i; - - ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot); - if( !ap ) return 0; - - pHash = &p->aHash[iHash]; - for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ - int i; - Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); - assert( pDoclist->pNext==0 ); - for(i=0; ap[i]; i++){ - pDoclist = fts5PendingMerge(p, pDoclist, ap[i]); - ap[i] = 0; - } - ap[i] = pDoclist; - } - - pList = 0; - for(i=0; ipConfig; int i; for(i=0; i<=pConfig->nPrefix; i++){ - Fts3Hash *pHash = &p->aHash[i]; - Fts3HashElem *pE; /* Iterator variable */ - for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ - Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE); - fts5FreePendingDoclist(pDoclist); - } - fts3HashClear(pHash); + sqlite3Fts5HashClear(p->apHash[i]); } p->nPendingData = 0; } @@ -3012,44 +2817,6 @@ static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } -/* -** Write the contents of pending-doclist object pDoclist to writer pWriter. -** -** If an error occurs, set the Fts5Index.rc error code. If an error has -** already occurred, this function is a no-op. -*/ -static void fts5WritePendingDoclist( - Fts5Index *p, /* FTS5 backend object */ - Fts5SegWriter *pWriter, /* Write to this writer object */ - Fts5PendingDoclist *pDoclist /* Doclist to write to pWriter */ -){ - Fts5PendingPoslist *pPoslist; /* Used to iterate through the doclist */ - - /* Append the term */ - fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm); - - /* Append the position list for each rowid */ - for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){ - int i = 0; - - /* Append the rowid itself */ - fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid); - - /* Append the size of the position list in bytes */ - fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n); - - /* Copy the position list to the output segment */ - while( ibuf.n){ - int iVal; - i += getVarint32(&pPoslist->buf.p[i], iVal); - fts5WriteAppendPoslistInt(p, pWriter, iVal); - } - } - - /* Write the doclist terminator */ - fts5WriteAppendZerobyte(p, pWriter); -} - /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. @@ -3386,6 +3153,53 @@ static void fts5IndexWork( } } +typedef struct Fts5FlushCtx Fts5FlushCtx; +struct Fts5FlushCtx { + Fts5Index *pIdx; + Fts5SegWriter writer; +}; + +static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){ + Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; + int rc = SQLITE_OK; + fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm); + return rc; +} + +static int fts5FlushTermDone(void *pCtx){ + Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; + int rc = SQLITE_OK; + /* Write the doclist terminator */ + fts5WriteAppendZerobyte(p->pIdx, &p->writer); + return rc; +} + +static int fts5FlushNewEntry( + void *pCtx, + i64 iRowid, + const u8 *aPoslist, + int nPoslist +){ + Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; + int rc = SQLITE_OK; + int i = 0; + + /* Append the rowid itself */ + fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); + + /* Append the size of the position list in bytes */ + fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist); + + /* Copy the position list to the output segment */ + while( ipIdx, &p->writer, iVal); + } + + return rc; +} + /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. @@ -3404,24 +3218,19 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ - Fts5SegWriter writer; - Fts5PendingDoclist *pList; - Fts5PendingDoclist *pIter; - Fts5PendingDoclist *pNext; - Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ + int rc; + Fts5FlushCtx ctx; - pList = fts5PendingList(p, iHash); - assert( pList!=0 || p->rc!=SQLITE_OK ); - fts5WriteInit(p, &writer, iHash, iSegid); + fts5WriteInit(p, &ctx.writer, iHash, iSegid); + ctx.pIdx = p; - for(pIter=pList; pIter; pIter=pNext){ - pNext = pIter->pNext; - fts5WritePendingDoclist(p, &writer, pIter); - fts5FreePendingDoclist(pIter); - } - fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, + fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone + ); + if( p->rc==SQLITE_OK ) p->rc = rc; + fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); /* Edit the Fts5Structure and write it back to the database. */ if( pStruct->nLevel==0 ){ @@ -3452,7 +3261,7 @@ static void fts5IndexFlush(Fts5Index *p){ /* If an error has already occured this call is a no-op. */ if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; - assert( p->aHash ); + assert( p->apHash ); /* Flush the terms and each prefix index to disk */ for(i=0; i<=pConfig->nPrefix; i++){ @@ -3555,7 +3364,13 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); - sqlite3_free(p->aHash); + if( p->apHash ){ + int i; + for(i=0; i<=p->pConfig->nPrefix; i++){ + sqlite3Fts5HashFree(p->apHash[i]); + } + sqlite3_free(p->apHash); + } sqlite3_free(p->zDataTbl); sqlite3_free(p); return rc; @@ -4315,7 +4130,7 @@ static void fts5SetupPrefixIter( if( aBuf && pStruct ){ Fts5DoclistIter *pDoclist; int i; - i64 iLastRowid; + i64 iLastRowid = 0; Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ Fts5Buffer doclist; diff --git a/main.mk b/main.mk index 1a75c39827..5ac76de2d8 100644 --- a/main.mk +++ b/main.mk @@ -77,6 +77,7 @@ LIBOBJ += fts5_aux.o LIBOBJ += fts5_buffer.o LIBOBJ += fts5_config.o LIBOBJ += fts5_expr.o +LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5parse.o @@ -232,6 +233,7 @@ SRC += \ $(TOP)/ext/fts5/fts5.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c @@ -599,6 +601,9 @@ fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR) fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c +fts5_hash.o: $(TOP)/ext/fts5/fts5_hash.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_hash.c + fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c diff --git a/manifest b/manifest index e3d5197922..c888cfba5a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\suninitialized\svariable\scausing\sa\sproblem\sduring\sfts5\stable\sinitialization. -D 2014-08-09T18:22:59.679 +C Replace\sthe\shash\stable\sborrowed\sfrom\sfts3. +D 2014-08-11T19:44:52.686 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,12 +105,13 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/fts5.c 15e585ed0194f94a1da360808f29184f9d44554c F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h 410001da21bcc3d09b4290d4858352d0985ac7a6 +F ext/fts5/fts5Int.h f17a25546d598fdc5cc47f576d38063fd9290963 F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_index.c 75b2ebfa97ad6054bba98cb923cd2d3c6cc5b112 +F ext/fts5/fts5_hash.c 2af412d00f65ad427f18acbe421c113413cdef06 +F ext/fts5/fts5_index.c ccef8703b6228a39090b0a03b83f163e69627ff2 F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -156,7 +157,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 8118631727a27fa88eb38a07ac3b86ecb86e9eb0 +F main.mk c4fff232b880b91bf665cd2951465de61178e444 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1162,7 +1163,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce F tool/lemon.c 3ff0fec22f92dfb54e62eeb48772eddffdbeb0d6 F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc -F tool/loadfts.c 3bdd46090112c84df44a4fbae740af3836108b3f +F tool/loadfts.c b5b3206ddd58d89ec8d54038c784bcadd6195915 F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6 F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383 F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670 @@ -1201,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2821825f7a481755a333dcdcad780b3e24448f20 -R c0a232bfa9626e6e9a9c306fc05ca763 +P a14fa876f0eb66028e302b908967cc4a05ede9fc +R b81a5fabd4e838059b5d12635ffcd939 U dan -Z 9113dc9c4d427c4fad9a129f5cfa7a9b +Z 39c621bf94a400035f58731d1ee0f6cd diff --git a/manifest.uuid b/manifest.uuid index 332f5fc368..602ab0978e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a14fa876f0eb66028e302b908967cc4a05ede9fc \ No newline at end of file +617e2fac1c128212254f71b1a8fddaf0d1d90262 \ No newline at end of file diff --git a/tool/loadfts.c b/tool/loadfts.c index 18bd355a4d..7da07b15dd 100644 --- a/tool/loadfts.c +++ b/tool/loadfts.c @@ -69,6 +69,7 @@ static void showHelp(const char *zArgv0){ " -fts [345] FTS version to use (default=5)\n" " -idx [01] Create a mapping from filename to rowid (default=0)\n" " -dir Root of directory tree to load data from (default=.)\n" +" -trans Number of inserts per transaction (default=1)\n" , zArgv0 ); exit(1); @@ -96,6 +97,7 @@ static void sqlite_error_out(const char *zText, sqlite3 *db){ */ typedef struct VisitContext VisitContext; struct VisitContext { + int nRowPerTrans; sqlite3 *db; /* Database handle */ sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ }; @@ -112,7 +114,13 @@ void visit_file(void *pCtx, const char *zPath){ sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); sqlite3_step(p->pInsert); rc = sqlite3_reset(p->pInsert); - if( rc!=SQLITE_OK ) sqlite_error_out("insert", p->db); + if( rc!=SQLITE_OK ){ + sqlite_error_out("insert", p->db); + }else if( p->nRowPerTrans>0 + && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 + ){ + sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0); + } } /* @@ -150,6 +158,7 @@ int main(int argc, char **argv){ const char *zDir = "."; /* Directory to scan */ int i; int rc; + int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; @@ -163,6 +172,9 @@ int main(int argc, char **argv){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } + if( strcmp(zOpt, "-trans")==0 ){ + nRowPerTrans = atoi(zArg); + } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); @@ -189,13 +201,16 @@ int main(int argc, char **argv){ /* Compile the INSERT statement to write data to the FTS table. */ memset(&sCtx, 0, sizeof(VisitContext)); sCtx.db = db; + sCtx.nRowPerTrans = nRowPerTrans; rc = sqlite3_prepare_v2(db, "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); /* Load all files in the directory hierarchy into the FTS table. */ + if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); traverse(zDir, (void*)&sCtx, visit_file); + if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert); From 1a669f84a12a576f3055277f43b6a6ea3e76da33 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 11 Aug 2014 20:26:34 +0000 Subject: [PATCH 042/206] Simplify the way position lists are copied when merging data. FossilOrigin-Name: 9f8d678a0ea75e169daf8b3f00bd05f52a050ea6 --- ext/fts5/fts5_index.c | 46 ++++++++++++++++++++++++++++++------------- manifest | 12 +++++------ manifest.uuid | 2 +- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e1f6c70f60..a21f881e61 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2813,6 +2813,34 @@ static void fts5WriteAppendPoslistInt( } } +static void fts5WriteAppendPoslistData( + Fts5Index *p, + Fts5SegWriter *pWriter, + const u8 *aData, + int nData +){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + const u8 *a = aData; + int n = nData; + + while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pgsz ){ + int nReq = p->pgsz - pPage->buf.n; + int nCopy = 0; + while( nCopyrc, &pPage->buf, nCopy, a); + a += nCopy; + n -= nCopy; + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } + if( n>0 ){ + fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); + } +} + static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } @@ -3041,12 +3069,7 @@ fflush(stdout); /* Copy the position list from input to output */ fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ - int iOff = 0; - while( iOffpIdx, &p->writer, iRowid); @@ -3190,13 +3213,8 @@ static int fts5FlushNewEntry( /* Append the size of the position list in bytes */ fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist); - /* Copy the position list to the output segment */ - while( ipIdx, &p->writer, iVal); - } - + /* And the poslist data */ + fts5WriteAppendPoslistData(p->pIdx, &p->writer, aPoslist, nPoslist); return rc; } diff --git a/manifest b/manifest index c888cfba5a..fab8b27016 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Replace\sthe\shash\stable\sborrowed\sfrom\sfts3. -D 2014-08-11T19:44:52.686 +C Simplify\sthe\sway\sposition\slists\sare\scopied\swhen\smerging\sdata. +D 2014-08-11T20:26:34.077 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 F ext/fts5/fts5_hash.c 2af412d00f65ad427f18acbe421c113413cdef06 -F ext/fts5/fts5_index.c ccef8703b6228a39090b0a03b83f163e69627ff2 +F ext/fts5/fts5_index.c 687736cba90a735ecd3cf9bf0e4174e7e5cc3f60 F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1202,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a14fa876f0eb66028e302b908967cc4a05ede9fc -R b81a5fabd4e838059b5d12635ffcd939 +P 617e2fac1c128212254f71b1a8fddaf0d1d90262 +R b29d729688c110acdf84d0128dad3fdf U dan -Z 39c621bf94a400035f58731d1ee0f6cd +Z 772c48dd1029e003fed3d46062572012 diff --git a/manifest.uuid b/manifest.uuid index 602ab0978e..7afe20b569 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -617e2fac1c128212254f71b1a8fddaf0d1d90262 \ No newline at end of file +9f8d678a0ea75e169daf8b3f00bd05f52a050ea6 \ No newline at end of file From e2fb318e341e3dfe9c670f02f409b2723ba842fc Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 12 Aug 2014 08:36:00 +0000 Subject: [PATCH 043/206] Automatically resize the hash table used by fts5. FossilOrigin-Name: f1cb48f412a5f200f1fe04f91072864f379db08f --- ext/fts5/fts5_hash.c | 45 ++++++++++++++++++++++++++++++++++++++++--- ext/fts5/fts5_index.c | 2 +- manifest | 14 +++++++------- manifest.uuid | 2 +- 4 files changed, 51 insertions(+), 12 deletions(-) diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 3b50f3ac78..97dd0dc0ec 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -116,15 +116,46 @@ void sqlite3Fts5HashClear(Fts5Hash *pHash){ pHash->aSlot[i] = 0; } } + pHash->nEntry = 0; } -static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){ +static unsigned int fts5HashKey(int nSlot, const char *p, int n){ int i; unsigned int h = 13; for(i=n-1; i>=0; i--){ h = (h << 3) ^ h ^ p[i]; } - return (h % pHash->nSlot); + return (h % nSlot); +} + +/* +** Resize the hash table by doubling the number of slots. +*/ +static int fts5HashResize(Fts5Hash *pHash){ + int nNew = pHash->nSlot*2; + int i; + Fts5HashEntry **apNew; + Fts5HashEntry **apOld = pHash->aSlot; + + apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*)); + if( !apNew ) return SQLITE_NOMEM; + memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); + + for(i=0; inSlot; i++){ + while( apOld[i] ){ + int iHash; + Fts5HashEntry *p = apOld[i]; + apOld[i] = p->pNext; + iHash = fts5HashKey(nNew, p->zKey, strlen(p->zKey)); + p->pNext = apNew[iHash]; + apNew[iHash] = p; + } + } + + sqlite3_free(apOld); + pHash->nSlot = nNew; + pHash->aSlot = apNew; + return SQLITE_OK; } /* @@ -153,7 +184,7 @@ int sqlite3Fts5HashWrite( int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ - unsigned int iHash = fts5HashKey(pHash, pToken, nToken); + unsigned int iHash = fts5HashKey(pHash->nSlot, pToken, nToken); Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ @@ -168,6 +199,12 @@ int sqlite3Fts5HashWrite( int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64; if( nByte<128 ) nByte = 128; + if( (pHash->nEntry*2)>=pHash->nSlot ){ + int rc = fts5HashResize(pHash); + if( rc!=SQLITE_OK ) return rc; + iHash = fts5HashKey(pHash->nSlot, pToken, nToken); + } + p = (Fts5HashEntry*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5HashEntry)); @@ -179,6 +216,7 @@ int sqlite3Fts5HashWrite( p->iRowid = iRowid; p->pNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; + pHash->nEntry++; nIncr += p->nData; } @@ -317,6 +355,7 @@ static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){ pList = fts5HashEntryMerge(pList, ap[i]); } + pHash->nEntry = 0; sqlite3_free(ap); *ppSorted = pList; return SQLITE_OK; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a21f881e61..9676ad4b45 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3293,7 +3293,7 @@ static void fts5IndexFlush(Fts5Index *p){ ** to the document with rowid iRowid. */ void sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ - if( iRowid<=p->iWriteRowid ){ + if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ fts5IndexFlush(p); } p->iWriteRowid = iRowid; diff --git a/manifest b/manifest index fab8b27016..0e007fa42a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Simplify\sthe\sway\sposition\slists\sare\scopied\swhen\smerging\sdata. -D 2014-08-11T20:26:34.077 +C Automatically\sresize\sthe\shash\stable\sused\sby\sfts5. +D 2014-08-12T08:36:00.189 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,8 +110,8 @@ F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 -F ext/fts5/fts5_hash.c 2af412d00f65ad427f18acbe421c113413cdef06 -F ext/fts5/fts5_index.c 687736cba90a735ecd3cf9bf0e4174e7e5cc3f60 +F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 +F ext/fts5/fts5_index.c 0453bb593fe0ef6245762b6823e88839757fdc75 F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1202,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 617e2fac1c128212254f71b1a8fddaf0d1d90262 -R b29d729688c110acdf84d0128dad3fdf +P 9f8d678a0ea75e169daf8b3f00bd05f52a050ea6 +R 0ec55f1e05fde288099bbb5e345c2533 U dan -Z 772c48dd1029e003fed3d46062572012 +Z 0fd4a4dea7b2432712fcac248306942e diff --git a/manifest.uuid b/manifest.uuid index 7afe20b569..aa7d7d2665 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9f8d678a0ea75e169daf8b3f00bd05f52a050ea6 \ No newline at end of file +f1cb48f412a5f200f1fe04f91072864f379db08f \ No newline at end of file From 0bbd9c27c258779e0531b694f68da0b593ed1d07 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 12 Aug 2014 16:07:35 +0000 Subject: [PATCH 044/206] Cache the value of the "totals" record in memory during transactions. FossilOrigin-Name: 05dfdad445b22f375b71abe0b1fa1bf7ca331be7 --- ext/fts5/fts5.c | 10 +++--- ext/fts5/fts5Int.h | 2 ++ ext/fts5/fts5_storage.c | 71 +++++++++++++++++++++++++++-------------- manifest | 16 +++++----- manifest.uuid | 2 +- 5 files changed, 63 insertions(+), 38 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 06d5b8c70b..8b07047aec 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -934,7 +934,7 @@ static int fts5SyncMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SYNC, 0); - rc = sqlite3Fts5IndexSync(pTab->pIndex, 1); + rc = sqlite3Fts5StorageSync(pTab->pStorage, 1); return rc; } @@ -964,7 +964,7 @@ static int fts5RollbackMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); - rc = sqlite3Fts5IndexRollback(pTab->pIndex); + rc = sqlite3Fts5StorageRollback(pTab->pStorage); return rc; } @@ -1353,7 +1353,7 @@ static int fts5RenameMethod( static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); - return sqlite3Fts5IndexSync(pTab->pIndex, 0); + return sqlite3Fts5StorageSync(pTab->pStorage, 0); } /* @@ -1364,7 +1364,7 @@ static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); - return sqlite3Fts5IndexSync(pTab->pIndex, 0); + return sqlite3Fts5StorageSync(pTab->pStorage, 0); } /* @@ -1375,7 +1375,7 @@ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); - return sqlite3Fts5IndexRollback(pTab->pIndex); + return sqlite3Fts5StorageRollback(pTab->pStorage); } /* diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 4ef8454e1f..602f293097 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -353,6 +353,8 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); +int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit); +int sqlite3Fts5StorageRollback(Fts5Storage *p); /* ** End of interface to code in fts5_storage.c. diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index a67421075f..ff0add5bad 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -17,6 +17,7 @@ struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; + int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ i64 nTotalRow; /* Total number of rows in FTS table */ i64 *aTotalSize; /* Total sizes of each column */ sqlite3_stmt *aStmt[9]; @@ -317,31 +318,36 @@ static int fts5StorageInsertDocsize( } /* -** Load the contents of the "averages" record from disk into the -** p->nTotalRow and p->aTotalSize[] variables. +** Load the contents of the "averages" record from disk into the +** p->nTotalRow and p->aTotalSize[] variables. If successful, and if +** argument bCache is true, set the p->bTotalsValid flag to indicate +** that the contents of aTotalSize[] and nTotalRow are valid until +** further notice. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ -static int fts5StorageLoadTotals(Fts5Storage *p){ - int nCol = p->pConfig->nCol; - Fts5Buffer buf; - int rc; - memset(&buf, 0, sizeof(buf)); +static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ + int rc = SQLITE_OK; + if( p->bTotalsValid==0 ){ + int nCol = p->pConfig->nCol; + Fts5Buffer buf; + memset(&buf, 0, sizeof(buf)); - memset(p->aTotalSize, 0, sizeof(i64) * nCol); - p->nTotalRow = 0; - rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf); - if( rc==SQLITE_OK && buf.n ){ - int i = 0; - int iCol; - i += getVarint(&buf.p[i], (u64*)&p->nTotalRow); - for(iCol=0; iaTotalSize[iCol]); + memset(p->aTotalSize, 0, sizeof(i64) * nCol); + p->nTotalRow = 0; + rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf); + if( rc==SQLITE_OK && buf.n ){ + int i = 0; + int iCol; + i += getVarint(&buf.p[i], (u64*)&p->nTotalRow); + for(iCol=0; iaTotalSize[iCol]); + } } + sqlite3_free(buf.p); + p->bTotalsValid = bCache; } - sqlite3_free(buf.p); - return rc; } @@ -378,7 +384,7 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ int rc; sqlite3_stmt *pDel; - rc = fts5StorageLoadTotals(p); + rc = fts5StorageLoadTotals(p, 1); /* Delete the index records */ if( rc==SQLITE_OK ){ @@ -425,13 +431,13 @@ int sqlite3Fts5StorageInsert( Fts5Config *pConfig = p->pConfig; int rc = SQLITE_OK; /* Return code */ sqlite3_stmt *pInsert; /* Statement used to write %_content table */ - int eStmt; /* Type of statement used on %_content */ + int eStmt = 0; /* Type of statement used on %_content */ int i; /* Counter variable */ Fts5InsertCtx ctx; /* Tokenization callback context object */ Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ memset(&buf, 0, sizeof(Fts5Buffer)); - rc = fts5StorageLoadTotals(p); + rc = fts5StorageLoadTotals(p, 1); /* Insert the new row into the %_content table. */ if( rc==SQLITE_OK ){ @@ -592,7 +598,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ /* Test that the "totals" (sometimes called "averages") record looks Ok */ if( rc==SQLITE_OK ){ int i; - rc = fts5StorageLoadTotals(p); + rc = fts5StorageLoadTotals(p, 0); for(i=0; rc==SQLITE_OK && inCol; i++){ if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB; } @@ -706,7 +712,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ } int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ - int rc = fts5StorageLoadTotals(p); + int rc = fts5StorageLoadTotals(p, 0); if( rc==SQLITE_OK ){ *pnToken = p->aTotalSize[iCol]; } @@ -714,10 +720,27 @@ int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ } int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ - int rc = fts5StorageLoadTotals(p); + int rc = fts5StorageLoadTotals(p, 0); if( rc==SQLITE_OK ){ *pnRow = p->nTotalRow; } return rc; } +/* +** Flush any data currently held in-memory to disk. +*/ +int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit){ + if( bCommit && p->bTotalsValid ){ + int rc = fts5StorageSaveTotals(p); + p->bTotalsValid = 0; + if( rc!=SQLITE_OK ) return rc; + } + return sqlite3Fts5IndexSync(p->pIndex, bCommit); +} + +int sqlite3Fts5StorageRollback(Fts5Storage *p){ + p->bTotalsValid = 0; + return sqlite3Fts5IndexRollback(p->pIndex); +} + diff --git a/manifest b/manifest index 0e007fa42a..1f03c49271 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Automatically\sresize\sthe\shash\stable\sused\sby\sfts5. -D 2014-08-12T08:36:00.189 +C Cache\sthe\svalue\sof\sthe\s"totals"\srecord\sin\smemory\sduring\stransactions. +D 2014-08-12T16:07:35.119 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,16 +103,16 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 15e585ed0194f94a1da360808f29184f9d44554c +F ext/fts5/fts5.c 31db0b90774201820915db17916a9a4d9ac1c80b F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h f17a25546d598fdc5cc47f576d38063fd9290963 +F ext/fts5/fts5Int.h b0eb5cd422ba74148b30753f01031d546ffb98e4 F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 0453bb593fe0ef6245762b6823e88839757fdc75 -F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b +F ext/fts5/fts5_storage.c 5913aa01a1dada1c5e1a39e4cbb44e84c5f7f350 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1202,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9f8d678a0ea75e169daf8b3f00bd05f52a050ea6 -R 0ec55f1e05fde288099bbb5e345c2533 +P f1cb48f412a5f200f1fe04f91072864f379db08f +R 6321f5990f75eb3b76570e3e341050f0 U dan -Z 0fd4a4dea7b2432712fcac248306942e +Z f483f9b1471761b27d0fa8b15d969ed4 diff --git a/manifest.uuid b/manifest.uuid index aa7d7d2665..dc8ddf389d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f1cb48f412a5f200f1fe04f91072864f379db08f \ No newline at end of file +05dfdad445b22f375b71abe0b1fa1bf7ca331be7 \ No newline at end of file From 6885bbc71382ce75a126c51574330fdc1a6cf3f0 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 18 Aug 2014 19:30:01 +0000 Subject: [PATCH 045/206] Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges. FossilOrigin-Name: 2397404e152b908d838e6491294b263b05943b3f --- ext/fts5/fts5.c | 6 ++++ ext/fts5/fts5.h | 3 +- ext/fts5/fts5Int.h | 2 ++ ext/fts5/fts5_index.c | 70 +++++++++++++++++++++++++++++++++++-------- manifest | 20 ++++++------- manifest.uuid | 2 +- tool/loadfts.c | 21 ++++++++++++- 7 files changed, 97 insertions(+), 27 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 8b07047aec..ade9c87ceb 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -872,6 +872,12 @@ static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ if( pgsz<32 ) pgsz = 32; sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); rc = SQLITE_OK; + }else + + if( n>10 && 0==sqlite3_strnicmp("automerge=", z, 10) ){ + int nAutomerge = atoi(&z[10]); + sqlite3Fts5IndexAutomerge(pTab->pIndex, nAutomerge); + rc = SQLITE_OK; } return rc; diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 82d4884dc7..b2865d6609 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -26,7 +26,7 @@ /************************************************************************* ** CUSTOM AUXILIARY FUNCTIONS ** -** Virtual table implemenations may overload SQL functions by implementing +** Virtual table implementations may overload SQL functions by implementing ** the sqlite3_module.xFindFunction() method. */ @@ -42,7 +42,6 @@ typedef void (*fts5_extension_function)( ); /* -** ** xUserData(pFts): ** ** Return a copy of the context pointer the extension function was diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 602f293097..9512d3505f 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -273,6 +273,8 @@ int sqlite3Fts5IndexInit(sqlite3*); */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge); + /* ** Return the total number of entries read from the %_data table by ** this connection since it was created. diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 9676ad4b45..214cc13931 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -45,6 +45,7 @@ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ +#define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ @@ -291,6 +292,7 @@ struct Fts5Index { char *zDataTbl; /* Name of %_data table */ int pgsz; /* Target page size for this index */ int nMinMerge; /* Minimum input segments in a merge */ + int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* @@ -2987,14 +2989,15 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ - Fts5Structure *pStruct, /* Stucture of index iIdx */ + Fts5Structure **ppStruct, /* IN/OUT: Stucture of index iIdx */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ + Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; - Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; + Fts5StructureLevel *pLvlOut; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ - int nRem = *pnRem; /* Output leaf pages left to write */ + int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ @@ -3009,12 +3012,24 @@ static void fts5IndexMergeLevel( memset(&term, 0, sizeof(Fts5Buffer)); writer.iIdx = iIdx; if( pLvl->nMerge ){ + pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; }else{ int iSegid = fts5AllocateSegid(p, pStruct); + + /* Extend the Fts5Structure object as required to ensure the output + ** segment exists. */ + if( iLvl==pStruct->nLevel-1 ){ + fts5StructureAddLevel(&p->rc, ppStruct); + pStruct = *ppStruct; + } + fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); + pLvl = &pStruct->aLevel[iLvl]; + pLvlOut = &pStruct->aLevel[iLvl+1]; + fts5WriteInit(p, &writer, iIdx, iSegid); /* Add the new segment to the output level */ @@ -3049,7 +3064,7 @@ fflush(stdout); int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ - if( writer.nLeafWritten>nRem ){ + if( pnRem && writer.nLeafWritten>nRem ){ fts5ChunkIterRelease(&sPos); break; } @@ -3106,7 +3121,7 @@ fflush(stdout); fts5MultiIterFree(p, pIter); fts5BufferFree(&term); - *pnRem -= writer.nLeafWritten; + if( pnRem ) *pnRem -= writer.nLeafWritten; } /* @@ -3165,17 +3180,31 @@ static void fts5IndexWork( #endif if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; - if( iBestLvl==pStruct->nLevel-1 ){ - fts5StructureAddLevel(&p->rc, &pStruct); - } - fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0); - fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); + fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); *ppStruct = pStruct; } } +static void fts5IndexCrisisMerge( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ +){ + Fts5Structure *pStruct = *ppStruct; + int iLvl = 0; + while( p->rc==SQLITE_OK + && iLvlnLevel + && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge + ){ + fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); + fts5StructurePromote(p, iLvl+1, pStruct); + iLvl++; + } + *ppStruct = pStruct; +} + typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; @@ -3203,7 +3232,6 @@ static int fts5FlushNewEntry( const u8 *aPoslist, int nPoslist ){ - Fts5Buffer *pBuf; Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; int rc = SQLITE_OK; @@ -3250,7 +3278,8 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ if( p->rc==SQLITE_OK ) p->rc = rc; fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); - /* Edit the Fts5Structure and write it back to the database. */ + /* Update the Fts5Structure. It is written back to the database by the + ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } @@ -3264,7 +3293,8 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } } - fts5IndexWork(p, iHash, &pStruct, pgnoLast); + if( p->nMinMerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); + fts5IndexCrisisMerge(p, iHash, &pStruct); fts5StructureWrite(p, iHash, pStruct); fts5StructureRelease(pStruct); } @@ -3343,6 +3373,7 @@ int sqlite3Fts5IndexOpen( p->pConfig = pConfig; p->pgsz = 1000; p->nMinMerge = FTS5_MIN_MERGE; + p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); @@ -3962,6 +3993,19 @@ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } +/* +** Set the minimum number of segments that an auto-merge operation should +** attempt to merge together. A value of 1 sets the object to use the +** compile time default. Zero or less disables auto-merge altogether. +*/ +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMinMerge){ + if( nMinMerge==1 ){ + p->nMinMerge = FTS5_MIN_MERGE; + }else{ + p->nMinMerge = nMinMerge; + } +} + /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti diff --git a/manifest b/manifest index 1f03c49271..0edf60c0aa 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Cache\sthe\svalue\sof\sthe\s"totals"\srecord\sin\smemory\sduring\stransactions. -D 2014-08-12T16:07:35.119 +C Add\san\s"automerge=0"\smode\sthat\sdisables\sauto-merging\sand\sfalls\sback\sto\sfts4-style\scrisis\smerges. +D 2014-08-18T19:30:01.020 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,15 +103,15 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 31db0b90774201820915db17916a9a4d9ac1c80b -F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a -F ext/fts5/fts5Int.h b0eb5cd422ba74148b30753f01031d546ffb98e4 +F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f +F ext/fts5/fts5.h 1c501ea7c5c686b8aa7fba0382badc5df6026aa7 +F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9 F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 0453bb593fe0ef6245762b6823e88839757fdc75 +F ext/fts5/fts5_index.c 3f4d84a1762e4284319739d4672b90b18b91060a F ext/fts5/fts5_storage.c 5913aa01a1dada1c5e1a39e4cbb44e84c5f7f350 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1163,7 +1163,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce F tool/lemon.c 3ff0fec22f92dfb54e62eeb48772eddffdbeb0d6 F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc -F tool/loadfts.c b5b3206ddd58d89ec8d54038c784bcadd6195915 +F tool/loadfts.c 76b6589ab5efcdc9cfe16d43ab5a6c2618e44bd4 F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6 F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383 F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670 @@ -1202,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f1cb48f412a5f200f1fe04f91072864f379db08f -R 6321f5990f75eb3b76570e3e341050f0 +P 05dfdad445b22f375b71abe0b1fa1bf7ca331be7 +R a4a2c1b7e4d79c30cafb117d4f31d356 U dan -Z f483f9b1471761b27d0fa8b15d969ed4 +Z a836ac39870a35d2f8436dd5e99c8845 diff --git a/manifest.uuid b/manifest.uuid index dc8ddf389d..7ae37fa218 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -05dfdad445b22f375b71abe0b1fa1bf7ca331be7 \ No newline at end of file +2397404e152b908d838e6491294b263b05943b3f \ No newline at end of file diff --git a/tool/loadfts.c b/tool/loadfts.c index 7da07b15dd..5b2ed5dc6a 100644 --- a/tool/loadfts.c +++ b/tool/loadfts.c @@ -163,6 +163,9 @@ int main(int argc, char **argv){ char *zSql; VisitContext sCtx; + int nCmd = 0; + char **aCmd = 0; + if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ @@ -172,7 +175,7 @@ int main(int argc, char **argv){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } - if( strcmp(zOpt, "-trans")==0 ){ + else if( strcmp(zOpt, "-trans")==0 ){ nRowPerTrans = atoi(zArg); } else if( strcmp(zOpt, "-idx")==0 ){ @@ -182,6 +185,14 @@ int main(int argc, char **argv){ else if( strcmp(zOpt, "-dir")==0 ){ zDir = zArg; } + else if( strcmp(zOpt, "-special")==0 ){ + nCmd++; + aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd); + aCmd[nCmd-1] = zArg; + } + else{ + showHelp(argv[0]); + } } /* Open the database file */ @@ -198,6 +209,13 @@ int main(int argc, char **argv){ if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); + for(i=0; i Date: Mon, 25 Aug 2014 19:58:54 +0000 Subject: [PATCH 046/206] Add documentation for tokenizer api to fts5.h. Also add a script to extract extension API docs and format them as html. FossilOrigin-Name: e240d467e60b7755486aae5e8b0824f7c741f852 --- ext/fts5/extract_api_docs.tcl | 131 ++++++++++++++++++++++++++++++++++ ext/fts5/fts5.h | 88 ++++++++++++++++++++++- manifest | 13 ++-- manifest.uuid | 2 +- 4 files changed, 224 insertions(+), 10 deletions(-) create mode 100644 ext/fts5/extract_api_docs.tcl diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl new file mode 100644 index 0000000000..e0f3191d61 --- /dev/null +++ b/ext/fts5/extract_api_docs.tcl @@ -0,0 +1,131 @@ +# +# 2014 August 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#-------------------------------------------------------------------------- +# +# This script extracts the documentation for the API used by fts5 auxiliary +# functions from header file fts5.h. It outputs html text on stdout that +# is included in the documentation on the web. +# + +set input_file [file join [file dir [info script]] fts5.h] +set fd [open $input_file] +set data [read $fd] +close $fd + + +# Argument $data is the entire text of the fts5.h file. This function +# extracts the definition of the Fts5ExtensionApi structure from it and +# returns a key/value list of structure member names and definitions. i.e. +# +# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ... +# +proc get_struct_members {data} { + + # Extract the structure definition from the fts5.h file. + regexp "struct Fts5ExtensionApi {(.*)};" $data -> defn + + # Remove all comments from the structure definition + regsub -all {/[*].*?[*]/} $defn {} defn2 + + set res [list] + foreach member [split $defn2 {;}] { + + set member [string trim $member] + if {$member!=""} { + catch { set name [lindex $member end] } + regexp {.*?[(][*]([^)]*)[)]} $member -> name + lappend res $name $member + } + } + + set res +} + +proc get_struct_docs {data names} { + # Extract the structure definition from the fts5.h file. + regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs + + set current_doc "" + set current_header "" + + foreach line [split $docs "\n"] { + regsub {[*]*} $line {} line + if {[regexp {^ } $line]} { + append current_doc "$line\n" + } elseif {[string trim $line]==""} { + if {$current_header!=""} { append current_doc "\n" } + } else { + if {$current_doc != ""} { + lappend res $current_header $current_doc + set current_doc "" + } + set subject n/a + regexp {^ *([[:alpha:]]*)} $line -> subject + if {[lsearch $names $subject]>=0} { + set current_header $subject + } else { + set current_header [string trim $line] + } + } + } + + if {$current_doc != ""} { + lappend res $current_header $current_doc + } + + set res +} + +# Initialize global array M as a map from Fts5StructureApi member name +# to member definition. i.e. +# +# iVersion -> {int iVersion} +# xUserData -> {void *(*xUserData)(Fts5Context*)} +# ... +# +array set M [get_struct_members $data] + +# Initialize global list D as a map from section name to documentation +# text. Most (all?) section names are structure member names. +# +set D [get_struct_docs $data [array names M]] + +foreach {hdr docs} $D { + if {[info exists M($hdr)]} { + set hdr $M($hdr) + } + puts "

  $hdr

" + + set mode "" + set bEmpty 1 + foreach line [split [string trim $docs] "\n"] { + if {[string trim $line]==""} { + if {$mode != ""} {puts ""} + set mode "" + } elseif {$mode == ""} { + if {[regexp {^ } $line]} { + set mode code + } else { + set mode p + } + puts "<$mode>" + } + puts $line + } + if {$mode != ""} {puts ""} +} + + + + + + + diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index b2865d6609..d3db15cc36 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -42,14 +42,14 @@ typedef void (*fts5_extension_function)( ); /* -** xUserData(pFts): +** EXTENSION API FUNCTIONS ** +** xUserData(pFts): ** Return a copy of the context pointer the extension function was ** registered with. ** ** ** xColumnTotalSize(pFts, iCol, pnToken): -** ** Returns the total number of tokens in column iCol, considering all ** rows in the FTS5 table. ** @@ -83,7 +83,6 @@ typedef void (*fts5_extension_function)( ** ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): -** ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** @@ -179,5 +178,88 @@ struct Fts5ExtensionApi { /* ** CUSTOM AUXILIARY FUNCTIONS *************************************************************************/ + +/************************************************************************* +** CUSTOM TOKENIZERS +** +** Applications may also register custom tokenizer types. A tokenizer +** is registered by providing fts5 with a populated instance of the +** following structure. The structure methods are expected to function +** as follows: +** +** xCreate: +** This function is used to allocate and inititalize a tokenizer instance. +** A tokenizer instance is required to actually tokenize text. +** +** The first argument passed to this function is a copy of the (void*) +** pointer provided by the application when the fts5_tokenizer object +** was registered with SQLite. The second and third arguments are an +** array of nul-terminated strings containing the tokenizer arguments, +** if any, specified as part of the CREATE VIRTUAL TABLE statement used +** to create the fts5 table. +** +** The final argument is an output variable. If successful, (*ppOut) +** should be set to point to the new tokenizer handle and SQLITE_OK +** returned. If an error occurs, some value other than SQLITE_OK should +** be returned. In this case, fts5 assumes that the final value of *ppOut +** is undefined. +** +** xDelete: +** This function is invoked to delete a tokenizer handle previously +** allocated using xCreate(). Fts5 guarantees that this function will +** be invoked exactly once for each successful call to xCreate(). +** +** xTokenize: +** This function is expected to tokenize the nText byte string indicated +** by argument pText. pText may not be nul-terminated. The first argument +** passed to this function is a pointer to an Fts5Tokenizer object returned +** by an earlier call to xCreate(). +** +** For each token in the input string, the supplied callback xToken() must +** be invoked. The first argument to it should be a copy of the pointer +** passed as the second argument to xTokenize(). The next two arguments +** are a pointer to a buffer containing the token text, and the size of +** the token in bytes. The 4th and 5th arguments are the byte offsets of +** the first byte of and first byte immediately following the text from +** which the token is derived within the input. The final argument is the +** token position - the total number of tokens that appear before this one +** in the input buffer. +** +** The xToken() callback must be invoked with non-decreasing values of +** the iPos parameter. +** +** If an xToken() callback returns any value other than SQLITE_OK, then +** the tokenization should be abandoned and the xTokenize() method should +** immediately return a copy of the xToken() return value. Or, if the +** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, +** if an error occurs with the xTokenize() implementation itself, it +** may abandon the tokenization and return any error code other than +** SQLITE_OK or SQLITE_DONE. +** +*/ +typedef struct fts5_tokenizer fts5_tokenizer; +typedef struct Fts5Tokenizer Fts5Tokenizer; + +struct fts5_tokenizer { + int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); + void (*xDelete)(Fts5Tokenizer*); + int (*xTokenize)(Fts5Tokenizer*, + void *pCtx, + const char *pText, int nText, + int (*xToken)( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd, /* Byte offset of end of token within input text */ + int iPos /* Position of token in input (first token is 0) */ + ) + ); +}; + +/* +** END OF CUSTOM TOKENIZERS +*************************************************************************/ + #endif /* _FTS5_H */ diff --git a/manifest b/manifest index 0edf60c0aa..10e421f7a2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\san\s"automerge=0"\smode\sthat\sdisables\sauto-merging\sand\sfalls\sback\sto\sfts4-style\scrisis\smerges. -D 2014-08-18T19:30:01.020 +C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml. +D 2014-08-25T19:58:54.559 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,8 +103,9 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 +F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f -F ext/fts5/fts5.h 1c501ea7c5c686b8aa7fba0382badc5df6026aa7 +F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9 F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 @@ -1202,7 +1203,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 05dfdad445b22f375b71abe0b1fa1bf7ca331be7 -R a4a2c1b7e4d79c30cafb117d4f31d356 +P 2397404e152b908d838e6491294b263b05943b3f +R f1a35566903c71a22822fa6dd6758208 U dan -Z a836ac39870a35d2f8436dd5e99c8845 +Z 8c301746cf7784949ad4603ff5681e4e diff --git a/manifest.uuid b/manifest.uuid index 7ae37fa218..3372bdccc8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2397404e152b908d838e6491294b263b05943b3f \ No newline at end of file +e240d467e60b7755486aae5e8b0824f7c741f852 \ No newline at end of file From 48d70140678b2fb74f7afddb8185e4becc195bb8 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 15 Nov 2014 20:07:31 +0000 Subject: [PATCH 047/206] Fix the customization interfaces so that they match the documentation. FossilOrigin-Name: fba0b5fc7eead07a4853e78e02d788e7c714f6cd --- ext/fts5/extract_api_docs.tcl | 25 +++-- ext/fts5/fts5.c | 173 +++++++++++++++++++++++++++++++--- ext/fts5/fts5.h | 37 ++++++++ ext/fts5/fts5Int.h | 53 +++++++---- ext/fts5/fts5_aux.c | 4 +- ext/fts5/fts5_config.c | 50 ++-------- ext/fts5/fts5_expr.c | 35 +++++-- ext/fts5/fts5_tokenize.c | 145 ++++++++++++++++++++++++++++ main.mk | 9 +- manifest | 55 +++++------ manifest.uuid | 2 +- src/main.c | 10 +- src/test_config.c | 6 ++ test/fts5aa.test | 2 +- test/fts5ab.test | 4 +- test/fts5ac.test | 4 +- test/fts5ad.test | 4 +- test/fts5ae.test | 4 +- test/fts5af.test | 6 +- test/fts5ag.test | 4 +- test/fts5ah.test | 4 +- test/fts5ai.test | 4 +- test/fts5aj.test | 4 +- test/fts5ea.test | 4 +- tool/mksqlite3c.tcl | 2 + 25 files changed, 506 insertions(+), 144 deletions(-) create mode 100644 ext/fts5/fts5_tokenize.c diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl index e0f3191d61..81fe4cde53 100644 --- a/ext/fts5/extract_api_docs.tcl +++ b/ext/fts5/extract_api_docs.tcl @@ -15,6 +15,17 @@ # is included in the documentation on the web. # +set ::fts5_docs_output "" +if {[info commands hd_putsnl]==""} { + proc output {text} { + puts $text + } +} else { + proc output {text} { + append ::fts5_docs_output $text + } +} + set input_file [file join [file dir [info script]] fts5.h] set fd [open $input_file] set data [read $fd] @@ -102,28 +113,28 @@ foreach {hdr docs} $D { if {[info exists M($hdr)]} { set hdr $M($hdr) } - puts "

  $hdr

" + output "
  $hdr
" set mode "" set bEmpty 1 foreach line [split [string trim $docs] "\n"] { if {[string trim $line]==""} { - if {$mode != ""} {puts ""} + if {$mode != ""} {output ""} set mode "" } elseif {$mode == ""} { if {[regexp {^ } $line]} { - set mode code + set mode codeblock } else { set mode p } - puts "<$mode>" + output "<$mode>" } - puts $line + output $line } - if {$mode != ""} {puts ""} + if {$mode != ""} {output ""} } - +set ::fts5_docs_output diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index ade9c87ceb..368a0f88c9 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -23,6 +23,8 @@ typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; typedef struct Fts5Auxdata Fts5Auxdata; +typedef struct Fts5TokenizerModule Fts5TokenizerModule; + /* ** NOTES ON TRANSACTIONS: ** @@ -65,9 +67,11 @@ struct Fts5TransactionState { ** all registered FTS5 extensions - tokenizers and auxiliary functions. */ struct Fts5Global { + fts5_api api; /* User visible part of object (see fts5.h) */ sqlite3 *db; /* Associated database connection */ i64 iNextId; /* Used to allocate unique cursor ids */ Fts5Auxiliary *pAux; /* First in list of all aux. functions */ + Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ Fts5Cursor *pCsr; /* First in list of all open cursors */ }; @@ -85,6 +89,19 @@ struct Fts5Auxiliary { Fts5Auxiliary *pNext; /* Next registered auxiliary function */ }; +/* +** Each tokenizer module registered with the FTS5 module is represented +** by an object of the following type. All such objects are stored as part +** of the Fts5Global.pTok list. +*/ +struct Fts5TokenizerModule { + char *zName; /* Name of tokenizer */ + void *pUserData; /* User pointer passed to xCreate() */ + fts5_tokenizer x; /* Tokenizer functions */ + void (*xDestroy)(void*); /* Destructor function */ + Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ +}; + /* ** Virtual-table object. */ @@ -281,12 +298,14 @@ static int fts5InitVtab( sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */ ){ + Fts5Global *pGlobal = (Fts5Global*)pAux; + const char **azConfig = (const char**)argv; int rc; /* Return code */ Fts5Config *pConfig; /* Results of parsing argc/argv */ Fts5Table *pTab = 0; /* New virtual table object */ /* Parse the arguments */ - rc = sqlite3Fts5ConfigParse(db, argc, (const char**)argv, &pConfig, pzErr); + rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); /* Allocate the new vtab object */ @@ -297,7 +316,7 @@ static int fts5InitVtab( }else{ memset(pTab, 0, sizeof(Fts5Table)); pTab->pConfig = pConfig; - pTab->pGlobal = (Fts5Global*)pAux; + pTab->pGlobal = pGlobal; } } @@ -857,6 +876,10 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ ** Argument pVal is the value assigned to column "fts" by the INSERT ** statement. This function returns SQLITE_OK if successful, or an SQLite ** error code if an error occurs. +** +** The commands implemented by this function are documented in the "Special +** INSERT Directives" section of the documentation. It should be updated if +** more commands are added to this function. */ static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ const char *z = (const char*)sqlite3_value_text(pVal); @@ -1387,13 +1410,14 @@ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ /* ** Register a new auxiliary function with global context pGlobal. */ -int sqlite3Fts5CreateAux( - Fts5Global *pGlobal, /* Global context (one per db handle) */ +static int fts5CreateAux( + fts5_api *pApi, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ void *pUserData, /* User data for aux. function */ fts5_extension_function xFunc, /* Aux. function implementation */ void(*xDestroy)(void*) /* Destructor for pUserData */ ){ + Fts5Global *pGlobal = (Fts5Global*)pApi; int rc = sqlite3_overload_function(pGlobal->db, zName, -1); if( rc==SQLITE_OK ){ Fts5Auxiliary *pAux; @@ -1419,20 +1443,131 @@ int sqlite3Fts5CreateAux( return rc; } -static void fts5ModuleDestroy(void *pCtx){ - Fts5Auxiliary *pAux; - Fts5Auxiliary *pNext; - Fts5Global *pGlobal = (Fts5Global*)pCtx; - for(pAux=pGlobal->pAux; pAux; pAux=pNext){ - pNext = pAux->pNext; - if( pAux->xDestroy ){ - pAux->xDestroy(pAux->pUserData); +/* +** Register a new tokenizer. This is the implementation of the +** fts5_api.xCreateTokenizer() method. +*/ +static int fts5CreateTokenizer( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + void *pUserData, /* User data for aux. function */ + fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ + void(*xDestroy)(void*) /* Destructor for pUserData */ +){ + Fts5Global *pGlobal = (Fts5Global*)pApi; + Fts5TokenizerModule *pNew; + int nByte; /* Bytes of space to allocate */ + int rc = SQLITE_OK; + + nByte = sizeof(Fts5TokenizerModule) + strlen(zName) + 1; + pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte); + if( pNew ){ + memset(pNew, 0, nByte); + pNew->zName = (char*)&pNew[1]; + strcpy(pNew->zName, zName); + pNew->pUserData = pUserData; + pNew->x = *pTokenizer; + pNew->xDestroy = xDestroy; + pNew->pNext = pGlobal->pTok; + pGlobal->pTok = pNew; + }else{ + rc = SQLITE_NOMEM; + } + + return rc; +} + +/* +** Find a tokenizer. This is the implementation of the +** fts5_api.xFindTokenizer() method. +*/ +static int fts5FindTokenizer( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + fts5_tokenizer *pTokenizer /* Populate this object */ +){ + Fts5Global *pGlobal = (Fts5Global*)pApi; + int rc = SQLITE_OK; + Fts5TokenizerModule *pTok; + + for(pTok=pGlobal->pTok; pTok; pTok=pTok->pNext){ + if( sqlite3_stricmp(zName, pTok->zName)==0 ) break; + } + + if( pTok ){ + *pTokenizer = pTok->x; + }else{ + memset(pTokenizer, 0, sizeof(fts5_tokenizer)); + rc = SQLITE_ERROR; + } + + return rc; +} + +int sqlite3Fts5GetTokenizer( + Fts5Global *pGlobal, + const char **azArg, + int nArg, + Fts5Tokenizer **ppTok, + fts5_tokenizer **ppTokApi +){ + Fts5TokenizerModule *pMod = 0; + int rc = SQLITE_OK; + if( nArg==0 ){ + pMod = pGlobal->pTok; + }else{ + for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ + if( sqlite3_stricmp(azArg[0], pMod->zName)==0 ) break; } + } + + if( pMod==0 ){ + rc = SQLITE_ERROR; + }else{ + rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok); + *ppTokApi = &pMod->x; + } + + if( rc!=SQLITE_OK ){ + *ppTokApi = 0; + *ppTok = 0; + } + + return rc; +} + +static void fts5ModuleDestroy(void *pCtx){ + Fts5TokenizerModule *pTok, *pNextTok; + Fts5Auxiliary *pAux, *pNextAux; + Fts5Global *pGlobal = (Fts5Global*)pCtx; + + for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ + pNextAux = pAux->pNext; + if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); sqlite3_free(pAux); } + + for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ + pNextTok = pTok->pNext; + if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); + sqlite3_free(pTok); + } + sqlite3_free(pGlobal); } +static void fts5Fts5Func( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); + char buf[8]; + assert( nArg==0 ); + assert( sizeof(buf)>=sizeof(pGlobal) ); + memcpy(buf, pGlobal, sizeof(pGlobal)); + sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT); +} int sqlite3Fts5Init(sqlite3 *db){ static const sqlite3_module fts5Mod = { @@ -1471,10 +1606,20 @@ int sqlite3Fts5Init(sqlite3 *db){ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; + pGlobal->api.iVersion = 1; + pGlobal->api.xCreateFunction = fts5CreateAux; + pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; + pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(pGlobal); + if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); + if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 + ); + } } return rc; } diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index d3db15cc36..248459ac8a 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -261,5 +261,42 @@ struct fts5_tokenizer { ** END OF CUSTOM TOKENIZERS *************************************************************************/ +/************************************************************************* +** FTS5 EXTENSION REGISTRATION API +*/ +typedef struct fts5_api fts5_api; +struct fts5_api { + int iVersion; /* Currently always set to 1 */ + + /* Create a new tokenizer */ + int (*xCreateTokenizer)( + fts5_api *pApi, + const char *zName, + void *pContext, + fts5_tokenizer *pTokenizer, + void (*xDestroy)(void*) + ); + + /* Find an existing tokenizer */ + int (*xFindTokenizer)( + fts5_api *pApi, + const char *zName, + fts5_tokenizer *pTokenizer + ); + + /* Create a new auxiliary function */ + int (*xCreateFunction)( + fts5_api *pApi, + const char *zName, + void *pContext, + fts5_extension_function xFunction, + void (*xDestroy)(void*) + ); +}; + +/* +** END OF REGISTRATION API +*************************************************************************/ + #endif /* _FTS5_H */ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 9512d3505f..3808d31617 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -31,6 +31,23 @@ /* Name of rank column */ #define FTS5_RANK_NAME "rank" +/************************************************************************** +** Interface to code in fts5.c. +*/ +typedef struct Fts5Global Fts5Global; + +int sqlite3Fts5GetTokenizer( + Fts5Global*, + const char **azArg, + int nArg, + Fts5Tokenizer**, + fts5_tokenizer** +); + +/* +** End of interface to code in fts5.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. @@ -50,10 +67,13 @@ struct Fts5Config { char **azCol; /* Column names */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ - sqlite3_tokenizer *pTokenizer; /* Tokenizer instance for this table */ + Fts5Tokenizer *pTok; + fts5_tokenizer *pTokApi; }; -int sqlite3Fts5ConfigParse(sqlite3*, int, const char**, Fts5Config**, char**); +int sqlite3Fts5ConfigParse( + Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** +); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); @@ -403,7 +423,7 @@ i64 sqlite3Fts5ExprRowid(Fts5Expr*); void sqlite3Fts5ExprFree(Fts5Expr*); /* Called during startup to register a UDF with SQLite */ -int sqlite3Fts5ExprInit(sqlite3*); +int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); @@ -453,32 +473,25 @@ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); **************************************************************************/ -/************************************************************************** -** Interface to code in fts5.c. -*/ -typedef struct Fts5Global Fts5Global; - -int sqlite3Fts5CreateAux( - Fts5Global*, - const char*, - void*, - fts5_extension_function, - void(*)(void*) -); -/* -** End of interface to code in fts5.c. -**************************************************************************/ - /************************************************************************** ** Interface to code in fts5_aux.c. */ -int sqlite3Fts5AuxInit(Fts5Global*); +int sqlite3Fts5AuxInit(fts5_api*); /* ** End of interface to code in fts5_aux.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5_tokenizer.c. +*/ + +int sqlite3Fts5TokenizerInit(fts5_api*); +/* +** End of interface to code in fts5_tokenizer.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_sorter.c. */ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 85bad5c496..186b43c166 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -956,7 +956,7 @@ static void fts5TestFunction( sqlite3Fts5BufferFree(&s); } -int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ +int sqlite3Fts5AuxInit(fts5_api *pApi){ struct Builtin { const char *zFunc; /* Function name (nul-terminated) */ void *pUserData; /* User-data pointer */ @@ -973,7 +973,7 @@ int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ int i; /* To iterate through builtin functions */ for(i=0; rc==SQLITE_OK && ixCreateFunction(pApi, aBuiltin[i].zFunc, aBuiltin[i].pUserData, aBuiltin[i].xFunc, diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index d326f60cc8..68c340a48f 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -113,26 +113,15 @@ static char *fts5Strdup(const char *z){ return sqlite3_mprintf("%s", z); } -void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**); - /* ** Allocate an instance of the default tokenizer ("simple") at ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error ** code if an error occurs. */ -static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ - const sqlite3_tokenizer_module *pMod; /* Tokenizer module "simple" */ - sqlite3_tokenizer *pTokenizer; /* Tokenizer instance */ - int rc; /* Return code */ - - sqlite3Fts3SimpleTokenizerModule(&pMod); - rc = pMod->xCreate(0, 0, &pTokenizer); - if( rc==SQLITE_OK ){ - pTokenizer->pModule = pMod; - pConfig->pTokenizer = pTokenizer; - } - - return rc; +static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ + return sqlite3Fts5GetTokenizer( + pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi + ); } /* @@ -148,6 +137,7 @@ static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ ** such error message using sqlite3_free(). */ int sqlite3Fts5ConfigParse( + Fts5Global *pGlobal, sqlite3 *db, int nArg, /* Number of arguments */ const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ @@ -206,8 +196,8 @@ int sqlite3Fts5ConfigParse( } } - if( rc==SQLITE_OK && pRet->pTokenizer==0 ){ - rc = fts5ConfigDefaultTokenizer(pRet); + if( rc==SQLITE_OK && pRet->pTok==0 ){ + rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } if( rc!=SQLITE_OK ){ @@ -223,8 +213,8 @@ int sqlite3Fts5ConfigParse( void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; - if( pConfig->pTokenizer ){ - pConfig->pTokenizer->pModule->xDestroy(pConfig->pTokenizer); + if( pConfig->pTok && pConfig->pTokApi->xDelete ){ + pConfig->pTokApi->xDelete(pConfig->pTok); } sqlite3_free(pConfig->zDb); sqlite3_free(pConfig->zName); @@ -302,27 +292,7 @@ int sqlite3Fts5Tokenize( void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ ){ - const sqlite3_tokenizer_module *pMod = pConfig->pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCsr = 0; - int rc; - - rc = pMod->xOpen(pConfig->pTokenizer, pText, nText, &pCsr); - assert( rc==SQLITE_OK || pCsr==0 ); - if( rc==SQLITE_OK ){ - const char *pToken; /* Pointer to token buffer */ - int nToken; /* Size of token in bytes */ - int iStart, iEnd, iPos; /* Start, end and position of token */ - pCsr->pTokenizer = pConfig->pTokenizer; - for(rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos); - rc==SQLITE_OK; - rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos) - ){ - if( (rc = xToken(pCtx, pToken, nToken, iStart, iEnd, iPos)) ) break; - } - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - pMod->xClose(pCsr); - } - return rc; + return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index b1fbe9ea97..efb91dadb6 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1520,20 +1520,22 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ } /* -** The implementation of user-defined scalar function fts5_expr(). +** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) +** and fts5_expr_tcl() (bTcl!=0). */ static void fts5ExprFunction( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ - sqlite3_value **apVal /* Function arguments */ + sqlite3_value **apVal, /* Function arguments */ + int bTcl ){ + Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); sqlite3 *db = sqlite3_context_db_handle(pCtx); const char *zExpr = 0; char *zErr = 0; Fts5Expr *pExpr = 0; int rc; int i; - int bTcl = sqlite3_user_data(pCtx)!=0; const char **azConfig; /* Array of arguments for Fts5Config */ const char *zNearsetCmd = "nearset"; @@ -1558,7 +1560,7 @@ static void fts5ExprFunction( } zExpr = (const char*)sqlite3_value_text(apVal[0]); - rc = sqlite3Fts5ConfigParse(db, nConfig, azConfig, &pConfig, &zErr); + rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); } @@ -1588,25 +1590,40 @@ static void fts5ExprFunction( sqlite3Fts5ExprFree(pExpr); } +static void fts5ExprFunctionHr( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + fts5ExprFunction(pCtx, nArg, apVal, 0); +} +static void fts5ExprFunctionTcl( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + fts5ExprFunction(pCtx, nArg, apVal, 1); +} + /* ** This is called during initialization to register the fts5_expr() scalar ** UDF with the SQLite handle passed as the only argument. */ -int sqlite3Fts5ExprInit(sqlite3 *db){ +int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ struct Fts5ExprFunc { const char *z; - void *p; void (*x)(sqlite3_context*,int,sqlite3_value**); } aFunc[] = { - { "fts5_expr", 0, fts5ExprFunction }, - { "fts5_expr_tcl", (void*)1, fts5ExprFunction }, + { "fts5_expr", fts5ExprFunctionHr }, + { "fts5_expr_tcl", fts5ExprFunctionTcl }, }; int i; int rc = SQLITE_OK; + void *pCtx = (void*)pGlobal; for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){ struct Fts5ExprFunc *p = &aFunc[i]; - rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, p->p, p->x, 0, 0); + rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); } return rc; diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c new file mode 100644 index 0000000000..ef7c767544 --- /dev/null +++ b/ext/fts5/fts5_tokenize.c @@ -0,0 +1,145 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +#include "fts5.h" + + +/* +** Create a "simple" tokenizer. +*/ +static int fts5SimpleCreate( + void *pCtx, + const char **azArg, int nArg, + Fts5Tokenizer **ppOut +){ + *ppOut = 0; + return SQLITE_OK; +} + +/* +** Delete a "simple" tokenizer. +*/ +static void fts5SimpleDelete(Fts5Tokenizer *p){ + return; +} + +/* +** For tokenizers with no "unicode" modifier, the set of token characters +** is the same as the set of ASCII range alphanumeric characters. +*/ +static unsigned char aSimpleTokenChar[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ +}; + + +static void simpleFold(char *aOut, const char *aIn, int nByte){ + int i; + for(i=0; i='A' && c<='Z' ) c += 32; + aOut[i] = c; + } +} + +/* +** Tokenize some text using the simple tokenizer. +*/ +static int fts5SimpleTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + const char *pText, int nText, + int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) +){ + int rc; + int ie; + int is = 0; + int iPos = 0; + + char aFold[64]; + int nFold = sizeof(aFold); + char *pFold = aFold; + + do { + int nByte; + + /* Skip any leading divider characters. */ + while( isnFold ){ + if( pFold!=aFold ) sqlite3_free(pFold); + pFold = sqlite3_malloc(nByte*2); + if( pFold==0 ){ + rc = SQLITE_NOMEM; + break; + } + nFold = nByte*2; + } + simpleFold(pFold, &pText[is], nByte); + + /* Invoke the token callback */ + rc = xToken(pCtx, pFold, nByte, is, ie, iPos); + iPos++; + is = ie+1; + }while( isxCreateTokenizer(pApi, + aBuiltin[i].zName, + &aBuiltin[i].pUserData, + &aBuiltin[i].x, + 0 + ); + } + + return SQLITE_OK; +} + + diff --git a/main.mk b/main.mk index 5ac76de2d8..e30bb92c82 100644 --- a/main.mk +++ b/main.mk @@ -80,6 +80,7 @@ LIBOBJ += fts5_expr.o LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o +LIBOBJ += fts5_tokenize.o LIBOBJ += fts5parse.o @@ -236,7 +237,8 @@ SRC += \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ - $(TOP)/ext/fts5/fts5_storage.c + $(TOP)/ext/fts5/fts5_storage.c \ + $(TOP)/ext/fts5/fts5_tokenize.c # Generated source code files @@ -610,10 +612,15 @@ fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c +fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c + fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y + mv fts5parse.c fts5parse.c.orig + cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' > fts5parse.c # Rules for building test programs and for running tests diff --git a/manifest b/manifest index 10e421f7a2..3c7f49ac42 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml. -D 2014-08-25T19:58:54.559 +C Fix\sthe\scustomization\sinterfaces\sso\sthat\sthey\smatch\sthe\sdocumentation. +D 2014-11-15T20:07:31.166 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,17 +103,18 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c -F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f -F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f -F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9 -F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e +F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 +F ext/fts5/fts5.c cc3f0e4bac499c81d1311199f929dcad5e40ee8e +F ext/fts5/fts5.h a77cad780eec8f10850fdba0f44079a92561b790 +F ext/fts5/fts5Int.h a3c46f9dae13277de6fc3a6f8863d337ca660d6a +F ext/fts5/fts5_aux.c 6b0612e4312ca27264f7dacb0c97abc723a4b472 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 -F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 -F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 +F ext/fts5/fts5_config.c a292fe73864086e51e7974d842cc09f6379fbae0 +F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 3f4d84a1762e4284319739d4672b90b18b91060a F ext/fts5/fts5_storage.c 5913aa01a1dada1c5e1a39e4cbb44e84c5f7f350 +F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -158,7 +159,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk c4fff232b880b91bf665cd2951465de61178e444 +F main.mk 8a02fddafc05159c4b7d65200e912cf549f978c1 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -201,7 +202,7 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c 0df0b1550b9cc1f58229644735e317ac89131f12 F src/lempar.c cdf0a000315332fc9b50b62f3b5e22e080a0952b F src/loadext.c 867c7b330b740c6c917af9956b13b81d0a048303 -F src/main.c e777879ad7c431f5b3b5d49c8419727b61d7c1be +F src/main.c afc0ae834a8abca9079908f6193b3886564164a0 F src/malloc.c 0203ebce9152c6a0e5de520140b8ba65187350be F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c c0c990fcaddff810ea277b4fb5d9138603dd5d4b @@ -257,7 +258,7 @@ F src/test_async.c 21e11293a2f72080eda70e1124e9102044531cd8 F src/test_autoext.c dea8a01a7153b9adc97bd26161e4226329546e12 F src/test_backup.c 3875e899222b651e18b662f86e0e50daa946344e F src/test_btree.c 2e9978eca99a9a4bfa8cae949efb00886860a64f -F src/test_config.c dabaa32868974e1ae39770cc17d7e066a9c38e6d +F src/test_config.c 9acba5c44c1562159104096e6e2ed5d293d4b86d F src/test_demovfs.c 69b2085076654ebc18014cbc6386f04409c959a9 F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc F src/test_fs.c ced436e3d4b8e4681328409b8081051ce614e28f @@ -597,17 +598,17 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test 4c7cbf1d38d30e7aaa8febf44958dd13bbb53bf8 -F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 399533fe52b7383053368ab8ba01ae182391e5d7 -F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e -F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 -F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7 -F test/fts5ai.test 4dee71c23ddbcf2b0fc5d5586f241002b883c10e -F test/fts5aj.test 67014e9fc7c069425d67d549b133742b67755047 -F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 +F test/fts5aa.test 16bf1dbb92d4d63c7c357b480b1a47309f654ad1 +F test/fts5ab.test 657d6dc5ddc57bfea4af1bb85204d4f3539cd3e8 +F test/fts5ac.test f38ceca8a43fa0ff86122bec72428a4067b17bc4 +F test/fts5ad.test d29ff407c70df470c9a8fcbfe5bc80efd662f2c4 +F test/fts5ae.test d4141786d817e0198f89f8c66749af38359839a7 +F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc +F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb +F test/fts5ah.test af9274cdb58a69780c7e57e61581990665ac0fb6 +F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d +F test/fts5aj.test fe5c40216cac8072f29e454ee0540c7b89d17ccd +F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1172,7 +1173,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 78a77b2c554d534c6f2dc903130186ed15715460 F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 1712d3d71256ca1f297046619c89e77a4d7c8f6d -F tool/mksqlite3c.tcl becaa9d5617dfe137e73dddda9dab8f58bc71e8c +F tool/mksqlite3c.tcl 5be4e6b3ecf563c1ec6d579dc03ea2839e7fbc8b F tool/mksqlite3h.tcl ba24038056f51fde07c0079c41885ab85e2cff12 F tool/mksqlite3internalh.tcl b6514145a7d5321b47e64e19b8116cc44f973eb1 F tool/mkvsix.tcl 52a4c613707ac34ae9c226e5ccc69cb948556105 @@ -1203,7 +1204,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2397404e152b908d838e6491294b263b05943b3f -R f1a35566903c71a22822fa6dd6758208 +P e240d467e60b7755486aae5e8b0824f7c741f852 +R 28baa98ae078d2f041a83a26b4550455 U dan -Z 8c301746cf7784949ad4603ff5681e4e +Z 1c1b566687b968f39cda6c2d32a692b6 diff --git a/manifest.uuid b/manifest.uuid index 3372bdccc8..ccae749918 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e240d467e60b7755486aae5e8b0824f7c741f852 \ No newline at end of file +fba0b5fc7eead07a4853e78e02d788e7c714f6cd \ No newline at end of file diff --git a/src/main.c b/src/main.c index 5d894b2ea9..d0ecbb56b8 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,9 @@ #ifdef SQLITE_ENABLE_FTS3 # include "fts3.h" #endif +#ifdef SQLITE_ENABLE_FTS5 +int sqlite3Fts5Init(sqlite3*); +#endif #ifdef SQLITE_ENABLE_RTREE # include "rtree.h" #endif @@ -2609,7 +2612,12 @@ static int openDatabase( #ifdef SQLITE_ENABLE_FTS3 if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5Init(db); + } +#endif + +#ifdef SQLITE_ENABLE_FTS5 + if( !db->mallocFailed && rc==SQLITE_OK ){ + rc = sqlite3Fts5Init(db); } #endif diff --git a/src/test_config.c b/src/test_config.c index bf8afd8e67..2f8bed4477 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -330,6 +330,12 @@ static void set_options(Tcl_Interp *interp){ Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_ENABLE_FTS5 + Tcl_SetVar2(interp, "sqlite_options", "fts5", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "fts5", "0", TCL_GLOBAL_ONLY); +#endif + #if defined(SQLITE_ENABLE_FTS3) && defined(SQLITE_ENABLE_FTS4_UNICODE61) Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY); #else diff --git a/test/fts5aa.test b/test/fts5aa.test index f5c1977f20..4126034687 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -17,7 +17,7 @@ source $testdir/tester.tcl set testprefix fts5aa # If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ab.test b/test/fts5ab.test index 1f6b7171da..4e2e9b13d0 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ab -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ac.test b/test/fts5ac.test index b137e3a938..be6177705e 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ac -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ad.test b/test/fts5ad.test index 70349388ee..824444a867 100644 --- a/test/fts5ad.test +++ b/test/fts5ad.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ad -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ae.test b/test/fts5ae.test index 4a5e4d041e..32d75616ae 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ae -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5af.test b/test/fts5af.test index d59df77291..da70dc7ae6 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -19,8 +19,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5af -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } @@ -138,7 +138,7 @@ foreach {tn doc res} { 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} } { - do_snippet_test 1.$tn $doc "X + Y" $res + do_snippet_test 2.$tn $doc "X + Y" $res } finish_test diff --git a/test/fts5ag.test b/test/fts5ag.test index 14063669f4..647604ef64 100644 --- a/test/fts5ag.test +++ b/test/fts5ag.test @@ -16,8 +16,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ag -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ah.test b/test/fts5ah.test index f5e25848ab..fd78d23577 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -16,8 +16,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ah -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ai.test b/test/fts5ai.test index 705ca15988..eba9d09d71 100644 --- a/test/fts5ai.test +++ b/test/fts5ai.test @@ -18,8 +18,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ai -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5aj.test b/test/fts5aj.test index cb8e2d2a2f..8b333ae562 100644 --- a/test/fts5aj.test +++ b/test/fts5aj.test @@ -20,8 +20,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5aj -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ea.test b/test/fts5ea.test index a76f901d06..1518b8892c 100644 --- a/test/fts5ea.test +++ b/test/fts5ea.test @@ -14,8 +14,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ea -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 0d3120ce1a..073d67a5dd 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -335,9 +335,11 @@ foreach file { fts5.c fts5_config.c fts5_expr.c + fts5_hash.c fts5_index.c fts5parse.c fts5_storage.c + fts5_tokenize.c rtree.c icu.c From cb62aae034175e974965a7113c05491cded03048 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 24 Nov 2014 16:24:33 +0000 Subject: [PATCH 048/206] Add the auxiliary highlight() function to fts5. FossilOrigin-Name: 059092379f981eb919b500ce447006f9e645fc5a --- ext/fts5/fts5.c | 131 +++++++++++++++++++++++++++++++++---- ext/fts5/fts5.h | 36 +++++++++- ext/fts5/fts5Int.h | 3 + ext/fts5/fts5_aux.c | 149 ++++++++++++++++++++++++++++++++++++++++++ ext/fts5/fts5_index.c | 6 +- manifest | 23 +++---- manifest.uuid | 2 +- test/fts5ae.test | 2 +- test/fts5ak.test | 111 +++++++++++++++++++++++++++++++ 9 files changed, 434 insertions(+), 29 deletions(-) create mode 100644 test/fts5ak.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 368a0f88c9..fb3c0d7197 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -165,6 +165,9 @@ struct Fts5Cursor { Fts5Auxiliary *pAux; /* Currently executing extension function */ Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ int *aColumnSize; /* Values for xColumnSize() */ + + int nInstCount; /* Number of phrase instances */ + int *aInst; /* 3 integers per phrase instance */ }; /* @@ -488,6 +491,18 @@ static int fts5StmtType(int idxNum){ return FTS5_STMT_LOOKUP; } +/* +** This function is called after the cursor passed as the only argument +** is moved to point at a different row. It clears all cached data +** specific to the previous row stored by the cursor object. +*/ +static void fts5CsrNewrow(Fts5Cursor *pCsr){ + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + sqlite3_free(pCsr->aInst); + pCsr->aInst = 0; + pCsr->nInstCount = 0; +} + /* ** Close the cursor. For additional information see the documentation ** on the xClose method of the virtual table interface. @@ -499,6 +514,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ Fts5Auxdata *pData; Fts5Auxdata *pNext; + fts5CsrNewrow(pCsr); if( pCsr->pStmt ){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); @@ -557,7 +573,7 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ pSorter->aIdx[i] = &aBlob[nBlob] - a; pSorter->aPoslist = a; - CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + fts5CsrNewrow(pCsr); } return rc; @@ -583,7 +599,7 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } - CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + fts5CsrNewrow(pCsr); break; case FTS5_PLAN_SPECIAL: { @@ -666,7 +682,7 @@ static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } - CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + fts5CsrNewrow(pCsr); return rc; } @@ -1044,6 +1060,104 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); } +static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){ + int n; + if( pCsr->pSorter ){ + Fts5Sorter *pSorter = pCsr->pSorter; + int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); + n = pSorter->aIdx[iPhrase] - i1; + *pa = &pSorter->aPoslist[i1]; + }else{ + n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); + } + return n; +} + +/* +** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated +** correctly for the current view. Return SQLITE_OK if successful, or an +** SQLite error code otherwise. +*/ +static int fts5CacheInstArray(Fts5Cursor *pCsr){ + int rc = SQLITE_OK; + if( pCsr->aInst==0 ){ + Fts5PoslistReader *aIter; /* One iterator for each phrase */ + int nIter; /* Number of iterators/phrases */ + int nByte; + + nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); + nByte = sizeof(Fts5PoslistReader) * nIter; + aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); + if( aIter ){ + Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */ + int nInst; /* Number instances seen so far */ + int i; + + /* Initialize all iterators */ + for(i=0; iaInst = (int*)buf.p; + pCsr->nInstCount = nInst; + sqlite3_free(aIter); + } + } + return rc; +} + +static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + int rc; + if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ + *pnInst = pCsr->nInstCount; + } + return rc; +} + +static int fts5ApiInst( + Fts5Context *pCtx, + int iIdx, + int *piPhrase, + int *piCol, + int *piOff +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + int rc; + if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ + if( iIdx<0 || iIdx>=pCsr->nInstCount ){ + rc = SQLITE_RANGE; + }else{ + *piPhrase = pCsr->aInst[iIdx*3]; + *piCol = pCsr->aInst[iIdx*3 + 1]; + *piOff = pCsr->aInst[iIdx*3 + 2]; + } + } + return rc; +} + static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ return fts5CursorRowid((Fts5Cursor*)pCtx); } @@ -1088,14 +1202,7 @@ static int fts5ApiPoslist( ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; const u8 *a; int n; /* Poslist for phrase iPhrase */ - if( pCsr->pSorter ){ - Fts5Sorter *pSorter = pCsr->pSorter; - int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); - n = pSorter->aIdx[iPhrase] - i1; - a = &pSorter->aPoslist[i1]; - }else{ - n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a); - } + n = fts5CsrPoslist(pCsr, iPhrase, &a); return sqlite3Fts5PoslistNext64(a, n, pi, piPos); } @@ -1162,6 +1269,8 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiTokenize, fts5ApiPhraseCount, fts5ApiPhraseSize, + fts5ApiInstCount, + fts5ApiInst, fts5ApiRowid, fts5ApiColumnText, fts5ApiColumnSize, diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 248459ac8a..71db9577c9 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -70,13 +70,42 @@ typedef void (*fts5_extension_function)( ** Returns the number of tokens in phrase iPhrase of the query. Phrases ** are numbered starting from zero. ** +** xInstCount: +** Set *pnInst to the total number of occurrences of all phrases within +** the query within the current row. Return SQLITE_OK if successful, or +** an error code (i.e. SQLITE_NOMEM) if an error occurs. +** +** xInst: +** Query for the details of phrase match iIdx within the current row. +** Phrase matches are numbered starting from zero, so the iIdx argument +** should be greater than or equal to zero and smaller than the value +** output by xInstCount(). +** +** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) +** if an error occurs. +** ** xRowid: ** Returns the rowid of the current row. ** ** xPoslist: -** Iterate through instances of phrase iPhrase in the current row. +** Iterate through phrase instances in the current row. If the iPhrase +** argument is 0 or greater, then only instances of phrase iPhrase are +** visited. If it is less than 0, instances of all phrases are visited. +** +** At EOF, -1 is returned and output variable iPos set to -1. +** +** +** sqlite3_int64 iPos; +** int iPhrase; +** int ii = 0; +** +** while( (iPhrase = pFts->xPoslist(pFts, -1, &ii, &iPos) >= 0 ){ +** int iCol = FTS5_POS2COLUMN(iPos); +** int iOff = FTS5_POS2OFFSET(iPos); +** // An instance of phrase iPhrase at offset iOff of column iCol. +** } +** ** -** At EOF, a non-zero value is returned and output variable iPos set to -1. ** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. @@ -160,6 +189,9 @@ struct Fts5ExtensionApi { int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); + int (*xInstCount)(Fts5Context*, int *pnInst); + int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); + sqlite3_int64 (*xRowid)(Fts5Context*); int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 3808d31617..999777fcdf 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -301,6 +301,9 @@ void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge); */ int sqlite3Fts5IndexReads(Fts5Index *p); +/* Malloc utility */ +void *sqlite3Fts5MallocZero(int *pRc, int nByte); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 186b43c166..a039b7b536 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -14,6 +14,138 @@ #include "fts5Int.h" #include +/************************************************************************* +** Start of highlight() implementation. +*/ +typedef struct HighlightContext HighlightContext; +struct HighlightContext { + const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ + Fts5Context *pFts; /* First arg to pass to pApi functions */ + int iInst; /* Current phrase instance index */ + int iStart; /* First token of current phrase */ + int iEnd; /* Last token of current phrase */ + + const char *zOpen; /* Opening highlight */ + const char *zClose; /* Closing highlight */ + int iCol; /* Column to read from */ + + const char *zIn; /* Input text */ + int nIn; /* Size of input text in bytes */ + int iOff; /* Current offset within zIn[] */ + char *zOut; /* Output value */ +}; + +static int fts5HighlightAppend(HighlightContext *p, const char *z, int n){ + if( n<0 ) n = strlen(z); + p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); + if( p->zOut==0 ) return SQLITE_NOMEM; + return SQLITE_OK; +} + +static int fts5HighlightCb( + void *pContext, /* Pointer to HighlightContext object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd, /* End offset of token */ + int iPos /* Position offset of token */ +){ + HighlightContext *p = (HighlightContext*)pContext; + int rc = SQLITE_OK; + + if( iPos==p->iStart ){ + rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iStart - p->iOff); + p->iOff = iStart; + if( rc==SQLITE_OK ){ + rc = fts5HighlightAppend(p, p->zOpen, -1); + } + } + + if( rc==SQLITE_OK ){ + rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iEnd - p->iOff); + p->iOff = iEnd; + } + + if( rc==SQLITE_OK && iPos==p->iEnd ){ + int bClose = 1; + do{ + int iP, iPCol, iOff; + rc = p->pApi->xInst(p->pFts, ++p->iInst, &iP, &iPCol, &iOff); + if( rc==SQLITE_RANGE || iPCol!=p->iCol ){ + p->iStart = -1; + p->iEnd = -1; + rc = SQLITE_OK; + }else{ + iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP); + if( iEnd<=p->iEnd ) continue; + if( iOff<=p->iEnd ) bClose = 0; + p->iStart = iOff; + p->iEnd = iEnd; + } + }while( 0 ); + + if( rc==SQLITE_OK && bClose ){ + rc = fts5HighlightAppend(p, p->zClose, -1); + } + } + + return rc; +} + +static void fts5HighlightFunction( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +){ + HighlightContext ctx; + int rc; + + if( nVal!=3 ){ + const char *zErr = "wrong number of arguments to function highlight()"; + sqlite3_result_error(pCtx, zErr, -1); + return; + } + memset(&ctx, 0, sizeof(HighlightContext)); + ctx.iCol = sqlite3_value_int(apVal[0]); + ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); + ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); + rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn); + ctx.pApi = pApi; + ctx.pFts = pFts; + + /* Find the first phrase instance in the right column. */ + ctx.iStart = -1; + ctx.iEnd = -1; + while( rc==SQLITE_OK ){ + int iP, iPCol, iOff; + rc = pApi->xInst(pFts, ctx.iInst, &iP, &iPCol, &iOff); + if( rc==SQLITE_OK && iPCol==ctx.iCol ){ + ctx.iStart = iOff; + ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP); + break; + } + ctx.iInst++; + } + + if( rc==SQLITE_OK || rc==SQLITE_RANGE ){ + rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb); + } + if( rc==SQLITE_OK ){ + rc = fts5HighlightAppend(&ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); + } + + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + sqlite3_free(ctx.zOut); +} +/* +**************************************************************************/ + typedef struct SnipPhrase SnipPhrase; typedef struct SnipIter SnipIter; typedef struct SnippetCtx SnippetCtx; @@ -796,6 +928,22 @@ static void fts5TestFunction( } } + /* + ** xInst() + */ + if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " inst "); + if( 0==zReq || 0==sqlite3_stricmp(zReq, "inst") ){ + int nInst; + rc = pApi->xInstCount(pFts, &nInst); + for(i=0; rc==SQLITE_OK && ixInst(pFts, i, &iPhrase, &iCol, &iOff); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d.%d.%d", + (i==0 ? "" : " "), iPhrase, iCol, iOff + ); + } + } + /* ** xPhraseCount() */ @@ -966,6 +1114,7 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){ { "bm25debug", (void*)1, fts5Bm25Function, 0 }, { "snippet", 0, fts5SnippetFunction, 0 }, { "fts5_test", 0, fts5TestFunction, 0 }, + { "highlight", 0, fts5HighlightFunction, 0 }, { "bm25", 0, fts5Bm25Function, 0 }, }; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 214cc13931..b6923a3cf4 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -692,7 +692,7 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ return pRet; } -static void *fts5MallocZero(int *pRc, int nByte){ +void *sqlite3Fts5MallocZero(int *pRc, int nByte){ void *pRet = 0; if( *pRc==SQLITE_OK ){ pRet = sqlite3_malloc(nByte); @@ -981,7 +981,7 @@ static int fts5StructureDecode( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */ ); - pRet = (Fts5Structure*)fts5MallocZero(&rc, nByte); + pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); if( pRet ){ pRet->nLevel = nLevel; @@ -995,7 +995,7 @@ static int fts5StructureDecode( i += getVarint32(&pData[i], pLvl->nMerge); i += getVarint32(&pData[i], nTotal); assert( nTotal>=pLvl->nMerge ); - pLvl->aSeg = (Fts5StructureSegment*)fts5MallocZero(&rc, + pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, nTotal * sizeof(Fts5StructureSegment) ); diff --git a/manifest b/manifest index 3c7f49ac42..7d776a97d4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\scustomization\sinterfaces\sso\sthat\sthey\smatch\sthe\sdocumentation. -D 2014-11-15T20:07:31.166 +C Add\sthe\sauxiliary\shighlight()\sfunction\sto\sfts5. +D 2014-11-24T16:24:33.456 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,15 +104,15 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c cc3f0e4bac499c81d1311199f929dcad5e40ee8e -F ext/fts5/fts5.h a77cad780eec8f10850fdba0f44079a92561b790 -F ext/fts5/fts5Int.h a3c46f9dae13277de6fc3a6f8863d337ca660d6a -F ext/fts5/fts5_aux.c 6b0612e4312ca27264f7dacb0c97abc723a4b472 +F ext/fts5/fts5.c d4b9895c5dc11c20493b3a9f09f4a0cdb0bc1438 +F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 +F ext/fts5/fts5Int.h fd811979294410b10c1737392a9114510fc2a1be +F ext/fts5/fts5_aux.c 2e467bdd93f23f049824411b326f77b9326cb61a F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c a292fe73864086e51e7974d842cc09f6379fbae0 F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 3f4d84a1762e4284319739d4672b90b18b91060a +F ext/fts5/fts5_index.c 998c4aa0f003666afe85b6ff821476419ed245e9 F ext/fts5/fts5_storage.c 5913aa01a1dada1c5e1a39e4cbb44e84c5f7f350 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -602,12 +602,13 @@ F test/fts5aa.test 16bf1dbb92d4d63c7c357b480b1a47309f654ad1 F test/fts5ab.test 657d6dc5ddc57bfea4af1bb85204d4f3539cd3e8 F test/fts5ac.test f38ceca8a43fa0ff86122bec72428a4067b17bc4 F test/fts5ad.test d29ff407c70df470c9a8fcbfe5bc80efd662f2c4 -F test/fts5ae.test d4141786d817e0198f89f8c66749af38359839a7 +F test/fts5ae.test a514ee09be90723ccc9736edaef900a5af1c121a F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb F test/fts5ah.test af9274cdb58a69780c7e57e61581990665ac0fb6 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d F test/fts5aj.test fe5c40216cac8072f29e454ee0540c7b89d17ccd +F test/fts5ak.test 2c930afe32bd15b39a2c416fabe9fc7a36e3042e F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1204,7 +1205,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e240d467e60b7755486aae5e8b0824f7c741f852 -R 28baa98ae078d2f041a83a26b4550455 +P fba0b5fc7eead07a4853e78e02d788e7c714f6cd +R 0f9755027ecd37d1e64800649e8a07f5 U dan -Z 1c1b566687b968f39cda6c2d32a692b6 +Z 35a88e8d2ae30031588d0943e2aec6ce diff --git a/manifest.uuid b/manifest.uuid index ccae749918..374484385c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fba0b5fc7eead07a4853e78e02d788e7c714f6cd \ No newline at end of file +059092379f981eb919b500ce447006f9e645fc5a \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index 32d75616ae..b770d00c5f 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -269,7 +269,7 @@ foreach {tn q res} { SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank DESC; } $res - do_execsql_test 8.3.$tn.3 { + do_execsql_test 8.2.$tn.3 { SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank DESC; } $res } diff --git a/test/fts5ak.test b/test/fts5ak.test new file mode 100644 index 0000000000..4d5b22b030 --- /dev/null +++ b/test/fts5ak.test @@ -0,0 +1,111 @@ +# 2014 November 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# Specifically, the auxiliary function "highlight". +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5aj + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE ft1 USING fts5(x); + INSERT INTO ft1 VALUES('i d d a g i b g d d'); + INSERT INTO ft1 VALUES('h d b j c c g a c a'); + INSERT INTO ft1 VALUES('e j a e f h b f h h'); + INSERT INTO ft1 VALUES('j f h d g h i b d f'); + INSERT INTO ft1 VALUES('d c j d c j b c g e'); + INSERT INTO ft1 VALUES('i a d e g j g d a a'); + INSERT INTO ft1 VALUES('j f c e d a h j d b'); + INSERT INTO ft1 VALUES('i c c f a d g h j e'); + INSERT INTO ft1 VALUES('i d i g c d c h b f'); + INSERT INTO ft1 VALUES('g d a e h a b c f j'); +} + +do_execsql_test 1.2 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e'; +} { + {g d a [e] h a b c f j} + {i c c f a d g h j [e]} + {j f c [e] d a h j d b} + {i a d [e] g j g d a a} + {d c j d c j b c g [e]} + {[e] j a [e] f h b f h h} +} + +do_execsql_test 1.3 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d'; +} { + {j f [h d] g h i b d f} + {[h d] b j c c g a c a} +} + +do_execsql_test 1.4 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d'; +} { + {i [d d] a g i b g [d d]} +} + +do_execsql_test 1.5 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e' +} { + {g d a [e] h a b c f j} + {i c c f a d g h j [e]} + {j f c [e] d a h j d b} + {i a d [e] g j g d a a} + {d c j d c j b c g [e]} + {[e] j a [e] f h b f h h} +} + +do_execsql_test 1.6 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d'; +} { + {i [d d] a g i b g [d d]} +} + +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE ft2 USING fts5(x); + INSERT INTO ft2 VALUES('a b c d e f g h i j'); +} + +do_execsql_test 2.2 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e' +} {{a [b c d e] f g h i j}} + +do_execsql_test 2.3 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g' +} { + {a [b c d] [e f g] h i j} +} + +do_execsql_test 2.4 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c' +} { + {a [b c d] e f g h i j} +} + +do_execsql_test 2.5 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e' +} { + {a [b c d e] f g h i j} +} + + +finish_test + From cb11e73fc25128f5faed3376be9c51a9c4b12eb9 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 27 Nov 2014 20:03:45 +0000 Subject: [PATCH 049/206] Add a %_config table to fts5. FossilOrigin-Name: 83491c56661ca78f96020ba68184bb3fb19e674f --- ext/fts5/fts5.c | 49 ++++++++++++++------------ ext/fts5/fts5Int.h | 23 ++++++++----- ext/fts5/fts5_aux.c | 76 ++++++++++++++++++++++------------------- ext/fts5/fts5_config.c | 63 ++++++++++++++++++++++++++++++++++ ext/fts5/fts5_index.c | 38 ++++++++------------- ext/fts5/fts5_storage.c | 41 ++++++++++++++++++---- manifest | 39 ++++++++++----------- manifest.uuid | 2 +- test/fts5aa.test | 13 +++---- test/fts5ab.test | 4 +-- test/fts5ac.test | 2 +- test/fts5ad.test | 4 +-- test/fts5ae.test | 2 +- test/fts5ah.test | 2 +- test/fts5aj.test | 2 +- test/fts5ak.test | 12 +++++++ test/fts5al.test | 43 +++++++++++++++++++++++ 17 files changed, 287 insertions(+), 128 deletions(-) create mode 100644 test/fts5al.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index fb3c0d7197..b47b37aba1 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -340,6 +340,11 @@ static int fts5InitVtab( rc = sqlite3Fts5ConfigDeclareVtab(pConfig); } + /* Load the contents of %_config */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ConfigLoad(pConfig); + } + if( rc!=SQLITE_OK ){ fts5FreeVtab(pTab, 0); pTab = 0; @@ -887,7 +892,8 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ ** This function is called to handle an FTS INSERT command. In other words, ** an INSERT statement of the form: ** -** INSERT INTO fts(fts) VALUES($pVal) +** INSERT INTO fts(fts) VALUES($pCmd) +** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) ** ** Argument pVal is the value assigned to column "fts" by the INSERT ** statement. This function returns SQLITE_OK if successful, or an SQLite @@ -897,28 +903,25 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ ** INSERT Directives" section of the documentation. It should be updated if ** more commands are added to this function. */ -static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ - const char *z = (const char*)sqlite3_value_text(pVal); - int n = sqlite3_value_bytes(pVal); - int rc = SQLITE_ERROR; +static int fts5SpecialCommand( + Fts5Table *pTab, /* Fts5 table object */ + sqlite3_value *pCmd, /* Value inserted into special column */ + sqlite3_value *pVal /* Value inserted into rowid column */ +){ + const char *z = (const char*)sqlite3_value_text(pCmd); + int rc = SQLITE_OK; + int bError = 0; if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); - }else - - if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ - int pgsz = atoi(&z[5]); - if( pgsz<32 ) pgsz = 32; - sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); - rc = SQLITE_OK; - }else - - if( n>10 && 0==sqlite3_strnicmp("automerge=", z, 10) ){ - int nAutomerge = atoi(&z[10]); - sqlite3Fts5IndexAutomerge(pTab->pIndex, nAutomerge); - rc = SQLITE_OK; + }else{ + rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); + if( rc==SQLITE_OK && bError ){ + rc = SQLITE_ERROR; + }else{ + rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal); + } } - return rc; } @@ -953,7 +956,9 @@ static int fts5UpdateMethod( assert( nArg==1 || nArg==(2 + pConfig->nCol + 2) ); if( nArg>1 && SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ - return fts5SpecialCommand(pTab, apVal[2 + pConfig->nCol]); + return fts5SpecialCommand(pTab, + apVal[2 + pConfig->nCol], apVal[2 + pConfig->nCol + 1] + ); } eType0 = sqlite3_value_type(apVal[0]); @@ -1104,7 +1109,9 @@ static int fts5CacheInstArray(Fts5Cursor *pCsr){ int *aInst; int iBest = -1; for(i=0; izOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); - if( p->zOut==0 ) return SQLITE_NOMEM; - return SQLITE_OK; +/* +** Append text to the HighlightContext output string - p->zOut. Argument +** z points to a buffer containing n bytes of text to append. If n is +** negative, everything up until the first '\0' is appended to the output. +*/ +static void fts5HighlightAppend( + int *pRc, + HighlightContext *p, + const char *z, int n +){ + if( *pRc==SQLITE_OK ){ + if( n<0 ) n = strlen(z); + p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); + if( p->zOut==0 ) *pRc = SQLITE_NOMEM; + } } static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ - int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ + int iStartOff, /* Start offset of token */ + int iEndOff, /* End offset of token */ int iPos /* Position offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; if( iPos==p->iStart ){ - rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iStart - p->iOff); - p->iOff = iStart; - if( rc==SQLITE_OK ){ - rc = fts5HighlightAppend(p, p->zOpen, -1); - } - } - - if( rc==SQLITE_OK ){ - rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iEnd - p->iOff); - p->iOff = iEnd; + fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); + fts5HighlightAppend(&rc, p, p->zOpen, -1); + p->iOff = iStartOff; } - if( rc==SQLITE_OK && iPos==p->iEnd ){ + if( iPos==p->iEnd ){ int bClose = 1; - do{ + for(p->iInst++; rc==SQLITE_OK && p->iInstnInst; p->iInst++){ int iP, iPCol, iOff; - rc = p->pApi->xInst(p->pFts, ++p->iInst, &iP, &iPCol, &iOff); - if( rc==SQLITE_RANGE || iPCol!=p->iCol ){ - p->iStart = -1; - p->iEnd = -1; - rc = SQLITE_OK; + rc = p->pApi->xInst(p->pFts, p->iInst, &iP, &iPCol, &iOff); + if( iPCol!=p->iCol ){ + p->iStart = p->iEnd = -1; }else{ - iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP); + int iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP); if( iEnd<=p->iEnd ) continue; if( iOff<=p->iEnd ) bClose = 0; p->iStart = iOff; p->iEnd = iEnd; } - }while( 0 ); + break; + } - if( rc==SQLITE_OK && bClose ){ - rc = fts5HighlightAppend(p, p->zClose, -1); + if( bClose ){ + fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + fts5HighlightAppend(&rc, p, p->zClose, -1); + p->iOff = iEndOff; } } @@ -107,34 +112,33 @@ static void fts5HighlightFunction( sqlite3_result_error(pCtx, zErr, -1); return; } + memset(&ctx, 0, sizeof(HighlightContext)); ctx.iCol = sqlite3_value_int(apVal[0]); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); - rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn); ctx.pApi = pApi; ctx.pFts = pFts; + rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn); + if( rc==SQLITE_OK ) rc = pApi->xInstCount(pFts, &ctx.nInst); /* Find the first phrase instance in the right column. */ ctx.iStart = -1; ctx.iEnd = -1; - while( rc==SQLITE_OK ){ + for( ; ctx.iInstxInst(pFts, ctx.iInst, &iP, &iPCol, &iOff); - if( rc==SQLITE_OK && iPCol==ctx.iCol ){ + if( iPCol==ctx.iCol ){ ctx.iStart = iOff; ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP); break; } - ctx.iInst++; } - if( rc==SQLITE_OK || rc==SQLITE_RANGE ){ + if( rc==SQLITE_OK ){ rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb); } - if( rc==SQLITE_OK ){ - rc = fts5HighlightAppend(&ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); - } + fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 68c340a48f..98a6fe1afe 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -15,6 +15,8 @@ #include "fts5Int.h" +#define FTS5_DEFAULT_PAGE_SIZE 1000 + /* ** Convert an SQL-style quoted string into a normal string by removing ** the quote characters. The conversion is done in-place. If the @@ -295,4 +297,65 @@ int sqlite3Fts5Tokenize( return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } +int sqlite3Fts5ConfigSetValue( + Fts5Config *pConfig, + const char *zKey, + sqlite3_value *pVal, + int *pbBadkey +){ + int rc = SQLITE_OK; + if( 0==sqlite3_stricmp(zKey, "cookie") ){ + pConfig->iCookie = sqlite3_value_int(pVal); + } + else if( 0==sqlite3_stricmp(zKey, "pgsz") ){ + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + pConfig->pgsz = sqlite3_value_int(pVal); + }else{ + if( pbBadkey ) *pbBadkey = 1; + } + } + else if( 0==sqlite3_stricmp(zKey, "automerge") ){ + // todo + } + else if( 0==sqlite3_stricmp(zKey, "rank") ){ + // todo + }else{ + if( pbBadkey ) *pbBadkey = 1; + } + return rc; +} + +/* +** Load the contents of the %_config table into memory. +*/ +int sqlite3Fts5ConfigLoad(Fts5Config *pConfig){ + const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; + char *zSql; + sqlite3_stmt *p = 0; + int rc; + + /* Set default values */ + pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; + pConfig->iCookie = 0; + + zSql = sqlite3_mprintf(zSelect, pConfig->zDb, pConfig->zName); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); + sqlite3_free(zSql); + } + + assert( rc==SQLITE_OK || p==0 ); + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==sqlite3_step(p) ){ + const char *zK = (const char*)sqlite3_column_text(p, 0); + sqlite3_value *pVal = sqlite3_column_value(p, 1); + sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, 0); + } + rc = sqlite3_finalize(p); + } + + return rc; +} diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index b6923a3cf4..262d5db97c 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -41,8 +41,6 @@ ** */ -#define FTS5_DEFAULT_PAGE_SIZE 1000 - #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ #define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ @@ -290,7 +288,6 @@ typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ - int pgsz; /* Target page size for this index */ int nMinMerge; /* Minimum input segments in a merge */ int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ @@ -2535,12 +2532,15 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ ** Discard all data currently cached in the hash-tables. */ static void fts5IndexDiscardData(Fts5Index *p){ - Fts5Config *pConfig = p->pConfig; - int i; - for(i=0; i<=pConfig->nPrefix; i++){ - sqlite3Fts5HashClear(p->apHash[i]); + assert( p->apHash || p->nPendingData==0 ); + if( p->apHash ){ + Fts5Config *pConfig = p->pConfig; + int i; + for(i=0; i<=pConfig->nPrefix; i++){ + sqlite3Fts5HashClear(p->apHash[i]); + } + p->nPendingData = 0; } - p->nPendingData = 0; } /* @@ -2630,7 +2630,7 @@ static void fts5WriteBtreeTerm( fts5WriteBtreeNEmpty(p, pWriter); - if( pPage->buf.n>=p->pgsz ){ + if( pPage->buf.n>=p->pConfig->pgsz ){ /* pPage will be written to disk. The term will be written into the ** parent of pPage. */ i64 iRowid = FTS5_SEGMENT_ROWID( @@ -2761,7 +2761,7 @@ static void fts5WriteAppendTerm( pWriter->bFirstRowidInDoclist = 1; /* If the current leaf page is full, flush it to disk. */ - if( pPage->buf.n>=p->pgsz ){ + if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); pWriter->bFirstRowidInPage = 1; } @@ -2796,7 +2796,7 @@ static void fts5WriteAppendRowid( pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; - if( pPage->buf.n>=p->pgsz ){ + if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); pWriter->bFirstRowidInPage = 1; } @@ -2809,7 +2809,7 @@ static void fts5WriteAppendPoslistInt( ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); - if( pPage->buf.n>=p->pgsz ){ + if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); pWriter->bFirstRowidInPage = 1; } @@ -2825,8 +2825,8 @@ static void fts5WriteAppendPoslistData( const u8 *a = aData; int n = nData; - while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pgsz ){ - int nReq = p->pgsz - pPage->buf.n; + while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ + int nReq = p->pConfig->pgsz - pPage->buf.n; int nCopy = 0; while( nCopypConfig = pConfig; - p->pgsz = 1000; p->nMinMerge = FTS5_MIN_MERGE; p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; @@ -3383,7 +3382,7 @@ int sqlite3Fts5IndexOpen( int i; Fts5Structure s; rc = sqlite3Fts5CreateTable( - pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", pzErr + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr ); if( rc==SQLITE_OK ){ memset(&s, 0, sizeof(Fts5Structure)); @@ -3986,13 +3985,6 @@ int sqlite3Fts5IndexInit(sqlite3 *db){ return rc; } -/* -** Set the target page size for the index object. -*/ -void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ - p->pgsz = pgsz; -} - /* ** Set the minimum number of segments that an auto-merge operation should ** attempt to merge together. A value of 1 sets the object to use the diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index ff0add5bad..bbe09874ce 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -20,7 +20,7 @@ struct Fts5Storage { int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ i64 nTotalRow; /* Total number of rows in FTS table */ i64 *aTotalSize; /* Total sizes of each column */ - sqlite3_stmt *aStmt[9]; + sqlite3_stmt *aStmt[10]; }; @@ -43,6 +43,8 @@ struct Fts5Storage { #define FTS5_STMT_LOOKUP_DOCSIZE 8 +#define FTS5_STMT_REPLACE_CONFIG 9 + /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and ** Fts5Storage.pInsertDocsize - if they have not already been prepared. @@ -70,6 +72,8 @@ static int fts5StorageGetStmt( "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ + + "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ }; Fts5Config *pConfig = p->pConfig; char *zSql = 0; @@ -131,11 +135,13 @@ int sqlite3Fts5CreateTable( Fts5Config *pConfig, /* FTS5 configuration */ const char *zPost, /* Shadow table to create (e.g. "content") */ const char *zDefn, /* Columns etc. for shadow table */ + int bWithout, /* True for without rowid */ char **pzErr /* OUT: Error message */ ){ int rc; - char *zSql = sqlite3_mprintf("CREATE TABLE %Q.'%q_%q'(%s)", - pConfig->zDb, pConfig->zName, zPost, zDefn + char *zSql = sqlite3_mprintf("CREATE TABLE %Q.'%q_%q'(%s)%s", + pConfig->zDb, pConfig->zName, zPost, zDefn, + (bWithout ? " WITHOUT ROWID" :"") ); if( zSql==0 ){ rc = SQLITE_NOMEM; @@ -193,12 +199,17 @@ int sqlite3Fts5StorageOpen( for(i=0; inCol; i++){ iOff += sprintf(&zDefn[iOff], ", c%d", i); } - rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, pzErr); + rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); } sqlite3_free(zDefn); if( rc==SQLITE_OK ){ rc = sqlite3Fts5CreateTable( - pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", pzErr + pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr + ); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5CreateTable( + pConfig, "config", "k PRIMARY KEY, v", 1, pzErr ); } } @@ -225,7 +236,8 @@ int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ /* If required, remove the shadow tables from the database */ if( bDestroy ){ rc = sqlite3Fts5DropTable(p->pConfig, "content"); - if( rc==SQLITE_OK ) sqlite3Fts5DropTable(p->pConfig, "docsize"); + if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "docsize"); + if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "config"); } sqlite3_free(p); @@ -744,3 +756,20 @@ int sqlite3Fts5StorageRollback(Fts5Storage *p){ return sqlite3Fts5IndexRollback(p->pIndex); } +int sqlite3Fts5StorageConfigValue( + Fts5Storage *p, + const char *z, + sqlite3_value *pVal +){ + sqlite3_stmt *pReplace = 0; + int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace); + if( rc==SQLITE_OK ){ + sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_TRANSIENT); + sqlite3_bind_value(pReplace, 2, pVal); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + } + return rc; +} + + diff --git a/manifest b/manifest index 7d776a97d4..2a109507b3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sauxiliary\shighlight()\sfunction\sto\sfts5. -D 2014-11-24T16:24:33.456 +C Add\sa\s%_config\stable\sto\sfts5. +D 2014-11-27T20:03:45.010 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c d4b9895c5dc11c20493b3a9f09f4a0cdb0bc1438 +F ext/fts5/fts5.c 3c920d090b1cdbc69ba03acf7c9302a19be55cee F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 -F ext/fts5/fts5Int.h fd811979294410b10c1737392a9114510fc2a1be -F ext/fts5/fts5_aux.c 2e467bdd93f23f049824411b326f77b9326cb61a +F ext/fts5/fts5Int.h 63daceb6e421b9066e05c4e89651f27fa675be93 +F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 -F ext/fts5/fts5_config.c a292fe73864086e51e7974d842cc09f6379fbae0 +F ext/fts5/fts5_config.c aae1470ca0e2125e758df5b612f26082a1dc254a F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 998c4aa0f003666afe85b6ff821476419ed245e9 -F ext/fts5/fts5_storage.c 5913aa01a1dada1c5e1a39e4cbb44e84c5f7f350 +F ext/fts5/fts5_index.c 5cb71b3922e50a23752fd6c11028acfe2f367850 +F ext/fts5/fts5_storage.c c28d1a88f45f83980eb32631c7421d7f5dd336fa F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -598,17 +598,18 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test 16bf1dbb92d4d63c7c357b480b1a47309f654ad1 -F test/fts5ab.test 657d6dc5ddc57bfea4af1bb85204d4f3539cd3e8 -F test/fts5ac.test f38ceca8a43fa0ff86122bec72428a4067b17bc4 -F test/fts5ad.test d29ff407c70df470c9a8fcbfe5bc80efd662f2c4 -F test/fts5ae.test a514ee09be90723ccc9736edaef900a5af1c121a +F test/fts5aa.test fb49e2db450f9bec900b05b6a85141695d6c2255 +F test/fts5ab.test 32ad48ca5317548dc6934585f6071b5e7ff03e61 +F test/fts5ac.test 3982268756a543cbf3ae508b6336f623136b754a +F test/fts5ad.test 4e2e6a71fc7465eaaa32fd6ec318e657c6e7baa9 +F test/fts5ae.test 0c0712b1430158f976fce3564adf3e3713a4a93d F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb -F test/fts5ah.test af9274cdb58a69780c7e57e61581990665ac0fb6 +F test/fts5ah.test c79b5107c2a47096f0e3473a4806ebc17f006cf4 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d -F test/fts5aj.test fe5c40216cac8072f29e454ee0540c7b89d17ccd -F test/fts5ak.test 2c930afe32bd15b39a2c416fabe9fc7a36e3042e +F test/fts5aj.test 947c957cdcfc8af7d428f8b82e82926b3b45a504 +F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f +F test/fts5al.test 455b2bdc9f6ffb965a38a970a60c5075ee1e23bb F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1205,7 +1206,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fba0b5fc7eead07a4853e78e02d788e7c714f6cd -R 0f9755027ecd37d1e64800649e8a07f5 +P 059092379f981eb919b500ce447006f9e645fc5a +R 4ec91e9a7d6238c87465b8e2b7986d2c U dan -Z 35a88e8d2ae30031588d0943e2aec6ce +Z 1dea62df28b08fe11d603550a65329e7 diff --git a/manifest.uuid b/manifest.uuid index 374484385c..54ffb90922 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -059092379f981eb919b500ce447006f9e645fc5a \ No newline at end of file +83491c56661ca78f96020ba68184bb3fb19e674f \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 4126034687..9c715bfd40 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -30,6 +30,7 @@ do_execsql_test 1.0 { t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)} t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)} t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)} + t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID} } do_execsql_test 1.1 { @@ -84,7 +85,7 @@ foreach {i x y} { reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } foreach {i x y} { 1 {g f d b f} {h h e i a} @@ -108,7 +109,7 @@ foreach {i x y} { reset_db do_execsql_test 5.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } foreach {i x y} { 1 {dd abc abc abc abcde} {aaa dd ddd ddd aab} @@ -133,7 +134,7 @@ breakpoint reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } do_execsql_test 6.1 { @@ -151,7 +152,7 @@ reset_db expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } proc doc {} { @@ -190,7 +191,7 @@ for {set i 1} {$i <= 10} {incr i} { reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } do_execsql_test 8.1 { @@ -207,7 +208,7 @@ expr srand(0) do_execsql_test 9.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3"); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } proc doc {} { diff --git a/test/fts5ab.test b/test/fts5ab.test index 4e2e9b13d0..2fd3c047cc 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -59,7 +59,7 @@ do_execsql_test 1.6 { reset_db do_execsql_test 2.1 { CREATE VIRTUAL TABLE t1 USING fts5(x); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); INSERT INTO t1 VALUES('one'); INSERT INTO t1 VALUES('two'); INSERT INTO t1 VALUES('three'); @@ -99,7 +99,7 @@ foreach {tn expr res} { reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE t1 USING fts5(a,b); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } foreach {tn a b} { diff --git a/test/fts5ac.test b/test/fts5ac.test index be6177705e..f3efa14107 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -25,7 +25,7 @@ ifcapable !fts5 { do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x,y); - INSERT INTO xx(xx) VALUES('pgsz=32'); + INSERT INTO xx(xx, rowid) VALUES('pgsz', 32); } set data { diff --git a/test/fts5ad.test b/test/fts5ad.test index 824444a867..8eabb7b978 100644 --- a/test/fts5ad.test +++ b/test/fts5ad.test @@ -55,12 +55,12 @@ foreach {tn match res} { foreach {T create} { 2 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } 3 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } } { diff --git a/test/fts5ae.test b/test/fts5ae.test index b770d00c5f..c29ec499ed 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -25,7 +25,7 @@ ifcapable !fts5 { do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); - INSERT INTO t1(t1) VALUES('pgsz=32'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); } do_execsql_test 1.1 { diff --git a/test/fts5ah.test b/test/fts5ah.test index fd78d23577..7ee6de9731 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -28,7 +28,7 @@ ifcapable !fts5 { do_test 1.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } - execsql { INSERT INTO t1(t1) VALUES('pgsz=128') } + execsql { INSERT INTO t1(t1, rowid) VALUES('pgsz', 128) } for {set i 1} {$i <= 10000} {incr i} { set v {x x x x x x x x x x x x x x x x x x x x} if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i} diff --git a/test/fts5aj.test b/test/fts5aj.test index 8b333ae562..6c8cd1827b 100644 --- a/test/fts5aj.test +++ b/test/fts5aj.test @@ -46,7 +46,7 @@ proc structure {} { expr srand(0) do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); - INSERT INTO t1(t1) VALUES('pgsz=64'); + INSERT INTO t1(t1, rowid) VALUES('pgsz', 64); } for {set iTest 0} {$iTest < 50000} {incr iTest} { diff --git a/test/fts5ak.test b/test/fts5ak.test index 4d5b22b030..29d19bc4b1 100644 --- a/test/fts5ak.test +++ b/test/fts5ak.test @@ -106,6 +106,18 @@ do_execsql_test 2.5 { {a [b c d e] f g h i j} } +do_execsql_test 2.6.1 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d' +} { + {a b c [d] e [f] g h i j} +} + +do_execsql_test 2.6.2 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f' +} { + {a b c [d] e [f] g h i j} +} + finish_test diff --git a/test/fts5al.test b/test/fts5al.test new file mode 100644 index 0000000000..63e14a13a0 --- /dev/null +++ b/test/fts5al.test @@ -0,0 +1,43 @@ +# 2014 November 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# +# Specifically, this function tests the %_config table. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts5al + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE ft1 USING fts5(x); + SELECT * FROM ft1_config; +} {} + +do_execsql_test 1.2 { + INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); + SELECT * FROM ft1_config; +} {pgsz 32} + +do_execsql_test 1.3 { + INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); + SELECT * FROM ft1_config; +} {pgsz 64} + +finish_test + From ca7fad3d2c38939a28e31462dfe2111fa29d0e64 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 28 Nov 2014 20:01:13 +0000 Subject: [PATCH 050/206] Add a cookie mechanism to ensure that the %_config table is re-read as required. FossilOrigin-Name: bb4a37b53de60da9ec8b9317eec14afa99690828 --- ext/fts5/fts5.c | 5 --- ext/fts5/fts5Int.h | 16 ++++++- ext/fts5/fts5_buffer.c | 18 ++++++++ ext/fts5/fts5_config.c | 34 ++++++++++++--- ext/fts5/fts5_index.c | 92 ++++++++++++++++++++++++++++++----------- ext/fts5/fts5_storage.c | 7 ++++ manifest | 36 ++++++++-------- manifest.uuid | 2 +- test/fts5aa.test | 12 +++--- test/fts5ab.test | 4 +- test/fts5ac.test | 2 +- test/fts5ad.test | 4 +- test/fts5ae.test | 2 +- test/fts5ah.test | 4 +- test/fts5aj.test | 2 +- 15 files changed, 170 insertions(+), 70 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index b47b37aba1..83428bc1c3 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -340,11 +340,6 @@ static int fts5InitVtab( rc = sqlite3Fts5ConfigDeclareVtab(pConfig); } - /* Load the contents of %_config */ - if( rc==SQLITE_OK ){ - rc = sqlite3Fts5ConfigLoad(pConfig); - } - if( rc!=SQLITE_OK ){ fts5FreeVtab(pTab, 0); pTab = 0; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index a29eb3f5dc..d2abadc36d 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -60,6 +60,11 @@ typedef struct Fts5Config Fts5Config; ** be gleaned from the CREATE VIRTUAL TABLE statement. ** ** And all information loaded from the %_config table. +** +** nAutomerge: +** The minimum number of segments that an auto-merge operation should +** attempt to merge together. A value of 1 sets the object to use the +** compile time default. Zero disables auto-merge altogether. */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -75,6 +80,7 @@ struct Fts5Config { /* Values loaded from the %_config table */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ + int nAutomerge; /* 'automerge' setting */ }; int sqlite3Fts5ConfigParse( @@ -94,7 +100,7 @@ int sqlite3Fts5Tokenize( void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ -int sqlite3Fts5ConfigLoad(Fts5Config*); +int sqlite3Fts5ConfigLoad(Fts5Config*, int); /* Set the value of a single config attribute */ int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); @@ -126,6 +132,7 @@ void sqlite3Fts5BufferZero(Fts5Buffer*); void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); void sqlite3Fts5BufferAppendListElem(int*, Fts5Buffer*, const char*, int); +void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int); #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) #define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) @@ -133,6 +140,11 @@ void sqlite3Fts5BufferAppendListElem(int*, Fts5Buffer*, const char*, int); #define fts5BufferFree(a) sqlite3Fts5BufferFree(a) #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) +#define fts5BufferAppend32(a,b,c) sqlite3Fts5BufferAppend32(a,b,c) + +/* Write and decode big-endian 32-bit integer values */ +void sqlite3Fts5Put32(u8*, int); +int sqlite3Fts5Get32(const u8*); typedef struct Fts5PoslistReader Fts5PoslistReader; struct Fts5PoslistReader { @@ -298,7 +310,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); */ int sqlite3Fts5IndexInit(sqlite3*); -void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge); +int sqlite3Fts5IndexSetCookie(Fts5Index*, int); /* ** Return the total number of entries read from the %_data table by diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index bea316eda4..478b903614 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -46,6 +46,24 @@ void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); } +void sqlite3Fts5Put32(u8 *aBuf, int iVal){ + aBuf[0] = (iVal>>24) & 0x00FF; + aBuf[1] = (iVal>>16) & 0x00FF; + aBuf[2] = (iVal>> 8) & 0x00FF; + aBuf[3] = (iVal>> 0) & 0x00FF; +} + +int sqlite3Fts5Get32(const u8 *aBuf){ + return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3]; +} + +void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){ + char *a; + if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return; + sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal); + pBuf->n += 4; +} + /* ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set ** the error code in p. If an error has already occurred when this function diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 98a6fe1afe..c7e729276b 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -16,6 +16,10 @@ #include "fts5Int.h" #define FTS5_DEFAULT_PAGE_SIZE 1000 +#define FTS5_DEFAULT_AUTOMERGE 4 + +/* Maximum allowed page size */ +#define FTS5_MAX_PAGE_SIZE (128*1024) /* ** Convert an SQL-style quoted string into a normal string by removing @@ -153,6 +157,7 @@ int sqlite3Fts5ConfigParse( if( pRet==0 ) return SQLITE_NOMEM; memset(pRet, 0, sizeof(Fts5Config)); pRet->db = db; + pRet->iCookie = -1; pRet->azCol = (char**)sqlite3_malloc(sizeof(char*) * nArg); pRet->zDb = fts5Strdup(azArg[1]); @@ -307,16 +312,32 @@ int sqlite3Fts5ConfigSetValue( if( 0==sqlite3_stricmp(zKey, "cookie") ){ pConfig->iCookie = sqlite3_value_int(pVal); } + else if( 0==sqlite3_stricmp(zKey, "pgsz") ){ + int pgsz = 0; if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ - pConfig->pgsz = sqlite3_value_int(pVal); - }else{ + pgsz = sqlite3_value_int(pVal); + } + if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){ if( pbBadkey ) *pbBadkey = 1; + }else{ + pConfig->pgsz = pgsz; } } + else if( 0==sqlite3_stricmp(zKey, "automerge") ){ - // todo + int nAutomerge = -1; + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + nAutomerge = sqlite3_value_int(pVal); + } + if( nAutomerge<0 || nAutomerge>64 ){ + if( pbBadkey ) *pbBadkey = 1; + }else{ + if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; + pConfig->nAutomerge = nAutomerge; + } } + else if( 0==sqlite3_stricmp(zKey, "rank") ){ // todo }else{ @@ -328,7 +349,7 @@ int sqlite3Fts5ConfigSetValue( /* ** Load the contents of the %_config table into memory. */ -int sqlite3Fts5ConfigLoad(Fts5Config *pConfig){ +int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; char *zSql; sqlite3_stmt *p = 0; @@ -336,7 +357,7 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig){ /* Set default values */ pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; - pConfig->iCookie = 0; + pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; zSql = sqlite3_mprintf(zSelect, pConfig->zDb, pConfig->zName); if( zSql==0 ){ @@ -356,6 +377,9 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig){ rc = sqlite3_finalize(p); } + if( rc==SQLITE_OK ){ + pConfig->iCookie = iCookie; + } return rc; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 262d5db97c..b51e7bad11 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -42,7 +42,6 @@ */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ -#define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ #define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ @@ -61,10 +60,14 @@ ** 1. Structure Records: ** ** The set of segments that make up an index - the index structure - are -** recorded in a single record within the %_data table. The record is a list -** of SQLite varints. +** recorded in a single record within the %_data table. The record consists +** of a single 32-bit configuration cookie value followed by a list of +** SQLite varints. If the FTS table features more than one index (because +** there are one or more prefix indexes), it is guaranteed that all share +** the same cookie value. ** -** The record begins with three varints: +** Immediately following the configuration cookie, the record begins with +** three varints: ** ** + number of levels, ** + total number of segments on all levels, @@ -288,7 +291,6 @@ typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ - int nMinMerge; /* Minimum input segments in a merge */ int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ @@ -960,6 +962,7 @@ static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ static int fts5StructureDecode( const u8 *pData, /* Buffer containing serialized structure */ int nData, /* Size of buffer pData in bytes */ + int *piCookie, /* Configuration cookie value */ Fts5Structure **ppOut /* OUT: Deserialized object */ ){ int rc = SQLITE_OK; @@ -970,9 +973,13 @@ static int fts5StructureDecode( int nByte; /* Bytes of space to allocate at pRet */ Fts5Structure *pRet = 0; /* Structure object to return */ + /* Grab the cookie value */ + if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); + i = 4; + /* Read the total number of levels and segments from the start of the ** structure record. */ - i = getVarint32(&pData[i], nLevel); + i += getVarint32(&pData[i], nLevel); i += getVarint32(&pData[i], nSegment); nByte = ( sizeof(Fts5Structure) + /* Main structure */ @@ -1083,11 +1090,16 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ Fts5Data *pData; /* %_data entry containing structure record */ + int iCookie; /* Configuration cookie */ assert( iIdx<=pConfig->nPrefix ); pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx)); if( !pData ) return 0; - p->rc = fts5StructureDecode(pData->p, pData->n, &pRet); + p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); + + if( p->rc==SQLITE_OK && p->pConfig->iCookie!=iCookie ){ + p->rc = sqlite3Fts5ConfigLoad(p->pConfig, iCookie); + } fts5DataRelease(pData); return pRet; @@ -1129,9 +1141,16 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ int nSegment; /* Total number of segments */ Fts5Buffer buf; /* Buffer to serialize record into */ int iLvl; /* Used to iterate through levels */ + int iCookie; /* Cookie value to store */ nSegment = fts5StructureCountSegments(pStruct); memset(&buf, 0, sizeof(Fts5Buffer)); + + /* Append the current configuration cookie */ + iCookie = p->pConfig->iCookie; + if( iCookie<0 ) iCookie = 0; + fts5BufferAppend32(&p->rc, &buf, iCookie); + fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); fts5BufferAppendVarint(&p->rc, &buf, nSegment); fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); @@ -2825,6 +2844,7 @@ static void fts5WriteAppendPoslistData( const u8 *a = aData; int n = nData; + assert( p->pConfig->pgsz>0 ); while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ int nReq = p->pConfig->pgsz - pPage->buf.n; int nCopy = 0; @@ -3179,7 +3199,11 @@ static void fts5IndexWork( } #endif - if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; + if( nBestpConfig->nAutomerge + && pStruct->aLevel[iBestLvl].nMerge==0 + ){ + break; + } fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); @@ -3293,7 +3317,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } } - if( p->nMinMerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); + if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); fts5IndexCrisisMerge(p, iHash, &pStruct); fts5StructureWrite(p, iHash, pStruct); fts5StructureRelease(pStruct); @@ -3371,7 +3395,6 @@ int sqlite3Fts5IndexOpen( memset(p, 0, sizeof(Fts5Index)); p->pConfig = pConfig; - p->nMinMerge = FTS5_MIN_MERGE; p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; @@ -3781,6 +3804,11 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ } /* +** This is part of the fts5_decode() debugging aid. +** +** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This +** function appends a human-readable representation of the same object +** to the buffer passed as the second argument. */ static void fts5DecodeStructure( int *pRc, /* IN/OUT: error code */ @@ -3790,7 +3818,7 @@ static void fts5DecodeStructure( int rc; /* Return code */ Fts5Structure *p = 0; /* Decoded structure object */ - rc = fts5StructureDecode(pBlob, nBlob, &p); + rc = fts5StructureDecode(pBlob, nBlob, 0, &p); if( rc!=SQLITE_OK ){ *pRc = rc; return; @@ -3985,19 +4013,6 @@ int sqlite3Fts5IndexInit(sqlite3 *db){ return rc; } -/* -** Set the minimum number of segments that an auto-merge operation should -** attempt to merge together. A value of 1 sets the object to use the -** compile time default. Zero or less disables auto-merge altogether. -*/ -void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMinMerge){ - if( nMinMerge==1 ){ - p->nMinMerge = FTS5_MIN_MERGE; - }else{ - p->nMinMerge = nMinMerge; - } -} - /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti @@ -4408,3 +4423,32 @@ int sqlite3Fts5IndexReads(Fts5Index *p){ return p->nRead; } +/* +** Set the 32-bit cookie value at the start of all structure records to +** the value passed as the second argument. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ + int rc = SQLITE_OK; + Fts5Config *pConfig = p->pConfig; + u8 aCookie[4]; + int i; + + sqlite3Fts5Put32(aCookie, iNew); + for(i=0; rc==SQLITE_OK && i<=pConfig->nPrefix; i++){ + sqlite3_blob *pBlob = 0; + i64 iRowid = FTS5_STRUCTURE_ROWID(i); + rc = sqlite3_blob_open( + pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 1, &pBlob + ); + if( rc==SQLITE_OK ){ + sqlite3_blob_write(pBlob, aCookie, 4, 0); + rc = sqlite3_blob_close(pBlob); + } + } + + return rc; +} + diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index bbe09874ce..0ce4e50e7a 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -769,6 +769,13 @@ int sqlite3Fts5StorageConfigValue( sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); } + if( rc==SQLITE_OK ){ + int iNew = p->pConfig->iCookie + 1; + rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); + if( rc==SQLITE_OK ){ + p->pConfig->iCookie = iNew; + } + } return rc; } diff --git a/manifest b/manifest index 2a109507b3..91938fa8ea 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\s%_config\stable\sto\sfts5. -D 2014-11-27T20:03:45.010 +C Add\sa\scookie\smechanism\sto\sensure\sthat\sthe\s%_config\stable\sis\sre-read\sas\srequired. +D 2014-11-28T20:01:13.778 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 3c920d090b1cdbc69ba03acf7c9302a19be55cee +F ext/fts5/fts5.c b3a2574be6921512133d228a922bc0bfb221c569 F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 -F ext/fts5/fts5Int.h 63daceb6e421b9066e05c4e89651f27fa675be93 +F ext/fts5/fts5Int.h a466dd67c909ac05ce8330acf13c7c5bfd244e15 F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f -F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 -F ext/fts5/fts5_config.c aae1470ca0e2125e758df5b612f26082a1dc254a +F ext/fts5/fts5_buffer.c c79d67a5a611521f1f3b9d495981f22c02ef4bdb +F ext/fts5/fts5_config.c c95d89bd3ee119681f0aeff0fa34ee9cd18fc430 F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 5cb71b3922e50a23752fd6c11028acfe2f367850 -F ext/fts5/fts5_storage.c c28d1a88f45f83980eb32631c7421d7f5dd336fa +F ext/fts5/fts5_index.c 7e7023f3a29f104b44df2ca2474b296b8dfe447c +F ext/fts5/fts5_storage.c 0198c5976cefa5e8d3f1cfffa3587d0dd594fb2a F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -598,16 +598,16 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test fb49e2db450f9bec900b05b6a85141695d6c2255 -F test/fts5ab.test 32ad48ca5317548dc6934585f6071b5e7ff03e61 -F test/fts5ac.test 3982268756a543cbf3ae508b6336f623136b754a -F test/fts5ad.test 4e2e6a71fc7465eaaa32fd6ec318e657c6e7baa9 -F test/fts5ae.test 0c0712b1430158f976fce3564adf3e3713a4a93d +F test/fts5aa.test 27c7d3c865e144a0501dcbfbd6d2ae87f77602ea +F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc +F test/fts5ac.test 60302196b7711176ce872fe2e4c73c75ac2c4038 +F test/fts5ad.test ed60fdafc73d879b42573abcfa6ede7e02e07c19 +F test/fts5ae.test 6decf7634acd161af9583ce32ab7197b0113c5cd F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb -F test/fts5ah.test c79b5107c2a47096f0e3473a4806ebc17f006cf4 +F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d -F test/fts5aj.test 947c957cdcfc8af7d428f8b82e82926b3b45a504 +F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f F test/fts5al.test 455b2bdc9f6ffb965a38a970a60c5075ee1e23bb F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 @@ -1206,7 +1206,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 059092379f981eb919b500ce447006f9e645fc5a -R 4ec91e9a7d6238c87465b8e2b7986d2c +P 83491c56661ca78f96020ba68184bb3fb19e674f +R 14f6d2fef178e1939a8a8ad40901ad6e U dan -Z 1dea62df28b08fe11d603550a65329e7 +Z bcf001d05010ed5ade28bb9d53b64e80 diff --git a/manifest.uuid b/manifest.uuid index 54ffb90922..2e718d56bf 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -83491c56661ca78f96020ba68184bb3fb19e674f \ No newline at end of file +bb4a37b53de60da9ec8b9317eec14afa99690828 \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 9c715bfd40..b9440c5c90 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -85,7 +85,7 @@ foreach {i x y} { reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { 1 {g f d b f} {h h e i a} @@ -109,7 +109,7 @@ foreach {i x y} { reset_db do_execsql_test 5.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { 1 {dd abc abc abc abcde} {aaa dd ddd ddd aab} @@ -134,7 +134,7 @@ breakpoint reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 6.1 { @@ -152,7 +152,7 @@ reset_db expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } proc doc {} { @@ -191,7 +191,7 @@ for {set i 1} {$i <= 10} {incr i} { reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 8.1 { @@ -208,7 +208,7 @@ expr srand(0) do_execsql_test 9.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3"); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } proc doc {} { diff --git a/test/fts5ab.test b/test/fts5ab.test index 2fd3c047cc..88b8692787 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -59,7 +59,7 @@ do_execsql_test 1.6 { reset_db do_execsql_test 2.1 { CREATE VIRTUAL TABLE t1 USING fts5(x); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); INSERT INTO t1 VALUES('one'); INSERT INTO t1 VALUES('two'); INSERT INTO t1 VALUES('three'); @@ -99,7 +99,7 @@ foreach {tn expr res} { reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE t1 USING fts5(a,b); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {tn a b} { diff --git a/test/fts5ac.test b/test/fts5ac.test index f3efa14107..1b56c8b0e3 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -25,7 +25,7 @@ ifcapable !fts5 { do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x,y); - INSERT INTO xx(xx, rowid) VALUES('pgsz', 32); + INSERT INTO xx(xx, rank) VALUES('pgsz', 32); } set data { diff --git a/test/fts5ad.test b/test/fts5ad.test index 8eabb7b978..9514e996c5 100644 --- a/test/fts5ad.test +++ b/test/fts5ad.test @@ -55,12 +55,12 @@ foreach {tn match res} { foreach {T create} { 2 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 3 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } } { diff --git a/test/fts5ae.test b/test/fts5ae.test index c29ec499ed..57b91452ad 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -25,7 +25,7 @@ ifcapable !fts5 { do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 1.1 { diff --git a/test/fts5ah.test b/test/fts5ah.test index 7ee6de9731..b1dffc78fa 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -28,7 +28,7 @@ ifcapable !fts5 { do_test 1.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } - execsql { INSERT INTO t1(t1, rowid) VALUES('pgsz', 128) } + execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) } for {set i 1} {$i <= 10000} {incr i} { set v {x x x x x x x x x x x x x x x x x x x x} if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i} @@ -71,7 +71,7 @@ do_test 1.5 { set bwd [execsql_reads { SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC }] - expr {$bwd < $fwd + 10} + expr {$bwd < $fwd + 12} } {1} foreach {tn q res} " diff --git a/test/fts5aj.test b/test/fts5aj.test index 6c8cd1827b..49386f57bc 100644 --- a/test/fts5aj.test +++ b/test/fts5aj.test @@ -46,7 +46,7 @@ proc structure {} { expr srand(0) do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); - INSERT INTO t1(t1, rowid) VALUES('pgsz', 64); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); } for {set iTest 0} {$iTest < 50000} {incr iTest} { From 89a89560d60edc2b5e48c0679ab5c64e08a2ee66 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 1 Dec 2014 20:05:00 +0000 Subject: [PATCH 051/206] Add code to parse a rank() function specification. And a tcl interface to add auxiliary functions to fts5. FossilOrigin-Name: 9c1697a2aa1f601e6eb11704abe63a73c8105447 --- ext/fts5/fts5.c | 16 +- ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_config.c | 200 +++++++++++++++++++++++- ext/fts5/fts5_tcl.c | 341 +++++++++++++++++++++++++++++++++++++++++ main.mk | 3 +- manifest | 23 +-- manifest.uuid | 2 +- src/tclsqlite.c | 2 + test/fts5al.test | 101 ++++++++++++ 9 files changed, 669 insertions(+), 21 deletions(-) create mode 100644 ext/fts5/fts5_tcl.c diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 83428bc1c3..120c7e2738 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -898,7 +898,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ ** INSERT Directives" section of the documentation. It should be updated if ** more commands are added to this function. */ -static int fts5SpecialCommand( +static int fts5SpecialInsert( Fts5Table *pTab, /* Fts5 table object */ sqlite3_value *pCmd, /* Value inserted into special column */ sqlite3_value *pVal /* Value inserted into rowid column */ @@ -911,10 +911,12 @@ static int fts5SpecialCommand( rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); - if( rc==SQLITE_OK && bError ){ - rc = SQLITE_ERROR; - }else{ - rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal); + if( rc==SQLITE_OK ){ + if( bError ){ + rc = SQLITE_ERROR; + }else{ + rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal); + } } } return rc; @@ -951,7 +953,7 @@ static int fts5UpdateMethod( assert( nArg==1 || nArg==(2 + pConfig->nCol + 2) ); if( nArg>1 && SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ - return fts5SpecialCommand(pTab, + return fts5SpecialInsert(pTab, apVal[2 + pConfig->nCol], apVal[2 + pConfig->nCol + 1] ); } @@ -1676,7 +1678,7 @@ static void fts5Fts5Func( char buf[8]; assert( nArg==0 ); assert( sizeof(buf)>=sizeof(pGlobal) ); - memcpy(buf, pGlobal, sizeof(pGlobal)); + memcpy(buf, (void*)&pGlobal, sizeof(pGlobal)); sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT); } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d2abadc36d..c2aea79451 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -81,6 +81,8 @@ struct Fts5Config { int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ int nAutomerge; /* 'automerge' setting */ + char *zRank; /* Name of rank function */ + char *zRankArgs; /* Arguments to rank function */ }; int sqlite3Fts5ConfigParse( diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index c7e729276b..88a030f5b5 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -230,6 +230,8 @@ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ } sqlite3_free(pConfig->azCol); sqlite3_free(pConfig->aPrefix); + sqlite3_free(pConfig->zRank); + sqlite3_free(pConfig->zRankArgs); sqlite3_free(pConfig); } } @@ -302,6 +304,190 @@ int sqlite3Fts5Tokenize( return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } +/* +** Argument pIn points to a character that is part of a nul-terminated +** string. Return a pointer to the first character following *pIn in +** the string that is not a white-space character. +*/ +static const char *fts5ConfigSkipWhitespace(const char *pIn){ + const char *p = pIn; + if( p ){ + while( *p==' ' ){ p++; } + } + return p; +} + +/* +** Argument pIn points to a character that is part of a nul-terminated +** string. Return a pointer to the first character following *pIn in +** the string that is not a "bareword" character. +*/ +static const char *fts5ConfigSkipBareword(const char *pIn){ + const char *p = pIn; + while( *p && *p!=' ' && *p!=':' && *p!='!' && *p!='@' + && *p!='#' && *p!='$' && *p!='%' && *p!='^' && *p!='&' + && *p!='*' && *p!='(' && *p!=')' + ){ + p++; + } + if( p==pIn ) p = 0; + return p; +} + +static int fts5_isdigit(char a){ + return (a>='0' && a<='9'); +} + + + +static const char *fts5ConfigSkipLiteral(const char *pIn){ + const char *p = pIn; + if( p ){ + switch( *p ){ + case 'n': case 'N': + if( sqlite3_strnicmp("null", p, 4)==0 ){ + p = &p[4]; + }else{ + p = 0; + } + break; + + case 'x': case 'X': + p++; + if( *p=='\'' ){ + p++; + while( (*p>='a' && *p<='f') + || (*p>='A' && *p<='F') + || (*p>='0' && *p<='9') + ){ + p++; + } + if( *p=='\'' && 0==((p-pIn)%2) ){ + p++; + }else{ + p = 0; + } + }else{ + p = 0; + } + break; + + case '\'': + p++; + while( p ){ + if( *p=='\'' ){ + p++; + if( *p!='\'' ) break; + } + p++; + if( *p==0 ) p = 0; + } + break; + + default: + /* maybe a number */ + if( *p=='+' || *p=='-' ) p++; + while( fts5_isdigit(*p) ) p++; + + /* At this point, if the literal was an integer, the parse is + ** finished. Or, if it is a floating point value, it may continue + ** with either a decimal point or an 'E' character. */ + if( *p=='.' && fts5_isdigit(p[1]) ){ + p += 2; + while( fts5_isdigit(*p) ) p++; + } + + break; + } + } + + return p; +} + +/* +** Argument pIn points to the first character in what is expected to be +** a comma-separated list of SQL literals followed by a ')' character. +** If it actually is this, return a pointer to the ')'. Otherwise, return +** NULL to indicate a parse error. +*/ +static const char *fts5ConfigSkipArgs(const char *pIn){ + const char *p = pIn; + + while( 1 ){ + p = fts5ConfigSkipWhitespace(p); + p = fts5ConfigSkipLiteral(p); + p = fts5ConfigSkipWhitespace(p); + if( p==0 || *p==')' ) break; + if( *p!=',' ){ + p = 0; + break; + } + p++; + } + + return p; +} + +/* +** Parameter zIn contains a rank() function specification. The format of +** this is: +** +** + Bareword (function name) +** + Open parenthesis - "(" +** + Zero or more SQL literals in a comma separated list +** + Close parenthesis - ")" +*/ +static int fts5ConfigParseRank( + const char *zIn, /* Input string */ + char **pzRank, /* OUT: Rank function name */ + char **pzRankArgs /* OUT: Rank function arguments */ +){ + const char *p = zIn; + const char *pRank; + char *zRank = 0; + char *zRankArgs = 0; + int rc = SQLITE_OK; + + *pzRank = 0; + *pzRankArgs = 0; + + p = fts5ConfigSkipWhitespace(p); + pRank = p; + p = fts5ConfigSkipBareword(p); + + if( p ){ + zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); + if( zRank ) memcpy(zRank, pRank, p-pRank); + }else{ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK ){ + p = fts5ConfigSkipWhitespace(p); + if( *p!='(' ) rc = SQLITE_ERROR; + p++; + } + if( rc==SQLITE_OK ){ + const char *pArgs = p; + p = fts5ConfigSkipArgs(p); + if( p==0 ){ + rc = SQLITE_ERROR; + }else{ + zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); + if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(zRank); + assert( zRankArgs==0 ); + }else{ + *pzRank = zRank; + *pzRankArgs = zRankArgs; + } + return rc; +} + int sqlite3Fts5ConfigSetValue( Fts5Config *pConfig, const char *zKey, @@ -339,7 +525,19 @@ int sqlite3Fts5ConfigSetValue( } else if( 0==sqlite3_stricmp(zKey, "rank") ){ - // todo + const char *zIn = (const char*)sqlite3_value_text(pVal); + char *zRank; + char *zRankArgs; + rc = fts5ConfigParseRank(zIn, &zRank, &zRankArgs); + if( rc==SQLITE_OK ){ + sqlite3_free(pConfig->zRank); + sqlite3_free(pConfig->zRankArgs); + pConfig->zRank = zRank; + pConfig->zRankArgs = zRankArgs; + }else if( rc==SQLITE_ERROR ){ + rc = SQLITE_OK; + if( pbBadkey ) *pbBadkey = 1; + } }else{ if( pbBadkey ) *pbBadkey = 1; } diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c new file mode 100644 index 0000000000..28efe7109c --- /dev/null +++ b/ext/fts5/fts5_tcl.c @@ -0,0 +1,341 @@ +/* +** 2014 Dec 01 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + + +#include "fts5.h" +#include +#include +#include + +/************************************************************************* +** This is a copy of the first part of the SqliteDb structure in +** tclsqlite.c. We need it here so that the get_sqlite_pointer routine +** can extract the sqlite3* pointer from an existing Tcl SQLite +** connection. +*/ +struct SqliteDb { + sqlite3 *db; +}; + +/* +** Decode a pointer to an sqlite3 object. +*/ +static int f5tDbPointer(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb){ + struct SqliteDb *p; + Tcl_CmdInfo cmdInfo; + char *z = Tcl_GetString(pObj); + if( Tcl_GetCommandInfo(interp, z, &cmdInfo) ){ + p = (struct SqliteDb*)cmdInfo.objClientData; + *ppDb = p->db; + return TCL_OK; + } + return TCL_ERROR; +} +/* End of code that accesses the SqliteDb struct. +**************************************************************************/ + +typedef struct F5tFunction F5tFunction; +struct F5tFunction { + Tcl_Interp *interp; + Tcl_Obj *pScript; +}; + +typedef struct F5tApi F5tApi; +struct F5tApi { + const Fts5ExtensionApi *pApi; + Fts5Context *pFts; +}; + +/* +** api sub-command... +** +** Description... +*/ +static int xF5tApi( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + struct Sub { + const char *zName; + int nArg; + const char *zMsg; + } aSub[] = { + { "xRowid", 0, "" }, + { "xInstCount", 0, "" }, + { "xInst", 1, "IDX" }, + { "xColumnText", 1, "COL" }, + { "xColumnSize", 1, "COL" }, + }; + int rc; + int iSub = 0; + F5tApi *p = (F5tApi*)clientData; + + if( objc<2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "SUB-COMMAND"); + return TCL_ERROR; + } + + rc = Tcl_GetIndexFromObjStruct( + interp, objv[1], aSub, sizeof(aSub[0]), "SUB-COMMAND", 0, &iSub + ); + if( rc!=TCL_OK ) return rc; + if( aSub[iSub].nArg!=objc-2 ){ + Tcl_WrongNumArgs(interp, 1, objv, aSub[iSub].zMsg); + return TCL_ERROR; + } + + switch( iSub ){ + case 0: { /* xRowid */ + sqlite3_int64 iRowid = p->pApi->xRowid(p->pFts); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(iRowid)); + break; + } + + case 1: { /* xInstCount */ + int nInst; + rc = p->pApi->xInstCount(p->pFts, &nInst); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewIntObj(nInst)); + } + break; + } + + case 2: { /* xInst */ + int iIdx, ip, ic, io; + if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ){ + return TCL_ERROR; + } + rc = p->pApi->xInst(p->pFts, iIdx, &ip, &ic, &io); + if( rc==SQLITE_OK ){ + Tcl_Obj *pList = Tcl_NewObj(); + Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ip)); + Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ic)); + Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(io)); + Tcl_SetObjResult(interp, pList); + } + break; + } + + case 3: { /* xColumnText */ + const char *z = 0; + int n = 0; + int iCol; + if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ + return TCL_ERROR; + } + rc = p->pApi->xColumnText(p->pFts, iCol, &z, &n); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n)); + } + break; + } + + case 4: { /* xColumnSize */ + int n = 0; + int iCol; + if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ + return TCL_ERROR; + } + rc = p->pApi->xColumnSize(p->pFts, iCol, &n); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewIntObj(n)); + } + break; + } + + default: + assert( 0 ); + break; + } + + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error in api call", 0); + return TCL_ERROR; + } + + return TCL_OK; +} + +static void xF5tFunction( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ +){ + F5tFunction *p = (F5tFunction*)pApi->xUserData(pFts); + Tcl_Obj *pEval; /* Script to evaluate */ + int i; + int rc; + + static sqlite3_int64 iCmd = 0; + char zCmd[64]; + F5tApi sApi; + sApi.pApi = pApi; + sApi.pFts = pFts; + + sprintf(zCmd, "f5t_%lld", iCmd++); + Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0); + pEval = Tcl_DuplicateObj(p->pScript); + Tcl_IncrRefCount(pEval); + Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1)); + + for(i=0; iinterp, pEval, pObj); + } + + rc = Tcl_EvalObjEx(p->interp, pEval, TCL_GLOBAL_ONLY); + Tcl_DecrRefCount(pEval); + Tcl_DeleteCommand(p->interp, zCmd); + + if( rc!=TCL_OK ){ + sqlite3_result_error(pCtx, Tcl_GetStringResult(p->interp), -1); + }else{ + Tcl_Obj *pVar = Tcl_GetObjResult(p->interp); + int n; + const char *zType = (pVar->typePtr ? pVar->typePtr->name : ""); + char c = zType[0]; + if( c=='b' && strcmp(zType,"bytearray")==0 && pVar->bytes==0 ){ + /* Only return a BLOB type if the Tcl variable is a bytearray and + ** has no string representation. */ + unsigned char *data = Tcl_GetByteArrayFromObj(pVar, &n); + sqlite3_result_blob(pCtx, data, n, SQLITE_TRANSIENT); + }else if( c=='b' && strcmp(zType,"boolean")==0 ){ + Tcl_GetIntFromObj(0, pVar, &n); + sqlite3_result_int(pCtx, n); + }else if( c=='d' && strcmp(zType,"double")==0 ){ + double r; + Tcl_GetDoubleFromObj(0, pVar, &r); + sqlite3_result_double(pCtx, r); + }else if( (c=='w' && strcmp(zType,"wideInt")==0) || + (c=='i' && strcmp(zType,"int")==0) ){ + Tcl_WideInt v; + Tcl_GetWideIntFromObj(0, pVar, &v); + sqlite3_result_int64(pCtx, v); + }else{ + unsigned char *data = (unsigned char *)Tcl_GetStringFromObj(pVar, &n); + sqlite3_result_text(pCtx, (char *)data, n, SQLITE_TRANSIENT); + } + } +} + +static void xF5tDestroy(void *pCtx){ + F5tFunction *p = (F5tFunction*)pCtx; + Tcl_DecrRefCount(p->pScript); + ckfree(p); +} + +/* +** sqlite3_fts5_create_function DB NAME SCRIPT +** +** Description... +*/ +static int f5tCreateFunction( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + char *zName; + Tcl_Obj *pScript; + sqlite3 *db = 0; + sqlite3_stmt *pStmt = 0; + fts5_api *pApi = 0; + F5tFunction *pCtx = 0; + int rc; + + if( objc!=4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); + return TCL_ERROR; + } + if( f5tDbPointer(interp, objv[1], &db) ){ + return TCL_ERROR; + } + zName = Tcl_GetString(objv[2]); + pScript = objv[3]; + + rc = sqlite3_prepare_v2(db, "SELECT fts5()", -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + const void *pPtr = sqlite3_column_blob(pStmt, 0); + memcpy((void*)&pApi, pPtr, sizeof(pApi)); + } + if( sqlite3_finalize(pStmt)!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + + pCtx = (F5tFunction*)ckalloc(sizeof(F5tFunction)); + pCtx->interp = interp; + pCtx->pScript = pScript; + Tcl_IncrRefCount(pScript); + + rc = pApi->xCreateFunction( + pApi, zName, (void*)pCtx, xF5tFunction, xF5tDestroy + ); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + + return TCL_OK; +} + +/* +** Entry point. +*/ +int Fts5tcl_Init(Tcl_Interp *interp){ + static struct Cmd { + char *zName; + Tcl_ObjCmdProc *xProc; + void *clientData; + } aCmd[] = { + { "sqlite3_fts5_create_function", f5tCreateFunction, 0 } + }; + int i; + + for(i=0; izName, p->xProc, p->clientData, 0); + } + + return TCL_OK; +} + diff --git a/main.mk b/main.mk index e30bb92c82..58044218a7 100644 --- a/main.mk +++ b/main.mk @@ -312,7 +312,8 @@ TESTSRC += \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c \ - $(TOP)/ext/misc/vfslog.c + $(TOP)/ext/misc/vfslog.c \ + $(TOP)/ext/fts5/fts5_tcl.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c diff --git a/manifest b/manifest index 91938fa8ea..27423bc7d7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\scookie\smechanism\sto\sensure\sthat\sthe\s%_config\stable\sis\sre-read\sas\srequired. -D 2014-11-28T20:01:13.778 +C Add\scode\sto\sparse\sa\srank()\sfunction\sspecification.\sAnd\sa\stcl\sinterface\sto\sadd\sauxiliary\sfunctions\sto\sfts5. +D 2014-12-01T20:05:00.761 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,17 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c b3a2574be6921512133d228a922bc0bfb221c569 +F ext/fts5/fts5.c 07f81ce7ebbffdd0acdad9eb090ff506fa503a10 F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 -F ext/fts5/fts5Int.h a466dd67c909ac05ce8330acf13c7c5bfd244e15 +F ext/fts5/fts5Int.h e16cf2213ae748ccc2c890f404fc341eb941d10b F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f F ext/fts5/fts5_buffer.c c79d67a5a611521f1f3b9d495981f22c02ef4bdb -F ext/fts5/fts5_config.c c95d89bd3ee119681f0aeff0fa34ee9cd18fc430 +F ext/fts5/fts5_config.c bb87c2b915ae94002d94d02a6b1f81a0dac9c6db F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 7e7023f3a29f104b44df2ca2474b296b8dfe447c F ext/fts5/fts5_storage.c 0198c5976cefa5e8d3f1cfffa3587d0dd594fb2a +F ext/fts5/fts5_tcl.c 5272224faf9be129679da5e19d788f0307afc375 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -159,7 +160,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 8a02fddafc05159c4b7d65200e912cf549f978c1 +F main.mk 863a6f5cdcc3a47a9dcbedc9af37d3c0d4172935 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -244,7 +245,7 @@ F src/sqliteInt.h fccdc735c27b3dc12322fec7cdad8bc76be8d00b F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 7ac05a5c7017d0b9f0b4bcd701228b784f987158 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e -F src/tclsqlite.c e87c99e28a145943666b51b212dacae35fcea0bd +F src/tclsqlite.c 3a274c56cfc66b1f957afef201547213fc2ccecc F src/test1.c 3c8bc491d2f8de5adbbf306533cefc343c733927 F src/test2.c 98049e51a17dc62606a99a9eb95ee477f9996712 F src/test3.c 1c0e5d6f080b8e33c1ce8b3078e7013fdbcd560c @@ -609,7 +610,7 @@ F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f -F test/fts5al.test 455b2bdc9f6ffb965a38a970a60c5075ee1e23bb +F test/fts5al.test d716a933bb88eb6986b02b985924fa42960b6eec F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1206,7 +1207,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 83491c56661ca78f96020ba68184bb3fb19e674f -R 14f6d2fef178e1939a8a8ad40901ad6e +P bb4a37b53de60da9ec8b9317eec14afa99690828 +R efa8336057fcd1502b8cbf6d797345c7 U dan -Z bcf001d05010ed5ade28bb9d53b64e80 +Z dc9192af5fedea55ad78c651e89e8c7b diff --git a/manifest.uuid b/manifest.uuid index 2e718d56bf..a405c3cc14 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bb4a37b53de60da9ec8b9317eec14afa99690828 \ No newline at end of file +9c1697a2aa1f601e6eb11704abe63a73c8105447 \ No newline at end of file diff --git a/src/tclsqlite.c b/src/tclsqlite.c index 9b977e54ae..92a107cd77 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -3698,6 +3698,7 @@ static void init_all(Tcl_Interp *interp){ extern int Sqlitemultiplex_Init(Tcl_Interp*); extern int SqliteSuperlock_Init(Tcl_Interp*); extern int SqlitetestSyscall_Init(Tcl_Interp*); + extern int Fts5tcl_Init(Tcl_Interp *); #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) extern int Sqlitetestfts3_Init(Tcl_Interp *interp); @@ -3740,6 +3741,7 @@ static void init_all(Tcl_Interp *interp){ Sqlitemultiplex_Init(interp); SqliteSuperlock_Init(interp); SqlitetestSyscall_Init(interp); + Fts5tcl_Init(interp); #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) Sqlitetestfts3_Init(interp); diff --git a/test/fts5al.test b/test/fts5al.test index 63e14a13a0..7739093c56 100644 --- a/test/fts5al.test +++ b/test/fts5al.test @@ -39,5 +39,106 @@ do_execsql_test 1.3 { SELECT * FROM ft1_config; } {pgsz 64} +#-------------------------------------------------------------------------- +# Test the logic for parsing the rank() function definition. +# +foreach {tn defn} { + 1 "fname()" + 2 "fname(1)" + 3 "fname(1,2)" + 4 "fname(null,NULL,nUlL)" + 5 " fname ( null , NULL , nUlL ) " + 6 "fname('abc')" + 7 "fname('a''bc')" + 8 "fname('''abc')" + 9 "fname('abc''')" + + 7 "fname( 'a''bc' )" + 8 "fname('''abc' )" + 9 "fname( 'abc''' )" + + 10 "fname(X'1234ab')" + + 11 "myfunc(1.2)" + 12 "myfunc(-1.0)" + 13 "myfunc(.01,'abc')" +} { + do_execsql_test 2.1.$tn { + INSERT INTO ft1(ft1, rank) VALUES('rank', $defn); + } +} + +foreach {tn defn} { + 1 "" + 2 "fname" + 3 "fname(X'234ab')" + 4 "myfunc(-1.,'abc')" +} { + do_test 2.2.$tn { + catchsql { INSERT INTO ft1(ft1, rank) VALUES('rank', $defn) } + } {1 {SQL logic error or missing database}} +} + +#------------------------------------------------------------------------- +# + +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1 VALUES('q w e r t y'); + INSERT INTO t1 VALUES('y t r e w q'); +} + +proc argtest {cmd args} { return $args } +sqlite3_fts5_create_function db argtest argtest + +do_execsql_test 3.2.1 { + SELECT argtest(t1, 123) FROM t1 WHERE t1 MATCH 'q' +} {123 123} + +do_execsql_test 3.2.2 { + SELECT argtest(t1, 123, 456) FROM t1 WHERE t1 MATCH 'q' +} {{123 456} {123 456}} + +proc rowidtest {cmd} { $cmd xRowid } +sqlite3_fts5_create_function db rowidtest rowidtest + +do_execsql_test 3.3.1 { + SELECT rowidtest(t1) FROM t1 WHERE t1 MATCH 'q' +} {2 1} + +proc insttest {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xInstCount]} {incr i} { + lappend res [$cmd xInst $i] + } + set res +} +sqlite3_fts5_create_function db insttest insttest + +do_execsql_test 3.4.1 { + SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'q' +} { + {{0 0 5}} + {{0 0 0}} +} + +do_execsql_test 3.4.2 { + SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w' +} { + {{0 0 2} {1 0 4}} + {{1 0 1}} +} + +proc coltest {cmd} { + list [$cmd xColumnSize 0] [$cmd xColumnText 0] +} +sqlite3_fts5_create_function db coltest coltest + +do_execsql_test 3.4.1 { + SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q' +} { + {6 {y t r e w q}} {6 {q w e r t y}} +} + finish_test From 615a9ae5db986bc7f7227922f324889d52cb4ed4 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 2 Dec 2014 20:18:11 +0000 Subject: [PATCH 052/206] Add a configuration option to remap the "rank" column to an auxiliary fts5 function. FossilOrigin-Name: b5f5971283b9b2f60c16f9675099855af95012cd --- ext/fts5/fts5.c | 126 +++++++++++++++++++++++++++++++++-------- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_config.c | 6 +- manifest | 20 +++---- manifest.uuid | 2 +- test/fts5ae.test | 1 - test/fts5al.test | 78 ++++++++++++++++++++++++- 7 files changed, 196 insertions(+), 38 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 120c7e2738..54d3c4bbcd 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -157,9 +157,14 @@ struct Fts5Cursor { Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ int csrflags; /* Mask of cursor flags (see below) */ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ - Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ char *zSpecial; /* Result of special query */ + /* "rank" function. Populated on demand from vtab.xColumn(). */ + Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ + int nRankArg; /* Number of trailing arguments for rank() */ + sqlite3_value **apRankArg; /* Array of trailing arguments */ + sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ + /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ Fts5Auxiliary *pAux; /* Currently executing extension function */ @@ -539,6 +544,9 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); *pp = pCsr->pNext; + sqlite3_finalize(pCsr->pRankArgStmt); + sqlite3_free(pCsr->apRankArg); + sqlite3_free(pCsr->zSpecial); sqlite3_free(pCsr); return SQLITE_OK; @@ -633,6 +641,7 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ int nByte; int rc = SQLITE_OK; char *zSql; + const char *zRank = pConfig->zRank ? pConfig->zRank : FTS5_DEFAULT_RANK; nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase; @@ -648,8 +657,10 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ ** table, saving it creates a circular reference. ** ** If SQLite a built-in statement cache, this wouldn't be a problem. */ - zSql = sqlite3_mprintf("SELECT rowid, %s FROM %Q.%Q ORDER BY +%s %s", - pConfig->zName, pConfig->zDb, pConfig->zName, FTS5_RANK_NAME, + zSql = sqlite3_mprintf("SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", + pConfig->zDb, pConfig->zName, zRank, pConfig->zName, + (pConfig->zRankArgs ? ", " : ""), + (pConfig->zRankArgs ? pConfig->zRankArgs : ""), bAsc ? "ASC" : "DESC" ); if( zSql==0 ){ @@ -721,6 +732,74 @@ static int fts5SpecialMatch( return rc; } +/* +** Search for an auxiliary function named zName that can be used with table +** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary +** structure. Otherwise, if no such function exists, return NULL. +*/ +static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){ + Fts5Auxiliary *pAux; + + for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ + if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux; + } + + /* No function of the specified name was found. Return 0. */ + return 0; +} + + +static int fts5FindRankFunction(Fts5Cursor *pCsr){ + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + Fts5Config *pConfig = pTab->pConfig; + const char *zRank = pConfig->zRank; + int rc = SQLITE_OK; + Fts5Auxiliary *pAux; + + if( zRank==0 ) zRank = FTS5_DEFAULT_RANK; + + if( pTab->pConfig->zRankArgs ){ + char *zSql = sqlite3_mprintf("SELECT %s", pTab->pConfig->zRankArgs); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 ); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + int nByte; + pCsr->nRankArg = sqlite3_column_count(pStmt); + nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; + pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); + if( rc==SQLITE_OK ){ + int i; + for(i=0; inRankArg; i++){ + pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i); + } + } + pCsr->pRankArgStmt = pStmt; + }else{ + rc = sqlite3_finalize(pStmt); + assert( rc!=SQLITE_OK ); + } + } + } + } + + if( rc==SQLITE_OK ){ + pAux = fts5FindAuxiliary(pTab, zRank); + if( pAux==0 ){ + assert( pTab->base.zErrMsg==0 ); + pTab->base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank); + rc = SQLITE_ERROR; + } + } + + pCsr->pRank = pAux; + return rc; +} /* ** This is the xFilter interface for the virtual table. See @@ -753,7 +832,6 @@ static int fts5FilterMethod( ** fts5CursorFirstSorted() above. */ assert( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ); pCsr->idxNum = FTS5_PLAN_SOURCE; - pCsr->pRank = pTab->pSortCsr->pRank; pCsr->pExpr = pTab->pSortCsr->pExpr; rc = fts5CursorFirst(pTab, pCsr, bAsc); }else{ @@ -769,7 +847,6 @@ static int fts5FilterMethod( rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); }else{ char **pzErr = &pTab->base.zErrMsg; - pCsr->pRank = pTab->pGlobal->pAux; rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ if( ePlan==FTS5_PLAN_MATCH ){ @@ -1092,7 +1169,7 @@ static int fts5CacheInstArray(Fts5Cursor *pCsr){ aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); if( aIter ){ Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */ - int nInst; /* Number instances seen so far */ + int nInst = 0; /* Number instances seen so far */ int i; /* Initialize all iterators */ @@ -1426,19 +1503,23 @@ static int fts5ColumnMethod( }else if( iCol==pConfig->nCol ){ + /* User is requesting the value of the special column with the same name + ** as the table. Return the cursor integer id number. This value is only + ** useful in that it may be passed as the first argument to an FTS5 + ** auxiliary function. */ + sqlite3_result_int64(pCtx, pCsr->iCsrId); + }else if( iCol==pConfig->nCol+1 ){ + + /* The value of the "rank" column. */ if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE ){ fts5PoslistBlob(pCtx, pCsr); - }else{ - /* User is requesting the value of the special column with the same name - ** as the table. Return the cursor integer id number. This value is only - ** useful in that it may be passed as the first argument to an FTS5 - ** auxiliary function. */ - sqlite3_result_int64(pCtx, pCsr->iCsrId); - } - }else if( iCol==pConfig->nCol+1 ){ - /* The value of the "rank" column. */ - if( pCsr->pRank ){ - fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, 0, 0); + }else if( + FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_MATCH + || FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SORTED_MATCH + ){ + if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){ + fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); + } } }else{ rc = fts5SeekCursor(pCsr); @@ -1464,12 +1545,11 @@ static int fts5FindFunctionMethod( Fts5Table *pTab = (Fts5Table*)pVtab; Fts5Auxiliary *pAux; - for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ - if( sqlite3_stricmp(zName, pAux->zFunc)==0 ){ - *pxFunc = fts5ApiCallback; - *ppArg = (void*)pAux; - return 1; - } + pAux = fts5FindAuxiliary(pTab, zName); + if( pAux ){ + *pxFunc = fts5ApiCallback; + *ppArg = (void*)pAux; + return 1; } /* No function of the specified name was found. Return 0. */ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index c2aea79451..b3d5eed811 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -27,6 +27,7 @@ #define FTS5_MAX_PREFIX_INDEXES 31 #define FTS5_DEFAULT_NEARDIST 10 +#define FTS5_DEFAULT_RANK "bm25" /* Name of rank column */ #define FTS5_RANK_NAME "rank" diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 88a030f5b5..3cc1ffda46 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -468,11 +468,13 @@ static int fts5ConfigParseRank( p++; } if( rc==SQLITE_OK ){ - const char *pArgs = p; + const char *pArgs; + p = fts5ConfigSkipWhitespace(p); + pArgs = p; p = fts5ConfigSkipArgs(p); if( p==0 ){ rc = SQLITE_ERROR; - }else{ + }else if( p!=pArgs ){ zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); } diff --git a/manifest b/manifest index 27423bc7d7..60491b268b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\scode\sto\sparse\sa\srank()\sfunction\sspecification.\sAnd\sa\stcl\sinterface\sto\sadd\sauxiliary\sfunctions\sto\sfts5. -D 2014-12-01T20:05:00.761 +C Add\sa\sconfiguration\soption\sto\sremap\sthe\s"rank"\scolumn\sto\san\sauxiliary\sfts5\sfunction. +D 2014-12-02T20:18:11.604 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,12 +104,12 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 07f81ce7ebbffdd0acdad9eb090ff506fa503a10 +F ext/fts5/fts5.c 572bd5d4d272ca562240dc1905538f060783ab78 F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 -F ext/fts5/fts5Int.h e16cf2213ae748ccc2c890f404fc341eb941d10b +F ext/fts5/fts5Int.h 9dbf415de032b1cc770dcedaa5a8e434d88ca90c F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f F ext/fts5/fts5_buffer.c c79d67a5a611521f1f3b9d495981f22c02ef4bdb -F ext/fts5/fts5_config.c bb87c2b915ae94002d94d02a6b1f81a0dac9c6db +F ext/fts5/fts5_config.c 664fdc8519b55753f5c24d7b45176f05586b7965 F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 7e7023f3a29f104b44df2ca2474b296b8dfe447c @@ -603,14 +603,14 @@ F test/fts5aa.test 27c7d3c865e144a0501dcbfbd6d2ae87f77602ea F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc F test/fts5ac.test 60302196b7711176ce872fe2e4c73c75ac2c4038 F test/fts5ad.test ed60fdafc73d879b42573abcfa6ede7e02e07c19 -F test/fts5ae.test 6decf7634acd161af9583ce32ab7197b0113c5cd +F test/fts5ae.test 5de775469d45a2f8218fc89b8d6d5176c226d05e F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f -F test/fts5al.test d716a933bb88eb6986b02b985924fa42960b6eec +F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1207,7 +1207,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bb4a37b53de60da9ec8b9317eec14afa99690828 -R efa8336057fcd1502b8cbf6d797345c7 +P 9c1697a2aa1f601e6eb11704abe63a73c8105447 +R 30808e5592c3e61509564bec30e4914f U dan -Z dc9192af5fedea55ad78c651e89e8c7b +Z 9589e0356694de369bd9f49ee042fc35 diff --git a/manifest.uuid b/manifest.uuid index a405c3cc14..124c8a0053 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9c1697a2aa1f601e6eb11704abe63a73c8105447 \ No newline at end of file +b5f5971283b9b2f60c16f9675099855af95012cd \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index 57b91452ad..07b1891618 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -274,6 +274,5 @@ foreach {tn q res} { } $res } - finish_test diff --git a/test/fts5al.test b/test/fts5al.test index 7739093c56..236fdf8521 100644 --- a/test/fts5al.test +++ b/test/fts5al.test @@ -80,6 +80,7 @@ foreach {tn defn} { } #------------------------------------------------------------------------- +# Assorted tests of the tcl interface for creating extension functions. # do_execsql_test 3.1 { @@ -134,11 +135,86 @@ proc coltest {cmd} { } sqlite3_fts5_create_function db coltest coltest -do_execsql_test 3.4.1 { +do_execsql_test 3.5.1 { SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q' } { {6 {y t r e w q}} {6 {q w e r t y}} } +#------------------------------------------------------------------------- +# Tests for remapping the "rank" column. +# +# 4.1.*: Mapped to a function with no arguments. +# 4.2.*: Mapped to a function with one or more arguments. +# + +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t2 USING fts5(a, b); + INSERT INTO t2 VALUES('a s h g s b j m r h', 's b p a d b b a o e'); + INSERT INTO t2 VALUES('r h n t a g r d d i', 'l d n j r c f t o q'); + INSERT INTO t2 VALUES('q k n i k c a a e m', 'c h n j p g s c i t'); + INSERT INTO t2 VALUES('h j g t r e l s g s', 'k q k c i i c k n s'); + INSERT INTO t2 VALUES('b l k h d n n n m i', 'p t i a r b t q o l'); + INSERT INTO t2 VALUES('k r i l j b g i p a', 't q c h a i m g n l'); + INSERT INTO t2 VALUES('a e c q n m o m d g', 'l c t g i s q g q e'); + INSERT INTO t2 VALUES('b o j h f o g b p e', 'r t l h s b g i c p'); + INSERT INTO t2 VALUES('s q k f q b j g h f', 'n m a o p e i e k t'); + INSERT INTO t2 VALUES('o q g g q c o k a b', 'r t k p t f t h p c'); +} + +proc firstinst {cmd} { + foreach {p c o} [$cmd xInst 0] {} + expr $c*100 + $o +} +sqlite3_fts5_create_function db firstinst firstinst + +do_execsql_test 4.1.1 { + SELECT rowid, firstinst(t2) FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC +} { + 1 0 2 4 3 6 5 103 + 6 9 7 0 9 102 10 8 +} + +do_execsql_test 4.1.2 { + INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()'); + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC +} { + 1 0 2 4 3 6 5 103 + 6 9 7 0 9 102 10 8 +} + +do_execsql_test 4.1.3 { + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC +} { + 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 +} + +do_execsql_test 4.1.4 { + INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) '); + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC +} { + 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 +} + +proc rowidplus {cmd ival} { + expr [$cmd xRowid] + $ival +} +sqlite3_fts5_create_function db rowidplus rowidplus + +do_execsql_test 4.2.1 { + INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) '); + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' +} { + 10 110 +} +do_execsql_test 4.2.2 { + INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) '); + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' +} { + 10 121 +} + + + finish_test From 845d0ab323b05b920198af4532289fc6621f40f7 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 3 Dec 2014 17:27:35 +0000 Subject: [PATCH 053/206] Begin testing fts5 OOM and IO error handling. FossilOrigin-Name: 2037dba62fdd995ad15b642abe499a790f5ffe5c --- ext/fts5/fts5.c | 23 +- ext/fts5/fts5Int.h | 25 +- ext/fts5/fts5_buffer.c | 14 + ext/fts5/fts5_config.c | 80 +-- ext/fts5/fts5_expr.c | 38 +- ext/fts5/fts5_index.c | 1123 ++++++++++++++++++++------------------- ext/fts5/fts5_storage.c | 41 +- manifest | 27 +- manifest.uuid | 2 +- test/fts5fault1.test | 37 ++ test/permutations.test | 6 +- 11 files changed, 744 insertions(+), 672 deletions(-) create mode 100644 test/fts5fault1.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 54d3c4bbcd..e80715a4c7 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -308,24 +308,19 @@ static int fts5InitVtab( ){ Fts5Global *pGlobal = (Fts5Global*)pAux; const char **azConfig = (const char**)argv; - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ Fts5Config *pConfig; /* Results of parsing argc/argv */ Fts5Table *pTab = 0; /* New virtual table object */ - /* Parse the arguments */ - rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); - assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); - - /* Allocate the new vtab object */ + /* Allocate the new vtab object and parse the configuration */ + pTab = (Fts5Table*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Table)); if( rc==SQLITE_OK ){ - pTab = (Fts5Table*)sqlite3_malloc(sizeof(Fts5Table)); - if( pTab==0 ){ - rc = SQLITE_NOMEM; - }else{ - memset(pTab, 0, sizeof(Fts5Table)); - pTab->pConfig = pConfig; - pTab->pGlobal = pGlobal; - } + rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); + assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); + } + if( rc==SQLITE_OK ){ + pTab->pConfig = pConfig; + pTab->pGlobal = pGlobal; } /* Open the index sub-system */ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index b3d5eed811..684b34f009 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -187,6 +187,9 @@ int sqlite3Fts5PoslistNext64( i64 *piOff /* IN/OUT: Current offset */ ); +/* Malloc utility */ +void *sqlite3Fts5MallocZero(int *pRc, int nByte); + /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ @@ -225,24 +228,25 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy); ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. */ -Fts5IndexIter *sqlite3Fts5IndexQuery( +int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ - int flags /* Mask of FTS5INDEX_QUERY_X flags */ + int flags, /* Mask of FTS5INDEX_QUERY_X flags */ + Fts5IndexIter **ppIter ); /* ** Docid list iteration. */ -int sqlite3Fts5IterEof(Fts5IndexIter*); -void sqlite3Fts5IterNext(Fts5IndexIter*); -void sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); -i64 sqlite3Fts5IterRowid(Fts5IndexIter*); +int sqlite3Fts5IterEof(Fts5IndexIter*); +int sqlite3Fts5IterNext(Fts5IndexIter*); +int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); +i64 sqlite3Fts5IterRowid(Fts5IndexIter*); /* ** Obtain the position list that corresponds to the current position. */ -const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter*, int *pn); +int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). @@ -259,7 +263,7 @@ void sqlite3Fts5IterClose(Fts5IndexIter*); ** unique token in the document with an iCol value less than zero. The iPos ** argument is ignored for a delete. */ -void sqlite3Fts5IndexWrite( +int sqlite3Fts5IndexWrite( Fts5Index *p, /* Index to write to */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ @@ -270,7 +274,7 @@ void sqlite3Fts5IndexWrite( ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to ** document iDocid. */ -void sqlite3Fts5IndexBeginWrite( +int sqlite3Fts5IndexBeginWrite( Fts5Index *p, /* Index to write to */ i64 iDocid /* Docid to add or remove data from */ ); @@ -321,9 +325,6 @@ int sqlite3Fts5IndexSetCookie(Fts5Index*, int); */ int sqlite3Fts5IndexReads(Fts5Index *p); -/* Malloc utility */ -void *sqlite3Fts5MallocZero(int *pRc, int nByte); - /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 478b903614..59eb1b8752 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -282,4 +282,18 @@ void sqlite3Fts5BufferAppendListElem( *pOut = '\0'; } +void *sqlite3Fts5MallocZero(int *pRc, int nByte){ + void *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet==0 && nByte>0 ){ + *pRc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + } + return pRet; +} + + diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 3cc1ffda46..9ea78143c5 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -115,8 +115,13 @@ static int fts5ConfigParseSpecial( ** ** Return 0 if an OOM error is encountered. */ -static char *fts5Strdup(const char *z){ - return sqlite3_mprintf("%s", z); +static char *fts5Strdup(int *pRc, const char *z){ + char *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_mprintf("%s", z); + if( pRet==0 ) *pRc = SQLITE_NOMEM; + } + return pRet; } /* @@ -159,44 +164,41 @@ int sqlite3Fts5ConfigParse( pRet->db = db; pRet->iCookie = -1; - pRet->azCol = (char**)sqlite3_malloc(sizeof(char*) * nArg); - pRet->zDb = fts5Strdup(azArg[1]); - pRet->zName = fts5Strdup(azArg[2]); - if( sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ - *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); - rc = SQLITE_ERROR; - }else if( pRet->azCol==0 || pRet->zDb==0 || pRet->zName==0 ){ - rc = SQLITE_NOMEM; - }else{ - int i; - for(i=3; rc==SQLITE_OK && iazCol = (char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); + pRet->zDb = fts5Strdup(&rc, azArg[1]); + pRet->zName = fts5Strdup(&rc, azArg[2]); + if( rc==SQLITE_OK ){ + if( sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ + *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); + rc = SQLITE_ERROR; + }else{ + int i; + for(i=3; rc==SQLITE_OK && iazCol[pRet->nCol++] = zDup; - if( sqlite3_stricmp(zDup, FTS5_RANK_NAME)==0 ){ - *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zDup); - rc = SQLITE_ERROR; + /* Check if this is a special directive - "cmd=arg" */ + if( zDup[0]!='"' && zDup[0]!='\'' && zDup[0]!='[' && zDup[0]!='`' ){ + char *p = zDup; + while( *p && *p!='=' ) p++; + if( *p ){ + char *zArg = &p[1]; + *p = '\0'; + sqlite3Fts5Dequote(zArg); + rc = fts5ConfigParseSpecial(pRet, zDup, zArg, pzErr); + sqlite3_free(zDup); + zDup = 0; + } + } + + /* If it is not a special directive, it must be a column name. In + ** this case, check that it is not the reserved column name "rank". */ + if( zDup ){ + sqlite3Fts5Dequote(zDup); + pRet->azCol[pRet->nCol++] = zDup; + if( sqlite3_stricmp(zDup, FTS5_RANK_NAME)==0 ){ + *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zDup); + rc = SQLITE_ERROR; + } } } } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index efb91dadb6..5c95eda7e4 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -386,8 +386,11 @@ static int fts5ExprPhraseIsMatch( /* Initialize a term iterator for each term in the phrase */ for(i=0; inTerm; i++){ int n; - const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n); - if( sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ) goto ismatch_out; + const u8 *a; + rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n); + if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ + goto ismatch_out; + } } while( 1 ){ @@ -576,22 +579,21 @@ static int fts5ExprNearAdvanceAll( Fts5ExprNearset *pNear, /* Near object to advance iterators of */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ - int rc = SQLITE_OK; /* Return code */ int i, j; /* Phrase and token index, respectively */ for(i=0; inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - sqlite3Fts5IterNext(pIter); - if( sqlite3Fts5IterEof(pIter) ){ + int rc = sqlite3Fts5IterNext(pIter); + if( rc || sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; return rc; } } } - return rc; + return SQLITE_OK; } /* @@ -711,21 +713,21 @@ static int fts5ExprNearNextMatch( rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); if( pNode->bEof || rc!=SQLITE_OK ) break; - for(i=0; inPhrase; i++){ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); - if( rc!=SQLITE_OK ) return rc; if( bMatch==0 ) break; }else{ int n; - const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); + const u8 *a; + rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &a, &n); fts5BufferSet(&rc, &pPhrase->poslist, n, a); } } - if( i==pNear->nPhrase ){ + if( rc==SQLITE_OK && i==pNear->nPhrase ){ int bMatch = 1; if( pNear->nPhrase>1 ){ rc = fts5ExprNearIsMatch(pNear, &bMatch); @@ -735,7 +737,9 @@ static int fts5ExprNearNextMatch( /* If control flows to here, then the current rowid is not a match. ** Advance all term iterators in all phrases to the next rowid. */ - rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); + if( rc==SQLITE_OK ){ + rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); + } if( pNode->bEof || rc!=SQLITE_OK ) break; } @@ -755,24 +759,26 @@ static int fts5ExprNearInitAll( Fts5ExprTerm *pTerm; Fts5ExprPhrase *pPhrase; int i, j; + int rc = SQLITE_OK; - for(i=0; inPhrase; i++){ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ pTerm = &pPhrase->aTerm[j]; - pTerm->pIter = sqlite3Fts5IndexQuery( + rc = sqlite3Fts5IndexQuery( pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) + (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0), + &pTerm->pIter ); if( pTerm->pIter && sqlite3Fts5IterEof(pTerm->pIter) ){ pNode->bEof = 1; - return SQLITE_OK; + break; } } } - return SQLITE_OK; + return rc; } /* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index b51e7bad11..7d0d01afdf 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -262,12 +262,6 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif -#ifdef SQLITE_DEBUG -static int fts5MissingData() { return 0; } -#else -# define fts5MissingData() -#endif - typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; @@ -592,78 +586,6 @@ struct Fts5BtreeIter { }; -/* -** Decode a segment-data rowid from the %_data table. This function is -** the opposite of macro FTS5_SEGMENT_ROWID(). -*/ -static void fts5DecodeRowid( - i64 iRowid, /* Rowid from %_data table */ - int *piIdx, /* OUT: Index */ - int *piSegid, /* OUT: Segment id */ - int *piHeight, /* OUT: Height */ - int *piPgno /* OUT: Page number */ -){ - *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); - iRowid >>= FTS5_DATA_PAGE_B; - - *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); - iRowid >>= FTS5_DATA_HEIGHT_B; - - *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); - iRowid >>= FTS5_DATA_ID_B; - - *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); -} - -static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ - int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ - fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); - - if( iSegid==0 ){ - if( iKey==FTS5_AVERAGES_ROWID ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); - }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - "{structure idx=%d}", (int)(iKey-10) - ); - } - } - else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", - iIdx, iSegid, iPgno - ); - }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)", - iIdx, iSegid, iHeight, iPgno - ); - } -} - -static void fts5DebugStructure( - int *pRc, /* IN/OUT: error code */ - Fts5Buffer *pBuf, - Fts5Structure *p -){ - int iLvl, iSeg; /* Iterate through levels, segments */ - - for(iLvl=0; iLvlnLevel; iLvl++){ - Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge - ); - for(iSeg=0; iSegnSeg; iSeg++){ - Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, - pSeg->pgnoFirst, pSeg->pgnoLast - ); - } - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); - } -} - - - static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); @@ -691,19 +613,6 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ return pRet; } -void *sqlite3Fts5MallocZero(int *pRc, int nByte){ - void *pRet = 0; - if( *pRc==SQLITE_OK ){ - pRet = sqlite3_malloc(nByte); - if( pRet==0 && nByte>0 ){ - *pRc = SQLITE_NOMEM; - }else{ - memset(pRet, 0, nByte); - } - } - return pRet; -} - /* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** @@ -794,8 +703,6 @@ sqlite3_free(buf.p); ); } - if( rc ) fts5MissingData(); - if( rc==SQLITE_OK ){ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ @@ -2475,47 +2382,6 @@ static void fts5AddTermToHash( } } -/* -** Insert or remove data to or from the index. Each time a document is -** added to or removed from the index, this function is called one or more -** times. -** -** For an insert, it must be called once for each token in the new document. -** If the operation is a delete, it must be called (at least) once for each -** unique token in the document with an iCol value less than zero. The iPos -** argument is ignored for a delete. -*/ -void sqlite3Fts5IndexWrite( - Fts5Index *p, /* Index to write to */ - int iCol, /* Column token appears in (-ve -> delete) */ - int iPos, /* Position of token within column */ - const char *pToken, int nToken /* Token to add or remove to or from index */ -){ - int i; /* Used to iterate through indexes */ - Fts5Config *pConfig = p->pConfig; - - /* If an error has already occured this call is a no-op. */ - if( p->rc!=SQLITE_OK ) return; - - /* Allocate hash tables if they have not already been allocated */ - if( p->apHash==0 ){ - int nHash = pConfig->nPrefix + 1; - p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); - for(i=0; p->rc==SQLITE_OK && irc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); - } - } - - /* Add the new token to the main terms hash table. And to each of the - ** prefix hash tables that it is large enough for. */ - fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); - for(i=0; inPrefix; i++){ - if( nToken>=pConfig->aPrefix[i] ){ - fts5AddTermToHash(p, i+1, iCol, iPos, pToken, pConfig->aPrefix[i]); - } - } -} - /* ** Allocate a new segment-id for the structure pStruct. ** @@ -3342,111 +3208,6 @@ static void fts5IndexFlush(Fts5Index *p){ p->nPendingData = 0; } -/* -** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain -** to the document with rowid iRowid. -*/ -void sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ - if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ - fts5IndexFlush(p); - } - p->iWriteRowid = iRowid; -} - -/* -** Commit data to disk. -*/ -int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ - fts5IndexFlush(p); - if( bCommit ) fts5CloseReader(p); - return p->rc; -} - -/* -** Discard any data stored in the in-memory hash tables. Do not write it -** to the database. Additionally, assume that the contents of the %_data -** table may have changed on disk. So any in-memory caches of %_data -** records must be invalidated. -*/ -int sqlite3Fts5IndexRollback(Fts5Index *p){ - fts5CloseReader(p); - fts5IndexDiscardData(p); - return SQLITE_OK; -} - -/* -** Open a new Fts5Index handle. If the bCreate argument is true, create -** and initialize the underlying %_data table. -** -** If successful, set *pp to point to the new object and return SQLITE_OK. -** Otherwise, set *pp to NULL and return an SQLite error code. -*/ -int sqlite3Fts5IndexOpen( - Fts5Config *pConfig, - int bCreate, - Fts5Index **pp, - char **pzErr -){ - int rc = SQLITE_OK; - Fts5Index *p; /* New object */ - - *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); - if( !p ) return SQLITE_NOMEM; - - memset(p, 0, sizeof(Fts5Index)); - p->pConfig = pConfig; - p->nCrisisMerge = FTS5_CRISIS_MERGE; - p->nWorkUnit = FTS5_WORK_UNIT; - p->nMaxPendingData = 1024*1024; - p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); - if( p->zDataTbl==0 ){ - rc = SQLITE_NOMEM; - }else if( bCreate ){ - int i; - Fts5Structure s; - rc = sqlite3Fts5CreateTable( - pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr - ); - if( rc==SQLITE_OK ){ - memset(&s, 0, sizeof(Fts5Structure)); - for(i=0; inPrefix+1; i++){ - fts5StructureWrite(p, i, &s); - } - rc = p->rc; - } - sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); - } - - if( rc ){ - sqlite3Fts5IndexClose(p, 0); - *pp = 0; - } - return rc; -} - -/* -** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). -*/ -int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ - int rc = SQLITE_OK; - if( bDestroy ){ - rc = sqlite3Fts5DropTable(p->pConfig, "data"); - } - assert( p->pReader==0 ); - sqlite3_finalize(p->pWriter); - sqlite3_finalize(p->pDeleter); - if( p->apHash ){ - int i; - for(i=0; i<=p->pConfig->nPrefix; i++){ - sqlite3Fts5HashFree(p->apHash[i]); - } - sqlite3_free(p->apHash); - } - sqlite3_free(p->zDataTbl); - sqlite3_free(p); - return rc; -} - /* ** Return a simple checksum value based on the arguments. */ @@ -3465,31 +3226,6 @@ static u64 fts5IndexEntryCksum( return ret; } -/* -** Calculate and return a checksum that is the XOR of the index entry -** checksum of all entries that would be generated by the token specified -** by the final 5 arguments. -*/ -u64 sqlite3Fts5IndexCksum( - Fts5Config *pConfig, /* Configuration object */ - i64 iRowid, /* Document term appears in */ - int iCol, /* Column term appears in */ - int iPos, /* Position term appears in */ - const char *pTerm, int nTerm /* Term at iPos */ -){ - u64 ret = 0; /* Return value */ - int iIdx; /* For iterating through indexes */ - - for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ - int n = ((iIdx==pConfig->nPrefix) ? nTerm : pConfig->aPrefix[iIdx]); - if( n<=nTerm ){ - ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, n); - } - } - - return ret; -} - static void fts5BtreeIterInit( Fts5Index *p, int iIdx, @@ -3735,284 +3471,6 @@ static void fts5IndexIntegrityCheckSegment( fts5BtreeIterFree(&iter); } -/* -** Run internal checks to ensure that the FTS index (a) is internally -** consistent and (b) contains entries for which the XOR of the checksums -** as calculated by fts5IndexEntryCksum() is cksum. -** -** Return SQLITE_CORRUPT if any of the internal checks fail, or if the -** checksum does not match. Return SQLITE_OK if all checks pass without -** error, or some other SQLite error code if another error (e.g. OOM) -** occurs. -*/ -int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ - Fts5Config *pConfig = p->pConfig; - int iIdx; /* Used to iterate through indexes */ - int rc; /* Return code */ - u64 cksum2 = 0; /* Checksum based on contents of indexes */ - - /* Check that the checksum of the index matches the argument checksum */ - for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ - Fts5MultiSegIter *pIter; - Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter); - fts5MultiIterEof(p, pIter)==0; - fts5MultiIterNext(p, pIter, 0, 0) - ){ - Fts5PosIter sPos; /* Used to iterate through position list */ - int n; /* Size of term in bytes */ - i64 iRowid = fts5MultiIterRowid(pIter); - char *z = (char*)fts5MultiIterTerm(pIter, &n); - - for(fts5PosIterInit(p, pIter, &sPos); - fts5PosIterEof(p, &sPos)==0; - fts5PosIterNext(p, &sPos) - ){ - cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); -#if 0 - fprintf(stdout, "rowid=%d ", (int)iRowid); - fprintf(stdout, "term=%.*s ", n, z); - fprintf(stdout, "col=%d ", sPos.iCol); - fprintf(stdout, "off=%d\n", sPos.iPos); - fflush(stdout); -#endif - } - } - fts5MultiIterFree(p, pIter); - fts5StructureRelease(pStruct); - } - rc = p->rc; - if( rc==SQLITE_OK && cksum!=cksum2 ) rc = FTS5_CORRUPT; - - /* Check that the internal nodes of each segment match the leaves */ - for(iIdx=0; rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ - Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - if( pStruct ){ - int iLvl, iSeg; - for(iLvl=0; iLvlnLevel; iLvl++){ - for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ - Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; - fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); - } - } - } - fts5StructureRelease(pStruct); - rc = p->rc; - } - - return rc; -} - -/* -** This is part of the fts5_decode() debugging aid. -** -** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This -** function appends a human-readable representation of the same object -** to the buffer passed as the second argument. -*/ -static void fts5DecodeStructure( - int *pRc, /* IN/OUT: error code */ - Fts5Buffer *pBuf, - const u8 *pBlob, int nBlob -){ - int rc; /* Return code */ - Fts5Structure *p = 0; /* Decoded structure object */ - - rc = fts5StructureDecode(pBlob, nBlob, 0, &p); - if( rc!=SQLITE_OK ){ - *pRc = rc; - return; - } - - fts5DebugStructure(pRc, pBuf, p); - fts5StructureRelease(p); -} - -/* -** Buffer (a/n) is assumed to contain a list of serialized varints. Read -** each varint and append its string representation to buffer pBuf. Return -** after either the input buffer is exhausted or a 0 value is read. -** -** The return value is the number of bytes read from the input buffer. -*/ -static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ - int iOff = 0; - while( iOff0 ){ - i = getVarint(&a[i], (u64*)&iPrev); - sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev); - } - while( irc; + p->rc = SQLITE_OK; + return rc; +} + +/* +** Run internal checks to ensure that the FTS index (a) is internally +** consistent and (b) contains entries for which the XOR of the checksums +** as calculated by fts5IndexEntryCksum() is cksum. +** +** Return SQLITE_CORRUPT if any of the internal checks fail, or if the +** checksum does not match. Return SQLITE_OK if all checks pass without +** error, or some other SQLite error code if another error (e.g. OOM) +** occurs. +*/ +int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ + Fts5Config *pConfig = p->pConfig; + int iIdx; /* Used to iterate through indexes */ + int rc; /* Return code */ + u64 cksum2 = 0; /* Checksum based on contents of indexes */ + + /* Check that the checksum of the index matches the argument checksum */ + for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ + Fts5MultiSegIter *pIter; + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter, 0, 0) + ){ + Fts5PosIter sPos; /* Used to iterate through position list */ + int n; /* Size of term in bytes */ + i64 iRowid = fts5MultiIterRowid(pIter); + char *z = (char*)fts5MultiIterTerm(pIter, &n); + + for(fts5PosIterInit(p, pIter, &sPos); + fts5PosIterEof(p, &sPos)==0; + fts5PosIterNext(p, &sPos) + ){ + cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); +#if 0 + fprintf(stdout, "rowid=%d ", (int)iRowid); + fprintf(stdout, "term=%.*s ", n, z); + fprintf(stdout, "col=%d ", sPos.iCol); + fprintf(stdout, "off=%d\n", sPos.iPos); + fflush(stdout); +#endif + } + } + fts5MultiIterFree(p, pIter); + fts5StructureRelease(pStruct); + } + rc = p->rc; + if( rc==SQLITE_OK && cksum!=cksum2 ) rc = FTS5_CORRUPT; + + /* Check that the internal nodes of each segment match the leaves */ + for(iIdx=0; rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); + } + } + } + fts5StructureRelease(pStruct); + rc = p->rc; + } + + return rc; +} + + +/* +** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain +** to the document with rowid iRowid. +*/ +int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ + assert( p->rc==SQLITE_OK ); + if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ + fts5IndexFlush(p); + } + p->iWriteRowid = iRowid; + return fts5IndexReturn(p); +} + +/* +** Commit data to disk. +*/ +int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ + assert( p->rc==SQLITE_OK ); + fts5IndexFlush(p); + if( bCommit ) fts5CloseReader(p); + return fts5IndexReturn(p); +} + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p){ + fts5CloseReader(p); + fts5IndexDiscardData(p); + assert( p->rc==SQLITE_OK ); + return SQLITE_OK; +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying %_data table. +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5IndexOpen( + Fts5Config *pConfig, + int bCreate, + Fts5Index **pp, + char **pzErr +){ + int rc = SQLITE_OK; + Fts5Index *p; /* New object */ + + *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Index)); + p->pConfig = pConfig; + p->nCrisisMerge = FTS5_CRISIS_MERGE; + p->nWorkUnit = FTS5_WORK_UNIT; + p->nMaxPendingData = 1024*1024; + p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); + if( p->zDataTbl==0 ){ + rc = SQLITE_NOMEM; + }else if( bCreate ){ + int i; + Fts5Structure s; + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr + ); + if( rc==SQLITE_OK ){ + memset(&s, 0, sizeof(Fts5Structure)); + for(i=0; inPrefix+1; i++){ + fts5StructureWrite(p, i, &s); + } + rc = p->rc; + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); + } + } + + assert( p->rc==SQLITE_OK || rc!=SQLITE_OK ); + if( rc ){ + sqlite3Fts5IndexClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). +*/ +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ + int rc = SQLITE_OK; + if( p ){ + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "data"); + } + assert( p->pReader==0 ); + sqlite3_finalize(p->pWriter); + sqlite3_finalize(p->pDeleter); + if( p->apHash ){ + int i; + for(i=0; i<=p->pConfig->nPrefix; i++){ + sqlite3Fts5HashFree(p->apHash[i]); + } + sqlite3_free(p->apHash); + } + sqlite3_free(p->zDataTbl); + sqlite3_free(p); + } + return rc; +} + +/* +** Calculate and return a checksum that is the XOR of the index entry +** checksum of all entries that would be generated by the token specified +** by the final 5 arguments. +*/ +u64 sqlite3Fts5IndexCksum( + Fts5Config *pConfig, /* Configuration object */ + i64 iRowid, /* Document term appears in */ + int iCol, /* Column term appears in */ + int iPos, /* Position term appears in */ + const char *pTerm, int nTerm /* Term at iPos */ +){ + u64 ret = 0; /* Return value */ + int iIdx; /* For iterating through indexes */ + + for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ + int n = ((iIdx==pConfig->nPrefix) ? nTerm : pConfig->aPrefix[iIdx]); + if( n<=nTerm ){ + ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, n); + } + } + + return ret; +} + +/* +** Insert or remove data to or from the index. Each time a document is +** added to or removed from the index, this function is called one or more +** times. +** +** For an insert, it must be called once for each token in the new document. +** If the operation is a delete, it must be called (at least) once for each +** unique token in the document with an iCol value less than zero. The iPos +** argument is ignored for a delete. +*/ +int sqlite3Fts5IndexWrite( + Fts5Index *p, /* Index to write to */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + int i; /* Used to iterate through indexes */ + Fts5Config *pConfig = p->pConfig; + assert( p->rc==SQLITE_OK ); + + /* Allocate hash tables if they have not already been allocated */ + if( p->apHash==0 ){ + int nHash = pConfig->nPrefix + 1; + p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); + for(i=0; p->rc==SQLITE_OK && irc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); + } + } + + /* Add the new token to the main terms hash table. And to each of the + ** prefix hash tables that it is large enough for. */ + fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); + for(i=0; inPrefix; i++){ + if( nToken>=pConfig->aPrefix[i] ){ + fts5AddTermToHash(p, i+1, iCol, iPos, pToken, pConfig->aPrefix[i]); + } + } + + return fts5IndexReturn(p); +} + /* ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. */ -Fts5IndexIter *sqlite3Fts5IndexQuery( +int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ - int flags /* Mask of FTS5INDEX_QUERY_X flags */ + int flags, /* Mask of FTS5INDEX_QUERY_X flags */ + Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5IndexIter *pRet; int iIdx = 0; @@ -4281,7 +3995,7 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( } } - pRet = (Fts5IndexIter*)sqlite3_malloc(sizeof(Fts5IndexIter)); + pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); if( pRet ){ memset(pRet, 0, sizeof(Fts5IndexIter)); @@ -4303,13 +4017,15 @@ Fts5IndexIter *sqlite3Fts5IndexQuery( sqlite3Fts5IterClose(pRet); pRet = 0; } - return pRet; + *ppIter = pRet; + return fts5IndexReturn(p); } /* ** Return true if the iterator passed as the only argument is at EOF. */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ + assert( pIter->pIndex->rc==SQLITE_OK ); if( pIter->pDoclist ){ return pIter->pDoclist->aPoslist==0; }else{ @@ -4320,13 +4036,15 @@ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ /* ** Move to the next matching rowid. */ -void sqlite3Fts5IterNext(Fts5IndexIter *pIter){ +int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ + assert( pIter->pIndex->rc==SQLITE_OK ); if( pIter->pDoclist ){ fts5DoclistIterNext(pIter->pDoclist); }else{ fts5BufferZero(&pIter->poslist); fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); } + return fts5IndexReturn(pIter->pIndex); } /* @@ -4334,13 +4052,14 @@ void sqlite3Fts5IterNext(Fts5IndexIter *pIter){ ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ -void sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ +int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ if( pIter->pDoclist ){ assert( 0 ); /* fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); */ }else{ fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); } + return fts5IndexReturn(pIter->pIndex); } /* @@ -4363,19 +4082,19 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ ** The returned buffer does not include the 0x00 terminator byte stored on ** disk. */ -const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){ +int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ + assert( pIter->pIndex->rc==SQLITE_OK ); if( pIter->pDoclist ){ *pn = pIter->pDoclist->nPoslist; - return pIter->pDoclist->aPoslist; + *pp = pIter->pDoclist->aPoslist; }else{ Fts5Index *p = pIter->pIndex; fts5BufferZero(&pIter->poslist); fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); - assert( p->rc==SQLITE_OK ); - if( p->rc ) return 0; *pn = pIter->poslist.n; - return pIter->poslist.p; + *pp = pIter->poslist.p; } + return fts5IndexReturn(pIter->pIndex); } /* @@ -4402,8 +4121,9 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ ** if an error occurs. */ int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ + assert( p->rc==SQLITE_OK ); fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); - return p->rc; + return fts5IndexReturn(p); } /* @@ -4411,8 +4131,9 @@ int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ ** supplied as the second argument. */ int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ + assert( p->rc==SQLITE_OK ); fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); - return p->rc; + return fts5IndexReturn(p); } /* @@ -4424,8 +4145,8 @@ int sqlite3Fts5IndexReads(Fts5Index *p){ } /* -** Set the 32-bit cookie value at the start of all structure records to -** the value passed as the second argument. +** Set the 32-bit cookie value stored at the start of all structure +** records to the value passed as the second argument. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. @@ -4436,6 +4157,7 @@ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ u8 aCookie[4]; int i; + assert( p->rc==SQLITE_OK ); sqlite3Fts5Put32(aCookie, iNew); for(i=0; rc==SQLITE_OK && i<=pConfig->nPrefix; i++){ sqlite3_blob *pBlob = 0; @@ -4452,3 +4174,290 @@ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ return rc; } +/************************************************************************* +************************************************************************** +** Below this point is the implementation of the fts5_decode() scalar +** function only. +*/ + +/* +** Decode a segment-data rowid from the %_data table. This function is +** the opposite of macro FTS5_SEGMENT_ROWID(). +*/ +static void fts5DecodeRowid( + i64 iRowid, /* Rowid from %_data table */ + int *piIdx, /* OUT: Index */ + int *piSegid, /* OUT: Segment id */ + int *piHeight, /* OUT: Height */ + int *piPgno /* OUT: Page number */ +){ + *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); + iRowid >>= FTS5_DATA_PAGE_B; + + *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); + iRowid >>= FTS5_DATA_HEIGHT_B; + + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); + iRowid >>= FTS5_DATA_ID_B; + + *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); +} + +static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ + int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); + + if( iSegid==0 ){ + if( iKey==FTS5_AVERAGES_ROWID ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + "{structure idx=%d}", (int)(iKey-10) + ); + } + } + else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", + iIdx, iSegid, iPgno + ); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)", + iIdx, iSegid, iHeight, iPgno + ); + } +} + +static void fts5DebugStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + Fts5Structure *p +){ + int iLvl, iSeg; /* Iterate through levels, segments */ + + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge + ); + for(iSeg=0; iSegnSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, + pSeg->pgnoFirst, pSeg->pgnoLast + ); + } + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); + } +} + +/* +** This is part of the fts5_decode() debugging aid. +** +** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This +** function appends a human-readable representation of the same object +** to the buffer passed as the second argument. +*/ +static void fts5DecodeStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + const u8 *pBlob, int nBlob +){ + int rc; /* Return code */ + Fts5Structure *p = 0; /* Decoded structure object */ + + rc = fts5StructureDecode(pBlob, nBlob, 0, &p); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + + fts5DebugStructure(pRc, pBuf, p); + fts5StructureRelease(p); +} + +/* +** Buffer (a/n) is assumed to contain a list of serialized varints. Read +** each varint and append its string representation to buffer pBuf. Return +** after either the input buffer is exhausted or a 0 value is read. +** +** The return value is the number of bytes read from the input buffer. +*/ +static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ + int iOff = 0; + while( iOff0 ){ + i = getVarint(&a[i], (u64*)&iPrev); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev); + } + while( iaStmt); i++){ - sqlite3_finalize(p->aStmt[i]); + /* Finalize all SQL statements */ + for(i=0; iaStmt); i++){ + sqlite3_finalize(p->aStmt[i]); + } + + /* If required, remove the shadow tables from the database */ + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "content"); + if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "docsize"); + if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "config"); + } + + sqlite3_free(p); } - - /* If required, remove the shadow tables from the database */ - if( bDestroy ){ - rc = sqlite3Fts5DropTable(p->pConfig, "content"); - if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "docsize"); - if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "config"); - } - - sqlite3_free(p); return rc; } @@ -265,8 +267,7 @@ static int fts5StorageInsertCallback( Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; pCtx->szCol = iPos+1; - sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); - return SQLITE_OK; + return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); } /* @@ -288,8 +289,8 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; - sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); - for(iCol=1; iCol<=pConfig->nCol; iCol++){ + rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); + for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), @@ -475,8 +476,10 @@ int sqlite3Fts5StorageInsert( *piRowid = sqlite3_last_insert_rowid(pConfig->db); /* Add new entries to the FTS index */ - sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); - ctx.pStorage = p; + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); + ctx.pStorage = p; + } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, diff --git a/manifest b/manifest index 60491b268b..a2dd5734ff 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\sconfiguration\soption\sto\sremap\sthe\s"rank"\scolumn\sto\san\sauxiliary\sfts5\sfunction. -D 2014-12-02T20:18:11.604 +C Begin\stesting\sfts5\sOOM\sand\sIO\serror\shandling. +D 2014-12-03T17:27:35.105 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 572bd5d4d272ca562240dc1905538f060783ab78 +F ext/fts5/fts5.c 1dae34f4a788b5760c52b914d6384d83ee027b35 F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 -F ext/fts5/fts5Int.h 9dbf415de032b1cc770dcedaa5a8e434d88ca90c +F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f -F ext/fts5/fts5_buffer.c c79d67a5a611521f1f3b9d495981f22c02ef4bdb -F ext/fts5/fts5_config.c 664fdc8519b55753f5c24d7b45176f05586b7965 -F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 +F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 +F ext/fts5/fts5_config.c 17986112dc76e7e39170e08df68f84180f66a9fe +F ext/fts5/fts5_expr.c 5db50cd4ae9c3764d7daa8388bf406c0bad15039 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 7e7023f3a29f104b44df2ca2474b296b8dfe447c -F ext/fts5/fts5_storage.c 0198c5976cefa5e8d3f1cfffa3587d0dd594fb2a +F ext/fts5/fts5_index.c 9233b8b1f519e50d9ec139031032d9211dfcb541 +F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 F ext/fts5/fts5_tcl.c 5272224faf9be129679da5e19d788f0307afc375 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -612,6 +612,7 @@ F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 +F test/fts5fault1.test 27cb71251f8f2cd710ce4bdc1f0c29fa5db83be7 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -778,7 +779,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test 4f71bc5c9ce9a249cc94ad415cda809ce7f2360b +F test/permutations.test a762abd3f97809c877c93e6b526ec07bb2a75b96 F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1207,7 +1208,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9c1697a2aa1f601e6eb11704abe63a73c8105447 -R 30808e5592c3e61509564bec30e4914f +P b5f5971283b9b2f60c16f9675099855af95012cd +R cb30a6b5c5f7ea511cc1ede93a5d038a U dan -Z 9589e0356694de369bd9f49ee042fc35 +Z f7fa77a51653f0fa8e3497900e76f571 diff --git a/manifest.uuid b/manifest.uuid index 124c8a0053..f14250e59b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b5f5971283b9b2f60c16f9675099855af95012cd \ No newline at end of file +2037dba62fdd995ad15b642abe499a790f5ffe5c \ No newline at end of file diff --git a/test/fts5fault1.test b/test/fts5fault1.test new file mode 100644 index 0000000000..723ae3d22f --- /dev/null +++ b/test/fts5fault1.test @@ -0,0 +1,37 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/malloc_common.tcl +set testprefix fts5fault1 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +faultsim_save_and_close +do_faultsim_test 1 -prep { + faultsim_restore_and_reopen +} -body { + execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } +} -test { + faultsim_test_result {0 {}} +} + + + +finish_test diff --git a/test/permutations.test b/test/permutations.test index ca34266a27..4e366ca36f 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -225,8 +225,12 @@ test_suite "fts3" -prefix "" -description { test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files { - fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test + fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5af.test fts5ag.test fts5ah.test fts5ai.test fts5aj.test + fts5ak.test fts5al.test + fts5ea.test + + fts5fault1.test } test_suite "nofaultsim" -prefix "" -description { From e4bec37900581a9d6048bfd0f4339362569390f8 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 18 Dec 2014 18:25:48 +0000 Subject: [PATCH 054/206] Fix various problems in fts5 revealed by fault-injection tests. FossilOrigin-Name: e358c3de5c916f2c851ab9324ceaae4e4e7a0fbd --- ext/fts5/fts5.c | 2 +- ext/fts5/fts5_config.c | 19 ++- ext/fts5/fts5_expr.c | 25 +-- ext/fts5/fts5_index.c | 343 ++++++++++++++++++++++------------------- manifest | 24 +-- manifest.uuid | 2 +- src/vtab.c | 7 +- test/fts5fault1.test | 76 ++++++++- test/malloc_common.tcl | 14 +- 9 files changed, 317 insertions(+), 195 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index e80715a4c7..4c6e98b86e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -223,7 +223,7 @@ static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){ break; case FTS5_ROLLBACK: - assert( p->ts.eState==1 || p->ts.eState==2 ); + assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 ); p->ts.eState = 0; break; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 9ea78143c5..54c7a57f28 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -74,14 +74,17 @@ static int fts5ConfigParseSpecial( char **pzErr /* OUT: Error message */ ){ if( sqlite3_stricmp(zCmd, "prefix")==0 ){ + const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; + int rc = SQLITE_OK; char *p; if( pConfig->aPrefix ){ *pzErr = sqlite3_mprintf("multiple prefix=... directives"); - return SQLITE_ERROR; + rc = SQLITE_ERROR; + }else{ + pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); } - pConfig->aPrefix = sqlite3_malloc(sizeof(int) * FTS5_MAX_PREFIX_INDEXES); p = zArg; - while( p[0] ){ + while( rc==SQLITE_OK && p[0] ){ int nPre = 0; while( p[0]==' ' ) p++; while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ @@ -93,16 +96,16 @@ static int fts5ConfigParseSpecial( p++; }else if( p[0] ){ *pzErr = sqlite3_mprintf("malformed prefix=... directive"); - return SQLITE_ERROR; + rc = SQLITE_ERROR; } - if( nPre==0 || nPre>=1000 ){ + if( rc==SQLITE_OK && (nPre==0 || nPre>=1000) ){ *pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre); - return SQLITE_ERROR; + rc = SQLITE_ERROR; } pConfig->aPrefix[pConfig->nPrefix] = nPre; pConfig->nPrefix++; } - return SQLITE_OK; + return rc; } *pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd); @@ -191,7 +194,7 @@ int sqlite3Fts5ConfigParse( } /* If it is not a special directive, it must be a column name. In - ** this case, check that it is not the reserved column name "rank". */ + ** this case, check that it is not the reserved column name "rank". */ if( zDup ){ sqlite3Fts5Dequote(zDup); pRet->azCol[pRet->nCol++] = zDup; diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 5c95eda7e4..830af586b3 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -213,6 +213,7 @@ int sqlite3Fts5ExprNew( *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); if( pNew==0 ){ sParse.rc = SQLITE_NOMEM; + sqlite3Fts5ParseNodeFree(sParse.pExpr); }else{ pNew->pRoot = sParse.pExpr; pNew->pIndex = 0; @@ -771,7 +772,8 @@ static int fts5ExprNearInitAll( (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0), &pTerm->pIter ); - if( pTerm->pIter && sqlite3Fts5IterEof(pTerm->pIter) ){ + assert( rc==SQLITE_OK || pTerm->pIter==0 ); + if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){ pNode->bEof = 1; break; } @@ -1204,24 +1206,29 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( int rc; /* Tokenize return code */ char *z = 0; + memset(&sCtx, 0, sizeof(TokenCtx)); + sCtx.pPhrase = pPhrase; + if( pPhrase==0 ){ if( (pParse->nPhrase % 8)==0 ){ int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); Fts5ExprPhrase **apNew; apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); - if( apNew==0 ) return 0; + if( apNew==0 ){ + pParse->rc = SQLITE_NOMEM; + fts5ExprPhraseFree(pPhrase); + return 0; + } pParse->apPhrase = apNew; } pParse->nPhrase++; } - pParse->rc = fts5ParseStringFromToken(pToken, &z); - if( z==0 ) return 0; - sqlite3Fts5Dequote(z); - - memset(&sCtx, 0, sizeof(TokenCtx)); - sCtx.pPhrase = pPhrase; - rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + rc = fts5ParseStringFromToken(pToken, &z); + if( rc==SQLITE_OK ){ + sqlite3Fts5Dequote(z); + rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + } if( rc ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 7d0d01afdf..95b2cdb2f7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -602,13 +602,14 @@ static u16 fts5GetU16(const u8 *aIn){ ** the Fts5Index handle passed as the first argument. */ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ - void *pRet; - assert( p->rc==SQLITE_OK ); - pRet = sqlite3_malloc(nByte); - if( pRet==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - memset(pRet, 0, nByte); + void *pRet = 0; + if( p->rc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } } return pRet; } @@ -662,8 +663,9 @@ static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ - sqlite3_blob_close(p->pReader); + sqlite3_blob *pReader = p->pReader; p->pReader = 0; + sqlite3_blob_close(pReader); } } @@ -707,9 +709,10 @@ sqlite3_free(buf.p); int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ fts5BufferZero(pBuf); - fts5BufferGrow(&rc, pBuf, nByte); - rc = sqlite3_blob_read(p->pReader, pBuf->p, nByte, 0); - if( rc==SQLITE_OK ) pBuf->n = nByte; + if( SQLITE_OK==fts5BufferGrow(&rc, pBuf, nByte) ){ + rc = sqlite3_blob_read(p->pReader, pBuf->p, nByte, 0); + if( rc==SQLITE_OK ) pBuf->n = nByte; + } }else{ pRet = (Fts5Data*)fts5IdxMalloc(p, sizeof(Fts5Data) + nByte); if( !pRet ) return 0; @@ -854,6 +857,20 @@ static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ fts5DataDelete(p, iFirst, iLast); } +/* +** Release a reference to an Fts5Structure object returned by an earlier +** call to fts5StructureRead() or fts5StructureDecode(). +*/ +static void fts5StructureRelease(Fts5Structure *pStruct){ + if( pStruct ){ + int i; + for(i=0; inLevel; i++){ + sqlite3_free(pStruct->aLevel[i].aSeg); + } + sqlite3_free(pStruct); + } +} + /* ** Deserialize and return the structure record currently stored in serialized ** form within buffer pData/nData. @@ -918,6 +935,9 @@ static int fts5StructureDecode( i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } + }else{ + fts5StructureRelease(pRet); + pRet = 0; } } } @@ -1009,19 +1029,11 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ } fts5DataRelease(pData); - return pRet; -} - -/* -** Release a reference to an Fts5Structure object returned by an earlier -** call to fts5StructureRead() or fts5StructureDecode(). -*/ -static void fts5StructureRelease(Fts5Structure *pStruct){ - int i; - for(i=0; inLevel; i++){ - sqlite3_free(pStruct->aLevel[i].aSeg); + if( p->rc!=SQLITE_OK ){ + fts5StructureRelease(pRet); + pRet = 0; } - sqlite3_free(pStruct); + return pRet; } /* @@ -1045,40 +1057,42 @@ static int fts5StructureCountSegments(Fts5Structure *pStruct){ ** error has already occurred, this function is a no-op. */ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ - int nSegment; /* Total number of segments */ - Fts5Buffer buf; /* Buffer to serialize record into */ - int iLvl; /* Used to iterate through levels */ - int iCookie; /* Cookie value to store */ + if( p->rc==SQLITE_OK ){ + int nSegment; /* Total number of segments */ + Fts5Buffer buf; /* Buffer to serialize record into */ + int iLvl; /* Used to iterate through levels */ + int iCookie; /* Cookie value to store */ - nSegment = fts5StructureCountSegments(pStruct); - memset(&buf, 0, sizeof(Fts5Buffer)); + nSegment = fts5StructureCountSegments(pStruct); + memset(&buf, 0, sizeof(Fts5Buffer)); - /* Append the current configuration cookie */ - iCookie = p->pConfig->iCookie; - if( iCookie<0 ) iCookie = 0; - fts5BufferAppend32(&p->rc, &buf, iCookie); + /* Append the current configuration cookie */ + iCookie = p->pConfig->iCookie; + if( iCookie<0 ) iCookie = 0; + fts5BufferAppend32(&p->rc, &buf, iCookie); - fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); - fts5BufferAppendVarint(&p->rc, &buf, nSegment); - fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); + fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); + fts5BufferAppendVarint(&p->rc, &buf, nSegment); + fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); - for(iLvl=0; iLvlnLevel; iLvl++){ - int iSeg; /* Used to iterate through segments */ - Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; - fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); - fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); - assert( pLvl->nMerge<=pLvl->nSeg ); + for(iLvl=0; iLvlnLevel; iLvl++){ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); + assert( pLvl->nMerge<=pLvl->nSeg ); - for(iSeg=0; iSegnSeg; iSeg++){ - fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); - fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); - fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); - fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); + for(iSeg=0; iSegnSeg; iSeg++){ + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); + } } - } - fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); - fts5BufferFree(&buf); + fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); + fts5BufferFree(&buf); + } } #if 0 @@ -1864,7 +1878,7 @@ static void fts5SegIterSeekInit( res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; fts5SegIterNext(p, pIter); - }while( pIter->pLeaf ); + }while( pIter->pLeaf && p->rc==SQLITE_OK ); if( bGe==0 && res ){ /* Set iterator to point to EOF */ @@ -1873,7 +1887,7 @@ static void fts5SegIterSeekInit( } } - if( bGe==0 ){ + if( p->rc==SQLITE_OK && bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ if( flags & FTS5INDEX_QUERY_ASC ){ @@ -2422,7 +2436,7 @@ static void fts5IndexDiscardData(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; for(i=0; i<=pConfig->nPrefix; i++){ - sqlite3Fts5HashClear(p->apHash[i]); + if( p->apHash[i] ) sqlite3Fts5HashClear(p->apHash[i]); } p->nPendingData = 0; } @@ -2609,8 +2623,8 @@ static void fts5WriteAppendTerm( int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->aWriter[0]; - assert( pPage->buf.n==0 || pPage->buf.n>4 ); - if( pPage->buf.n==0 ){ + assert( pPage==0 || pPage->buf.n==0 || pPage->buf.n>4 ); + if( pPage && pPage->buf.n==0 ){ /* Zero the first term and first docid fields */ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); @@ -2660,30 +2674,32 @@ static void fts5WriteAppendRowid( Fts5SegWriter *pWriter, i64 iRowid ){ - Fts5PageWriter *pPage = &pWriter->aWriter[0]; + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; - /* If this is to be the first docid written to the page, set the - ** docid-pointer in the page-header. Also append a value to the dlidx - ** buffer, in case a doclist-index is required. */ - if( pWriter->bFirstRowidInPage ){ - fts5PutU16(pPage->buf.p, pPage->buf.n); - fts5WriteDlidxAppend(p, pWriter, iRowid); - } + /* If this is to be the first docid written to the page, set the + ** docid-pointer in the page-header. Also append a value to the dlidx + ** buffer, in case a doclist-index is required. */ + if( pWriter->bFirstRowidInPage ){ + fts5PutU16(pPage->buf.p, pPage->buf.n); + fts5WriteDlidxAppend(p, pWriter, iRowid); + } - /* Write the docid. */ - if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ - fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); - }else{ - assert( iRowidiPrevRowid ); - fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->iPrevRowid - iRowid); - } - pWriter->iPrevRowid = iRowid; - pWriter->bFirstRowidInDoclist = 0; - pWriter->bFirstRowidInPage = 0; + /* Write the docid. */ + if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ + fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); + }else{ + assert( p->rc || iRowidiPrevRowid ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->iPrevRowid - iRowid); + } + pWriter->iPrevRowid = iRowid; + pWriter->bFirstRowidInDoclist = 0; + pWriter->bFirstRowidInPage = 0; - if( pPage->buf.n>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; + if( pPage->buf.n>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } } } @@ -2692,11 +2708,13 @@ static void fts5WriteAppendPoslistInt( Fts5SegWriter *pWriter, int iVal ){ - Fts5PageWriter *pPage = &pWriter->aWriter[0]; - fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); - if( pPage->buf.n>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); + if( pPage->buf.n>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } } } @@ -2744,32 +2762,37 @@ static void fts5WriteFinish( int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; - *pnLeaf = pWriter->aWriter[0].pgno; - if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ - *pnLeaf = 0; - *pnHeight = 0; - }else{ - fts5WriteFlushLeaf(p, pWriter); - if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - fts5WriteBtreeGrow(p, pWriter); - } - if( pWriter->nWriter>1 ){ - fts5WriteBtreeNEmpty(p, pWriter); - } - *pnHeight = pWriter->nWriter; + if( p->rc==SQLITE_OK ){ + *pnLeaf = pWriter->aWriter[0].pgno; + if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ + *pnLeaf = 0; + *pnHeight = 0; + }else{ + fts5WriteFlushLeaf(p, pWriter); + if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + fts5WriteBtreeGrow(p, pWriter); + } + if( pWriter->nWriter>1 ){ + fts5WriteBtreeNEmpty(p, pWriter); + } + *pnHeight = pWriter->nWriter; - for(i=1; inWriter; i++){ - Fts5PageWriter *pPg = &pWriter->aWriter[i]; - fts5DataWrite(p, - FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno), - pPg->buf.p, pPg->buf.n - ); + for(i=1; inWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + fts5DataWrite(p, + FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno), + pPg->buf.p, pPg->buf.n + ); + } } } for(i=0; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; - fts5BufferFree(&pPg->term); - fts5BufferFree(&pPg->buf); + assert( pPg || p->rc!=SQLITE_OK ); + if( pPg ){ + fts5BufferFree(&pPg->term); + fts5BufferFree(&pPg->buf); + } } sqlite3_free(pWriter->aWriter); sqlite3Fts5BufferFree(&pWriter->dlidx); @@ -3025,55 +3048,57 @@ static void fts5IndexWork( Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */ ){ - Fts5Structure *pStruct = *ppStruct; - i64 nWrite; /* Initial value of write-counter */ - int nWork; /* Number of work-quanta to perform */ - int nRem; /* Number of leaf pages left to write */ + if( p->rc==SQLITE_OK ){ + Fts5Structure *pStruct = *ppStruct; + i64 nWrite; /* Initial value of write-counter */ + int nWork; /* Number of work-quanta to perform */ + int nRem; /* Number of leaf pages left to write */ - /* Update the write-counter. While doing so, set nWork. */ - nWrite = pStruct->nWriteCounter; - nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); - pStruct->nWriteCounter += nLeaf; - nRem = p->nWorkUnit * nWork * pStruct->nLevel; + /* Update the write-counter. While doing so, set nWork. */ + nWrite = pStruct->nWriteCounter; + nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); + pStruct->nWriteCounter += nLeaf; + nRem = p->nWorkUnit * nWork * pStruct->nLevel; - while( nRem>0 ){ - int iLvl; /* To iterate through levels */ - int iBestLvl = 0; /* Level offering the most input segments */ - int nBest = 0; /* Number of input segments on best level */ + while( nRem>0 ){ + int iLvl; /* To iterate through levels */ + int iBestLvl = 0; /* Level offering the most input segments */ + int nBest = 0; /* Number of input segments on best level */ - /* Set iBestLvl to the level to read input segments from. */ - assert( pStruct->nLevel>0 ); - for(iLvl=0; iLvlnLevel; iLvl++){ - Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; - if( pLvl->nMerge ){ - if( pLvl->nMerge>nBest ){ - iBestLvl = iLvl; - nBest = pLvl->nMerge; + /* Set iBestLvl to the level to read input segments from. */ + assert( pStruct->nLevel>0 ); + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + if( pLvl->nMerge ){ + if( pLvl->nMerge>nBest ){ + iBestLvl = iLvl; + nBest = pLvl->nMerge; + } + break; + } + if( pLvl->nSeg>nBest ){ + nBest = pLvl->nSeg; + iBestLvl = iLvl; } - break; } - if( pLvl->nSeg>nBest ){ - nBest = pLvl->nSeg; - iBestLvl = iLvl; - } - } - /* If nBest is still 0, then the index must be empty. */ + /* If nBest is still 0, then the index must be empty. */ #ifdef SQLITE_DEBUG - for(iLvl=0; nBest==0 && iLvlnLevel; iLvl++){ - assert( pStruct->aLevel[iLvl].nSeg==0 ); - } + for(iLvl=0; nBest==0 && iLvlnLevel; iLvl++){ + assert( pStruct->aLevel[iLvl].nSeg==0 ); + } #endif - if( nBestpConfig->nAutomerge - && pStruct->aLevel[iBestLvl].nMerge==0 - ){ - break; + if( nBestpConfig->nAutomerge + && pStruct->aLevel[iBestLvl].nMerge==0 + ){ + break; + } + fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); + fts5StructurePromote(p, iBestLvl+1, pStruct); + assert( nRem==0 || p->rc==SQLITE_OK ); + *ppStruct = pStruct; } - fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); - fts5StructurePromote(p, iBestLvl+1, pStruct); - assert( nRem==0 || p->rc==SQLITE_OK ); - *ppStruct = pStruct; } } @@ -3123,17 +3148,17 @@ static int fts5FlushNewEntry( int nPoslist ){ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; + Fts5Index *pIdx = p->pIdx; /* Append the rowid itself */ - fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); + fts5WriteAppendRowid(pIdx, &p->writer, iRowid); /* Append the size of the position list in bytes */ - fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist); + fts5WriteAppendPoslistInt(pIdx, &p->writer, nPoslist); /* And the poslist data */ - fts5WriteAppendPoslistData(p->pIdx, &p->writer, aPoslist, nPoslist); - return rc; + fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist); + return pIdx->rc; } /* @@ -3485,20 +3510,22 @@ static void fts5MultiIterPoslist( int bSz, Fts5Buffer *pBuf ){ - Fts5ChunkIter iter; - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; - assert( fts5MultiIterEof(p, pMulti)==0 ); - fts5ChunkIterInit(p, pSeg, &iter); - if( fts5ChunkIterEof(p, &iter)==0 ){ - if( bSz ){ - fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem); - } - while( fts5ChunkIterEof(p, &iter)==0 ){ - fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); - fts5ChunkIterNext(p, &iter); + if( p->rc==SQLITE_OK ){ + Fts5ChunkIter iter; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + assert( fts5MultiIterEof(p, pMulti)==0 ); + fts5ChunkIterInit(p, pSeg, &iter); + if( fts5ChunkIterEof(p, &iter)==0 ){ + if( bSz ){ + fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem); + } + while( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); + fts5ChunkIterNext(p, &iter); + } } + fts5ChunkIterRelease(&iter); } - fts5ChunkIterRelease(&iter); } static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ diff --git a/manifest b/manifest index a2dd5734ff..a7170b5cfd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Begin\stesting\sfts5\sOOM\sand\sIO\serror\shandling. -D 2014-12-03T17:27:35.105 +C Fix\svarious\sproblems\sin\sfts5\srevealed\sby\sfault-injection\stests. +D 2014-12-18T18:25:48.377 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,15 +104,15 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 1dae34f4a788b5760c52b914d6384d83ee027b35 +F ext/fts5/fts5.c d1c1722eb661da3e8e3a19909958b97beff7d243 F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 17986112dc76e7e39170e08df68f84180f66a9fe -F ext/fts5/fts5_expr.c 5db50cd4ae9c3764d7daa8388bf406c0bad15039 +F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce +F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 9233b8b1f519e50d9ec139031032d9211dfcb541 +F ext/fts5/fts5_index.c 13b6d002e10840d8ec525ccd4a2bfc8831ea7a47 F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 F ext/fts5/fts5_tcl.c 5272224faf9be129679da5e19d788f0307afc375 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b @@ -306,7 +306,7 @@ F src/vdbeblob.c 9205ce9d3b064d9600f8418a897fc88b5687d9ac F src/vdbemem.c 6fc77594c60f6155404f3f8d71bf36d1fdeb4447 F src/vdbesort.c 44441d73b08b3a638dcdb725afffb87c6574ad27 F src/vdbetrace.c 6f52bc0c51e144b7efdcfb2a8f771167a8816767 -F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd +F src/vtab.c b05a86bed6ee2c7f79edb2cb57b951b220867caf F src/wal.c 264df50a1b33124130b23180ded2e2c5663c652a F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45 @@ -612,7 +612,7 @@ F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 -F test/fts5fault1.test 27cb71251f8f2cd710ce4bdc1f0c29fa5db83be7 +F test/fts5fault1.test 6fef96cf6eccd9b9fc9f4518cc15c4fa9740ef66 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -722,7 +722,7 @@ F test/mallocH.test 79b65aed612c9b3ed2dcdaa727c85895fd1bfbdb F test/mallocI.test a88c2b9627c8506bf4703d8397420043a786cdb6 F test/mallocJ.test b5d1839da331d96223e5f458856f8ffe1366f62e F test/mallocK.test d79968641d1b70d88f6c01bdb9a7eb4a55582cc9 -F test/malloc_common.tcl 58e54229c4132ef882a11fab6419ec4cd3073589 +F test/malloc_common.tcl dc07e2bbbfc3f8416f4cbddcc139d77eb360d9af F test/manydb.test 28385ae2087967aa05c38624cec7d96ec74feb3e F test/mem5.test c6460fba403c5703141348cd90de1c294188c68f F test/memdb.test fcb5297b321b562084fc79d64d5a12a1cd2b639b @@ -1208,7 +1208,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b5f5971283b9b2f60c16f9675099855af95012cd -R cb30a6b5c5f7ea511cc1ede93a5d038a +P 2037dba62fdd995ad15b642abe499a790f5ffe5c +R 701ca9549e5d1bd70b92dde1949b5886 U dan -Z f7fa77a51653f0fa8e3497900e76f571 +Z d83a5deacda027d17d377616a8122fa0 diff --git a/manifest.uuid b/manifest.uuid index f14250e59b..47e70fa938 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2037dba62fdd995ad15b642abe499a790f5ffe5c \ No newline at end of file +e358c3de5c916f2c851ab9324ceaae4e4e7a0fbd \ No newline at end of file diff --git a/src/vtab.c b/src/vtab.c index ca0db214cc..7b4867f9c0 100644 --- a/src/vtab.c +++ b/src/vtab.c @@ -789,8 +789,10 @@ int sqlite3VtabCallDestroy(sqlite3 *db, int iDb, const char *zTab){ static void callFinaliser(sqlite3 *db, int offset){ int i; if( db->aVTrans ){ + VTable **aVTrans = db->aVTrans; + db->aVTrans = 0; for(i=0; inVTrans; i++){ - VTable *pVTab = db->aVTrans[i]; + VTable *pVTab = aVTrans[i]; sqlite3_vtab *p = pVTab->pVtab; if( p ){ int (*x)(sqlite3_vtab *); @@ -800,9 +802,8 @@ static void callFinaliser(sqlite3 *db, int offset){ pVTab->iSavepoint = 0; sqlite3VtabUnlock(pVTab); } - sqlite3DbFree(db, db->aVTrans); + sqlite3DbFree(db, aVTrans); db->nVTrans = 0; - db->aVTrans = 0; } } diff --git a/test/fts5fault1.test b/test/fts5fault1.test index 723ae3d22f..76434b3a80 100644 --- a/test/fts5fault1.test +++ b/test/fts5fault1.test @@ -23,15 +23,89 @@ ifcapable !fts5 { return } +# Simple tests: +# +# 1: CREATE VIRTUAL TABLE +# 2: INSERT statement +# 3: DELETE statement +# 4: MATCH expressions +# + +if 1 { + faultsim_save_and_close do_faultsim_test 1 -prep { faultsim_restore_and_reopen } -body { - execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } + execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3') } } -test { faultsim_test_result {0 {}} } +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3'); +} +faultsim_save_and_close +do_faultsim_test 2 -prep { + faultsim_restore_and_reopen +} -body { + execsql { + INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno'); + } +} -test { + faultsim_test_result {0 {}} +} +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3'); + INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno'); +} +faultsim_save_and_close +do_faultsim_test 3 -prep { + faultsim_restore_and_reopen +} -body { + execsql { DELETE FROM t1 } +} -test { + faultsim_test_result {0 {}} +} + +} + +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t2 USING fts5(a, b); + INSERT INTO t2 VALUES('m f a jj th q jr ar', 'hj n h h sg j i m'); + INSERT INTO t2 VALUES('nr s t g od j kf h', 'sb h aq rg op rb n nl'); + INSERT INTO t2 VALUES('do h h pb p p q fr', 'c rj qs or cr a l i'); + INSERT INTO t2 VALUES('lk gp t i lq mq qm p', 'h mr g f op ld aj h'); + INSERT INTO t2 VALUES('ct d sq kc qi k f j', 'sn gh c of g s qt q'); + INSERT INTO t2 VALUES('d ea d d om mp s ab', 'dm hg l df cm ft pa c'); + INSERT INTO t2 VALUES('tc dk c jn n t sr ge', 'a a kn bc n i af h'); + INSERT INTO t2 VALUES('ie ii d i b sa qo rf', 'a h m aq i b m fn'); + INSERT INTO t2 VALUES('gs r fo a er m h li', 'tm c p gl eb ml q r'); + INSERT INTO t2 VALUES('k fe fd rd a gi ho kk', 'ng m c r d ml rm r'); +} +faultsim_save_and_close + +foreach {tn expr res} { + 1 { dk } 7 + 2 { m f } 1 + 3 { f* } {10 9 8 6 5 4 3 1} + 4 { m OR f } {10 9 8 5 4 1} + 5 { sn + gh } {5} + 6 { "sn gh" } {5} + 7 { NEAR(r a, 5) } {9} +} { + do_faultsim_test 4.$tn -prep { + faultsim_restore_and_reopen + } -body " + execsql { SELECT rowid FROM t2 WHERE t2 MATCH '$expr' } + " -test " + faultsim_test_result {[list 0 $res]} + " +} finish_test + diff --git a/test/malloc_common.tcl b/test/malloc_common.tcl index 160897057a..1393a84f35 100644 --- a/test/malloc_common.tcl +++ b/test/malloc_common.tcl @@ -129,6 +129,8 @@ proc do_faultsim_test {name args} { set DEFAULT(-test) "" set DEFAULT(-install) "" set DEFAULT(-uninstall) "" + set DEFAULT(-start) 1 + set DEFAULT(-end) 0 fix_testname name @@ -146,7 +148,8 @@ proc do_faultsim_test {name args} { } set testspec [list -prep $O(-prep) -body $O(-body) \ - -test $O(-test) -install $O(-install) -uninstall $O(-uninstall) + -test $O(-test) -install $O(-install) -uninstall $O(-uninstall) \ + -start $O(-start) -end $O(-end) ] foreach f [lsort -unique $faultlist] { eval do_one_faultsim_test "$name-$f" $FAULTSIM($f) $testspec @@ -318,6 +321,8 @@ proc faultsim_test_result_int {args} { # # -test Script to execute after -body. # +# -start Index of first fault to inject (default 1) +# proc do_one_faultsim_test {testname args} { set DEFAULT(-injectstart) "expr" @@ -330,6 +335,8 @@ proc do_one_faultsim_test {testname args} { set DEFAULT(-test) "" set DEFAULT(-install) "" set DEFAULT(-uninstall) "" + set DEFAULT(-start) 1 + set DEFAULT(-end) 0 array set O [array get DEFAULT] array set O $args @@ -346,7 +353,10 @@ proc do_one_faultsim_test {testname args} { eval $O(-install) set stop 0 - for {set iFail 1} {!$stop} {incr iFail} { + for {set iFail $O(-start)} \ + {!$stop && ($O(-end)==0 || $iFail<=$O(-end))} \ + {incr iFail} \ + { # Evaluate the -prep script. # From 2ae0f4c9316187739496fcdb726c215ad92f24a7 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 18 Dec 2014 20:01:15 +0000 Subject: [PATCH 055/206] Fix a problem with prefix queries and the AND operator. FossilOrigin-Name: 38b3c65e3ee95eb7afadb76e0110570fbbc41e1b --- ext/fts5/fts5_index.c | 18 ++++++++++++++++-- manifest | 16 ++++++++-------- manifest.uuid | 2 +- test/fts5ad.test | 14 ++++++++++---- test/fts5fault1.test | 2 ++ 5 files changed, 37 insertions(+), 15 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 95b2cdb2f7..8db2f95039 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4074,6 +4074,21 @@ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ return fts5IndexReturn(pIter->pIndex); } +/* +** Move the doclist-iter passed as the first argument to the next +** matching rowid that occurs at or after iMatch. The definition of "at +** or after" depends on whether this iterator iterates in ascending or +** descending rowid order. +*/ +static void fts5DoclistIterNextFrom(Fts5DoclistIter *p, i64 iMatch){ + do{ + i64 iRowid = p->iRowid; + if( p->bAsc!=0 && iRowid>=iMatch ) break; + if( p->bAsc==0 && iRowid<=iMatch ) break; + fts5DoclistIterNext(p); + }while( p->aPoslist ); +} + /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates @@ -4081,8 +4096,7 @@ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ if( pIter->pDoclist ){ - assert( 0 ); - /* fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); */ + fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); }else{ fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); } diff --git a/manifest b/manifest index a7170b5cfd..9c8098afeb 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\svarious\sproblems\sin\sfts5\srevealed\sby\sfault-injection\stests. -D 2014-12-18T18:25:48.377 +C Fix\sa\sproblem\swith\sprefix\squeries\sand\sthe\sAND\soperator. +D 2014-12-18T20:01:15.691 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 13b6d002e10840d8ec525ccd4a2bfc8831ea7a47 +F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 F ext/fts5/fts5_tcl.c 5272224faf9be129679da5e19d788f0307afc375 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b @@ -602,7 +602,7 @@ F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5aa.test 27c7d3c865e144a0501dcbfbd6d2ae87f77602ea F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc F test/fts5ac.test 60302196b7711176ce872fe2e4c73c75ac2c4038 -F test/fts5ad.test ed60fdafc73d879b42573abcfa6ede7e02e07c19 +F test/fts5ad.test ff518db6b0d7750b51ee6531ffebf82e57094bfd F test/fts5ae.test 5de775469d45a2f8218fc89b8d6d5176c226d05e F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb @@ -612,7 +612,7 @@ F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 -F test/fts5fault1.test 6fef96cf6eccd9b9fc9f4518cc15c4fa9740ef66 +F test/fts5fault1.test ba59b6f0897a4fe510c446b98968ec1e8800a56b F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1208,7 +1208,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2037dba62fdd995ad15b642abe499a790f5ffe5c -R 701ca9549e5d1bd70b92dde1949b5886 +P e358c3de5c916f2c851ab9324ceaae4e4e7a0fbd +R d93acda5ed1642f2b9a85bcc5fb79da9 U dan -Z d83a5deacda027d17d377616a8122fa0 +Z ee6f25fbcb6953dd4dd423bed7e3ab13 diff --git a/manifest.uuid b/manifest.uuid index 47e70fa938..82d7a8e415 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e358c3de5c916f2c851ab9324ceaae4e4e7a0fbd \ No newline at end of file +38b3c65e3ee95eb7afadb76e0110570fbbc41e1b \ No newline at end of file diff --git a/test/fts5ad.test b/test/fts5ad.test index 9514e996c5..bdf71265cf 100644 --- a/test/fts5ad.test +++ b/test/fts5ad.test @@ -179,12 +179,17 @@ foreach {T create} { } } {} - proc prefix_query {prefix} { + proc prefix_query {prefixlist} { set ret [list] db eval {SELECT rowid, a, b FROM t1 ORDER BY rowid DESC} { - if {[lsearch -glob $a $prefix]>=0 || [lsearch -glob $b $prefix]>=0} { - lappend ret $rowid + set bMatch 1 + foreach pref $prefixlist { + if { [lsearch -glob $a $pref]<0 && [lsearch -glob $b $pref]<0 } { + set bMatch 0 + break + } } + if {$bMatch} { lappend ret $rowid } } return $ret } @@ -199,7 +204,8 @@ foreach {T create} { 11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*} 16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*} 21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*} - 27 {x*} + 27 {x*} + 28 {a f*} 29 {a* f*} 30 {a* fghij*} } { set res [prefix_query $prefix] if {$bAsc} { diff --git a/test/fts5fault1.test b/test/fts5fault1.test index 76434b3a80..8a493e323f 100644 --- a/test/fts5fault1.test +++ b/test/fts5fault1.test @@ -97,6 +97,8 @@ foreach {tn expr res} { 5 { sn + gh } {5} 6 { "sn gh" } {5} 7 { NEAR(r a, 5) } {9} + 8 { m* f* } {10 9 8 6 4 1} + 9 { m* + f* } {8 1} } { do_faultsim_test 4.$tn -prep { faultsim_restore_and_reopen From 1616d55153a7654bd7955d1ba0c5c08b786277bb Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 19 Dec 2014 20:53:51 +0000 Subject: [PATCH 056/206] Remove the fts5_test() aux function. Test aux functions using the tcl interface instead. FossilOrigin-Name: 67e3ffd950c5347d219a06b33ad51949cffa7d90 --- ext/fts5/fts5_aux.c | 268 ------------------------------------------- ext/fts5/fts5_tcl.c | 162 +++++++++++++++++++++++--- manifest | 23 ++-- manifest.uuid | 2 +- test/fts5_common.tcl | 114 ++++++++++++++++++ test/fts5ac.test | 54 +++++++-- test/fts5ae.test | 54 +++++---- test/fts5ag.test | 6 +- test/tester.tcl | 1 + 9 files changed, 350 insertions(+), 334 deletions(-) create mode 100644 test/fts5_common.tcl diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index aff871e9dd..a09487a369 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -841,273 +841,6 @@ static void fts5Bm25Function( } } -static int fts5TestCallback( - void *pContext, /* Pointer to Fts5Buffer object */ - const char *pToken, /* Buffer containing token */ - int nToken, /* Size of token in bytes */ - int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ - int iPos /* Position offset of token */ -){ - int rc = SQLITE_OK; - Fts5Buffer *pBuf = (Fts5Buffer*)pContext; - if( pBuf->n!=0 ){ - sqlite3Fts5BufferAppendString(&rc, pBuf, " "); - } - sqlite3Fts5BufferAppendListElem(&rc, pBuf, pToken, nToken); - return rc; -} - - -static void fts5TestFunction( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - sqlite3_context *pCtx, /* Context for returning result/error */ - int nVal, /* Number of values in apVal[] array */ - sqlite3_value **apVal /* Array of trailing arguments */ -){ - Fts5Buffer s; /* Build up text to return here */ - int nCol; /* Number of columns in table */ - int nPhrase; /* Number of phrases in query */ - i64 iRowid; /* Rowid of current row */ - const char *zReq = 0; - int rc = SQLITE_OK; - int i; - - if( nVal>=1 ){ - zReq = (const char*)sqlite3_value_text(apVal[0]); - } - - memset(&s, 0, sizeof(Fts5Buffer)); - nCol = pApi->xColumnCount(pFts); - - /* - ** xColumnTotalSize() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntotalsize "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntotalsize") ){ - if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); - for(i=0; rc==SQLITE_OK && ixColumnTotalSize(pFts, i, &colsz); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz); - } - if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); - } - - /* - ** xColumnCount() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " columncount "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "columncount") ){ - nCol = pApi->xColumnCount(pFts); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nCol); - } - - /* - ** xColumnSize() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " columnsize "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnsize") ){ - if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); - for(i=0; rc==SQLITE_OK && ixColumnSize(pFts, i, &colsz); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz); - } - if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); - } - - /* - ** xColumnText() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " columntext "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntext") ){ - for(i=0; rc==SQLITE_OK && ixColumnText(pFts, i, &z, &n); - if( i!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " "); - sqlite3Fts5BufferAppendListElem(&rc, &s, z, n); - } - } - - /* - ** xInst() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " inst "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "inst") ){ - int nInst; - rc = pApi->xInstCount(pFts, &nInst); - for(i=0; rc==SQLITE_OK && ixInst(pFts, i, &iPhrase, &iCol, &iOff); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d.%d.%d", - (i==0 ? "" : " "), iPhrase, iCol, iOff - ); - } - } - - /* - ** xPhraseCount() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasecount "); - nPhrase = pApi->xPhraseCount(pFts); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasecount") ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nPhrase); - } - - /* - ** xPhraseSize() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " phrasesize "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "phrasesize") ){ - if( nPhrase==1 ){ - int nSize = pApi->xPhraseSize(pFts, 0); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%d", nSize); - }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); - for(i=0; ixPhraseSize(pFts, i); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", (i==0?"":" "), nSize); - } - sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); - } - } - - /* - ** xPoslist() - */ - if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " poslist "); - if( 0==zReq || 0==sqlite3_stricmp(zReq, "poslist") ){ - int bParen = 0; - Fts5Buffer s3; - memset(&s3, 0, sizeof(s3)); - - for(i=0; ixPoslist(pFts, i, &j, &iPos) ){ - int iOff = FTS5_POS2OFFSET(iPos); - int iCol = FTS5_POS2COLUMN(iPos); - if( nElem!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s2, " "); - sqlite3Fts5BufferAppendPrintf(&rc, &s2, "%d.%d", iCol, iOff); - nElem++; - } - - if( i!=0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s3, " "); - } - if( nElem==1 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s3, "%s", (const char*)s2.p); - }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &s3, "{%s}", (const char*)s2.p); - bParen = 1; - } - sqlite3_free(s2.p); - } - - if(zReq==0 && (nPhrase>1 || bParen) ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "{%s}", (const char*)s3.p); - }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s", (const char*)s3.p); - } - sqlite3_free(s3.p); - } - - if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " queryphrase "); - } - if( 0==zReq || 0==sqlite3_stricmp(zReq, "queryphrase") ){ - int ic, ip; - int *anVal = 0; - Fts5Buffer buf1; - memset(&buf1, 0, sizeof(Fts5Buffer)); - - if( rc==SQLITE_OK ){ - anVal = (int*)pApi->xGetAuxdata(pFts, 0); - if( anVal==0 ){ - rc = fts5GatherTotals(pApi, pFts, &anVal); - if( rc==SQLITE_OK ){ - rc = pApi->xSetAuxdata(pFts, (void*)anVal, sqlite3_free); - } - } - } - - for(ip=0; rc==SQLITE_OK && ip0 ) sqlite3Fts5BufferAppendString(&rc, &buf1, " "); - if( nCol>1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "{"); - for(ic=0; ic1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "}"); - } - - if( zReq==0 ){ - sqlite3Fts5BufferAppendListElem(&rc, &s, (const char*)buf1.p, buf1.n); - }else{ - sqlite3Fts5BufferAppendString(&rc, &s, (const char*)buf1.p); - } - sqlite3_free(buf1.p); - } - - if( zReq==0 ){ - sqlite3Fts5BufferAppendString(&rc, &s, " rowid "); - } - if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowid") ){ - iRowid = pApi->xRowid(pFts); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", iRowid); - } - - if( zReq==0 ){ - sqlite3Fts5BufferAppendString(&rc, &s, " rowcount "); - } - if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowcount") ){ - i64 nRow; - rc = pApi->xRowCount(pFts, &nRow); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", nRow); - } - - if( zReq==0 ){ - sqlite3Fts5BufferAppendString(&rc, &s, " tokenize "); - } - if( 0==zReq || 0==sqlite3_stricmp(zReq, "tokenize") ){ - Fts5Buffer buf; - memset(&buf, 0, sizeof(buf)); - for(i=0; rc==SQLITE_OK && ixColumnText(pFts, i, &z, &n); - if( rc==SQLITE_OK ){ - Fts5Buffer buf1; - memset(&buf1, 0, sizeof(Fts5Buffer)); - rc = pApi->xTokenize(pFts, z, n, (void*)&buf1, fts5TestCallback); - if( i!=0 ) sqlite3Fts5BufferAppendPrintf(&rc, &buf, " "); - sqlite3Fts5BufferAppendListElem(&rc, &buf, (const char*)buf1.p, buf1.n); - sqlite3_free(buf1.p); - } - } - if( zReq==0 ){ - sqlite3Fts5BufferAppendListElem(&rc, &s, (const char*)buf.p, buf.n); - }else{ - sqlite3Fts5BufferAppendString(&rc, &s, (const char*)buf.p); - } - sqlite3_free(buf.p); - } - - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, (const char*)s.p, -1, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_code(pCtx, rc); - } - sqlite3Fts5BufferFree(&s); -} - int sqlite3Fts5AuxInit(fts5_api *pApi){ struct Builtin { const char *zFunc; /* Function name (nul-terminated) */ @@ -1117,7 +850,6 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){ } aBuiltin [] = { { "bm25debug", (void*)1, fts5Bm25Function, 0 }, { "snippet", 0, fts5SnippetFunction, 0 }, - { "fts5_test", 0, fts5TestFunction, 0 }, { "highlight", 0, fts5HighlightFunction, 0 }, { "bm25", 0, fts5Bm25Function, 0 }, }; diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 28efe7109c..dd5ef60431 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -57,6 +57,57 @@ struct F5tApi { Fts5Context *pFts; }; +static int xTokenizeCb( + void *pCtx, + const char *zToken, int nToken, + int iStart, int iEnd, int iPos +){ + F5tFunction *p = (F5tFunction*)pCtx; + Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); + int rc; + + Tcl_IncrRefCount(pEval); + Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken)); + Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart)); + Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd)); + Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iPos)); + + rc = Tcl_EvalObjEx(p->interp, pEval, 0); + Tcl_DecrRefCount(pEval); + + return rc; +} + +static int xF5tApi(void*, Tcl_Interp*, int, Tcl_Obj *CONST []); + +static int xQueryPhraseCb( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + void *pCtx +){ + F5tFunction *p = (F5tFunction*)pCtx; + static sqlite3_int64 iCmd = 0; + Tcl_Obj *pEval; + int rc; + + char zCmd[64]; + F5tApi sApi; + + sApi.pApi = pApi; + sApi.pFts = pFts; + sprintf(zCmd, "f5t_2_%lld", iCmd++); + Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0); + + pEval = Tcl_DuplicateObj(p->pScript); + Tcl_IncrRefCount(pEval); + Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1)); + rc = Tcl_EvalObjEx(p->interp, pEval, 0); + Tcl_DecrRefCount(pEval); + Tcl_DeleteCommand(p->interp, zCmd); + + return rc; +} + /* ** api sub-command... ** @@ -73,12 +124,21 @@ static int xF5tApi( int nArg; const char *zMsg; } aSub[] = { - { "xRowid", 0, "" }, - { "xInstCount", 0, "" }, - { "xInst", 1, "IDX" }, - { "xColumnText", 1, "COL" }, - { "xColumnSize", 1, "COL" }, + { "xColumnCount", 0, "" }, + { "xRowCount", 0, "" }, + { "xColumnTotalSize", 1, "COL" }, + { "xTokenize", 2, "TEXT SCRIPT" }, + { "xPhraseCount", 0, "" }, + { "xPhraseSize", 1, "PHRASE" }, + { "xInstCount", 0, "" }, + { "xInst", 1, "IDX" }, + { "xRowid", 0, "" }, + { "xColumnText", 1, "COL" }, + { "xColumnSize", 1, "COL" }, + { "xQueryPhrase", 2, "PHRASE SCRIPT" }, + { 0, 0, 0} }; + int rc; int iSub = 0; F5tApi *p = (F5tApi*)clientData; @@ -97,14 +157,67 @@ static int xF5tApi( return TCL_ERROR; } +#define CASE(i,str) case i: assert( strcmp(aSub[i].zName, str)==0 ); switch( iSub ){ - case 0: { /* xRowid */ - sqlite3_int64 iRowid = p->pApi->xRowid(p->pFts); - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(iRowid)); + CASE(0, "xColumnCount") { + int nCol; + nCol = p->pApi->xColumnCount(p->pFts); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewIntObj(nCol)); + } break; } - - case 1: { /* xInstCount */ + CASE(1, "xRowCount") { + sqlite3_int64 nRow; + rc = p->pApi->xRowCount(p->pFts, &nRow); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nRow)); + } + break; + } + CASE(2, "xColumnTotalSize") { + int iCol; + sqlite3_int64 nSize; + if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ) return TCL_ERROR; + rc = p->pApi->xColumnTotalSize(p->pFts, iCol, &nSize); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nSize)); + } + break; + } + CASE(3, "xTokenize") { + int nText; + char *zText = Tcl_GetStringFromObj(objv[2], &nText); + F5tFunction ctx; + ctx.interp = interp; + ctx.pScript = objv[3]; + rc = p->pApi->xTokenize(p->pFts, zText, nText, &ctx, xTokenizeCb); + if( rc==SQLITE_OK ){ + Tcl_ResetResult(interp); + } + return rc; + } + CASE(4, "xPhraseCount") { + int nPhrase; + nPhrase = p->pApi->xPhraseCount(p->pFts); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewIntObj(nPhrase)); + } + break; + } + CASE(5, "xPhraseSize") { + int iPhrase; + int sz; + if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){ + return TCL_ERROR; + } + sz = p->pApi->xPhraseSize(p->pFts, iPhrase); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewIntObj(sz)); + } + break; + } + CASE(6, "xInstCount") { int nInst; rc = p->pApi->xInstCount(p->pFts, &nInst); if( rc==SQLITE_OK ){ @@ -112,8 +225,7 @@ static int xF5tApi( } break; } - - case 2: { /* xInst */ + CASE(7, "xInst") { int iIdx, ip, ic, io; if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ){ return TCL_ERROR; @@ -128,8 +240,12 @@ static int xF5tApi( } break; } - - case 3: { /* xColumnText */ + CASE(8, "xRowid") { + sqlite3_int64 iRowid = p->pApi->xRowid(p->pFts); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(iRowid)); + break; + } + CASE(9, "xColumnText") { const char *z = 0; int n = 0; int iCol; @@ -142,8 +258,7 @@ static int xF5tApi( } break; } - - case 4: { /* xColumnSize */ + CASE(10, "xColumnSize") { int n = 0; int iCol; if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ @@ -155,11 +270,26 @@ static int xF5tApi( } break; } + CASE(11, "xQueryPhrase") { + int iPhrase; + F5tFunction ctx; + if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){ + return TCL_ERROR; + } + ctx.interp = interp; + ctx.pScript = objv[3]; + rc = p->pApi->xQueryPhrase(p->pFts, iPhrase, &ctx, xQueryPhraseCb); + if( rc==SQLITE_OK ){ + Tcl_ResetResult(interp); + } + break; + } default: assert( 0 ); break; } +#undef CASE if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in api call", 0); diff --git a/manifest b/manifest index 9c8098afeb..6b558dbd3f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sprefix\squeries\sand\sthe\sAND\soperator. -D 2014-12-18T20:01:15.691 +C Remove\sthe\sfts5_test()\saux\sfunction.\sTest\saux\sfunctions\susing\sthe\stcl\sinterface\sinstead. +D 2014-12-19T20:53:51.092 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,14 +107,14 @@ F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 F ext/fts5/fts5.c d1c1722eb661da3e8e3a19909958b97beff7d243 F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 -F ext/fts5/fts5_aux.c 0e3e5fea6bf5772805afe14c95cb5f16e03e4b3f +F ext/fts5/fts5_aux.c b8e5660a05b86dab059c9989835b5df0ac5e3c55 F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 -F ext/fts5/fts5_tcl.c 5272224faf9be129679da5e19d788f0307afc375 +F ext/fts5/fts5_tcl.c d56484fd5cc3b02d268ee11fa4918e98ce3b1d03 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -599,13 +599,14 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 +F test/fts5_common.tcl 2488117cd80b7a4de7c20054b89f082b77b4189c F test/fts5aa.test 27c7d3c865e144a0501dcbfbd6d2ae87f77602ea F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc -F test/fts5ac.test 60302196b7711176ce872fe2e4c73c75ac2c4038 +F test/fts5ac.test 021e175b809d2baa23792807caae5dfc6bc706f4 F test/fts5ad.test ff518db6b0d7750b51ee6531ffebf82e57094bfd -F test/fts5ae.test 5de775469d45a2f8218fc89b8d6d5176c226d05e +F test/fts5ae.test 0877873a2b9df6b3a2d832ed5ea928f838d19faf F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc -F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb +F test/fts5ag.test 8b2bb67cf2a3245eaad5e49ab8daa6be6e64332b F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba @@ -888,7 +889,7 @@ F test/tclsqlite.test 37a61c2da7e3bfe3b8c1a2867199f6b860df5d43 F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c F test/temptable.test d2c9b87a54147161bcd1822e30c1d1cd891e5b30 F test/temptrigger.test 8ec228b0db5d7ebc4ee9b458fc28cb9e7873f5e1 -F test/tester.tcl f31bea1483ea1d39620f982130026e76f872d744 +F test/tester.tcl 7d2c97b43e51abde7a35f9d3bc57e886c8503e55 F test/thread001.test 9f22fd3525a307ff42a326b6bc7b0465be1745a5 F test/thread002.test e630504f8a06c00bf8bbe68528774dd96aeb2e58 F test/thread003.test ee4c9efc3b86a6a2767516a37bd64251272560a7 @@ -1208,7 +1209,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e358c3de5c916f2c851ab9324ceaae4e4e7a0fbd -R d93acda5ed1642f2b9a85bcc5fb79da9 +P 38b3c65e3ee95eb7afadb76e0110570fbbc41e1b +R a4b205e397ac161e65b87e55d29e5aba U dan -Z ee6f25fbcb6953dd4dd423bed7e3ab13 +Z b9d8ce93a014115b155c701f123b4810 diff --git a/manifest.uuid b/manifest.uuid index 82d7a8e415..3656228956 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -38b3c65e3ee95eb7afadb76e0110570fbbc41e1b \ No newline at end of file +67e3ffd950c5347d219a06b33ad51949cffa7d90 \ No newline at end of file diff --git a/test/fts5_common.tcl b/test/fts5_common.tcl new file mode 100644 index 0000000000..78f561ac95 --- /dev/null +++ b/test/fts5_common.tcl @@ -0,0 +1,114 @@ +# 2014 Dec 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + + + + +proc fts5_test_poslist {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xInstCount]} {incr i} { + lappend res [string map {{ } .} [$cmd xInst $i]] + } + set res +} + +proc fts5_test_columnsize {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { + lappend res [$cmd xColumnSize $i] + } + set res +} + +proc fts5_test_columntext {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { + lappend res [$cmd xColumnText $i] + } + set res +} + +proc fts5_test_columntotalsize {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { + lappend res [$cmd xColumnTotalSize $i] + } + set res +} + +proc test_append_token {varname token iStart iEnd iPos} { + upvar $varname var + lappend var $token +} +proc fts5_test_tokenize {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { + set tokens [list] + $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens] + lappend res $tokens + } + set res +} + +proc fts5_test_rowcount {cmd} { + $cmd xRowCount +} + +proc test_queryphrase_cb {cnt cmd} { + upvar $cnt L + for {set i 0} {$i < [$cmd xInstCount]} {incr i} { + foreach {ip ic io} [$cmd xInst $i] break + set A($ic) 1 + } + foreach ic [array names A] { + lset L $ic [expr {[lindex $L $ic] + 1}] + } +} +proc fts5_test_queryphrase {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { + set cnt [list] + for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 } + $cmd xQueryPhrase $i [list test_queryphrase_cb cnt] + lappend res $cnt + } + set res +} + +proc fts5_test_all {cmd} { + set res [list] + lappend res columnsize [fts5_test_columnsize $cmd] + lappend res columntext [fts5_test_columntext $cmd] + lappend res columntotalsize [fts5_test_columntotalsize $cmd] + lappend res poslist [fts5_test_poslist $cmd] + lappend res tokenize [fts5_test_tokenize $cmd] + lappend res rowcount [fts5_test_rowcount $cmd] + set res +} + +proc fts5_aux_test_functions {db} { + foreach f { + fts5_test_columnsize + fts5_test_columntext + fts5_test_columntotalsize + fts5_test_poslist + fts5_test_tokenize + fts5_test_rowcount + fts5_test_all + + fts5_test_queryphrase + } { + sqlite3_fts5_create_function $db $f $f + } +} + + diff --git a/test/fts5ac.test b/test/fts5ac.test index 1b56c8b0e3..1044a81932 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -139,11 +139,12 @@ do_test 1.1 { # Usage: # -# poslist aCol ?-near N? ?-col C? -- phrase1 phrase2... +# poslist aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... # proc poslist {aCol args} { set O(-near) 10 set O(-col) -1 + set O(-pc) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } @@ -153,6 +154,12 @@ proc poslist {aCol args} { set O($k) $v } + if {$O(-pc) == ""} { + set counter 0 + } else { + upvar $O(-pc) counter + } + # Set $phraselist to be a list of phrases. $nPhrase its length. set phraselist [lrange $args [expr $nOpt+1] end] set nPhrase [llength $phraselist] @@ -197,14 +204,15 @@ proc poslist {aCol args} { } set res [list] +#puts [array names A] + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set plist [list] for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { foreach a $A($iCol,$iPhrase) { - lappend plist "$iCol.$a" + lappend res "$counter.$iCol.$a" } } - lappend res $plist + incr counter } #puts $res @@ -220,6 +228,17 @@ proc nearset {args} { return [expr [llength [lindex $plist 0]]>0] } +proc instcompare {lhs rhs} { + foreach {p1 c1 o1} [split $lhs .] {} + foreach {p2 c2 o2} [split $rhs .] {} + + set res [expr $c1 - $c2] + if {$res==0} { set res [expr $o1 - $o2] } + if {$res==0} { set res [expr $p1 - $p2] } + + return $res +} + # Argument $expr is an FTS5 match expression designed to be executed against # an FTS5 table with the following schema: # @@ -247,11 +266,14 @@ proc matchdata {bPos expr {bAsc 0}} { if {$bPos} { set N [regexp -all -inline {\[nearset [^\]]*\]} $tclexpr] set rowres [list] + set cnt 0 foreach phrase $N { - set cmd "poslist [string range $phrase 9 end-1]" + set arglist [string range $phrase 9 end-1] + set cmd "poslist [lindex $arglist 0] -pc cnt [lrange $arglist 1 end]" set pos [eval $cmd] set rowres [concat $rowres $pos] } + set rowres [lsort -command instcompare $rowres] lappend res [list $id $rowres] } else { lappend res $id @@ -272,6 +294,16 @@ proc matchdata {bPos expr {bAsc 0}} { # End of test code #------------------------------------------------------------------------- +proc fts5_test_poslist {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xInstCount]} {incr i} { + lappend res [string map {{ } .} [$cmd xInst $i]] + } + set res +} + +sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist + #------------------------------------------------------------------------- # Test phrase queries. # @@ -291,7 +323,7 @@ foreach {tn phrase} { set res [matchdata 1 $expr] do_execsql_test 1.2.$tn.[llength $res] { - SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } @@ -313,7 +345,7 @@ foreach {tn expr} { } { set res [matchdata 1 $expr] do_execsql_test 2.$tn.[llength $res] { - SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } @@ -328,7 +360,7 @@ foreach {tn expr} { } { set res [matchdata 1 $expr] do_execsql_test 3.$tn.[llength $res] { - SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } @@ -349,12 +381,12 @@ foreach {tn expr} { } { set res [matchdata 1 $expr] do_execsql_test 4.1.$tn.[llength $res] { - SELECT rowid, fts5_test(xx, 'poslist') FROM xx WHERE xx match $expr + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } -do_test 4.1 { poslist {{a b c}} -- a } {0.0} -do_test 4.2 { poslist {{a b c}} -- c } {0.2} +do_test 4.1 { poslist {{a b c}} -- a } {0.0.0} +do_test 4.2 { poslist {{a b c}} -- c } {0.0.2} foreach {tn expr tclexpr} { 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} diff --git a/test/fts5ae.test b/test/fts5ae.test index 07b1891618..b6475d8bbf 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -51,6 +51,7 @@ do_execsql_test 1.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; } {1 2 4} +fts5_aux_test_functions db #------------------------------------------------------------------------- # @@ -61,26 +62,26 @@ do_execsql_test 2.0 { } do_execsql_test 2.1 { - SELECT rowid, fts5_test(t2, 'poslist') FROM t2 + SELECT rowid, fts5_test_poslist(t2) FROM t2 WHERE t2 MATCH 'm' ORDER BY rowid; } { - 1 {{0.5 1.0 1.2}} - 2 {{0.7 1.5}} + 1 {0.0.5 0.1.0 0.1.2} + 2 {0.0.7 0.1.5} } do_execsql_test 2.2 { - SELECT rowid, fts5_test(t2, 'poslist') FROM t2 + SELECT rowid, fts5_test_poslist(t2) FROM t2 WHERE t2 MATCH 'u OR q' ORDER BY rowid; } { - 1 {0.0 {}} - 2 {{} {0.2 0.10}} + 1 {0.0.0} + 2 {1.0.2 1.0.10} } do_execsql_test 2.3 { - SELECT rowid, fts5_test(t2, 'poslist') FROM t2 + SELECT rowid, fts5_test_poslist(t2) FROM t2 WHERE t2 MATCH 'y:o' ORDER BY rowid; } { - 1 {{1.3 1.7}} + 1 {0.1.3 0.1.7} } #------------------------------------------------------------------------- @@ -92,25 +93,25 @@ do_execsql_test 3.0 { } do_execsql_test 3.1 { - SELECT rowid, fts5_test(t3, 'poslist') FROM t3 WHERE t3 MATCH 'NEAR(a b)'; + SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)'; } { - 1 {{0.6 0.10 0.12} {0.9 0.15}} + 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15} } do_execsql_test 3.2 { - SELECT rowid, fts5_test(t3, 'poslist') FROM t3 WHERE t3 MATCH 'NEAR(r c)'; + SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)'; } { - 2 {0.0 0.1} + 2 {0.0.0 1.0.1} } do_execsql_test 3.3 { INSERT INTO t3 VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); - SELECT rowid, fts5_test(t3, 'poslist') + SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'a OR b AND c'; } { - 3 {0.5 {} {}} - 1 {{0.6 0.10 0.12} {0.9 0.15} 1.2} + 3 0.0.5 + 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15 2.1.2} } #------------------------------------------------------------------------- @@ -122,16 +123,17 @@ do_execsql_test 4.0 { } do_execsql_test 4.1 { - SELECT rowid, fts5_test(t4, 'poslist') FROM t4 WHERE t4 MATCH 'a OR b AND c'; + SELECT rowid, fts5_test_poslist(t4) FROM t4 WHERE t4 MATCH 'a OR b AND c'; } { - 1 {0.5 {} {}} + 1 0.0.5 } #------------------------------------------------------------------------- # Test that the xColumnSize() and xColumnAvgsize() APIs work. # - reset_db +fts5_aux_test_functions db + do_execsql_test 5.1 { CREATE VIRTUAL TABLE t5 USING fts5(x, y); INSERT INTO t5 VALUES('a b c d', 'e f g h i j'); @@ -139,7 +141,7 @@ do_execsql_test 5.1 { INSERT INTO t5 VALUES('a', ''); } do_execsql_test 5.2 { - SELECT rowid, fts5_test(t5, 'columnsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test_columnsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {1 0} @@ -148,7 +150,7 @@ do_execsql_test 5.2 { } do_execsql_test 5.2 { - SELECT rowid, fts5_test(t5, 'columntext') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test_columntext(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {a {}} @@ -157,7 +159,7 @@ do_execsql_test 5.2 { } do_execsql_test 5.3 { - SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {5 7} @@ -167,7 +169,7 @@ do_execsql_test 5.3 { do_execsql_test 5.4 { INSERT INTO t5 VALUES('x y z', 'v w x y z'); - SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {8 12} @@ -179,6 +181,7 @@ do_execsql_test 5.4 { # Test the xTokenize() API # reset_db +fts5_aux_test_functions db do_execsql_test 6.1 { CREATE VIRTUAL TABLE t6 USING fts5(x, y); INSERT INTO t6 VALUES('There are more', 'things in heaven and earth'); @@ -186,7 +189,7 @@ do_execsql_test 6.1 { } do_execsql_test 6.2 { - SELECT rowid, fts5_test(t6, 'tokenize') FROM t6 WHERE t6 MATCH 't*' + SELECT rowid, fts5_test_tokenize(t6) FROM t6 WHERE t6 MATCH 't*' } { 2 {{horatio than are} {dreamt of in your philosophy}} 1 {{there are more} {things in heaven and earth}} @@ -196,6 +199,7 @@ do_execsql_test 6.2 { # Test the xQueryPhrase() API # reset_db +fts5_aux_test_functions db do_execsql_test 7.1 { CREATE VIRTUAL TABLE t7 USING fts5(x, y); } @@ -221,12 +225,12 @@ foreach {tn q res} { 6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}} } { do_execsql_test 7.3.$tn { - SELECT fts5_test(t7, 'queryphrase') FROM t7 WHERE t7 MATCH $q LIMIT 1 + SELECT fts5_test_queryphrase(t7) FROM t7 WHERE t7 MATCH $q LIMIT 1 } [list $res] } do_execsql_test 7.4 { - SELECT fts5_test(t7, 'rowcount') FROM t7 WHERE t7 MATCH 'a'; + SELECT fts5_test_rowcount(t7) FROM t7 WHERE t7 MATCH 'a'; } {5 5 5 5} #do_execsql_test 7.4 { diff --git a/test/fts5ag.test b/test/fts5ag.test index 647604ef64..52b4774d55 100644 --- a/test/fts5ag.test +++ b/test/fts5ag.test @@ -96,9 +96,11 @@ do_test 1.1 { set {} {} } {} +fts5_aux_test_functions db + proc do_fts5ag_test {tn E} { - set q1 {SELECT fts5_test(t1) FROM t1 WHERE t1 MATCH $E ORDER BY rank} - set q2 {SELECT fts5_test(t1) FROM t1 WHERE t1 MATCH $E ORDER BY bm25(t1)} + set q1 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY rank} + set q2 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY bm25(t1)} set res [execsql $q1] set expected [execsql $q2] diff --git a/test/tester.tcl b/test/tester.tcl index 1c4e93937c..2b5d871d5f 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -1918,3 +1918,4 @@ database_never_corrupt source $testdir/thread_common.tcl source $testdir/malloc_common.tcl +source $testdir/fts5_common.tcl From 005e10e39a2d7e56cd113a1af8e7d37e07deca3d Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 22 Dec 2014 21:01:52 +0000 Subject: [PATCH 057/206] Fixes and simplifications for the snippet() and highlight() functions. FossilOrigin-Name: ca5d44042aa7461dcc8b700b0763df4df9d4a891 --- ext/fts5/fts5.c | 5 +- ext/fts5/fts5.h | 12 +- ext/fts5/fts5_aux.c | 656 ++++++++++++++------------------------ ext/fts5/fts5_tcl.c | 51 ++- ext/fts5/fts5auxdata.test | 112 +++++++ manifest | 23 +- manifest.uuid | 2 +- test/fts5af.test | 6 +- test/fts5ak.test | 23 +- 9 files changed, 449 insertions(+), 441 deletions(-) create mode 100644 ext/fts5/fts5auxdata.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 4c6e98b86e..67b2b82373 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1300,7 +1300,10 @@ static int fts5ApiSetAuxdata( } }else{ pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata)); - if( pData==0 ) return SQLITE_NOMEM; + if( pData==0 ){ + if( xDelete ) xDelete(pPtr); + return SQLITE_NOMEM; + } memset(pData, 0, sizeof(Fts5Auxdata)); pData->pAux = pCsr->pAux; pData->pNext = pCsr->pAuxdata; diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 71db9577c9..8bee42dbc0 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -141,9 +141,10 @@ typedef void (*fts5_extension_function)( ** future invocation of the same fts5 extension function made as part of ** of the same MATCH query using the xGetAuxdata() API. ** -** Each extension function is allocated a single auxiliary data slot per -** query. If the extension function is invoked more than once by the SQL -** query, then all invocations share a single auxiliary data context. +** Each extension function is allocated a single auxiliary data slot for +** each FTS query (MATCH expression). If the extension function is invoked +** more than once for a single FTS query, then all invocations share a +** single auxiliary data context. ** ** If there is already an auxiliary data pointer when this function is ** invoked, then it is replaced by the new pointer. If an xDelete callback @@ -153,6 +154,11 @@ typedef void (*fts5_extension_function)( ** The xDelete callback, if one is specified, is also invoked on the ** auxiliary data pointer after the FTS5 query has finished. ** +** If an error (e.g. an OOM condition) occurs within this function, an +** the auxiliary data is set to NULL and an error code returned. If the +** xDelete parameter was not NULL, it is invoked on the auxiliary data +** pointer before returning. +** ** ** xGetAuxdata(pFts5, bClear) ** diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index a09487a369..c0224a0e02 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -14,22 +14,113 @@ #include "fts5Int.h" #include +/* +** Object used to iterate through all "coalesced phrase instances" in +** a single column of the current row. If the phrase instances in the +** column being considered do not overlap, this object simply iterates +** through them. Or, if they do overlap (share one or more tokens in +** common), each set of overlapping instances is treated as a single +** match. See documentation for the highlight() auxiliary function for +** details. +** +** Usage is: +** +** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); +** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); +** rc = fts5CInstIterNext(&iter) +** ){ +** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); +** } +** +*/ +typedef struct CInstIter CInstIter; +struct CInstIter { + const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ + Fts5Context *pFts; /* First arg to pass to pApi functions */ + int iCol; /* Column to search */ + int iInst; /* Next phrase instance index */ + int nInst; /* Total number of phrase instances */ + + /* Output variables */ + int iStart; /* First token in coalesced phrase instance */ + int iEnd; /* Last token in coalesced phrase instance */ +}; + +/* +** Return non-zero if the iterator is at EOF, or zero otherwise. +*/ +static int fts5CInstIterEof(CInstIter *pIter){ + return (pIter->iStart < 0); +} + +/* +** Advance the iterator to the next coalesced phrase instance. Return +** an SQLite error code if an error occurs, or SQLITE_OK otherwise. +*/ +static int fts5CInstIterNext(CInstIter *pIter){ + int rc = SQLITE_OK; + pIter->iStart = -1; + pIter->iEnd = -1; + + while( rc==SQLITE_OK && pIter->iInstnInst ){ + int ip; int ic; int io; + rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); + if( rc==SQLITE_OK ){ + if( ic==pIter->iCol ){ + int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); + if( pIter->iStart<0 ){ + pIter->iStart = io; + pIter->iEnd = iEnd; + }else if( io<=pIter->iEnd ){ + if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; + }else{ + break; + } + } + pIter->iInst++; + } + } + + return rc; +} + +/* +** Initialize the iterator object indicated by the final parameter to +** iterate through coalesced phrase instances in column iCol. +*/ +static int fts5CInstIterInit( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + int iCol, + CInstIter *pIter +){ + int rc; + + memset(pIter, 0, sizeof(CInstIter)); + pIter->pApi = pApi; + pIter->pFts = pFts; + pIter->iCol = iCol; + rc = pApi->xInstCount(pFts, &pIter->nInst); + + if( rc==SQLITE_OK ){ + rc = fts5CInstIterNext(pIter); + } + + return rc; +} + + + /************************************************************************* ** Start of highlight() implementation. */ typedef struct HighlightContext HighlightContext; struct HighlightContext { - const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ - Fts5Context *pFts; /* First arg to pass to pApi functions */ - int nInst; /* Total number of phrase instances */ - int iInst; /* Current phrase instance index */ - int iStart; /* First token of current phrase */ - int iEnd; /* Last token of current phrase */ - + CInstIter iter; /* Coalesced Instance Iterator */ + int iRangeStart; + int iRangeEnd; const char *zOpen; /* Opening highlight */ const char *zClose; /* Closing highlight */ - int iCol; /* Column to read from */ - const char *zIn; /* Input text */ int nIn; /* Size of input text in bytes */ int iOff; /* Current offset within zIn[] */ @@ -40,6 +131,10 @@ struct HighlightContext { ** Append text to the HighlightContext output string - p->zOut. Argument ** z points to a buffer containing n bytes of text to append. If n is ** negative, everything up until the first '\0' is appended to the output. +** +** If *pRc is set to any value other than SQLITE_OK when this function is +** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, +** *pRc is set to an error code before returning. */ static void fts5HighlightAppend( int *pRc, @@ -53,6 +148,9 @@ static void fts5HighlightAppend( } } +/* +** Tokenizer callback used by implementation of highlight() function. +*/ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ @@ -64,39 +162,43 @@ static int fts5HighlightCb( HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; - if( iPos==p->iStart ){ + if( p->iRangeEnd>0 ){ + if( iPosiRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; + if( iPos==p->iRangeStart ) p->iOff = iStartOff; + } + + if( iPos==p->iter.iStart ){ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); fts5HighlightAppend(&rc, p, p->zOpen, -1); p->iOff = iStartOff; } - if( iPos==p->iEnd ){ - int bClose = 1; - for(p->iInst++; rc==SQLITE_OK && p->iInstnInst; p->iInst++){ - int iP, iPCol, iOff; - rc = p->pApi->xInst(p->pFts, p->iInst, &iP, &iPCol, &iOff); - if( iPCol!=p->iCol ){ - p->iStart = p->iEnd = -1; - }else{ - int iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP); - if( iEnd<=p->iEnd ) continue; - if( iOff<=p->iEnd ) bClose = 0; - p->iStart = iOff; - p->iEnd = iEnd; - } - break; + if( iPos==p->iter.iEnd ){ + if( p->iRangeEnd && p->iter.iStartiRangeStart ){ + fts5HighlightAppend(&rc, p, p->zOpen, -1); } + fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + fts5HighlightAppend(&rc, p, p->zClose, -1); + p->iOff = iEndOff; + if( rc==SQLITE_OK ){ + rc = fts5CInstIterNext(&p->iter); + } + } - if( bClose ){ - fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ + fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + p->iOff = iEndOff; + if( iPositer.iEnd ){ fts5HighlightAppend(&rc, p, p->zClose, -1); - p->iOff = iEndOff; } } return rc; } +/* +** Implementation of highlight() function. +*/ static void fts5HighlightFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -106,6 +208,7 @@ static void fts5HighlightFunction( ){ HighlightContext ctx; int rc; + int iCol; if( nVal!=3 ){ const char *zErr = "wrong number of arguments to function highlight()"; @@ -113,26 +216,14 @@ static void fts5HighlightFunction( return; } + iCol = sqlite3_value_int(apVal[0]); memset(&ctx, 0, sizeof(HighlightContext)); - ctx.iCol = sqlite3_value_int(apVal[0]); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); - ctx.pApi = pApi; - ctx.pFts = pFts; - rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn); - if( rc==SQLITE_OK ) rc = pApi->xInstCount(pFts, &ctx.nInst); + rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); - /* Find the first phrase instance in the right column. */ - ctx.iStart = -1; - ctx.iEnd = -1; - for( ; ctx.iInstxInst(pFts, ctx.iInst, &iP, &iPCol, &iOff); - if( iPCol==ctx.iCol ){ - ctx.iStart = iOff; - ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP); - break; - } + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); } if( rc==SQLITE_OK ){ @@ -150,347 +241,7 @@ static void fts5HighlightFunction( /* **************************************************************************/ -typedef struct SnipPhrase SnipPhrase; -typedef struct SnipIter SnipIter; -typedef struct SnippetCtx SnippetCtx; -struct SnipPhrase { - u64 mask; /* Current mask */ - int nToken; /* Tokens in this phrase */ - int i; /* Current offset in phrase poslist */ - i64 iPos; /* Next position in phrase (-ve -> EOF) */ -}; - -struct SnipIter { - i64 iLast; /* Last token position of current snippet */ - int nScore; /* Score of current snippet */ - - const Fts5ExtensionApi *pApi; - Fts5Context *pFts; - u64 szmask; /* Mask used to on SnipPhrase.mask */ - int nPhrase; /* Number of phrases */ - SnipPhrase aPhrase[0]; /* Array of size nPhrase */ -}; - -struct SnippetCtx { - int iFirst; /* Offset of first token to record */ - int nToken; /* Size of aiStart[] and aiEnd[] arrays */ - int iSeen; /* Set to largest offset seen */ - int *aiStart; - int *aiEnd; -}; - -static int fts5SnippetCallback( - void *pContext, /* Pointer to Fts5Buffer object */ - const char *pToken, /* Buffer containing token */ - int nToken, /* Size of token in bytes */ - int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ - int iPos /* Position offset of token */ -){ - int rc = SQLITE_OK; - SnippetCtx *pCtx = (SnippetCtx*)pContext; - int iOff = iPos - pCtx->iFirst; - - if( iOff>=0 ){ - if( iOff < pCtx->nToken ){ - pCtx->aiStart[iOff] = iStart; - pCtx->aiEnd[iOff] = iEnd; - } - pCtx->iSeen = iPos; - if( iOff>=pCtx->nToken ) rc = SQLITE_DONE; - } - - return rc; -} - -/* -** Set pIter->nScore to the score for the current entry. -*/ -static void fts5SnippetCalculateScore(SnipIter *pIter){ - int i; - int nScore = 0; - assert( pIter->iLast>=0 ); - - for(i=0; inPhrase; i++){ - SnipPhrase *p = &pIter->aPhrase[i]; - u64 mask = p->mask; - if( mask ){ - u64 j; - nScore += 1000; - for(j=1; j & pIter->szmask; j<<=1){ - if( mask & j ) nScore++; - } - } - } - - pIter->nScore = nScore; -} - -/* -** Allocate a new snippet iter. -*/ -static int fts5SnipIterNew( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - int nToken, /* Number of tokens in snippets */ - SnipIter **ppIter /* OUT: New object */ -){ - int i; /* Counter variable */ - SnipIter *pIter; /* New iterator object */ - int nByte; /* Bytes of space to allocate */ - int nPhrase; /* Number of phrases in query */ - - *ppIter = 0; - nPhrase = pApi->xPhraseCount(pFts); - nByte = sizeof(SnipIter) + nPhrase * sizeof(SnipPhrase); - pIter = (SnipIter*)sqlite3_malloc(nByte); - if( pIter==0 ) return SQLITE_NOMEM; - memset(pIter, 0, nByte); - - pIter->nPhrase = nPhrase; - pIter->pApi = pApi; - pIter->pFts = pFts; - pIter->szmask = ((u64)1 << nToken) - 1; - assert( nToken<=63 ); - - for(i=0; iaPhrase[i].nToken = pApi->xPhraseSize(pFts, i); - } - - *ppIter = pIter; - return SQLITE_OK; -} - -/* -** Set the iterator to point to the first candidate snippet. -*/ -static void fts5SnipIterFirst(SnipIter *pIter){ - const Fts5ExtensionApi *pApi = pIter->pApi; - Fts5Context *pFts = pIter->pFts; - int i; /* Used to iterate through phrases */ - SnipPhrase *pMin = 0; /* Phrase with first match */ - - memset(pIter->aPhrase, 0, sizeof(SnipPhrase) * pIter->nPhrase); - - for(i=0; inPhrase; i++){ - SnipPhrase *p = &pIter->aPhrase[i]; - p->nToken = pApi->xPhraseSize(pFts, i); - pApi->xPoslist(pFts, i, &p->i, &p->iPos); - if( p->iPos>=0 && (pMin==0 || p->iPosiPos) ){ - pMin = p; - } - } - assert( pMin ); - - pIter->iLast = pMin->iPos + pMin->nToken - 1; - pMin->mask = 0x01; - pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos); - fts5SnippetCalculateScore(pIter); -} - -/* -** Advance the snippet iterator to the next candidate snippet. -*/ -static void fts5SnipIterNext(SnipIter *pIter){ - const Fts5ExtensionApi *pApi = pIter->pApi; - Fts5Context *pFts = pIter->pFts; - int nPhrase = pIter->nPhrase; - int i; /* Used to iterate through phrases */ - SnipPhrase *pMin = 0; - - for(i=0; iaPhrase[i]; - if( p->iPos>=0 && (pMin==0 || p->iPosiPos) ) pMin = p; - } - - if( pMin==0 ){ - /* pMin==0 indicates that the SnipIter is at EOF. */ - pIter->iLast = -1; - }else{ - i64 nShift = pMin->iPos - pIter->iLast; - assert( nShift>=0 ); - for(i=0; iaPhrase[i]; - if( nShift>=63 ){ - p->mask = 0; - }else{ - p->mask = p->mask << (int)nShift; - p->mask &= pIter->szmask; - } - } - - pIter->iLast = pMin->iPos; - pMin->mask |= 0x01; - fts5SnippetCalculateScore(pIter); - pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos); - } -} - -static void fts5SnipIterFree(SnipIter *pIter){ - if( pIter ){ - sqlite3_free(pIter); - } -} - -static int fts5SnippetText( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - SnipIter *pIter, /* Snippet to write to buffer */ - int nToken, /* Size of desired snippet in tokens */ - const char *zStart, - const char *zFinal, - const char *zEllip, - Fts5Buffer *pBuf /* Write output to this buffer */ -){ - SnippetCtx ctx; - int i; - u64 all = 0; - const char *zCol; /* Column text to extract snippet from */ - int nCol; /* Size of column text in bytes */ - int rc; - int nShift; - - rc = pApi->xColumnText(pFts, FTS5_POS2COLUMN(pIter->iLast), &zCol, &nCol); - if( rc!=SQLITE_OK ) return rc; - - /* At this point pIter->iLast is the offset of the last token in the - ** proposed snippet. However, in all cases pIter->iLast contains the - ** final token of one of the phrases. This makes the snippet look - ** unbalanced. For example: - ** - ** "...x x x x x term..." - ** - ** It is better to increase iLast a little so that the snippet looks - ** more like: - ** - ** "...x x x term y y..." - ** - ** The problem is that there is no easy way to discover whether or not - ** how many tokens are present in the column following "term". - */ - - /* Set variable nShift to the number of tokens by which the snippet - ** should be shifted, assuming there are sufficient tokens to the right - ** of iLast in the column value. */ - for(i=0; inPhrase; i++){ - int iToken; - for(iToken=0; iTokenaPhrase[i].nToken; iToken++){ - all |= (pIter->aPhrase[i].mask << iToken); - } - } - for(i=nToken-1; i>=0; i--){ - if( all & ((u64)1 << i) ) break; - } - assert( i>=0 ); - nShift = (nToken - i) / 2; - - memset(&ctx, 0, sizeof(SnippetCtx)); - ctx.nToken = nToken + nShift; - ctx.iFirst = FTS5_POS2OFFSET(pIter->iLast) - nToken + 1; - if( ctx.iFirst<0 ){ - nShift += ctx.iFirst; - if( nShift<0 ) nShift = 0; - ctx.iFirst = 0; - } - ctx.aiStart = (int*)sqlite3_malloc(sizeof(int) * ctx.nToken * 2); - if( ctx.aiStart==0 ) return SQLITE_NOMEM; - ctx.aiEnd = &ctx.aiStart[ctx.nToken]; - - rc = pApi->xTokenize(pFts, zCol, nCol, (void*)&ctx, fts5SnippetCallback); - if( rc==SQLITE_OK ){ - int i1; /* First token from input to include */ - int i2; /* Last token from input to include */ - - int iPrint; - int iMatchto; - int iLast; - - int *aiStart = ctx.aiStart - ctx.iFirst; - int *aiEnd = ctx.aiEnd - ctx.iFirst; - - /* Ideally we want to start the snippet with token (ctx.iFirst + nShift). - ** However, this is only possible if there are sufficient tokens within - ** the column. This block sets variables i1 and i2 to the first and last - ** input tokens to include in the snippet. */ - if( (ctx.iFirst + nShift + nToken)<=ctx.iSeen ){ - i1 = ctx.iFirst + nShift; - i2 = i1 + nToken - 1; - }else{ - i2 = ctx.iSeen; - i1 = ctx.iSeen - nToken + 1; - assert( i1>=0 || ctx.iFirst==0 ); - if( i1<0 ) i1 = 0; - } - - /* If required, append the preceding ellipsis. */ - if( i1>0 ) sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%s", zEllip); - - iLast = FTS5_POS2OFFSET(pIter->iLast); - iPrint = i1; - iMatchto = -1; - - for(i=i1; i<=i2; i++){ - - /* Check if this is the first token of any phrase match. */ - int ip; - for(ip=0; ipnPhrase; ip++){ - SnipPhrase *pPhrase = &pIter->aPhrase[ip]; - u64 m = (1 << (iLast - i - pPhrase->nToken + 1)); - - if( i<=iLast && (pPhrase->mask & m) ){ - if( iMatchto<0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s", - aiStart[i] - aiStart[iPrint], - &zCol[aiStart[iPrint]], - zStart - ); - iPrint = i; - } - if( i>iMatchto ) iMatchto = i + pPhrase->nToken - 1; - } - } - - if( i==iMatchto ){ - sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s", - aiEnd[i] - aiStart[iPrint], - &zCol[aiStart[iPrint]], - zFinal - ); - iMatchto = -1; - iPrint = i+1; - - if( i=0 ){ - sqlite3Fts5BufferAppendString(&rc, pBuf, zFinal); - } - } - - /* If required, append the trailing ellipsis. */ - if( i2=1 ) zStart = (const char*)sqlite3_value_text(apVal[0]); - if( nVal>=2 ) zFinal = (const char*)sqlite3_value_text(apVal[1]); - if( nVal>=3 ) zEllip = (const char*)sqlite3_value_text(apVal[2]); - if( nVal>=4 ){ - nToken = sqlite3_value_int(apVal[3]); - if( nToken==0 ) nToken = -15; + if( nVal!=5 ){ + const char *zErr = "wrong number of arguments to function snippet()"; + sqlite3_result_error(pCtx, zErr, -1); + return; + } + + memset(&ctx, 0, sizeof(HighlightContext)); + rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); + + iCol = sqlite3_value_int(apVal[0]); + ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); + ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); + zEllips = (const char*)sqlite3_value_text(apVal[3]); + nToken = sqlite3_value_int(apVal[4]); + + iBestCol = (iCol>=0 ? iCol : 0); + nPhrase = pApi->xPhraseCount(pFts); + aSeen = sqlite3_malloc(nPhrase); + if( aSeen==0 ){ + rc = SQLITE_NOMEM; } - nAbs = nToken * (nToken<0 ? -1 : 1); - rc = fts5SnipIterNew(pApi, pFts, nAbs, &pIter); if( rc==SQLITE_OK ){ - Fts5Buffer buf; /* Result buffer */ - int nBestScore = 0; /* Score of best snippet found */ + rc = pApi->xInstCount(pFts, &nInst); + } + for(i=0; rc==SQLITE_OK && ixInst(pFts, i, &ip, &iSnippetCol, &iStart); + if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ + int nScore = 1000; + int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); + int j; + aSeen[ip] = 1; - for(fts5SnipIterFirst(pIter); - pIter->iLast>=0; - fts5SnipIterNext(pIter) - ){ - if( pIter->nScore>nBestScore ) nBestScore = pIter->nScore; - } - for(fts5SnipIterFirst(pIter); - pIter->iLast>=0; - fts5SnipIterNext(pIter) - ){ - if( pIter->nScore==nBestScore ) break; - } + for(j=i+1; rc==SQLITE_OK && jxInst(pFts, j, &ip, &ic, &io); + iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; + if( rc==SQLITE_OK && ic==iSnippetCol && iLastiLast ) iLast = iFinal; + } + } - memset(&buf, 0, sizeof(Fts5Buffer)); - rc = fts5SnippetText(pApi, pFts, pIter, nAbs, zStart, zFinal, zEllip, &buf); - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, (const char*)buf.p, buf.n, SQLITE_TRANSIENT); + if( rc==SQLITE_OK && nScore>nBestScore ){ + iBestCol = iSnippetCol; + iBestStart = iStart; + iBestLast = iLast; + nBestScore = nScore; + } } - sqlite3_free(buf.p); } - fts5SnipIterFree(pIter); - if( rc!=SQLITE_OK ){ + if( rc==SQLITE_OK ){ + rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); + } + if( rc==SQLITE_OK ){ + rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); + } + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); + } + + if( (iBestStart+nToken-1)>iBestLast ){ + iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; + } + if( iBestStart+nToken>nColSize ){ + iBestStart = nColSize - nToken; + } + if( iBestStart<0 ) iBestStart = 0; + + ctx.iRangeStart = iBestStart; + ctx.iRangeEnd = iBestStart + nToken - 1; + + if( iBestStart>0 ){ + fts5HighlightAppend(&rc, &ctx, zEllips, -1); + } + if( rc==SQLITE_OK ){ + rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb); + } + if( ctx.iRangeEnd>=(nColSize-1) ){ + fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); + }else{ + fts5HighlightAppend(&rc, &ctx, zEllips, -1); + } + + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); + }else{ sqlite3_result_error_code(pCtx, rc); } + sqlite3_free(ctx.zOut); + sqlite3_free(aSeen); } +/************************************************************************/ + /* ** Context object passed by fts5GatherTotals() to xQueryPhrase callback diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index dd5ef60431..d9b3dd4883 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -57,6 +57,19 @@ struct F5tApi { Fts5Context *pFts; }; +/* +** An object of this type is used with the xSetAuxdata() and xGetAuxdata() +** API test wrappers. The tcl interface allows a single tcl value to be +** saved using xSetAuxdata(). Instead of simply storing a pointer to the +** tcl object, the code in this file wraps it in an sqlite3_malloc'd +** instance of the following struct so that if the destructor is not +** correctly invoked it will be reported as an SQLite memory leak. +*/ +typedef struct F5tAuxData F5tAuxData; +struct F5tAuxData { + Tcl_Obj *pObj; +}; + static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, @@ -108,8 +121,14 @@ static int xQueryPhraseCb( return rc; } +static void xSetAuxdataDestructor(void *p){ + F5tAuxData *pData = (F5tAuxData*)p; + Tcl_DecrRefCount(pData->pObj); + sqlite3_free(pData); +} + /* -** api sub-command... +** api sub-command... ** ** Description... */ @@ -136,6 +155,8 @@ static int xF5tApi( { "xColumnText", 1, "COL" }, { "xColumnSize", 1, "COL" }, { "xQueryPhrase", 2, "PHRASE SCRIPT" }, + { "xSetAuxdata", 1, "VALUE" }, + { "xGetAuxdata", 1, "CLEAR" }, { 0, 0, 0} }; @@ -284,6 +305,34 @@ static int xF5tApi( } break; } + CASE(12, "xSetAuxdata") { + F5tAuxData *pData = (F5tAuxData*)sqlite3_malloc(sizeof(F5tAuxData)); + if( pData==0 ){ + Tcl_AppendResult(interp, "out of memory", 0); + return TCL_ERROR; + } + pData->pObj = objv[2]; + Tcl_IncrRefCount(pData->pObj); + rc = p->pApi->xSetAuxdata(p->pFts, pData, xSetAuxdataDestructor); + break; + } + CASE(13, "xGetAuxdata") { + F5tAuxData *pData; + int bClear; + if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ){ + return TCL_ERROR; + } + pData = (F5tAuxData*)p->pApi->xGetAuxdata(p->pFts, bClear); + if( pData==0 ){ + Tcl_ResetResult(interp); + }else{ + Tcl_SetObjResult(interp, pData->pObj); + if( bClear ){ + xSetAuxdataDestructor((void*)pData); + } + } + break; + } default: assert( 0 ); diff --git a/ext/fts5/fts5auxdata.test b/ext/fts5/fts5auxdata.test new file mode 100644 index 0000000000..158e393c32 --- /dev/null +++ b/ext/fts5/fts5auxdata.test @@ -0,0 +1,112 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs. +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5auxdata + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE f1 USING fts5(a, b); + INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1'); + INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2'); + INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3'); + INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4'); + INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5'); +} + +proc aux_function_1 {cmd tn} { + switch [$cmd xRowid] { + 1 { + do_test $tn.1 [list $cmd xGetAuxdata 0 ] {} + $cmd xSetAuxdata "one" + } + + 2 { + do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one} + $cmd xSetAuxdata "two" + } + + 3 { + do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two} + } + + 4 { + do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two} + } + + 5 { + do_test $tn.5 [list $cmd xGetAuxdata 0 ] {} + } + } +} + +sqlite3_fts5_create_function db aux_function_1 aux_function_1 +db eval { + SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a' + ORDER BY rowid ASC +} + +proc aux_function_2 {cmd tn inst} { + if {$inst == "A"} { + switch [$cmd xRowid] { + 1 { + do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {} + $cmd xSetAuxdata "one $inst" + } + 2 { + do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst" + $cmd xSetAuxdata "two $inst" + } + 3 { + do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst" + } + 4 { + do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst" + } + 5 { + do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {} + } + } + } else { + switch [$cmd xRowid] { + 1 { + do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A" + } + 2 { + do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A" + } + 3 { + do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A" + } + 4 { + do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {} + } + 5 { + do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {} + } + } + } +} + +sqlite3_fts5_create_function db aux_function_2 aux_function_2 +db eval { + SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B') + FROM f1 WHERE f1 MATCH 'a' + ORDER BY rowid ASC +} + +finish_test + diff --git a/manifest b/manifest index 6b558dbd3f..de303c2758 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sthe\sfts5_test()\saux\sfunction.\sTest\saux\sfunctions\susing\sthe\stcl\sinterface\sinstead. -D 2014-12-19T20:53:51.092 +C Fixes\sand\ssimplifications\sfor\sthe\ssnippet()\sand\shighlight()\sfunctions. +D 2014-12-22T21:01:52.167 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,18 +104,19 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c d1c1722eb661da3e8e3a19909958b97beff7d243 -F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 +F ext/fts5/fts5.c 8e5af98a1e370a39c8a91ed77f21ad171e5b214c +F ext/fts5/fts5.h 0a0e97c65ba3b3e82638d7f7742c5d96f2b61535 F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 -F ext/fts5/fts5_aux.c b8e5660a05b86dab059c9989835b5df0ac5e3c55 +F ext/fts5/fts5_aux.c 6200a3f6d17c491e6c87189eaef7649ee7fe564d F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 -F ext/fts5/fts5_tcl.c d56484fd5cc3b02d268ee11fa4918e98ce3b1d03 +F ext/fts5/fts5_tcl.c 4392e74421d24cc37c370732e8b48217cd2c1777 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b +F ext/fts5/fts5auxdata.test 3844d0f098441cedf75b9cc96d5e6e94d1a3bef4 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -605,12 +606,12 @@ F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc F test/fts5ac.test 021e175b809d2baa23792807caae5dfc6bc706f4 F test/fts5ad.test ff518db6b0d7750b51ee6531ffebf82e57094bfd F test/fts5ae.test 0877873a2b9df6b3a2d832ed5ea928f838d19faf -F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc +F test/fts5af.test 355d2048bd9ddc2f8f4e80a4cb1e70c6204422a0 F test/fts5ag.test 8b2bb67cf2a3245eaad5e49ab8daa6be6e64332b F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba -F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f +F test/fts5ak.test 26187e57ba56a9e10e6da894a038b07588e7249d F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/fts5fault1.test ba59b6f0897a4fe510c446b98968ec1e8800a56b @@ -1209,7 +1210,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 38b3c65e3ee95eb7afadb76e0110570fbbc41e1b -R a4b205e397ac161e65b87e55d29e5aba +P 67e3ffd950c5347d219a06b33ad51949cffa7d90 +R eaa7ec352adc789c928b49341506e13d U dan -Z b9d8ce93a014115b155c701f123b4810 +Z 1c7bcf3d91cb30ef107cecfef87d0af9 diff --git a/manifest.uuid b/manifest.uuid index 3656228956..4e7afab002 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -67e3ffd950c5347d219a06b33ad51949cffa7d90 \ No newline at end of file +ca5d44042aa7461dcc8b700b0763df4df9d4a891 \ No newline at end of file diff --git a/test/fts5af.test b/test/fts5af.test index da70dc7ae6..ca56c0ec1c 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -38,19 +38,19 @@ proc do_snippet_test {tn doc match res} { do_execsql_test $tn.1 { DELETE FROM t1; INSERT INTO t1 VALUES($v1, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] do_execsql_test $tn.2 { DELETE FROM t1; INSERT INTO t1 VALUES(NULL, $v1); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] do_execsql_test $tn.3 { DELETE FROM t1; INSERT INTO t1 VALUES($v1, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2 + SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2 ORDER BY rank DESC; } [list $res] diff --git a/test/fts5ak.test b/test/fts5ak.test index 29d19bc4b1..53977ab70c 100644 --- a/test/fts5ak.test +++ b/test/fts5ak.test @@ -16,7 +16,7 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl -set testprefix fts5aj +set testprefix fts5ak # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { @@ -118,6 +118,27 @@ do_execsql_test 2.6.2 { {a b c [d] e [f] g h i j} } +#------------------------------------------------------------------------- +# The example from the docs. +# +do_execsql_test 3.1 { + -- Assuming this: + CREATE VIRTUAL TABLE ft USING fts5(a); + INSERT INTO ft VALUES('a b c x c d e'); + INSERT INTO ft VALUES('a b c c d e'); + INSERT INTO ft VALUES('a b c d e'); + + -- The following SELECT statement returns these three rows: + -- '[a b c] x [c d e]' + -- '[a b c] [c d e]' + -- '[a b c d e]' + SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e'; +} { + {[a b c d e]} + {[a b c] [c d e]} + {[a b c] x [c d e]} +} + finish_test From 2a615fa627d51f87ab272ce7eaa4e7b845bff066 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 23 Dec 2014 19:18:34 +0000 Subject: [PATCH 058/206] Fix the fts5 bm25() function so that it matches the documentation. FossilOrigin-Name: 1ac7a8d0af9a71ddf6a1421033dcb9fa67c6120c --- ext/fts5/fts5.c | 9 +- ext/fts5/fts5.h | 12 +- ext/fts5/fts5_aux.c | 344 +++++++++++++--------------------------- ext/fts5/fts5_storage.c | 12 +- manifest | 18 +-- manifest.uuid | 2 +- 6 files changed, 144 insertions(+), 253 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 67b2b82373..3995d5d4b9 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1262,10 +1262,17 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); } - if( iCol>=0 && iColpConfig->nCol ){ + if( iCol<0 ){ + int i; + *pnToken = 0; + for(i=0; ipConfig->nCol; i++){ + *pnToken += pCsr->aColumnSize[i]; + } + }else if( iColpConfig->nCol ){ *pnToken = pCsr->aColumnSize[iCol]; }else{ *pnToken = 0; + rc = SQLITE_RANGE; } return rc; } diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 8bee42dbc0..0e448659ab 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -48,11 +48,17 @@ typedef void (*fts5_extension_function)( ** Return a copy of the context pointer the extension function was ** registered with. ** -** ** xColumnTotalSize(pFts, iCol, pnToken): -** Returns the total number of tokens in column iCol, considering all -** rows in the FTS5 table. +** If parameter iCol is less than zero, set output variable *pnToken +** to the total number of tokens in the FTS5 table. Or, if iCol is +** non-negative but less than the number of columns in the table, return +** the total number of tokens in column iCol, considering all rows in +** the FTS5 table. ** +** If parameter iCol is greater than or equal to the number of columns +** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. +** an OOM condition or IO error), an appropriate SQLite error code is +** returned. ** ** xColumnCount: ** Returns the number of columns in the FTS5 table. diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index c0224a0e02..64904210f6 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -117,8 +117,8 @@ static int fts5CInstIterInit( typedef struct HighlightContext HighlightContext; struct HighlightContext { CInstIter iter; /* Coalesced Instance Iterator */ - int iRangeStart; - int iRangeEnd; + int iRangeStart; /* First token to include */ + int iRangeEnd; /* If non-zero, last token to include */ const char *zOpen; /* Opening highlight */ const char *zClose; /* Closing highlight */ const char *zIn; /* Input text */ @@ -164,7 +164,7 @@ static int fts5HighlightCb( if( p->iRangeEnd>0 ){ if( iPosiRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; - if( iPos==p->iRangeStart ) p->iOff = iStartOff; + if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } if( iPos==p->iter.iStart ){ @@ -239,9 +239,12 @@ static void fts5HighlightFunction( sqlite3_free(ctx.zOut); } /* +** End of highlight() implementation. **************************************************************************/ - +/* +** Implementation of snippet() function. +*/ static void fts5SnippetFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -260,7 +263,7 @@ static void fts5SnippetFunction( unsigned char *aSeen; /* Array of "seen instance" flags */ int iBestCol; /* Column containing best snippet */ int iBestStart = 0; /* First token of best snippet */ - int iBestLast = nToken; /* Last token of best snippet */ + int iBestLast; /* Last token of best snippet */ int nBestScore = 0; /* Score of best snippet */ int nColSize; /* Total size of iBestCol in tokens */ @@ -271,13 +274,13 @@ static void fts5SnippetFunction( } memset(&ctx, 0, sizeof(HighlightContext)); - rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); - iCol = sqlite3_value_int(apVal[0]); + rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); zEllips = (const char*)sqlite3_value_text(apVal[3]); nToken = sqlite3_value_int(apVal[4]); + iBestLast = nToken-1; iBestCol = (iCol>=0 ? iCol : 0); nPhrase = pApi->xPhraseCount(pFts); @@ -363,151 +366,78 @@ static void fts5SnippetFunction( /************************************************************************/ - /* -** Context object passed by fts5GatherTotals() to xQueryPhrase callback -** fts5GatherCallback(). +** The first time the bm25() function is called for a query, an instance +** of the following structure is allocated and populated. */ -struct Fts5GatherCtx { - int nCol; /* Number of columns in FTS table */ - int iPhrase; /* Phrase currently under investigation */ - int *anVal; /* Array to populate */ +typedef struct Fts5Bm25Data Fts5Bm25Data; +struct Fts5Bm25Data { + int nPhrase; /* Number of phrases in query */ + double avgdl; /* Average number of tokens in each row */ + double *aIDF; /* IDF for each phrase */ + double *aFreq; /* Array used to calculate phrase freq. */ }; /* -** Callback used by fts5GatherTotals() with the xQueryPhrase() API. +** Callback used by fts5Bm25GetData() to count the number of rows in the +** table matched by each individual phrase within the query. */ -static int fts5GatherCallback( +static int fts5CountCb( const Fts5ExtensionApi *pApi, Fts5Context *pFts, - void *pUserData /* Pointer to Fts5GatherCtx object */ + void *pUserData /* Pointer to sqlite3_int64 variable */ ){ - struct Fts5GatherCtx *p = (struct Fts5GatherCtx*)pUserData; - int i = 0; - int iPrev = -1; - i64 iPos = 0; - - while( 0==pApi->xPoslist(pFts, 0, &i, &iPos) ){ - int iCol = FTS5_POS2COLUMN(iPos); - if( iCol!=iPrev ){ - p->anVal[p->iPhrase * p->nCol + iCol]++; - iPrev = iCol; - } - } - + sqlite3_int64 *pn = (sqlite3_int64*)pUserData; + (*pn)++; return SQLITE_OK; } /* -** This function returns a pointer to an array of integers containing entries -** indicating the number of rows in the table for which each phrase features -** at least once in each column. -** -** If nCol is the number of matchable columns in the table, and nPhrase is -** the number of phrases in the query, the array contains a total of -** (nPhrase*nCol) entries. -** -** For phrase iPhrase and column iCol: -** -** anVal[iPhrase * nCol + iCol] -** -** is set to the number of rows in the table for which column iCol contains -** at least one instance of phrase iPhrase. +** Set *ppData to point to the Fts5Bm25Data object for the current query. +** If the object has not already been allocated, allocate and populate it +** now. */ -static int fts5GatherTotals( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - int **panVal +static int fts5Bm25GetData( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ ){ - int rc = SQLITE_OK; - int *anVal = 0; - int i; /* For iterating through expression phrases */ - int nPhrase = pApi->xPhraseCount(pFts); - int nCol = pApi->xColumnCount(pFts); - int nByte = nCol * nPhrase * sizeof(int); - struct Fts5GatherCtx sCtx; - - sCtx.nCol = nCol; - anVal = sCtx.anVal = (int*)sqlite3_malloc(nByte); - if( anVal==0 ){ - rc = SQLITE_NOMEM; - }else{ - memset(anVal, 0, nByte); - } - - for(i=0; ixQueryPhrase(pFts, i, (void*)&sCtx, fts5GatherCallback); - } - - if( rc!=SQLITE_OK ){ - sqlite3_free(anVal); - anVal = 0; - } - - *panVal = anVal; - return rc; -} - -typedef struct Fts5Bm25Context Fts5Bm25Context; -struct Fts5Bm25Context { - int nPhrase; /* Number of phrases in query */ - int nCol; /* Number of columns in FTS table */ - double *aIDF; /* Array of IDF values */ - double *aAvg; /* Average size of each column in tokens */ -}; - -static int fts5Bm25GetContext( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - Fts5Bm25Context **pp /* OUT: Context object */ -){ - Fts5Bm25Context *p; - int rc = SQLITE_OK; + int rc = SQLITE_OK; /* Return code */ + Fts5Bm25Data *p; /* Object to return */ p = pApi->xGetAuxdata(pFts, 0); if( p==0 ){ - int *anVal = 0; - int ic; /* For iterating through columns */ - int ip; /* For iterating through phrases */ - i64 nRow; /* Total number of rows in table */ - int nPhrase = pApi->xPhraseCount(pFts); - int nCol = pApi->xColumnCount(pFts); - int nByte = sizeof(Fts5Bm25Context) - + sizeof(double) * nPhrase * nCol /* aIDF[] */ - + sizeof(double) * nCol; /* aAvg[] */ + int nPhrase; /* Number of phrases in query */ + sqlite3_int64 nRow; /* Number of rows in table */ + sqlite3_int64 nToken; /* Number of tokens in table */ + int nByte; /* Bytes of space to allocate */ + int i; - p = (Fts5Bm25Context*)sqlite3_malloc(nByte); + /* Allocate the Fts5Bm25Data object */ + nPhrase = pApi->xPhraseCount(pFts); + nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); + p = (Fts5Bm25Data*)sqlite3_malloc(nByte); if( p==0 ){ rc = SQLITE_NOMEM; }else{ memset(p, 0, nByte); - p->aAvg = (double*)&p[1]; - p->aIDF = (double*)&p->aAvg[nCol]; - p->nCol = nCol; p->nPhrase = nPhrase; + p->aIDF = (double*)&p[1]; + p->aFreq = &p->aIDF[nPhrase]; } - if( rc==SQLITE_OK ){ - rc = pApi->xRowCount(pFts, &nRow); - assert( nRow>0 || rc!=SQLITE_OK ); - if( nRow<2 ) nRow = 2; - } + /* Calculate the average document length for this FTS5 table */ + if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); + if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); + if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; - for(ic=0; rc==SQLITE_OK && icxColumnTotalSize(pFts, ic, &nToken); - p->aAvg[ic] = (double)nToken / (double)nRow; - } - - if( rc==SQLITE_OK ){ - rc = fts5GatherTotals(pApi, pFts, &anVal); - } - for(ic=0; icxQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); + if( rc==SQLITE_OK ){ + /* Calculate the IDF (Inverse Document Frequency) for phrase i. + ** This is done using the standard BM25 formula as found on wikipedia: ** ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) ** @@ -519,72 +449,26 @@ static int fts5Bm25GetContext( ** negative. Which is undesirable. So the mimimum allowable IDF is ** (1e-6) - roughly the same as a term that appears in just over ** half of set of 5,000,000 documents. */ - int idx = ip * nCol + ic; /* Index in aIDF[] and anVal[] arrays */ - int nHit = anVal[idx]; /* Number of docs matching "ic: ip" */ - - p->aIDF[idx] = log( (0.5 + nRow - nHit) / (0.5 + nHit) ); - if( p->aIDF[idx]<=0.0 ) p->aIDF[idx] = 1e-6; - assert( p->aIDF[idx]>=0.0 ); + double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); + if( idf<=0.0 ) idf = 1e-6; + p->aIDF[i] = idf; } } - sqlite3_free(anVal); - if( rc==SQLITE_OK ){ - rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); - } if( rc!=SQLITE_OK ){ sqlite3_free(p); - p = 0; + }else{ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); } + if( rc!=SQLITE_OK ) p = 0; } - - *pp = p; + *ppData = p; return rc; } -static void fts5Bm25DebugContext( - int *pRc, /* IN/OUT: Return code */ - Fts5Buffer *pBuf, /* Buffer to populate */ - Fts5Bm25Context *p /* Context object to decode */ -){ - int ip; - int ic; - - sqlite3Fts5BufferAppendString(pRc, pBuf, "idf "); - if( p->nPhrase>1 || p->nCol>1 ){ - sqlite3Fts5BufferAppendString(pRc, pBuf, "{"); - } - for(ip=0; ipnPhrase; ip++){ - if( ip>0 ) sqlite3Fts5BufferAppendString(pRc, pBuf, " "); - if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "{"); - for(ic=0; icnCol; ic++){ - if( ic>0 ) sqlite3Fts5BufferAppendString(pRc, pBuf, " "); - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%f", p->aIDF[ip*p->nCol+ic]); - } - if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "}"); - } - if( p->nPhrase>1 || p->nCol>1 ){ - sqlite3Fts5BufferAppendString(pRc, pBuf, "}"); - } - - sqlite3Fts5BufferAppendString(pRc, pBuf, " avgdl "); - if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "{"); - for(ic=0; icnCol; ic++){ - if( ic>0 ) sqlite3Fts5BufferAppendString(pRc, pBuf, " "); - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%f", p->aAvg[ic]); - } - if( p->nCol>1 ) sqlite3Fts5BufferAppendString(pRc, pBuf, "}"); -} - -static void fts5Bm25DebugRow( - int *pRc, - Fts5Buffer *pBuf, - Fts5Bm25Context *p, - const Fts5ExtensionApi *pApi, - Fts5Context *pFts -){ -} - +/* +** Implementation of bm25() function. +*/ static void fts5Bm25Function( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -592,67 +476,53 @@ static void fts5Bm25Function( int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ - const double k1 = 1.2; - const double B = 0.75; - int rc = SQLITE_OK; - Fts5Bm25Context *p; - - rc = fts5Bm25GetContext(pApi, pFts, &p); + const double k1 = 1.2; /* Constant "k1" from BM25 formula */ + const double b = 0.75; /* Constant "b" from BM25 formula */ + int rc = SQLITE_OK; /* Error code */ + double score = 0.0; /* SQL function return value */ + Fts5Bm25Data *pData; /* Values allocated/calculated once only */ + int i; /* Iterator variable */ + int nInst; /* Value returned by xInstCount() */ + double D; /* Total number of tokens in row */ + double *aFreq; /* Array of phrase freq. for current row */ + /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) + ** for each phrase in the query for the current row. */ + rc = fts5Bm25GetData(pApi, pFts, &pData); if( rc==SQLITE_OK ){ - /* If the bDebug flag is set, instead of returning a numeric rank, this - ** function returns a text value showing how the rank is calculated. */ - Fts5Buffer debug; - int bDebug = (pApi->xUserData(pFts)!=0); - memset(&debug, 0, sizeof(Fts5Buffer)); - - int ip; - double score = 0.0; - - if( bDebug ){ - fts5Bm25DebugContext(&rc, &debug, p); - fts5Bm25DebugRow(&rc, &debug, p, pApi, pFts); - } - - for(ip=0; rc==SQLITE_OK && ipnPhrase; ip++){ - int iPrev = 0; - int nHit = 0; - int i = 0; - i64 iPos = 0; - - while( rc==SQLITE_OK ){ - int bDone = pApi->xPoslist(pFts, ip, &i, &iPos); - int iCol = FTS5_POS2COLUMN(iPos); - if( (iCol!=iPrev || bDone) && nHit>0 ){ - int sz = 0; - int idx = ip * p->nCol + iPrev; - double bm25; - rc = pApi->xColumnSize(pFts, iPrev, &sz); - - bm25 = (p->aIDF[idx] * nHit * (k1+1.0)) / - (nHit + k1 * (1.0 - B + B * sz / p->aAvg[iPrev])); - - - score = score + bm25; - nHit = 0; - } - if( bDone ) break; - nHit++; - iPrev = iCol; - } - } - + aFreq = pData->aFreq; + memset(aFreq, 0, sizeof(double) * pData->nPhrase); + rc = pApi->xInstCount(pFts, &nInst); + } + for(i=0; rc==SQLITE_OK && ixInst(pFts, i, &ip, &ic, &io); if( rc==SQLITE_OK ){ - if( bDebug ){ - sqlite3_result_text(pCtx, (const char*)debug.p, -1, SQLITE_TRANSIENT); - }else{ - sqlite3_result_double(pCtx, score); - } + double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; + aFreq[ip] += w; } - sqlite3_free(debug.p); } - if( rc!=SQLITE_OK ){ + /* Figure out the total size of the current row in tokens. */ + if( rc==SQLITE_OK ){ + int nTok; + rc = pApi->xColumnSize(pFts, -1, &nTok); + D = (double)nTok; + } + + /* Determine the BM25 score for the current row. */ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + score += pData->aIDF[i] * ( + ( aFreq[i] * (k1 + 1.0) ) / + ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) + ); + } + + /* If no error has occurred, return the calculated score. Otherwise, + ** throw an SQL exception. */ + if( rc==SQLITE_OK ){ + sqlite3_result_double(pCtx, score); + }else{ sqlite3_result_error_code(pCtx, rc); } } @@ -664,12 +534,10 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){ fts5_extension_function xFunc;/* Callback function */ void (*xDestroy)(void*); /* Destructor function */ } aBuiltin [] = { - { "bm25debug", (void*)1, fts5Bm25Function, 0 }, { "snippet", 0, fts5SnippetFunction, 0 }, { "highlight", 0, fts5HighlightFunction, 0 }, { "bm25", 0, fts5Bm25Function, 0 }, }; - int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 67bcbe8f1a..0a9ba0c8ad 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -729,7 +729,17 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ int rc = fts5StorageLoadTotals(p, 0); if( rc==SQLITE_OK ){ - *pnToken = p->aTotalSize[iCol]; + *pnToken = 0; + if( iCol<0 ){ + int i; + for(i=0; ipConfig->nCol; i++){ + *pnToken += p->aTotalSize[i]; + } + }else if( iColpConfig->nCol ){ + *pnToken = p->aTotalSize[iCol]; + }else{ + rc = SQLITE_RANGE; + } } return rc; } diff --git a/manifest b/manifest index de303c2758..4fbaaa8a3d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sand\ssimplifications\sfor\sthe\ssnippet()\sand\shighlight()\sfunctions. -D 2014-12-22T21:01:52.167 +C Fix\sthe\sfts5\sbm25()\sfunction\sso\sthat\sit\smatches\sthe\sdocumentation. +D 2014-12-23T19:18:34.426 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 8e5af98a1e370a39c8a91ed77f21ad171e5b214c -F ext/fts5/fts5.h 0a0e97c65ba3b3e82638d7f7742c5d96f2b61535 +F ext/fts5/fts5.c 6dc8a8504d84aef13d922db06faa8fbcf8c11424 +F ext/fts5/fts5.h 7598f4b55b888890650829124717874973c52649 F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 -F ext/fts5/fts5_aux.c 6200a3f6d17c491e6c87189eaef7649ee7fe564d +F ext/fts5/fts5_aux.c 445e54031ff94174673f4f5aac6c064df20a2a6b F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f -F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 +F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 F ext/fts5/fts5_tcl.c 4392e74421d24cc37c370732e8b48217cd2c1777 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5auxdata.test 3844d0f098441cedf75b9cc96d5e6e94d1a3bef4 @@ -1210,7 +1210,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 67e3ffd950c5347d219a06b33ad51949cffa7d90 -R eaa7ec352adc789c928b49341506e13d +P ca5d44042aa7461dcc8b700b0763df4df9d4a891 +R 62a7bacc31f6964d59aeb316de8e27d6 U dan -Z 1c7bcf3d91cb30ef107cecfef87d0af9 +Z 38f208724902306f1118acd017f9d3d1 diff --git a/manifest.uuid b/manifest.uuid index 4e7afab002..c18189239b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ca5d44042aa7461dcc8b700b0763df4df9d4a891 \ No newline at end of file +1ac7a8d0af9a71ddf6a1421033dcb9fa67c6120c \ No newline at end of file From 5fa3acabf4158fa903bdddad4a958be50f5e156f Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 29 Dec 2014 11:24:46 +0000 Subject: [PATCH 059/206] Fixes to built-in tokenizers. FossilOrigin-Name: b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e --- ext/fts5/fts5.c | 15 +- ext/fts5/fts5.h | 5 +- ext/fts5/fts5Int.h | 6 +- ext/fts5/fts5_config.c | 424 +- ext/fts5/fts5_tcl.c | 355 +- ext/fts5/fts5_tokenize.c | 360 +- ext/fts5/fts5porter.test | 11803 ++++++++++++++++++++++++++++++++++ ext/fts5/fts5tokenizer.test | 82 + manifest | 24 +- manifest.uuid | 2 +- 10 files changed, 12864 insertions(+), 212 deletions(-) create mode 100644 ext/fts5/fts5porter.test create mode 100644 ext/fts5/fts5tokenizer.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 3995d5d4b9..604d5c7cb6 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1277,18 +1277,6 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ return rc; } -static int fts5ApiPoslist( - Fts5Context *pCtx, - int iPhrase, - int *pi, - i64 *piPos -){ - Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; - const u8 *a; int n; /* Poslist for phrase iPhrase */ - n = fts5CsrPoslist(pCsr, iPhrase, &a); - return sqlite3Fts5PoslistNext64(a, n, pi, piPos); -} - static int fts5ApiSetAuxdata( Fts5Context *pCtx, /* Fts5 context */ void *pPtr, /* Pointer to save as auxdata */ @@ -1360,7 +1348,6 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiRowid, fts5ApiColumnText, fts5ApiColumnSize, - fts5ApiPoslist, fts5ApiQueryPhrase, fts5ApiSetAuxdata, fts5ApiGetAuxdata, @@ -1682,6 +1669,7 @@ static int fts5CreateTokenizer( static int fts5FindTokenizer( fts5_api *pApi, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ + void **ppUserData, fts5_tokenizer *pTokenizer /* Populate this object */ ){ Fts5Global *pGlobal = (Fts5Global*)pApi; @@ -1694,6 +1682,7 @@ static int fts5FindTokenizer( if( pTok ){ *pTokenizer = pTok->x; + *ppUserData = pTok->pUserData; }else{ memset(pTokenizer, 0, sizeof(fts5_tokenizer)); rc = SQLITE_ERROR; diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 0e448659ab..6ccbebc283 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -207,7 +207,6 @@ struct Fts5ExtensionApi { sqlite3_int64 (*xRowid)(Fts5Context*); int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); - int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos); int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) @@ -216,9 +215,6 @@ struct Fts5ExtensionApi { void *(*xGetAuxdata)(Fts5Context*, int bClear); }; -#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) -#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF) - /* ** CUSTOM AUXILIARY FUNCTIONS *************************************************************************/ @@ -325,6 +321,7 @@ struct fts5_api { int (*xFindTokenizer)( fts5_api *pApi, const char *zName, + void **ppContext, fts5_tokenizer *pTokenizer ); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 684b34f009..1fffcbfe51 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -29,8 +29,9 @@ #define FTS5_DEFAULT_NEARDIST 10 #define FTS5_DEFAULT_RANK "bm25" -/* Name of rank column */ +/* Name of rank and rowid columns */ #define FTS5_RANK_NAME "rank" +#define FTS5_ROWID_NAME "rowid" /************************************************************************** ** Interface to code in fts5.c. @@ -149,6 +150,9 @@ void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int); void sqlite3Fts5Put32(u8*, int); int sqlite3Fts5Get32(const u8*); +#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) +#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF) + typedef struct Fts5PoslistReader Fts5PoslistReader; struct Fts5PoslistReader { /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 54c7a57f28..fc3fe73bba 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -21,6 +21,142 @@ /* Maximum allowed page size */ #define FTS5_MAX_PAGE_SIZE (128*1024) +static int fts5_iswhitespace(char x){ + return (x==' '); +} + +static int fts5_isopenquote(char x){ + return (x=='"' || x=='\'' || x=='[' || x=='`'); +} + +/* +** Argument pIn points to a character that is part of a nul-terminated +** string. Return a pointer to the first character following *pIn in +** the string that is not a white-space character. +*/ +static const char *fts5ConfigSkipWhitespace(const char *pIn){ + const char *p = pIn; + if( p ){ + while( fts5_iswhitespace(*p) ){ p++; } + } + return p; +} + +/* +** Argument pIn points to a character that is part of a nul-terminated +** string. Return a pointer to the first character following *pIn in +** the string that is not a "bareword" character. +*/ +static const char *fts5ConfigSkipBareword(const char *pIn){ + const char *p = pIn; + while( *p && *p!=' ' && *p!=':' && *p!='!' && *p!='@' + && *p!='#' && *p!='$' && *p!='%' && *p!='^' && *p!='&' + && *p!='*' && *p!='(' && *p!=')' && *p!='=' + ){ + p++; + } + if( p==pIn ) p = 0; + return p; +} + +static int fts5_isdigit(char a){ + return (a>='0' && a<='9'); +} + + + +static const char *fts5ConfigSkipLiteral(const char *pIn){ + const char *p = pIn; + if( p ){ + switch( *p ){ + case 'n': case 'N': + if( sqlite3_strnicmp("null", p, 4)==0 ){ + p = &p[4]; + }else{ + p = 0; + } + break; + + case 'x': case 'X': + p++; + if( *p=='\'' ){ + p++; + while( (*p>='a' && *p<='f') + || (*p>='A' && *p<='F') + || (*p>='0' && *p<='9') + ){ + p++; + } + if( *p=='\'' && 0==((p-pIn)%2) ){ + p++; + }else{ + p = 0; + } + }else{ + p = 0; + } + break; + + case '\'': + p++; + while( p ){ + if( *p=='\'' ){ + p++; + if( *p!='\'' ) break; + } + p++; + if( *p==0 ) p = 0; + } + break; + + default: + /* maybe a number */ + if( *p=='+' || *p=='-' ) p++; + while( fts5_isdigit(*p) ) p++; + + /* At this point, if the literal was an integer, the parse is + ** finished. Or, if it is a floating point value, it may continue + ** with either a decimal point or an 'E' character. */ + if( *p=='.' && fts5_isdigit(p[1]) ){ + p += 2; + while( fts5_isdigit(*p) ) p++; + } + if( p==pIn ) p = 0; + + break; + } + } + + return p; +} + +static int fts5Dequote(char *z){ + char q; + int iIn = 1; + int iOut = 0; + int bRet = 1; + q = z[0]; + + assert( q=='[' || q=='\'' || q=='"' || q=='`' ); + if( q=='[' ) q = ']'; + + while( z[iIn] ){ + if( z[iIn]==q ){ + if( z[iIn+1]!=q ){ + if( z[iIn+1]=='\0' ) bRet = 0; + break; + } + z[iOut++] = q; + iIn += 2; + }else{ + z[iOut++] = z[iIn++]; + } + } + z[iOut] = '\0'; + + return bRet; +} + /* ** Convert an SQL-style quoted string into a normal string by removing ** the quote characters. The conversion is done in-place. If the @@ -37,27 +173,25 @@ void sqlite3Fts5Dequote(char *z){ char quote; /* Quote character (if any ) */ + assert( 0==fts5_iswhitespace(z[0]) ); quote = z[0]; if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ - int iIn = 1; /* Index of next byte to read from input */ - int iOut = 0; /* Index of next byte to write to output */ - - /* If the first byte was a '[', then the close-quote character is a ']' */ - if( quote=='[' ) quote = ']'; - - while( ALWAYS(z[iIn]) ){ - if( z[iIn]==quote ){ - if( z[iIn+1]!=quote ) break; - z[iOut++] = quote; - iIn += 2; - }else{ - z[iOut++] = z[iIn++]; - } - } - z[iOut] = '\0'; + fts5Dequote(z); } } +/* +** Trim any white-space from the right of nul-terminated string z. +*/ +static char *fts5TrimString(char *z){ + int n = strlen(z); + while( n>0 && fts5_iswhitespace(z[n-1]) ){ + z[--n] = '\0'; + } + while( fts5_iswhitespace(*z) ) z++; + return z; +} + /* ** Parse the "special" CREATE VIRTUAL TABLE directive and update ** configuration object pConfig as appropriate. @@ -68,15 +202,17 @@ void sqlite3Fts5Dequote(char *z){ ** eventually free any such error message using sqlite3_free(). */ static int fts5ConfigParseSpecial( + Fts5Global *pGlobal, Fts5Config *pConfig, /* Configuration object to update */ - char *zCmd, /* Special command to parse */ - char *zArg, /* Argument to parse */ + const char *zCmd, /* Special command to parse */ + int nCmd, /* Size of zCmd in bytes */ + const char *zArg, /* Argument to parse */ char **pzErr /* OUT: Error message */ ){ - if( sqlite3_stricmp(zCmd, "prefix")==0 ){ + if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; int rc = SQLITE_OK; - char *p; + const char *p; if( pConfig->aPrefix ){ *pzErr = sqlite3_mprintf("multiple prefix=... directives"); rc = SQLITE_ERROR; @@ -108,6 +244,53 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ + int rc = SQLITE_OK; + const char *p = (const char*)zArg; + int nArg = strlen(zArg) + 1; + char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); + char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); + char *pSpace = pDel; + + if( azArg && pSpace ){ + if( pConfig->pTok ){ + *pzErr = sqlite3_mprintf("multiple tokenize=... directives"); + rc = SQLITE_ERROR; + }else{ + for(nArg=0; p && *p; nArg++){ + const char *p2 = fts5ConfigSkipWhitespace(p); + if( p2 && *p2=='\'' ){ + p = fts5ConfigSkipLiteral(p2); + }else{ + p = fts5ConfigSkipBareword(p2); + } + if( p ){ + memcpy(pSpace, p2, p-p2); + azArg[nArg] = pSpace; + sqlite3Fts5Dequote(pSpace); + pSpace += (p - p2) + 1; + p = fts5ConfigSkipWhitespace(p); + } + } + if( p==0 ){ + *pzErr = sqlite3_mprintf("parse error in tokenize directive"); + rc = SQLITE_ERROR; + }else{ + rc = sqlite3Fts5GetTokenizer(pGlobal, + (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi + ); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("error in tokenizer constructor"); + } + } + } + } + + sqlite3_free(azArg); + sqlite3_free(pDel); + return rc; + } + *pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd); return SQLITE_ERROR; } @@ -133,6 +316,7 @@ static char *fts5Strdup(int *pRc, const char *z){ ** code if an error occurs. */ static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ + assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); return sqlite3Fts5GetTokenizer( pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi ); @@ -160,6 +344,7 @@ int sqlite3Fts5ConfigParse( ){ int rc = SQLITE_OK; /* Return code */ Fts5Config *pRet; /* New object to return */ + int i; *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; @@ -170,44 +355,69 @@ int sqlite3Fts5ConfigParse( pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); pRet->zDb = fts5Strdup(&rc, azArg[1]); pRet->zName = fts5Strdup(&rc, azArg[2]); - if( rc==SQLITE_OK ){ - if( sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ - *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); - rc = SQLITE_ERROR; - }else{ - int i; - for(i=3; rc==SQLITE_OK && izName, FTS5_RANK_NAME)==0 ){ + *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); + rc = SQLITE_ERROR; + } - /* If it is not a special directive, it must be a column name. In - ** this case, check that it is not the reserved column name "rank". */ - if( zDup ){ - sqlite3Fts5Dequote(zDup); - pRet->azCol[pRet->nCol++] = zDup; - if( sqlite3_stricmp(zDup, FTS5_RANK_NAME)==0 ){ - *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zDup); - rc = SQLITE_ERROR; + for(i=3; rc==SQLITE_OK && iazCol[pRet->nCol++] = zCol; + zDup = 0; + } + } + + sqlite3_free(zDup); } } + /* If a tokenizer= option was successfully parsed, the tokenizer has + ** already been allocated. Otherwise, allocate an instance of the default + ** tokenizer (simple) now. */ if( rc==SQLITE_OK && pRet->pTok==0 ){ rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } @@ -309,106 +519,6 @@ int sqlite3Fts5Tokenize( return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } -/* -** Argument pIn points to a character that is part of a nul-terminated -** string. Return a pointer to the first character following *pIn in -** the string that is not a white-space character. -*/ -static const char *fts5ConfigSkipWhitespace(const char *pIn){ - const char *p = pIn; - if( p ){ - while( *p==' ' ){ p++; } - } - return p; -} - -/* -** Argument pIn points to a character that is part of a nul-terminated -** string. Return a pointer to the first character following *pIn in -** the string that is not a "bareword" character. -*/ -static const char *fts5ConfigSkipBareword(const char *pIn){ - const char *p = pIn; - while( *p && *p!=' ' && *p!=':' && *p!='!' && *p!='@' - && *p!='#' && *p!='$' && *p!='%' && *p!='^' && *p!='&' - && *p!='*' && *p!='(' && *p!=')' - ){ - p++; - } - if( p==pIn ) p = 0; - return p; -} - -static int fts5_isdigit(char a){ - return (a>='0' && a<='9'); -} - - - -static const char *fts5ConfigSkipLiteral(const char *pIn){ - const char *p = pIn; - if( p ){ - switch( *p ){ - case 'n': case 'N': - if( sqlite3_strnicmp("null", p, 4)==0 ){ - p = &p[4]; - }else{ - p = 0; - } - break; - - case 'x': case 'X': - p++; - if( *p=='\'' ){ - p++; - while( (*p>='a' && *p<='f') - || (*p>='A' && *p<='F') - || (*p>='0' && *p<='9') - ){ - p++; - } - if( *p=='\'' && 0==((p-pIn)%2) ){ - p++; - }else{ - p = 0; - } - }else{ - p = 0; - } - break; - - case '\'': - p++; - while( p ){ - if( *p=='\'' ){ - p++; - if( *p!='\'' ) break; - } - p++; - if( *p==0 ) p = 0; - } - break; - - default: - /* maybe a number */ - if( *p=='+' || *p=='-' ) p++; - while( fts5_isdigit(*p) ) p++; - - /* At this point, if the literal was an integer, the parse is - ** finished. Or, if it is a floating point value, it may continue - ** with either a decimal point or an 'E' character. */ - if( *p=='.' && fts5_isdigit(p[1]) ){ - p += 2; - while( fts5_isdigit(*p) ) p++; - } - - break; - } - } - - return p; -} - /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. @@ -476,12 +586,14 @@ static int fts5ConfigParseRank( const char *pArgs; p = fts5ConfigSkipWhitespace(p); pArgs = p; - p = fts5ConfigSkipArgs(p); - if( p==0 ){ - rc = SQLITE_ERROR; - }else if( p!=pArgs ){ - zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); - if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); + if( *p!=')' ){ + p = fts5ConfigSkipArgs(p); + if( p==0 ){ + rc = SQLITE_ERROR; + }else if( p!=pArgs ){ + zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); + if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); + } } } diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index d9b3dd4883..575f4f871a 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -12,6 +12,7 @@ ** */ +#ifdef SQLITE_TEST #include "fts5.h" #include @@ -42,9 +43,47 @@ static int f5tDbPointer(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb){ } return TCL_ERROR; } + /* End of code that accesses the SqliteDb struct. **************************************************************************/ +static int f5tDbAndApi( + Tcl_Interp *interp, + Tcl_Obj *pObj, + sqlite3 **ppDb, + fts5_api **ppApi +){ + sqlite3 *db = 0; + int rc = f5tDbPointer(interp, pObj, &db); + if( rc!=TCL_OK ){ + return TCL_ERROR; + }else{ + sqlite3_stmt *pStmt = 0; + fts5_api *pApi = 0; + + rc = sqlite3_prepare_v2(db, "SELECT fts5()", -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + const void *pPtr = sqlite3_column_blob(pStmt, 0); + memcpy((void*)&pApi, pPtr, sizeof(pApi)); + } + + if( sqlite3_finalize(pStmt)!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + + *ppDb = db; + *ppApi = pApi; + } + + return TCL_OK; +} + typedef struct F5tFunction F5tFunction; struct F5tFunction { Tcl_Interp *interp; @@ -451,7 +490,6 @@ static int f5tCreateFunction( char *zName; Tcl_Obj *pScript; sqlite3 *db = 0; - sqlite3_stmt *pStmt = 0; fts5_api *pApi = 0; F5tFunction *pCtx = 0; int rc; @@ -460,27 +498,10 @@ static int f5tCreateFunction( Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); return TCL_ERROR; } - if( f5tDbPointer(interp, objv[1], &db) ){ - return TCL_ERROR; - } + if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; + zName = Tcl_GetString(objv[2]); pScript = objv[3]; - - rc = sqlite3_prepare_v2(db, "SELECT fts5()", -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); - return TCL_ERROR; - } - - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - const void *pPtr = sqlite3_column_blob(pStmt, 0); - memcpy((void*)&pApi, pPtr, sizeof(pApi)); - } - if( sqlite3_finalize(pStmt)!=SQLITE_OK ){ - Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); - return TCL_ERROR; - } - pCtx = (F5tFunction*)ckalloc(sizeof(F5tFunction)); pCtx->interp = interp; pCtx->pScript = pScript; @@ -497,6 +518,284 @@ static int f5tCreateFunction( return TCL_OK; } +static int xTokenizeCb2( + void *pCtx, + const char *zToken, int nToken, + int iStart, int iEnd, int iPos +){ + Tcl_Obj *pRet = (Tcl_Obj*)pCtx; + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iStart)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iEnd)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + return SQLITE_OK; +} + + +/* +** sqlite3_fts5_tokenize DB TOKENIZER TEXT +** +** Description... +*/ +static int f5tTokenize( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + char *zName; + char *zText; + int nText; + sqlite3 *db = 0; + fts5_api *pApi = 0; + Fts5Tokenizer *pTok = 0; + fts5_tokenizer tokenizer; + Tcl_Obj *pRet = 0; + void *pUserdata; + int rc; + + if( objc!=4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB NAME TEXT"); + return TCL_ERROR; + } + if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; + zName = Tcl_GetString(objv[2]); + zText = Tcl_GetStringFromObj(objv[3], &nText); + + rc = pApi->xFindTokenizer(pApi, zName, &pUserdata, &tokenizer); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "no such tokenizer: ", zName, 0); + return TCL_ERROR; + } + + rc = tokenizer.xCreate(pUserdata, 0, 0, &pTok); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0); + return TCL_ERROR; + } + + pRet = Tcl_NewObj(); + Tcl_IncrRefCount(pRet); + rc = tokenizer.xTokenize(pTok, pRet, zText, nText, xTokenizeCb2); + tokenizer.xDelete(pTok); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); + Tcl_DecrRefCount(pRet); + return TCL_ERROR; + } + + + Tcl_SetObjResult(interp, pRet); + Tcl_DecrRefCount(pRet); + return TCL_OK; +} + +/************************************************************************* +** Start of tokenizer wrapper. +*/ + +typedef struct F5tTokenizerContext F5tTokenizerContext; +typedef struct F5tTokenizerCb F5tTokenizerCb; +typedef struct F5tTokenizerModule F5tTokenizerModule; +typedef struct F5tTokenizerModule F5tTokenizerInstance; + +struct F5tTokenizerContext { + void *pCtx; + int (*xToken)(void*, const char*, int, int, int, int); +}; + +struct F5tTokenizerModule { + Tcl_Interp *interp; + Tcl_Obj *pScript; + F5tTokenizerContext *pContext; +}; + +static int f5tTokenizerCreate( + void *pCtx, + const char **azArg, + int nArg, + Fts5Tokenizer **ppOut +){ + F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; + Tcl_Obj *pEval; + int rc = TCL_OK; + int i; + + pEval = Tcl_DuplicateObj(pMod->pScript); + Tcl_IncrRefCount(pEval); + for(i=0; rc==TCL_OK && iinterp, pEval, pObj); + } + + if( rc==TCL_OK ){ + rc = Tcl_EvalObjEx(pMod->interp, pEval, TCL_GLOBAL_ONLY); + } + Tcl_DecrRefCount(pEval); + + if( rc==TCL_OK ){ + F5tTokenizerInstance *pInst = ckalloc(sizeof(F5tTokenizerInstance)); + memset(pInst, 0, sizeof(F5tTokenizerInstance)); + pInst->interp = pMod->interp; + pInst->pScript = Tcl_GetObjResult(pMod->interp); + pInst->pContext = pMod->pContext; + Tcl_IncrRefCount(pInst->pScript); + *ppOut = (Fts5Tokenizer*)pInst; + } + + return rc; +} + + +static void f5tTokenizerDelete(Fts5Tokenizer *p){ + F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + Tcl_DecrRefCount(pInst->pScript); + ckfree(pInst); +} + +static int f5tTokenizerTokenize( + Fts5Tokenizer *p, + void *pCtx, + const char *pText, int nText, + int (*xToken)(void*, const char*, int, int, int, int) +){ + F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + void *pOldCtx; + int (*xOldToken)(void*, const char*, int, int, int, int); + Tcl_Obj *pEval; + int rc; + + pOldCtx = pInst->pContext->pCtx; + xOldToken = pInst->pContext->xToken; + + pEval = Tcl_DuplicateObj(pInst->pScript); + Tcl_IncrRefCount(pEval); + rc = Tcl_ListObjAppendElement( + pInst->interp, pEval, Tcl_NewStringObj(pText, nText) + ); + if( rc==TCL_OK ){ + rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); + } + Tcl_DecrRefCount(pEval); + + pInst->pContext->pCtx = pOldCtx; + pInst->pContext->xToken = xOldToken; + return rc; +} + +extern const char *sqlite3ErrName(int); + +/* +** sqlite3_fts5_token TEXT START END POS +*/ +static int f5tTokenizerReturn( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; + int iStart; + int iEnd; + int iPos; + int nToken; + char *zToken; + int rc; + + assert( p ); + if( objc!=5 ){ + Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END POS"); + return TCL_ERROR; + } + if( p->xToken==0 ){ + Tcl_AppendResult(interp, + "sqlite3_fts5_token may only be used by tokenizer callback", 0 + ); + return TCL_ERROR; + } + + zToken = Tcl_GetStringFromObj(objv[1], &nToken); + if( Tcl_GetIntFromObj(interp, objv[2], &iStart) + || Tcl_GetIntFromObj(interp, objv[3], &iEnd) + || Tcl_GetIntFromObj(interp, objv[4], &iPos) + ){ + return TCL_ERROR; + } + + rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd, iPos); + Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); + return TCL_OK; +} + +static void f5tDelTokenizer(void *pCtx){ + F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; + Tcl_DecrRefCount(pMod->pScript); + ckfree(pMod); +} + +/* +** sqlite3_fts5_create_tokenizer DB NAME SCRIPT +** +** Register a tokenizer named NAME implemented by script SCRIPT. When +** a tokenizer instance is created (fts5_tokenizer.xCreate), any tokenizer +** arguments are appended to SCRIPT and the result executed. +** +** The value returned by (SCRIPT + args) is itself a tcl script. This +** script - call it SCRIPT2 - is executed to tokenize text using the +** tokenizer instance "returned" by SCRIPT. Specifically, to tokenize +** text SCRIPT2 is invoked with a single argument appended to it - the +** text to tokenize. +** +** SCRIPT2 should invoke the [sqlite3_fts5_token] command once for each +** token within the tokenized text. +*/ +static int f5tCreateTokenizer( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + F5tTokenizerContext *pContext = (F5tTokenizerContext*)clientData; + sqlite3 *db; + fts5_api *pApi; + char *zName; + Tcl_Obj *pScript; + fts5_tokenizer t; + F5tTokenizerModule *pMod; + int rc; + + if( objc!=4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); + return TCL_ERROR; + } + if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){ + return TCL_ERROR; + } + zName = Tcl_GetString(objv[2]); + pScript = objv[3]; + + t.xCreate = f5tTokenizerCreate; + t.xTokenize = f5tTokenizerTokenize; + t.xDelete = f5tTokenizerDelete; + + pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule)); + pMod->interp = interp; + pMod->pScript = pScript; + pMod->pContext = pContext; + Tcl_IncrRefCount(pScript); + rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0); + return TCL_ERROR; + } + + return TCL_OK; +} + +static void xF5tFree(ClientData clientData){ + ckfree(clientData); +} + /* ** Entry point. */ @@ -504,17 +803,27 @@ int Fts5tcl_Init(Tcl_Interp *interp){ static struct Cmd { char *zName; Tcl_ObjCmdProc *xProc; - void *clientData; + int bTokenizeCtx; } aCmd[] = { - { "sqlite3_fts5_create_function", f5tCreateFunction, 0 } + { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 }, + { "sqlite3_fts5_token", f5tTokenizerReturn, 1 }, + { "sqlite3_fts5_tokenize", f5tTokenize, 0 }, + { "sqlite3_fts5_create_function", f5tCreateFunction, 0 } }; int i; + F5tTokenizerContext *pContext; + + pContext = ckalloc(sizeof(F5tTokenizerContext)); + memset(pContext, 0, sizeof(*pContext)); for(i=0; izName, p->xProc, p->clientData, 0); + void *pCtx = 0; + if( p->bTokenizeCtx ) pCtx = (void*)pContext; + Tcl_CreateObjCommand(interp, p->zName, p->xProc, pCtx, (i ? 0 : xF5tFree)); } return TCL_OK; } +#endif diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index ef7c767544..5352faa2c6 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -12,6 +12,8 @@ */ #include "fts5.h" +#include +#include /* @@ -115,16 +117,368 @@ static int fts5SimpleTokenize( return rc; } +/************************************************************************** +** Start of porter2 stemmer implementation. +*/ + +/* Any tokens larger than this (in bytes) are passed through without +** stemming. */ +#define FTS5_PORTER_MAX_TOKEN 64 + +typedef struct PorterTokenizer PorterTokenizer; +struct PorterTokenizer { + fts5_tokenizer tokenizer; /* Parent tokenizer module */ + Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ + char aBuf[FTS5_PORTER_MAX_TOKEN + 64]; +}; + +/* +** Delete a "porter" tokenizer. +*/ +static void fts5PorterDelete(Fts5Tokenizer *pTok){ + if( pTok ){ + PorterTokenizer *p = (PorterTokenizer*)pTok; + if( p->pTokenizer ){ + p->tokenizer.xDelete(p->pTokenizer); + } + sqlite3_free(p); + } +} + +/* +** Create a "porter" tokenizer. +*/ +static int fts5PorterCreate( + void *pCtx, + const char **azArg, int nArg, + Fts5Tokenizer **ppOut +){ + fts5_api *pApi = (fts5_api*)pCtx; + int rc = SQLITE_OK; + PorterTokenizer *pRet; + void *pUserdata = 0; + + pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); + if( pRet ){ + memset(pRet, 0, sizeof(PorterTokenizer)); + rc = pApi->xFindTokenizer(pApi, "simple", &pUserdata, &pRet->tokenizer); + }else{ + rc = SQLITE_NOMEM; + } + if( rc==SQLITE_OK ){ + rc = pRet->tokenizer.xCreate(pUserdata, 0, 0, &pRet->pTokenizer); + } + + if( rc!=SQLITE_OK ){ + fts5PorterDelete((Fts5Tokenizer*)pRet); + pRet = 0; + } + *ppOut = (Fts5Tokenizer*)pRet; + return rc; +} + +typedef struct PorterContext PorterContext; +struct PorterContext { + void *pCtx; + int (*xToken)(void*, const char*, int, int, int, int); + char *aBuf; +}; + +typedef struct PorterRule PorterRule; +struct PorterRule { + const char *zSuffix; + int nSuffix; + int (*xCond)(char *zStem, int nStem); + const char *zOutput; + int nOutput; +}; + +static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ + int ret = -1; + int nBuf = *pnBuf; + PorterRule *p; + + + for(p=aRule; p->zSuffix; p++){ + assert( strlen(p->zSuffix)==p->nSuffix ); + assert( strlen(p->zOutput)==p->nOutput ); + if( nBufnSuffix ) continue; + if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; + } + + if( p->zSuffix ){ + int nStem = nBuf - p->nSuffix; + if( p->xCond==0 || p->xCond(aBuf, nStem) ){ + memcpy(&aBuf[nStem], p->zOutput, p->nOutput); + *pnBuf = nStem + p->nOutput; + ret = p - aRule; + } + } + + return ret; +} + +static int fts5PorterIsVowel(char c, int bYIsVowel){ + return ( + c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') + ); +} + +static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ + int i; + int bCons = bPrevCons; + + /* Scan for a vowel */ + for(i=0; i 0) */ +static int fts5Porter_MGt0(char *zStem, int nStem){ + return !!fts5PorterGobbleVC(zStem, nStem, 0); +} + +/* porter rule condition: (m > 1) */ +static int fts5Porter_MGt1(char *zStem, int nStem){ + int n; + n = fts5PorterGobbleVC(zStem, nStem, 0); + if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ + return 1; + } + return 0; +} + +/* porter rule condition: (m = 1) */ +static int fts5Porter_MEq1(char *zStem, int nStem){ + int n; + n = fts5PorterGobbleVC(zStem, nStem, 0); + if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ + return 1; + } + return 0; +} + +/* porter rule condition: (*o) */ +static int fts5Porter_Ostar(char *zStem, int nStem){ + if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ + return 0; + }else{ + int i; + int mask = 0; + int bCons = 0; + for(i=0; i 1 and (*S or *T)) */ +static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ + return nStem>0 + && (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') + && fts5Porter_MGt1(zStem, nStem); +} + +/* porter rule condition: (*v*) */ +static int fts5Porter_Vowel(char *zStem, int nStem){ + int i; + for(i=0; i0) ){ + return 1; + } + } + return 0; +} + +static int fts5PorterCb( + void *pCtx, + const char *pToken, + int nToken, + int iStart, + int iEnd, + int iPos +){ + PorterContext *p = (PorterContext*)pCtx; + + PorterRule aStep1A[] = { + { "sses", 4, 0, "ss", 2 }, + { "ies", 3, 0, "i", 1 }, + { "ss", 2, 0, "ss", 2 }, + { "s", 1, 0, "", 0 }, + { 0, 0, 0, 0 } + }; + + PorterRule aStep1B[] = { + { "eed", 3, fts5Porter_MGt0, "ee", 2 }, + { "ed", 2, fts5Porter_Vowel, "", 0 }, + { "ing", 3, fts5Porter_Vowel, "", 0 }, + { 0, 0, 0, 0 } + }; + + PorterRule aStep1B2[] = { + { "at", 2, 0, "ate", 3 }, + { "bl", 2, 0, "ble", 3 }, + { "iz", 2, 0, "ize", 3 }, + { 0, 0, 0, 0 } + }; + + PorterRule aStep1C[] = { + { "y", 1, fts5Porter_Vowel, "i", 1 }, + { 0, 0, 0, 0 } + }; + + PorterRule aStep2[] = { + { "ational", 7, fts5Porter_MGt0, "ate", 3}, + { "tional", 6, fts5Porter_MGt0, "tion", 4}, + { "enci", 4, fts5Porter_MGt0, "ence", 4}, + { "anci", 4, fts5Porter_MGt0, "ance", 4}, + { "izer", 4, fts5Porter_MGt0, "ize", 3}, + { "logi", 4, fts5Porter_MGt0, "log", 3}, /* added post 1979 */ + { "bli", 3, fts5Porter_MGt0, "ble", 3}, /* modified post 1979 */ + { "alli", 4, fts5Porter_MGt0, "al", 2}, + { "entli", 5, fts5Porter_MGt0, "ent", 3}, + { "eli", 3, fts5Porter_MGt0, "e", 1}, + { "ousli", 5, fts5Porter_MGt0, "ous", 3}, + { "ization", 7, fts5Porter_MGt0, "ize", 3}, + { "ation", 5, fts5Porter_MGt0, "ate", 3}, + { "ator", 4, fts5Porter_MGt0, "ate", 3}, + { "alism", 5, fts5Porter_MGt0, "al", 2}, + { "iveness", 7, fts5Porter_MGt0, "ive", 3}, + { "fulness", 7, fts5Porter_MGt0, "ful", 3}, + { "ousness", 7, fts5Porter_MGt0, "ous", 3}, + { "aliti", 5, fts5Porter_MGt0, "al", 2}, + { "iviti", 5, fts5Porter_MGt0, "ive", 3}, + { "biliti", 6, fts5Porter_MGt0, "ble", 3}, + { 0, 0, 0, 0 } + }; + + PorterRule aStep3[] = { + { "icate", 5, fts5Porter_MGt0, "ic", 2}, + { "ative", 5, fts5Porter_MGt0, "", 0}, + { "alize", 5, fts5Porter_MGt0, "al", 2}, + { "iciti", 5, fts5Porter_MGt0, "ic", 2}, + { "ical", 4, fts5Porter_MGt0, "ic", 2}, + { "ful", 3, fts5Porter_MGt0, "", 0}, + { "ness", 4, fts5Porter_MGt0, "", 0}, + { 0, 0, 0, 0 } + }; + + PorterRule aStep4[] = { + { "al", 2, fts5Porter_MGt1, "", 0}, + { "ance", 4, fts5Porter_MGt1, "", 0}, + { "ence", 4, fts5Porter_MGt1, "", 0}, + { "er", 2, fts5Porter_MGt1, "", 0}, + { "ic", 2, fts5Porter_MGt1, "", 0}, + { "able", 4, fts5Porter_MGt1, "", 0}, + { "ible", 4, fts5Porter_MGt1, "", 0}, + { "ant", 3, fts5Porter_MGt1, "", 0}, + { "ement", 5, fts5Porter_MGt1, "", 0}, + { "ment", 4, fts5Porter_MGt1, "", 0}, + { "ent", 3, fts5Porter_MGt1, "", 0}, + { "ion", 3, fts5Porter_MGt1_and_S_or_T, "", 0}, + { "ou", 2, fts5Porter_MGt1, "", 0}, + { "ism", 3, fts5Porter_MGt1, "", 0}, + { "ate", 3, fts5Porter_MGt1, "", 0}, + { "iti", 3, fts5Porter_MGt1, "", 0}, + { "ous", 3, fts5Porter_MGt1, "", 0}, + { "ive", 3, fts5Porter_MGt1, "", 0}, + { "ize", 3, fts5Porter_MGt1, "", 0}, + { 0, 0, 0, 0 } + }; + + + char *aBuf; + int nBuf; + int n; + + if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; + aBuf = p->aBuf; + nBuf = nToken; + memcpy(aBuf, pToken, nBuf); + + /* Step 1. */ + fts5PorterApply(aBuf, &nBuf, aStep1A); + n = fts5PorterApply(aBuf, &nBuf, aStep1B); + if( n==1 || n==2 ){ + if( fts5PorterApply(aBuf, &nBuf, aStep1B2)<0 ){ + char c = aBuf[nBuf-1]; + if( fts5PorterIsVowel(c, 0)==0 + && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] + ){ + nBuf--; + }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ + aBuf[nBuf++] = 'e'; + } + } + } + fts5PorterApply(aBuf, &nBuf, aStep1C); + + /* Steps 2 through 4. */ + fts5PorterApply(aBuf, &nBuf, aStep2); + fts5PorterApply(aBuf, &nBuf, aStep3); + fts5PorterApply(aBuf, &nBuf, aStep4); + + /* Step 5a. */ + if( nBuf>0 && aBuf[nBuf-1]=='e' ){ + if( fts5Porter_MGt1(aBuf, nBuf-1) + || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) + ){ + nBuf--; + } + } + + /* Step 5b. */ + if( nBuf>1 && aBuf[nBuf-1]=='l' + && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) + ){ + nBuf--; + } + + return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos); + + pass_through: + return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos); +} + +/* +** Tokenize using the porter tokenizer. +*/ +static int fts5PorterTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + const char *pText, int nText, + int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) +){ + PorterTokenizer *p = (PorterTokenizer*)pTokenizer; + PorterContext sCtx; + sCtx.xToken = xToken; + sCtx.pCtx = pCtx; + sCtx.aBuf = p->aBuf; + return p->tokenizer.xTokenize( + p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb + ); +} + /* ** Register all built-in tokenizers with FTS5. */ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ struct BuiltinTokenizer { const char *zName; - void *pUserData; fts5_tokenizer x; } aBuiltin[] = { - { "simple", 0, { fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize } } + { "porter", { fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize } }, + { "simple", { fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize } } }; int rc = SQLITE_OK; /* Return code */ @@ -133,7 +487,7 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ for(i=0; rc==SQLITE_OK && ixCreateTokenizer(pApi, aBuiltin[i].zName, - &aBuiltin[i].pUserData, + (void*)pApi, &aBuiltin[i].x, 0 ); diff --git a/ext/fts5/fts5porter.test b/ext/fts5/fts5porter.test new file mode 100644 index 0000000000..7c67f83dab --- /dev/null +++ b/ext/fts5/fts5porter.test @@ -0,0 +1,11803 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 porter stemmer implementation. +# +# http://tartarus.org/martin/PorterStemmer/ +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5porter + +set test_vocab { + a a aaron aaron + abaissiez abaissiez abandon abandon + abandoned abandon abase abas + abash abash abate abat + abated abat abatement abat + abatements abat abates abat + abbess abbess abbey abbei + abbeys abbei abbominable abbomin + abbot abbot abbots abbot + abbreviated abbrevi abed ab + abel abel aberga aberga + abergavenny abergavenni abet abet + abetting abet abhominable abhomin + abhor abhor abhorr abhorr + abhorred abhor abhorring abhor + abhors abhor abhorson abhorson + abide abid abides abid + abilities abil ability abil + abject abject abjectly abjectli + abjects abject abjur abjur + abjure abjur able abl + abler abler aboard aboard + abode abod aboded abod + abodements abod aboding abod + abominable abomin abominably abomin + abominations abomin abortive abort + abortives abort abound abound + abounding abound about about + above abov abr abr + abraham abraham abram abram + abreast abreast abridg abridg + abridge abridg abridged abridg + abridgment abridg abroach abroach + abroad abroad abrogate abrog + abrook abrook abrupt abrupt + abruption abrupt abruptly abruptli + absence absenc absent absent + absey absei absolute absolut + absolutely absolut absolv absolv + absolver absolv abstains abstain + abstemious abstemi abstinence abstin + abstract abstract absurd absurd + absyrtus absyrtu abundance abund + abundant abund abundantly abundantli + abus abu abuse abus + abused abus abuser abus + abuses abus abusing abus + abutting abut aby abi + abysm abysm ac ac + academe academ academes academ + accent accent accents accent + accept accept acceptable accept + acceptance accept accepted accept + accepts accept access access + accessary accessari accessible access + accidence accid accident accid + accidental accident accidentally accident + accidents accid accite accit + accited accit accites accit + acclamations acclam accommodate accommod + accommodated accommod accommodation accommod + accommodations accommod accommodo accommodo + accompanied accompani accompany accompani + accompanying accompani accomplices accomplic + accomplish accomplish accomplished accomplish + accomplishing accomplish accomplishment accomplish + accompt accompt accord accord + accordant accord accorded accord + accordeth accordeth according accord + accordingly accordingli accords accord + accost accost accosted accost + account account accountant account + accounted account accounts account + accoutred accoutr accoutrement accoutr + accoutrements accoutr accrue accru + accumulate accumul accumulated accumul + accumulation accumul accurs accur + accursed accurs accurst accurst + accus accu accusation accus + accusations accus accusative accus + accusativo accusativo accuse accus + accused accus accuser accus + accusers accus accuses accus + accuseth accuseth accusing accus + accustom accustom accustomed accustom + ace ac acerb acerb + ache ach acheron acheron + aches ach achiev achiev + achieve achiev achieved achiev + achievement achiev achievements achiev + achiever achiev achieves achiev + achieving achiev achilles achil + aching ach achitophel achitophel + acknowledg acknowledg acknowledge acknowledg + acknowledged acknowledg acknowledgment acknowledg + acknown acknown acold acold + aconitum aconitum acordo acordo + acorn acorn acquaint acquaint + acquaintance acquaint acquainted acquaint + acquaints acquaint acquir acquir + acquire acquir acquisition acquisit + acquit acquit acquittance acquitt + acquittances acquitt acquitted acquit + acre acr acres acr + across across act act + actaeon actaeon acted act + acting act action action + actions action actium actium + active activ actively activ + activity activ actor actor + actors actor acts act + actual actual acture actur + acute acut acutely acut + ad ad adage adag + adallas adalla adam adam + adamant adam add add + added ad adder adder + adders adder addeth addeth + addict addict addicted addict + addiction addict adding ad + addition addit additions addit + addle addl address address + addressing address addrest addrest + adds add adhere adher + adheres adher adieu adieu + adieus adieu adjacent adjac + adjoin adjoin adjoining adjoin + adjourn adjourn adjudg adjudg + adjudged adjudg adjunct adjunct + administer administ administration administr + admir admir admirable admir + admiral admir admiration admir + admire admir admired admir + admirer admir admiring admir + admiringly admiringli admission admiss + admit admit admits admit + admittance admitt admitted admit + admitting admit admonish admonish + admonishing admonish admonishment admonish + admonishments admonish admonition admonit + ado ado adonis adoni + adopt adopt adopted adopt + adoptedly adoptedli adoption adopt + adoptious adopti adopts adopt + ador ador adoration ador + adorations ador adore ador + adorer ador adores ador + adorest adorest adoreth adoreth + adoring ador adorn adorn + adorned adorn adornings adorn + adornment adorn adorns adorn + adown adown adramadio adramadio + adrian adrian adriana adriana + adriano adriano adriatic adriat + adsum adsum adulation adul + adulterate adulter adulterates adulter + adulterers adulter adulteress adulteress + adulteries adulteri adulterous adulter + adultery adulteri adultress adultress + advanc advanc advance advanc + advanced advanc advancement advanc + advancements advanc advances advanc + advancing advanc advantage advantag + advantageable advantag advantaged advantag + advantageous advantag advantages advantag + advantaging advantag advent advent + adventur adventur adventure adventur + adventures adventur adventuring adventur + adventurous adventur adventurously adventur + adversaries adversari adversary adversari + adverse advers adversely advers + adversities advers adversity advers + advertis adverti advertise advertis + advertised advertis advertisement advertis + advertising advertis advice advic + advis advi advise advis + advised advis advisedly advisedli + advises advis advisings advis + advocate advoc advocation advoc + aeacida aeacida aeacides aeacid + aedile aedil aediles aedil + aegeon aegeon aegion aegion + aegles aegl aemelia aemelia + aemilia aemilia aemilius aemiliu + aeneas aenea aeolus aeolu + aer aer aerial aerial + aery aeri aesculapius aesculapiu + aeson aeson aesop aesop + aetna aetna afar afar + afear afear afeard afeard + affability affabl affable affabl + affair affair affaire affair + affairs affair affect affect + affectation affect affectations affect + affected affect affectedly affectedli + affecteth affecteth affecting affect + affection affect affectionate affection + affectionately affection affections affect + affects affect affeer affeer + affianc affianc affiance affianc + affianced affianc affied affi + affin affin affined affin + affinity affin affirm affirm + affirmation affirm affirmatives affirm + afflict afflict afflicted afflict + affliction afflict afflictions afflict + afflicts afflict afford afford + affordeth affordeth affords afford + affray affrai affright affright + affrighted affright affrights affright + affront affront affronted affront + affy affi afield afield + afire afir afloat afloat + afoot afoot afore afor + aforehand aforehand aforesaid aforesaid + afraid afraid afresh afresh + afric afric africa africa + african african afront afront + after after afternoon afternoon + afterward afterward afterwards afterward + ag ag again again + against against agamemmon agamemmon + agamemnon agamemnon agate agat + agaz agaz age ag + aged ag agenor agenor + agent agent agents agent + ages ag aggravate aggrav + aggrief aggrief agile agil + agincourt agincourt agitation agit + aglet aglet agnize agniz + ago ago agone agon + agony agoni agree agre + agreed agre agreeing agre + agreement agreement agrees agre + agrippa agrippa aground aground + ague agu aguecheek aguecheek + agued agu agueface aguefac + agues agu ah ah + aha aha ahungry ahungri + ai ai aialvolio aialvolio + aiaria aiaria aid aid + aidance aidanc aidant aidant + aided aid aiding aid + aidless aidless aids aid + ail ail aim aim + aimed aim aimest aimest + aiming aim aims aim + ainsi ainsi aio aio + air air aired air + airless airless airs air + airy airi ajax ajax + akilling akil al al + alabaster alabast alack alack + alacrity alacr alarbus alarbu + alarm alarm alarms alarm + alarum alarum alarums alarum + alas ala alb alb + alban alban albans alban + albany albani albeit albeit + albion albion alchemist alchemist + alchemy alchemi alcibiades alcibiad + alcides alcid alder alder + alderman alderman aldermen aldermen + ale al alecto alecto + alehouse alehous alehouses alehous + alencon alencon alengon alengon + aleppo aleppo ales al + alewife alewif alexander alexand + alexanders alexand alexandria alexandria + alexandrian alexandrian alexas alexa + alias alia alice alic + alien alien aliena aliena + alight alight alighted alight + alights alight aliis alii + alike alik alisander alisand + alive aliv all all + alla alla allay allai + allayed allai allaying allai + allayment allay allayments allay + allays allai allegation alleg + allegations alleg allege alleg + alleged alleg allegiance allegi + allegiant allegi alley allei + alleys allei allhallowmas allhallowma + alliance allianc allicholy allicholi + allied alli allies alli + alligant allig alligator allig + allons allon allot allot + allots allot allotted allot + allottery allotteri allow allow + allowance allow allowed allow + allowing allow allows allow + allur allur allure allur + allurement allur alluring allur + allusion allus ally alli + allycholly allycholli almain almain + almanac almanac almanack almanack + almanacs almanac almighty almighti + almond almond almost almost + alms alm almsman almsman + aloes alo aloft aloft + alone alon along along + alonso alonso aloof aloof + aloud aloud alphabet alphabet + alphabetical alphabet alphonso alphonso + alps alp already alreadi + also also alt alt + altar altar altars altar + alter alter alteration alter + altered alter alters alter + althaea althaea although although + altitude altitud altogether altogeth + alton alton alway alwai + always alwai am am + amaimon amaimon amain amain + amaking amak amamon amamon + amaz amaz amaze amaz + amazed amaz amazedly amazedli + amazedness amazed amazement amaz + amazes amaz amazeth amazeth + amazing amaz amazon amazon + amazonian amazonian amazons amazon + ambassador ambassador ambassadors ambassador + amber amber ambiguides ambiguid + ambiguities ambigu ambiguous ambigu + ambition ambit ambitions ambit + ambitious ambiti ambitiously ambiti + amble ambl ambled ambl + ambles ambl ambling ambl + ambo ambo ambuscadoes ambuscado + ambush ambush amen amen + amend amend amended amend + amendment amend amends amend + amerce amerc america america + ames am amiable amiabl + amid amid amidst amidst + amiens amien amis ami + amiss amiss amities amiti + amity amiti amnipotent amnipot + among among amongst amongst + amorous amor amorously amor + amort amort amount amount + amounts amount amour amour + amphimacus amphimacu ample ampl + ampler ampler amplest amplest + amplified amplifi amplify amplifi + amply ampli ampthill ampthil + amurath amurath amyntas amynta + an an anatomiz anatomiz + anatomize anatom anatomy anatomi + ancestor ancestor ancestors ancestor + ancestry ancestri anchises anchis + anchor anchor anchorage anchorag + anchored anchor anchoring anchor + anchors anchor anchovies anchovi + ancient ancient ancientry ancientri + ancients ancient ancus ancu + and and andirons andiron + andpholus andpholu andren andren + andrew andrew andromache andromach + andronici andronici andronicus andronicu + anew anew ang ang + angel angel angelica angelica + angelical angel angelo angelo + angels angel anger anger + angerly angerli angers anger + anges ang angiers angier + angl angl anglais anglai + angle angl angler angler + angleterre angleterr angliae anglia + angling angl anglish anglish + angrily angrili angry angri + anguish anguish angus angu + animal anim animals anim + animis animi anjou anjou + ankle ankl anna anna + annals annal anne ann + annex annex annexed annex + annexions annexion annexment annex + annothanize annothan announces announc + annoy annoi annoyance annoy + annoying annoi annual annual + anoint anoint anointed anoint + anon anon another anoth + anselmo anselmo answer answer + answerable answer answered answer + answerest answerest answering answer + answers answer ant ant + ante ant antenor antenor + antenorides antenorid anteroom anteroom + anthem anthem anthems anthem + anthony anthoni anthropophagi anthropophagi + anthropophaginian anthropophaginian antiates antiat + antic antic anticipate anticip + anticipates anticip anticipatest anticipatest + anticipating anticip anticipation anticip + antick antick anticly anticli + antics antic antidote antidot + antidotes antidot antigonus antigonu + antiopa antiopa antipathy antipathi + antipholus antipholu antipholuses antipholus + antipodes antipod antiquary antiquari + antique antiqu antiquity antiqu + antium antium antoniad antoniad + antonio antonio antonius antoniu + antony antoni antres antr + anvil anvil any ani + anybody anybodi anyone anyon + anything anyth anywhere anywher + ap ap apace apac + apart apart apartment apart + apartments apart ape ap + apemantus apemantu apennines apennin + apes ap apiece apiec + apish apish apollinem apollinem + apollo apollo apollodorus apollodoru + apology apolog apoplex apoplex + apoplexy apoplexi apostle apostl + apostles apostl apostrophas apostropha + apoth apoth apothecary apothecari + appal appal appall appal + appalled appal appals appal + apparel apparel apparell apparel + apparelled apparel apparent appar + apparently appar apparition apparit + apparitions apparit appeach appeach + appeal appeal appeals appeal + appear appear appearance appear + appeared appear appeareth appeareth + appearing appear appears appear + appeas appea appease appeas + appeased appeas appelant appel + appele appel appelee appele + appeles appel appelez appelez + appellant appel appellants appel + appelons appelon appendix appendix + apperil apperil appertain appertain + appertaining appertain appertainings appertain + appertains appertain appertinent appertin + appertinents appertin appetite appetit + appetites appetit applaud applaud + applauded applaud applauding applaud + applause applaus applauses applaus + apple appl apples appl + appletart appletart appliance applianc + appliances applianc applications applic + applied appli applies appli + apply appli applying appli + appoint appoint appointed appoint + appointment appoint appointments appoint + appoints appoint apprehend apprehend + apprehended apprehend apprehends apprehend + apprehension apprehens apprehensions apprehens + apprehensive apprehens apprendre apprendr + apprenne apprenn apprenticehood apprenticehood + appris appri approach approach + approachers approach approaches approach + approacheth approacheth approaching approach + approbation approb approof approof + appropriation appropri approv approv + approve approv approved approv + approvers approv approves approv + appurtenance appurten appurtenances appurten + apricocks apricock april april + apron apron aprons apron + apt apt apter apter + aptest aptest aptly aptli + aptness apt aqua aqua + aquilon aquilon aquitaine aquitain + arabia arabia arabian arabian + araise arais arbitrate arbitr + arbitrating arbitr arbitrator arbitr + arbitrement arbitr arbors arbor + arbour arbour arc arc + arch arch archbishop archbishop + archbishopric archbishopr archdeacon archdeacon + arched arch archelaus archelau + archer archer archers archer + archery archeri archibald archibald + archidamus archidamu architect architect + arcu arcu arde ard + arden arden ardent ardent + ardour ardour are ar + argal argal argier argier + argo argo argosies argosi + argosy argosi argu argu + argue argu argued argu + argues argu arguing argu + argument argument arguments argument + argus argu ariachne ariachn + ariadne ariadn ariel ariel + aries ari aright aright + arinado arinado arinies arini + arion arion arise aris + arises aris ariseth ariseth + arising aris aristode aristod + aristotle aristotl arithmetic arithmet + arithmetician arithmetician ark ark + arm arm arma arma + armado armado armadoes armado + armagnac armagnac arme arm + armed arm armenia armenia + armies armi armigero armigero + arming arm armipotent armipot + armor armor armour armour + armourer armour armourers armour + armours armour armoury armouri + arms arm army armi + arn arn aroint aroint + arose aros arouse arous + aroused arous arragon arragon + arraign arraign arraigned arraign + arraigning arraign arraignment arraign + arrant arrant arras arra + array arrai arrearages arrearag + arrest arrest arrested arrest + arrests arrest arriv arriv + arrival arriv arrivance arriv + arrive arriv arrived arriv + arrives arriv arriving arriv + arrogance arrog arrogancy arrog + arrogant arrog arrow arrow + arrows arrow art art + artemidorus artemidoru arteries arteri + arthur arthur article articl + articles articl articulate articul + artificer artific artificial artifici + artillery artilleri artire artir + artist artist artists artist + artless artless artois artoi + arts art artus artu + arviragus arviragu as as + asaph asaph ascanius ascaniu + ascend ascend ascended ascend + ascendeth ascendeth ascends ascend + ascension ascens ascent ascent + ascribe ascrib ascribes ascrib + ash ash asham asham + ashamed asham asher asher + ashes ash ashford ashford + ashore ashor ashouting ashout + ashy ashi asia asia + aside asid ask ask + askance askanc asked ask + asker asker asketh asketh + asking ask asks ask + aslant aslant asleep asleep + asmath asmath asp asp + aspect aspect aspects aspect + aspen aspen aspersion aspers + aspic aspic aspicious aspici + aspics aspic aspir aspir + aspiration aspir aspire aspir + aspiring aspir asquint asquint + ass ass assail assail + assailable assail assailant assail + assailants assail assailed assail + assaileth assaileth assailing assail + assails assail assassination assassin + assault assault assaulted assault + assaults assault assay assai + assaying assai assays assai + assemblance assembl assemble assembl + assembled assembl assemblies assembl + assembly assembl assent assent + asses ass assez assez + assign assign assigned assign + assigns assign assinico assinico + assist assist assistance assist + assistances assist assistant assist + assistants assist assisted assist + assisting assist associate associ + associated associ associates associ + assuage assuag assubjugate assubjug + assum assum assume assum + assumes assum assumption assumpt + assur assur assurance assur + assure assur assured assur + assuredly assuredli assures assur + assyrian assyrian astonish astonish + astonished astonish astraea astraea + astray astrai astrea astrea + astronomer astronom astronomers astronom + astronomical astronom astronomy astronomi + asunder asund at at + atalanta atalanta ate at + ates at athenian athenian + athenians athenian athens athen + athol athol athversary athversari + athwart athwart atlas atla + atomies atomi atomy atomi + atone aton atonement aton + atonements aton atropos atropo + attach attach attached attach + attachment attach attain attain + attainder attaind attains attain + attaint attaint attainted attaint + attainture attaintur attempt attempt + attemptable attempt attempted attempt + attempting attempt attempts attempt + attend attend attendance attend + attendant attend attendants attend + attended attend attendents attend + attendeth attendeth attending attend + attends attend attent attent + attention attent attentive attent + attentivenes attentiven attest attest + attested attest attir attir + attire attir attired attir + attires attir attorney attornei + attorneyed attornei attorneys attornei + attorneyship attorneyship attract attract + attraction attract attractive attract + attracts attract attribute attribut + attributed attribut attributes attribut + attribution attribut attributive attribut + atwain atwain au au + aubrey aubrei auburn auburn + aucun aucun audacious audaci + audaciously audaci audacity audac + audible audibl audience audienc + audis audi audit audit + auditor auditor auditors auditor + auditory auditori audre audr + audrey audrei aufidius aufidiu + aufidiuses aufidius auger auger + aught aught augment augment + augmentation augment augmented augment + augmenting augment augurer augur + augurers augur augures augur + auguring augur augurs augur + augury auguri august august + augustus augustu auld auld + aumerle aumerl aunchient aunchient + aunt aunt aunts aunt + auricular auricular aurora aurora + auspicious auspici aussi aussi + austere auster austerely auster + austereness auster austerity auster + austria austria aut aut + authentic authent author author + authorities author authority author + authorized author authorizing author + authors author autolycus autolycu + autre autr autumn autumn + auvergne auvergn avail avail + avails avail avarice avaric + avaricious avarici avaunt avaunt + ave av aveng aveng + avenge aveng avenged aveng + averring aver avert avert + aves av avez avez + avis avi avoid avoid + avoided avoid avoiding avoid + avoids avoid avoirdupois avoirdupoi + avouch avouch avouched avouch + avouches avouch avouchment avouch + avow avow aw aw + await await awaits await + awak awak awake awak + awaked awak awaken awaken + awakened awaken awakens awaken + awakes awak awaking awak + award award awards award + awasy awasi away awai + awe aw aweary aweari + aweless aweless awful aw + awhile awhil awkward awkward + awl awl awooing awoo + awork awork awry awri + axe ax axle axl + axletree axletre ay ay + aye ay ayez ayez + ayli ayli azur azur + azure azur b b + ba ba baa baa + babbl babbl babble babbl + babbling babbl babe babe + babes babe babies babi + baboon baboon baboons baboon + baby babi babylon babylon + bacare bacar bacchanals bacchan + bacchus bacchu bach bach + bachelor bachelor bachelors bachelor + back back backbite backbit + backbitten backbitten backing back + backs back backward backward + backwardly backwardli backwards backward + bacon bacon bacons bacon + bad bad bade bade + badge badg badged badg + badges badg badly badli + badness bad baes bae + baffl baffl baffle baffl + baffled baffl bag bag + baggage baggag bagot bagot + bagpipe bagpip bags bag + bail bail bailiff bailiff + baillez baillez baily baili + baisant baisant baisees baise + baiser baiser bait bait + baited bait baiting bait + baitings bait baits bait + bajazet bajazet bak bak + bake bake baked bake + baker baker bakers baker + bakes bake baking bake + bal bal balanc balanc + balance balanc balcony balconi + bald bald baldrick baldrick + bale bale baleful bale + balk balk ball ball + ballad ballad ballads ballad + ballast ballast ballasting ballast + ballet ballet ballow ballow + balls ball balm balm + balms balm balmy balmi + balsam balsam balsamum balsamum + balth balth balthasar balthasar + balthazar balthazar bames bame + ban ban banbury banburi + band band bandied bandi + banding band bandit bandit + banditti banditti banditto banditto + bands band bandy bandi + bandying bandi bane bane + banes bane bang bang + bangor bangor banish banish + banished banish banishers banish + banishment banish banister banist + bank bank bankrout bankrout + bankrupt bankrupt bankrupts bankrupt + banks bank banner banner + bannerets banneret banners banner + banning ban banns bann + banquet banquet banqueted banquet + banqueting banquet banquets banquet + banquo banquo bans ban + baptism baptism baptista baptista + baptiz baptiz bar bar + barbarian barbarian barbarians barbarian + barbarism barbar barbarous barbar + barbary barbari barbason barbason + barbed barb barber barber + barbermonger barbermong bard bard + bardolph bardolph bards bard + bare bare bared bare + barefac barefac barefaced barefac + barefoot barefoot bareheaded barehead + barely bare bareness bare + barful bar bargain bargain + bargains bargain barge barg + bargulus bargulu baring bare + bark bark barking bark + barkloughly barkloughli barks bark + barky barki barley barlei + barm barm barn barn + barnacles barnacl barnardine barnardin + barne barn barnes barn + barnet barnet barns barn + baron baron barons baron + barony baroni barr barr + barrabas barraba barrel barrel + barrels barrel barren barren + barrenly barrenli barrenness barren + barricado barricado barricadoes barricado + barrow barrow bars bar + barson barson barter barter + bartholomew bartholomew bas ba + basan basan base base + baseless baseless basely base + baseness base baser baser + bases base basest basest + bashful bash bashfulness bash + basilisco basilisco basilisk basilisk + basilisks basilisk basimecu basimecu + basin basin basingstoke basingstok + basins basin basis basi + bask bask basket basket + baskets basket bass bass + bassanio bassanio basset basset + bassianus bassianu basta basta + bastard bastard bastardizing bastard + bastardly bastardli bastards bastard + bastardy bastardi basted bast + bastes bast bastinado bastinado + basting bast bat bat + batailles batail batch batch + bate bate bated bate + bates bate bath bath + bathe bath bathed bath + bathing bath baths bath + bating bate batler batler + bats bat batt batt + battalia battalia battalions battalion + batten batten batter batter + battering batter batters batter + battery batteri battle battl + battled battl battlefield battlefield + battlements battlement battles battl + batty batti bauble baubl + baubles baubl baubling baubl + baulk baulk bavin bavin + bawcock bawcock bawd bawd + bawdry bawdri bawds bawd + bawdy bawdi bawl bawl + bawling bawl bay bai + baying bai baynard baynard + bayonne bayonn bays bai + be be beach beach + beached beach beachy beachi + beacon beacon bead bead + beaded bead beadle beadl + beadles beadl beads bead + beadsmen beadsmen beagle beagl + beagles beagl beak beak + beaks beak beam beam + beamed beam beams beam + bean bean beans bean + bear bear beard beard + bearded beard beardless beardless + beards beard bearer bearer + bearers bearer bearest bearest + beareth beareth bearing bear + bears bear beast beast + beastliest beastliest beastliness beastli + beastly beastli beasts beast + beat beat beated beat + beaten beaten beating beat + beatrice beatric beats beat + beau beau beaufort beaufort + beaumond beaumond beaumont beaumont + beauteous beauteou beautied beauti + beauties beauti beautified beautifi + beautiful beauti beautify beautifi + beauty beauti beaver beaver + beavers beaver became becam + because becaus bechanc bechanc + bechance bechanc bechanced bechanc + beck beck beckon beckon + beckons beckon becks beck + becom becom become becom + becomed becom becomes becom + becoming becom becomings becom + bed bed bedabbled bedabbl + bedash bedash bedaub bedaub + bedazzled bedazzl bedchamber bedchamb + bedclothes bedcloth bedded bed + bedeck bedeck bedecking bedeck + bedew bedew bedfellow bedfellow + bedfellows bedfellow bedford bedford + bedlam bedlam bedrench bedrench + bedrid bedrid beds bed + bedtime bedtim bedward bedward + bee bee beef beef + beefs beef beehives beehiv + been been beer beer + bees bee beest beest + beetle beetl beetles beetl + beeves beev befall befal + befallen befallen befalls befal + befell befel befits befit + befitted befit befitting befit + befor befor before befor + beforehand beforehand befortune befortun + befriend befriend befriended befriend + befriends befriend beg beg + began began beget beget + begets beget begetting beget + begg begg beggar beggar + beggared beggar beggarly beggarli + beggarman beggarman beggars beggar + beggary beggari begging beg + begin begin beginners beginn + beginning begin beginnings begin + begins begin begnawn begnawn + begone begon begot begot + begotten begotten begrimed begrim + begs beg beguil beguil + beguile beguil beguiled beguil + beguiles beguil beguiling beguil + begun begun behalf behalf + behalfs behalf behav behav + behaved behav behavedst behavedst + behavior behavior behaviors behavior + behaviour behaviour behaviours behaviour + behead behead beheaded behead + beheld beheld behest behest + behests behest behind behind + behold behold beholder behold + beholders behold beholdest beholdest + beholding behold beholds behold + behoof behoof behooffull behoofful + behooves behoov behove behov + behoves behov behowls behowl + being be bel bel + belarius belariu belch belch + belching belch beldam beldam + beldame beldam beldams beldam + belee bele belgia belgia + belie beli belied beli + belief belief beliest beliest + believ believ believe believ + believed believ believes believ + believest believest believing believ + belike belik bell bell + bellario bellario belle bell + bellied belli bellies belli + bellman bellman bellona bellona + bellow bellow bellowed bellow + bellowing bellow bellows bellow + bells bell belly belli + bellyful belly belman belman + belmont belmont belock belock + belong belong belonging belong + belongings belong belongs belong + belov belov beloved belov + beloving belov below below + belt belt belzebub belzebub + bemadding bemad bemet bemet + bemete bemet bemoan bemoan + bemoaned bemoan bemock bemock + bemoil bemoil bemonster bemonst + ben ben bench bench + bencher bencher benches bench + bend bend bended bend + bending bend bends bend + bene bene beneath beneath + benedicite benedicit benedick benedick + benediction benedict benedictus benedictu + benefactors benefactor benefice benefic + beneficial benefici benefit benefit + benefited benefit benefits benefit + benetted benet benevolence benevol + benevolences benevol benied beni + benison benison bennet bennet + bent bent bentii bentii + bentivolii bentivolii bents bent + benumbed benumb benvolio benvolio + bepaint bepaint bepray beprai + bequeath bequeath bequeathed bequeath + bequeathing bequeath bequest bequest + ber ber berard berard + berattle berattl beray berai + bere bere bereave bereav + bereaved bereav bereaves bereav + bereft bereft bergamo bergamo + bergomask bergomask berhym berhym + berhyme berhym berkeley berkelei + bermoothes bermooth bernardo bernardo + berod berod berowne berown + berri berri berries berri + berrord berrord berry berri + bertram bertram berwick berwick + bescreen bescreen beseech beseech + beseeched beseech beseechers beseech + beseeching beseech beseek beseek + beseem beseem beseemeth beseemeth + beseeming beseem beseems beseem + beset beset beshrew beshrew + beside besid besides besid + besieg besieg besiege besieg + besieged besieg beslubber beslubb + besmear besmear besmeared besmear + besmirch besmirch besom besom + besort besort besotted besot + bespake bespak bespeak bespeak + bespice bespic bespoke bespok + bespotted bespot bess bess + bessy bessi best best + bestained bestain bested best + bestial bestial bestir bestir + bestirr bestirr bestow bestow + bestowed bestow bestowing bestow + bestows bestow bestraught bestraught + bestrew bestrew bestrid bestrid + bestride bestrid bestrides bestrid + bet bet betake betak + beteem beteem bethink bethink + bethought bethought bethrothed bethroth + bethump bethump betid betid + betide betid betideth betideth + betime betim betimes betim + betoken betoken betook betook + betossed betoss betray betrai + betrayed betrai betraying betrai + betrays betrai betrims betrim + betroth betroth betrothed betroth + betroths betroth bett bett + betted bet better better + bettered better bettering better + betters better betting bet + bettre bettr between between + betwixt betwixt bevel bevel + beverage beverag bevis bevi + bevy bevi bewail bewail + bewailed bewail bewailing bewail + bewails bewail beware bewar + bewasted bewast beweep beweep + bewept bewept bewet bewet + bewhored bewhor bewitch bewitch + bewitched bewitch bewitchment bewitch + bewray bewrai beyond beyond + bezonian bezonian bezonians bezonian + bianca bianca bianco bianco + bias bia bibble bibbl + bickerings bicker bid bid + bidden bidden bidding bid + biddings bid biddy biddi + bide bide bides bide + biding bide bids bid + bien bien bier bier + bifold bifold big big + bigamy bigami biggen biggen + bigger bigger bigness big + bigot bigot bilberry bilberri + bilbo bilbo bilboes bilbo + bilbow bilbow bill bill + billeted billet billets billet + billiards billiard billing bill + billow billow billows billow + bills bill bin bin + bind bind bindeth bindeth + binding bind binds bind + biondello biondello birch birch + bird bird birding bird + birdlime birdlim birds bird + birnam birnam birth birth + birthday birthdai birthdom birthdom + birthplace birthplac birthright birthright + birthrights birthright births birth + bis bi biscuit biscuit + bishop bishop bishops bishop + bisson bisson bit bit + bitch bitch bite bite + biter biter bites bite + biting bite bits bit + bitt bitt bitten bitten + bitter bitter bitterest bitterest + bitterly bitterli bitterness bitter + blab blab blabb blabb + blabbing blab blabs blab + black black blackamoor blackamoor + blackamoors blackamoor blackberries blackberri + blackberry blackberri blacker blacker + blackest blackest blackfriars blackfriar + blackheath blackheath blackmere blackmer + blackness black blacks black + bladder bladder bladders bladder + blade blade bladed blade + blades blade blains blain + blam blam blame blame + blamed blame blameful blame + blameless blameless blames blame + blanc blanc blanca blanca + blanch blanch blank blank + blanket blanket blanks blank + blaspheme blasphem blaspheming blasphem + blasphemous blasphem blasphemy blasphemi + blast blast blasted blast + blasting blast blastments blastment + blasts blast blaz blaz + blaze blaze blazes blaze + blazing blaze blazon blazon + blazoned blazon blazoning blazon + bleach bleach bleaching bleach + bleak bleak blear blear + bleared blear bleat bleat + bleated bleat bleats bleat + bled bled bleed bleed + bleedest bleedest bleedeth bleedeth + bleeding bleed bleeds bleed + blemish blemish blemishes blemish + blench blench blenches blench + blend blend blended blend + blent blent bless bless + blessed bless blessedly blessedli + blessedness blessed blesses bless + blesseth blesseth blessing bless + blessings bless blest blest + blew blew blind blind + blinded blind blindfold blindfold + blinding blind blindly blindli + blindness blind blinds blind + blink blink blinking blink + bliss bliss blist blist + blister blister blisters blister + blithe blith blithild blithild + bloat bloat block block + blockish blockish blocks block + blois bloi blood blood + blooded blood bloodhound bloodhound + bloodied bloodi bloodier bloodier + bloodiest bloodiest bloodily bloodili + bloodless bloodless bloods blood + bloodshed bloodsh bloodshedding bloodshed + bloodstained bloodstain bloody bloodi + bloom bloom blooms bloom + blossom blossom blossoming blossom + blossoms blossom blot blot + blots blot blotted blot + blotting blot blount blount + blow blow blowed blow + blowers blower blowest blowest + blowing blow blown blown + blows blow blowse blows + blubb blubb blubber blubber + blubbering blubber blue blue + bluecaps bluecap bluest bluest + blunt blunt blunted blunt + blunter blunter bluntest bluntest + blunting blunt bluntly bluntli + bluntness blunt blunts blunt + blur blur blurr blurr + blurs blur blush blush + blushes blush blushest blushest + blushing blush blust blust + bluster bluster blusterer bluster + blusters bluster bo bo + boar boar board board + boarded board boarding board + boards board boarish boarish + boars boar boast boast + boasted boast boastful boast + boasting boast boasts boast + boat boat boats boat + boatswain boatswain bob bob + bobb bobb boblibindo boblibindo + bobtail bobtail bocchus bocchu + bode bode boded bode + bodements bodement bodes bode + bodg bodg bodied bodi + bodies bodi bodiless bodiless + bodily bodili boding bode + bodkin bodkin body bodi + bodykins bodykin bog bog + boggle boggl boggler boggler + bogs bog bohemia bohemia + bohemian bohemian bohun bohun + boil boil boiling boil + boils boil boist boist + boisterous boister boisterously boister + boitier boitier bold bold + bolden bolden bolder bolder + boldest boldest boldly boldli + boldness bold bolds bold + bolingbroke bolingbrok bolster bolster + bolt bolt bolted bolt + bolter bolter bolters bolter + bolting bolt bolts bolt + bombard bombard bombards bombard + bombast bombast bon bon + bona bona bond bond + bondage bondag bonded bond + bondmaid bondmaid bondman bondman + bondmen bondmen bonds bond + bondslave bondslav bone bone + boneless boneless bones bone + bonfire bonfir bonfires bonfir + bonjour bonjour bonne bonn + bonnet bonnet bonneted bonnet + bonny bonni bonos bono + bonto bonto bonville bonvil + bood bood book book + bookish bookish books book + boon boon boor boor + boorish boorish boors boor + boot boot booted boot + booties booti bootless bootless + boots boot booty booti + bor bor bora bora + borachio borachio bordeaux bordeaux + border border bordered border + borderers border borders border + bore bore boreas borea + bores bore boring bore + born born borne born + borough borough boroughs borough + borrow borrow borrowed borrow + borrower borrow borrowing borrow + borrows borrow bosko bosko + boskos bosko bosky boski + bosom bosom bosoms bosom + boson boson boss boss + bosworth bosworth botch botch + botcher botcher botches botch + botchy botchi both both + bots bot bottle bottl + bottled bottl bottles bottl + bottom bottom bottomless bottomless + bottoms bottom bouciqualt bouciqualt + bouge boug bough bough + boughs bough bought bought + bounce bounc bouncing bounc + bound bound bounded bound + bounden bounden boundeth boundeth + bounding bound boundless boundless + bounds bound bounteous bounteou + bounteously bounteous bounties bounti + bountiful bounti bountifully bountifulli + bounty bounti bourbier bourbier + bourbon bourbon bourchier bourchier + bourdeaux bourdeaux bourn bourn + bout bout bouts bout + bove bove bow bow + bowcase bowcas bowed bow + bowels bowel bower bower + bowing bow bowl bowl + bowler bowler bowling bowl + bowls bowl bows bow + bowsprit bowsprit bowstring bowstr + box box boxes box + boy boi boyet boyet + boyish boyish boys boi + brabant brabant brabantio brabantio + brabble brabbl brabbler brabbler + brac brac brace brace + bracelet bracelet bracelets bracelet + brach brach bracy braci + brag brag bragg bragg + braggardism braggard braggards braggard + braggart braggart braggarts braggart + bragged brag bragging brag + bragless bragless brags brag + braid braid braided braid + brain brain brained brain + brainford brainford brainish brainish + brainless brainless brains brain + brainsick brainsick brainsickly brainsickli + brake brake brakenbury brakenburi + brakes brake brambles brambl + bran bran branch branch + branches branch branchless branchless + brand brand branded brand + brandish brandish brandon brandon + brands brand bras bra + brass brass brassy brassi + brat brat brats brat + brav brav brave brave + braved brave bravely brave + braver braver bravery braveri + braves brave bravest bravest + braving brave brawl brawl + brawler brawler brawling brawl + brawls brawl brawn brawn + brawns brawn bray brai + braying brai braz braz + brazen brazen brazier brazier + breach breach breaches breach + bread bread breadth breadth + break break breaker breaker + breakfast breakfast breaking break + breaks break breast breast + breasted breast breasting breast + breastplate breastplat breasts breast + breath breath breathe breath + breathed breath breather breather + breathers breather breathes breath + breathest breathest breathing breath + breathless breathless breaths breath + brecknock brecknock bred bred + breech breech breeches breech + breeching breech breed breed + breeder breeder breeders breeder + breeding breed breeds breed + breese brees breeze breez + breff breff bretagne bretagn + brethen brethen bretheren bretheren + brethren brethren brevis brevi + brevity breviti brew brew + brewage brewag brewer brewer + brewers brewer brewing brew + brews brew briareus briareu + briars briar brib brib + bribe bribe briber briber + bribes bribe brick brick + bricklayer bricklay bricks brick + bridal bridal bride bride + bridegroom bridegroom bridegrooms bridegroom + brides bride bridge bridg + bridgenorth bridgenorth bridges bridg + bridget bridget bridle bridl + bridled bridl brief brief + briefer briefer briefest briefest + briefly briefli briefness brief + brier brier briers brier + brigandine brigandin bright bright + brighten brighten brightest brightest + brightly brightli brightness bright + brim brim brimful brim + brims brim brimstone brimston + brinded brind brine brine + bring bring bringer bringer + bringeth bringeth bringing bring + bringings bring brings bring + brinish brinish brink brink + brisk brisk brisky briski + bristle bristl bristled bristl + bristly bristli bristol bristol + bristow bristow britain britain + britaine britain britaines britain + british british briton briton + britons briton brittany brittani + brittle brittl broach broach + broached broach broad broad + broader broader broadsides broadsid + brocas broca brock brock + brogues brogu broil broil + broiling broil broils broil + broke broke broken broken + brokenly brokenli broker broker + brokers broker brokes broke + broking broke brooch brooch + brooches brooch brood brood + brooded brood brooding brood + brook brook brooks brook + broom broom broomstaff broomstaff + broth broth brothel brothel + brother brother brotherhood brotherhood + brotherhoods brotherhood brotherly brotherli + brothers brother broths broth + brought brought brow brow + brown brown browner browner + brownist brownist browny browni + brows brow browse brows + browsing brows bruis brui + bruise bruis bruised bruis + bruises bruis bruising bruis + bruit bruit bruited bruit + brundusium brundusium brunt brunt + brush brush brushes brush + brute brute brutish brutish + brutus brutu bubble bubbl + bubbles bubbl bubbling bubbl + bubukles bubukl buck buck + bucket bucket buckets bucket + bucking buck buckingham buckingham + buckle buckl buckled buckl + buckler buckler bucklers buckler + bucklersbury bucklersburi buckles buckl + buckram buckram bucks buck + bud bud budded bud + budding bud budge budg + budger budger budget budget + buds bud buff buff + buffet buffet buffeting buffet + buffets buffet bug bug + bugbear bugbear bugle bugl + bugs bug build build + builded build buildeth buildeth + building build buildings build + builds build built built + bulk bulk bulks bulk + bull bull bullcalf bullcalf + bullen bullen bullens bullen + bullet bullet bullets bullet + bullocks bullock bulls bull + bully bulli bulmer bulmer + bulwark bulwark bulwarks bulwark + bum bum bumbast bumbast + bump bump bumper bumper + bums bum bunch bunch + bunches bunch bundle bundl + bung bung bunghole bunghol + bungle bungl bunting bunt + buoy buoi bur bur + burbolt burbolt burd burd + burden burden burdened burden + burdening burden burdenous burden + burdens burden burgh burgh + burgher burgher burghers burgher + burglary burglari burgomasters burgomast + burgonet burgonet burgundy burgundi + burial burial buried buri + burier burier buriest buriest + burly burli burn burn + burned burn burnet burnet + burneth burneth burning burn + burnish burnish burns burn + burnt burnt burr burr + burrows burrow burs bur + burst burst bursting burst + bursts burst burthen burthen + burthens burthen burton burton + bury buri burying buri + bush bush bushels bushel + bushes bush bushy bushi + busied busi busily busili + busines busin business busi + businesses busi buskin buskin + busky buski buss buss + busses buss bussing buss + bustle bustl bustling bustl + busy busi but but + butcheed butche butcher butcher + butchered butcher butcheries butcheri + butcherly butcherli butchers butcher + butchery butcheri butler butler + butt butt butter butter + buttered butter butterflies butterfli + butterfly butterfli butterwoman butterwoman + buttery butteri buttock buttock + buttocks buttock button button + buttonhole buttonhol buttons button + buttress buttress buttry buttri + butts butt buxom buxom + buy bui buyer buyer + buying bui buys bui + buzz buzz buzzard buzzard + buzzards buzzard buzzers buzzer + buzzing buzz by by + bye bye byzantium byzantium + c c ca ca + cabbage cabbag cabileros cabilero + cabin cabin cabins cabin + cable cabl cables cabl + cackling cackl cacodemon cacodemon + caddis caddi caddisses caddiss + cade cade cadence cadenc + cadent cadent cades cade + cadmus cadmu caduceus caduceu + cadwal cadwal cadwallader cadwallad + caelius caeliu caelo caelo + caesar caesar caesarion caesarion + caesars caesar cage cage + caged cage cagion cagion + cain cain caithness caith + caitiff caitiff caitiffs caitiff + caius caiu cak cak + cake cake cakes cake + calaber calab calais calai + calamities calam calamity calam + calchas calcha calculate calcul + calen calen calendar calendar + calendars calendar calf calf + caliban caliban calibans caliban + calipolis calipoli cality caliti + caliver caliv call call + callat callat called call + callet callet calling call + calls call calm calm + calmest calmest calmly calmli + calmness calm calms calm + calpurnia calpurnia calumniate calumni + calumniating calumni calumnious calumni + calumny calumni calve calv + calved calv calves calv + calveskins calveskin calydon calydon + cam cam cambio cambio + cambria cambria cambric cambric + cambrics cambric cambridge cambridg + cambyses cambys came came + camel camel camelot camelot + camels camel camest camest + camillo camillo camlet camlet + camomile camomil camp camp + campeius campeiu camping camp + camps camp can can + canakin canakin canaries canari + canary canari cancel cancel + cancell cancel cancelled cancel + cancelling cancel cancels cancel + cancer cancer candidatus candidatu + candied candi candle candl + candles candl candlesticks candlestick + candy candi canidius canidiu + cank cank canker canker + cankerblossom cankerblossom cankers canker + cannibally cannib cannibals cannib + cannon cannon cannoneer cannon + cannons cannon cannot cannot + canon canon canoniz canoniz + canonize canon canonized canon + canons canon canopied canopi + canopies canopi canopy canopi + canst canst canstick canstick + canterbury canterburi cantle cantl + cantons canton canus canu + canvas canva canvass canvass + canzonet canzonet cap cap + capability capabl capable capabl + capacities capac capacity capac + caparison caparison capdv capdv + cape cape capel capel + capels capel caper caper + capers caper capet capet + caphis caphi capilet capilet + capitaine capitain capital capit + capite capit capitol capitol + capitulate capitul capocchia capocchia + capon capon capons capon + capp capp cappadocia cappadocia + capriccio capriccio capricious caprici + caps cap capt capt + captain captain captains captain + captainship captainship captious captiou + captivate captiv captivated captiv + captivates captiv captive captiv + captives captiv captivity captiv + captum captum capucius capuciu + capulet capulet capulets capulet + car car carack carack + caracks carack carat carat + caraways carawai carbonado carbonado + carbuncle carbuncl carbuncled carbuncl + carbuncles carbuncl carcanet carcanet + carcase carcas carcases carcas + carcass carcass carcasses carcass + card card cardecue cardecu + carded card carders carder + cardinal cardin cardinally cardin + cardinals cardin cardmaker cardmak + cards card carduus carduu + care care cared care + career career careers career + careful care carefully carefulli + careless careless carelessly carelessli + carelessness careless cares care + caret caret cargo cargo + carl carl carlisle carlisl + carlot carlot carman carman + carmen carmen carnal carnal + carnally carnal carnarvonshire carnarvonshir + carnation carnat carnations carnat + carol carol carous carou + carouse carous caroused carous + carouses carous carousing carous + carp carp carpenter carpent + carper carper carpet carpet + carpets carpet carping carp + carriage carriag carriages carriag + carried carri carrier carrier + carriers carrier carries carri + carrion carrion carrions carrion + carry carri carrying carri + cars car cart cart + carters carter carthage carthag + carts cart carv carv + carve carv carved carv + carver carver carves carv + carving carv cas ca + casa casa casaer casaer + casca casca case case + casement casement casements casement + cases case cash cash + cashier cashier casing case + cask cask casket casket + casketed casket caskets casket + casque casqu casques casqu + cassado cassado cassandra cassandra + cassibelan cassibelan cassio cassio + cassius cassiu cassocks cassock + cast cast castalion castalion + castaway castawai castaways castawai + casted cast caster caster + castigate castig castigation castig + castile castil castiliano castiliano + casting cast castle castl + castles castl casts cast + casual casual casually casual + casualties casualti casualty casualti + cat cat cataian cataian + catalogue catalogu cataplasm cataplasm + cataracts cataract catarrhs catarrh + catastrophe catastroph catch catch + catcher catcher catches catch + catching catch cate cate + catechising catechis catechism catech + catechize catech cater cater + caterpillars caterpillar caters cater + caterwauling caterwaul cates cate + catesby catesbi cathedral cathedr + catlike catlik catling catl + catlings catl cato cato + cats cat cattle cattl + caucasus caucasu caudle caudl + cauf cauf caught caught + cauldron cauldron caus cau + cause caus caused caus + causeless causeless causer causer + causes caus causest causest + causeth causeth cautel cautel + cautelous cautel cautels cautel + cauterizing cauter caution caution + cautions caution cavaleiro cavaleiro + cavalery cavaleri cavaliers cavali + cave cave cavern cavern + caverns cavern caves cave + caveto caveto caviary caviari + cavil cavil cavilling cavil + cawdor cawdor cawdron cawdron + cawing caw ce ce + ceas cea cease ceas + ceases ceas ceaseth ceaseth + cedar cedar cedars cedar + cedius cediu celebrate celebr + celebrated celebr celebrates celebr + celebration celebr celerity celer + celestial celesti celia celia + cell cell cellar cellar + cellarage cellarag celsa celsa + cement cement censer censer + censor censor censorinus censorinu + censur censur censure censur + censured censur censurers censur + censures censur censuring censur + centaur centaur centaurs centaur + centre centr cents cent + centuries centuri centurion centurion + centurions centurion century centuri + cerberus cerberu cerecloth cerecloth + cerements cerement ceremonial ceremoni + ceremonies ceremoni ceremonious ceremoni + ceremoniously ceremoni ceremony ceremoni + ceres cere cerns cern + certain certain certainer certain + certainly certainli certainties certainti + certainty certainti certes cert + certificate certif certified certifi + certifies certifi certify certifi + ces ce cesario cesario + cess cess cesse cess + cestern cestern cetera cetera + cette cett chaces chace + chaf chaf chafe chafe + chafed chafe chafes chafe + chaff chaff chaffless chaffless + chafing chafe chain chain + chains chain chair chair + chairs chair chalic chalic + chalice chalic chalices chalic + chalk chalk chalks chalk + chalky chalki challeng challeng + challenge challeng challenged challeng + challenger challeng challengers challeng + challenges challeng cham cham + chamber chamber chamberers chamber + chamberlain chamberlain chamberlains chamberlain + chambermaid chambermaid chambermaids chambermaid + chambers chamber chameleon chameleon + champ champ champagne champagn + champain champain champains champain + champion champion champions champion + chanc chanc chance chanc + chanced chanc chancellor chancellor + chances chanc chandler chandler + chang chang change chang + changeable changeabl changed chang + changeful chang changeling changel + changelings changel changer changer + changes chang changest changest + changing chang channel channel + channels channel chanson chanson + chant chant chanticleer chanticl + chanting chant chantries chantri + chantry chantri chants chant + chaos chao chap chap + chape chape chapel chapel + chapeless chapeless chapels chapel + chaplain chaplain chaplains chaplain + chapless chapless chaplet chaplet + chapmen chapmen chaps chap + chapter chapter character charact + charactered charact characterless characterless + characters charact charactery characteri + characts charact charbon charbon + chare chare chares chare + charg charg charge charg + charged charg chargeful charg + charges charg chargeth chargeth + charging charg chariest chariest + chariness chari charing chare + chariot chariot chariots chariot + charitable charit charitably charit + charities chariti charity chariti + charlemain charlemain charles charl + charm charm charmed charm + charmer charmer charmeth charmeth + charmian charmian charming charm + charmingly charmingli charms charm + charneco charneco charnel charnel + charolois charoloi charon charon + charter charter charters charter + chartreux chartreux chary chari + charybdis charybdi chas cha + chase chase chased chase + chaser chaser chaseth chaseth + chasing chase chaste chast + chastely chast chastis chasti + chastise chastis chastised chastis + chastisement chastis chastity chastiti + chat chat chatham chatham + chatillon chatillon chats chat + chatt chatt chattels chattel + chatter chatter chattering chatter + chattles chattl chaud chaud + chaunted chaunt chaw chaw + chawdron chawdron che che + cheap cheap cheapen cheapen + cheaper cheaper cheapest cheapest + cheaply cheapli cheapside cheapsid + cheat cheat cheated cheat + cheater cheater cheaters cheater + cheating cheat cheats cheat + check check checked check + checker checker checking check + checks check cheek cheek + cheeks cheek cheer cheer + cheered cheer cheerer cheerer + cheerful cheer cheerfully cheerfulli + cheering cheer cheerless cheerless + cheerly cheerli cheers cheer + cheese chees chequer chequer + cher cher cherish cherish + cherished cherish cherisher cherish + cherishes cherish cherishing cherish + cherries cherri cherry cherri + cherrypit cherrypit chertsey chertsei + cherub cherub cherubims cherubim + cherubin cherubin cherubins cherubin + cheshu cheshu chess chess + chest chest chester chester + chestnut chestnut chestnuts chestnut + chests chest chetas cheta + chev chev cheval cheval + chevalier chevali chevaliers chevali + cheveril cheveril chew chew + chewed chew chewet chewet + chewing chew chez chez + chi chi chick chick + chicken chicken chickens chicken + chicurmurco chicurmurco chid chid + chidden chidden chide chide + chiders chider chides chide + chiding chide chief chief + chiefest chiefest chiefly chiefli + chien chien child child + childed child childeric childer + childhood childhood childhoods childhood + childing child childish childish + childishness childish childlike childlik + childness child children children + chill chill chilling chill + chime chime chimes chime + chimney chimnei chimneypiece chimneypiec + chimneys chimnei chimurcho chimurcho + chin chin china china + chine chine chines chine + chink chink chinks chink + chins chin chipp chipp + chipper chipper chips chip + chiron chiron chirping chirp + chirrah chirrah chirurgeonly chirurgeonli + chisel chisel chitopher chitoph + chivalrous chivalr chivalry chivalri + choice choic choicely choic + choicest choicest choir choir + choirs choir chok chok + choke choke choked choke + chokes choke choking choke + choler choler choleric choler + cholers choler chollors chollor + choose choos chooser chooser + chooses choos chooseth chooseth + choosing choos chop chop + chopine chopin choplogic choplog + chopp chopp chopped chop + chopping chop choppy choppi + chops chop chopt chopt + chor chor choristers chorist + chorus choru chose chose + chosen chosen chough chough + choughs chough chrish chrish + christ christ christen christen + christendom christendom christendoms christendom + christening christen christenings christen + christian christian christianlike christianlik + christians christian christmas christma + christom christom christopher christoph + christophero christophero chronicle chronicl + chronicled chronicl chronicler chronicl + chroniclers chronicl chronicles chronicl + chrysolite chrysolit chuck chuck + chucks chuck chud chud + chuffs chuff church church + churches church churchman churchman + churchmen churchmen churchyard churchyard + churchyards churchyard churl churl + churlish churlish churlishly churlishli + churls churl churn churn + chus chu cicatrice cicatric + cicatrices cicatric cicely cice + cicero cicero ciceter cicet + ciel ciel ciitzens ciitzen + cilicia cilicia cimber cimber + cimmerian cimmerian cinable cinabl + cincture cinctur cinders cinder + cine cine cinna cinna + cinque cinqu cipher cipher + ciphers cipher circa circa + circe circ circle circl + circled circl circlets circlet + circling circl circuit circuit + circum circum circumcised circumcis + circumference circumfer circummur circummur + circumscrib circumscrib circumscribed circumscrib + circumscription circumscript circumspect circumspect + circumstance circumst circumstanced circumstanc + circumstances circumst circumstantial circumstanti + circumvent circumv circumvention circumvent + cistern cistern citadel citadel + cital cital cite cite + cited cite cites cite + cities citi citing cite + citizen citizen citizens citizen + cittern cittern city citi + civet civet civil civil + civility civil civilly civilli + clack clack clad clad + claim claim claiming claim + claims claim clamb clamb + clamber clamber clammer clammer + clamor clamor clamorous clamor + clamors clamor clamour clamour + clamours clamour clang clang + clangor clangor clap clap + clapp clapp clapped clap + clapper clapper clapping clap + claps clap clare clare + clarence clarenc claret claret + claribel claribel clasp clasp + clasps clasp clatter clatter + claud claud claudio claudio + claudius claudiu clause claus + claw claw clawed claw + clawing claw claws claw + clay clai clays clai + clean clean cleanliest cleanliest + cleanly cleanli cleans clean + cleanse cleans cleansing cleans + clear clear clearer clearer + clearest clearest clearly clearli + clearness clear clears clear + cleave cleav cleaving cleav + clef clef cleft cleft + cleitus cleitu clemency clemenc + clement clement cleomenes cleomen + cleopatpa cleopatpa cleopatra cleopatra + clepeth clepeth clept clept + clerestories clerestori clergy clergi + clergyman clergyman clergymen clergymen + clerk clerk clerkly clerkli + clerks clerk clew clew + client client clients client + cliff cliff clifford clifford + cliffords clifford cliffs cliff + clifton clifton climate climat + climature climatur climb climb + climbed climb climber climber + climbeth climbeth climbing climb + climbs climb clime clime + cling cling clink clink + clinking clink clinquant clinquant + clip clip clipp clipp + clipper clipper clippeth clippeth + clipping clip clipt clipt + clitus clitu clo clo + cloak cloak cloakbag cloakbag + cloaks cloak clock clock + clocks clock clod clod + cloddy cloddi clodpole clodpol + clog clog clogging clog + clogs clog cloister cloister + cloistress cloistress cloquence cloquenc + clos clo close close + closed close closely close + closeness close closer closer + closes close closest closest + closet closet closing close + closure closur cloten cloten + clotens cloten cloth cloth + clothair clothair clotharius clothariu + clothe cloth clothes cloth + clothier clothier clothiers clothier + clothing cloth cloths cloth + clotpoles clotpol clotpoll clotpol + cloud cloud clouded cloud + cloudiness cloudi clouds cloud + cloudy cloudi clout clout + clouted clout clouts clout + cloven cloven clover clover + cloves clove clovest clovest + clowder clowder clown clown + clownish clownish clowns clown + cloy cloi cloyed cloi + cloying cloi cloyless cloyless + cloyment cloyment cloys cloi + club club clubs club + cluck cluck clung clung + clust clust clusters cluster + clutch clutch clyster clyster + cneius cneiu cnemies cnemi + co co coach coach + coaches coach coachmakers coachmak + coact coact coactive coactiv + coagulate coagul coal coal + coals coal coarse coars + coarsely coars coast coast + coasting coast coasts coast + coat coat coated coat + coats coat cobble cobbl + cobbled cobbl cobbler cobbler + cobham cobham cobloaf cobloaf + cobweb cobweb cobwebs cobweb + cock cock cockatrice cockatric + cockatrices cockatric cockle cockl + cockled cockl cockney cocknei + cockpit cockpit cocks cock + cocksure cocksur coctus coctu + cocytus cocytu cod cod + codding cod codling codl + codpiece codpiec codpieces codpiec + cods cod coelestibus coelestibu + coesar coesar coeur coeur + coffer coffer coffers coffer + coffin coffin coffins coffin + cog cog cogging cog + cogitation cogit cogitations cogit + cognition cognit cognizance cogniz + cogscomb cogscomb cohabitants cohabit + coher coher cohere coher + coherence coher coherent coher + cohorts cohort coif coif + coign coign coil coil + coin coin coinage coinag + coiner coiner coining coin + coins coin col col + colbrand colbrand colchos colcho + cold cold colder colder + coldest coldest coldly coldli + coldness cold coldspur coldspur + colebrook colebrook colic colic + collar collar collars collar + collateral collater colleagued colleagu + collect collect collected collect + collection collect college colleg + colleges colleg collied colli + collier collier colliers collier + collop collop collusion collus + colme colm colmekill colmekil + coloquintida coloquintida color color + colors color colossus colossu + colour colour colourable colour + coloured colour colouring colour + colours colour colt colt + colted colt colts colt + columbine columbin columbines columbin + colville colvil com com + comagene comagen comart comart + comb comb combat combat + combatant combat combatants combat + combated combat combating combat + combin combin combinate combin + combination combin combine combin + combined combin combless combless + combustion combust come come + comedian comedian comedians comedian + comedy comedi comeliness comeli + comely come comer comer + comers comer comes come + comest comest comet comet + cometh cometh comets comet + comfect comfect comfit comfit + comfits comfit comfort comfort + comfortable comfort comforted comfort + comforter comfort comforting comfort + comfortless comfortless comforts comfort + comic comic comical comic + coming come comings come + cominius cominiu comma comma + command command commande command + commanded command commander command + commanders command commanding command + commandment command commandments command + commands command comme comm + commenc commenc commence commenc + commenced commenc commencement commenc + commences commenc commencing commenc + commend commend commendable commend + commendation commend commendations commend + commended commend commending commend + commends commend comment comment + commentaries commentari commenting comment + comments comment commerce commerc + commingled commingl commiseration commiser + commission commiss commissioners commission + commissions commiss commit commit + commits commit committ committ + committed commit committing commit + commix commix commixed commix + commixtion commixt commixture commixtur + commodious commodi commodities commod + commodity commod common common + commonalty commonalti commoner common + commoners common commonly commonli + commons common commonweal commonw + commonwealth commonwealth commotion commot + commotions commot commune commun + communicat communicat communicate commun + communication commun communities commun + community commun comonty comonti + compact compact companies compani + companion companion companions companion + companionship companionship company compani + compar compar comparative compar + compare compar compared compar + comparing compar comparison comparison + comparisons comparison compartner compartn + compass compass compasses compass + compassing compass compassion compass + compassionate compassion compeers compeer + compel compel compell compel + compelled compel compelling compel + compels compel compensation compens + competence compet competency compet + competent compet competitor competitor + competitors competitor compil compil + compile compil compiled compil + complain complain complainer complain + complainest complainest complaining complain + complainings complain complains complain + complaint complaint complaints complaint + complement complement complements complement + complete complet complexion complexion + complexioned complexion complexions complexion + complices complic complies compli + compliment compliment complimental compliment + compliments compliment complot complot + complots complot complotted complot + comply compli compos compo + compose compos composed compos + composition composit compost compost + composture compostur composure composur + compound compound compounded compound + compounds compound comprehend comprehend + comprehended comprehend comprehends comprehend + compremises compremis compris compri + comprising compris compromis compromi + compromise compromis compt compt + comptible comptibl comptrollers comptrol + compulsatory compulsatori compulsion compuls + compulsive compuls compunctious compuncti + computation comput comrade comrad + comrades comrad comutual comutu + con con concave concav + concavities concav conceal conceal + concealed conceal concealing conceal + concealment conceal concealments conceal + conceals conceal conceit conceit + conceited conceit conceitless conceitless + conceits conceit conceiv conceiv + conceive conceiv conceived conceiv + conceives conceiv conceiving conceiv + conception concept conceptions concept + conceptious concepti concern concern + concernancy concern concerneth concerneth + concerning concern concernings concern + concerns concern conclave conclav + conclud conclud conclude conclud + concluded conclud concludes conclud + concluding conclud conclusion conclus + conclusions conclus concolinel concolinel + concord concord concubine concubin + concupiscible concupisc concupy concupi + concur concur concurring concur + concurs concur condemn condemn + condemnation condemn condemned condemn + condemning condemn condemns condemn + condescend condescend condign condign + condition condit conditionally condition + conditions condit condole condol + condolement condol condoling condol + conduce conduc conduct conduct + conducted conduct conducting conduct + conductor conductor conduit conduit + conduits conduit conected conect + coney conei confection confect + confectionary confectionari confections confect + confederacy confederaci confederate confeder + confederates confeder confer confer + conference confer conferr conferr + conferring confer confess confess + confessed confess confesses confess + confesseth confesseth confessing confess + confession confess confessions confess + confessor confessor confidence confid + confident confid confidently confid + confin confin confine confin + confined confin confineless confineless + confiners confin confines confin + confining confin confirm confirm + confirmation confirm confirmations confirm + confirmed confirm confirmer confirm + confirmers confirm confirming confirm + confirmities confirm confirms confirm + confiscate confisc confiscated confisc + confiscation confisc confixed confix + conflict conflict conflicting conflict + conflicts conflict confluence confluenc + conflux conflux conform conform + conformable conform confound confound + confounded confound confounding confound + confounds confound confront confront + confronted confront confus confu + confused confus confusedly confusedli + confusion confus confusions confus + confutation confut confutes confut + congeal congeal congealed congeal + congealment congeal congee conge + conger conger congest congest + congied congi congratulate congratul + congreeing congre congreeted congreet + congregate congreg congregated congreg + congregation congreg congregations congreg + congruent congruent congruing congru + conies coni conjectural conjectur + conjecture conjectur conjectures conjectur + conjoin conjoin conjoined conjoin + conjoins conjoin conjointly conjointli + conjunct conjunct conjunction conjunct + conjunctive conjunct conjur conjur + conjuration conjur conjurations conjur + conjure conjur conjured conjur + conjurer conjur conjurers conjur + conjures conjur conjuring conjur + conjuro conjuro conn conn + connected connect connive conniv + conqu conqu conquer conquer + conquered conquer conquering conquer + conqueror conqueror conquerors conqueror + conquers conquer conquest conquest + conquests conquest conquring conqur + conrade conrad cons con + consanguineous consanguin consanguinity consanguin + conscienc conscienc conscience conscienc + consciences conscienc conscionable conscion + consecrate consecr consecrated consecr + consecrations consecr consent consent + consented consent consenting consent + consents consent consequence consequ + consequences consequ consequently consequ + conserve conserv conserved conserv + conserves conserv consider consid + considerance consider considerate consider + consideration consider considerations consider + considered consid considering consid + considerings consid considers consid + consign consign consigning consign + consist consist consisteth consisteth + consisting consist consistory consistori + consists consist consolate consol + consolation consol consonancy conson + consonant conson consort consort + consorted consort consortest consortest + conspectuities conspectu conspir conspir + conspiracy conspiraci conspirant conspir + conspirator conspir conspirators conspir + conspire conspir conspired conspir + conspirers conspir conspires conspir + conspiring conspir constable constabl + constables constabl constance constanc + constancies constanc constancy constanc + constant constant constantine constantin + constantinople constantinopl constantly constantli + constellation constel constitution constitut + constrain constrain constrained constrain + constraineth constraineth constrains constrain + constraint constraint constring constr + construction construct construe constru + consul consul consuls consul + consulship consulship consulships consulship + consult consult consulting consult + consults consult consum consum + consume consum consumed consum + consumes consum consuming consum + consummate consumm consummation consumm + consumption consumpt consumptions consumpt + contagion contagion contagious contagi + contain contain containing contain + contains contain contaminate contamin + contaminated contamin contemn contemn + contemned contemn contemning contemn + contemns contemn contemplate contempl + contemplation contempl contemplative contempl + contempt contempt contemptible contempt + contempts contempt contemptuous contemptu + contemptuously contemptu contend contend + contended contend contending contend + contendon contendon content content + contenta contenta contented content + contenteth contenteth contention content + contentious contenti contentless contentless + contento contento contents content + contest contest contestation contest + continence contin continency contin + continent contin continents contin + continu continu continual continu + continually continu continuance continu + continuantly continuantli continuate continu + continue continu continued continu + continuer continu continues continu + continuing continu contract contract + contracted contract contracting contract + contraction contract contradict contradict + contradicted contradict contradiction contradict + contradicts contradict contraries contrari + contrarieties contrarieti contrariety contrarieti + contrarious contrari contrariously contrari + contrary contrari contre contr + contribution contribut contributors contributor + contrite contrit contriv contriv + contrive contriv contrived contriv + contriver contriv contrives contriv + contriving contriv control control + controll control controller control + controlling control controlment control + controls control controversy controversi + contumelious contumeli contumeliously contumeli + contumely contum contusions contus + convenience conveni conveniences conveni + conveniency conveni convenient conveni + conveniently conveni convented convent + conventicles conventicl convents convent + convers conver conversant convers + conversation convers conversations convers + converse convers conversed convers + converses convers conversing convers + conversion convers convert convert + converted convert convertest convertest + converting convert convertite convertit + convertites convertit converts convert + convey convei conveyance convey + conveyances convey conveyers convey + conveying convei convict convict + convicted convict convince convinc + convinced convinc convinces convinc + convive conviv convocation convoc + convoy convoi convulsions convuls + cony coni cook cook + cookery cookeri cooks cook + cool cool cooled cool + cooling cool cools cool + coop coop coops coop + cop cop copatain copatain + cope cope cophetua cophetua + copied copi copies copi + copious copiou copper copper + copperspur copperspur coppice coppic + copulation copul copulatives copul + copy copi cor cor + coragio coragio coral coral + coram coram corambus corambu + coranto coranto corantos coranto + corbo corbo cord cord + corded cord cordelia cordelia + cordial cordial cordis cordi + cords cord core core + corin corin corinth corinth + corinthian corinthian coriolanus coriolanu + corioli corioli cork cork + corky corki cormorant cormor + corn corn cornelia cornelia + cornelius corneliu corner corner + corners corner cornerstone cornerston + cornets cornet cornish cornish + corns corn cornuto cornuto + cornwall cornwal corollary corollari + coronal coron coronation coron + coronet coronet coronets coronet + corporal corpor corporals corpor + corporate corpor corpse corps + corpulent corpul correct correct + corrected correct correcting correct + correction correct correctioner correction + corrects correct correspondence correspond + correspondent correspond corresponding correspond + corresponsive correspons corrigible corrig + corrival corriv corrivals corriv + corroborate corrobor corrosive corros + corrupt corrupt corrupted corrupt + corrupter corrupt corrupters corrupt + corruptible corrupt corruptibly corrupt + corrupting corrupt corruption corrupt + corruptly corruptli corrupts corrupt + corse cors corses cors + corslet corslet cosmo cosmo + cost cost costard costard + costermongers costermong costlier costlier + costly costli costs cost + cot cot cote cote + coted cote cotsall cotsal + cotsole cotsol cotswold cotswold + cottage cottag cottages cottag + cotus cotu couch couch + couched couch couching couch + couchings couch coude coud + cough cough coughing cough + could could couldst couldst + coulter coulter council council + councillor councillor councils council + counsel counsel counsell counsel + counsellor counsellor counsellors counsellor + counselor counselor counselors counselor + counsels counsel count count + counted count countenanc countenanc + countenance counten countenances counten + counter counter counterchange counterchang + countercheck countercheck counterfeit counterfeit + counterfeited counterfeit counterfeiting counterfeit + counterfeitly counterfeitli counterfeits counterfeit + countermand countermand countermands countermand + countermines countermin counterpart counterpart + counterpoints counterpoint counterpois counterpoi + counterpoise counterpois counters counter + countervail countervail countess countess + countesses countess counties counti + counting count countless countless + countries countri countrv countrv + country countri countryman countryman + countrymen countrymen counts count + county counti couper couper + couple coupl coupled coupl + couplement couplement couples coupl + couplet couplet couplets couplet + cour cour courage courag + courageous courag courageously courag + courages courag courier courier + couriers courier couronne couronn + cours cour course cours + coursed cours courser courser + coursers courser courses cours + coursing cours court court + courted court courteous courteou + courteously courteous courtesan courtesan + courtesies courtesi courtesy courtesi + courtezan courtezan courtezans courtezan + courtier courtier courtiers courtier + courtlike courtlik courtly courtli + courtney courtnei courts court + courtship courtship cousin cousin + cousins cousin couterfeit couterfeit + coutume coutum covenant coven + covenants coven covent covent + coventry coventri cover cover + covered cover covering cover + coverlet coverlet covers cover + covert covert covertly covertli + coverture covertur covet covet + coveted covet coveting covet + covetings covet covetous covet + covetously covet covetousness covet + covets covet cow cow + coward coward cowarded coward + cowardice cowardic cowardly cowardli + cowards coward cowardship cowardship + cowish cowish cowl cowl + cowslip cowslip cowslips cowslip + cox cox coxcomb coxcomb + coxcombs coxcomb coy coi + coystrill coystril coz coz + cozen cozen cozenage cozenag + cozened cozen cozener cozen + cozeners cozen cozening cozen + coziers cozier crab crab + crabbed crab crabs crab + crack crack cracked crack + cracker cracker crackers cracker + cracking crack cracks crack + cradle cradl cradled cradl + cradles cradl craft craft + crafted craft craftied crafti + craftier craftier craftily craftili + crafts craft craftsmen craftsmen + crafty crafti cram cram + cramm cramm cramp cramp + cramps cramp crams cram + cranking crank cranks crank + cranmer cranmer crannied cranni + crannies cranni cranny cranni + crants crant crare crare + crash crash crassus crassu + crav crav crave crave + craved crave craven craven + cravens craven craves crave + craveth craveth craving crave + crawl crawl crawling crawl + crawls crawl craz craz + crazed craze crazy crazi + creaking creak cream cream + create creat created creat + creates creat creating creat + creation creation creator creator + creature creatur creatures creatur + credence credenc credent credent + credible credibl credit credit + creditor creditor creditors creditor + credo credo credulity credul + credulous credul creed creed + creek creek creeks creek + creep creep creeping creep + creeps creep crept crept + crescent crescent crescive cresciv + cressets cresset cressid cressid + cressida cressida cressids cressid + cressy cressi crest crest + crested crest crestfall crestfal + crestless crestless crests crest + cretan cretan crete crete + crevice crevic crew crew + crews crew crib crib + cribb cribb cribs crib + cricket cricket crickets cricket + cried cri criedst criedst + crier crier cries cri + criest criest crieth crieth + crime crime crimeful crime + crimeless crimeless crimes crime + criminal crimin crimson crimson + cringe cring cripple crippl + crisp crisp crisped crisp + crispian crispian crispianus crispianu + crispin crispin critic critic + critical critic critics critic + croak croak croaking croak + croaks croak crocodile crocodil + cromer cromer cromwell cromwel + crone crone crook crook + crookback crookback crooked crook + crooking crook crop crop + cropp cropp crosby crosbi + cross cross crossed cross + crosses cross crossest crossest + crossing cross crossings cross + crossly crossli crossness cross + crost crost crotchets crotchet + crouch crouch crouching crouch + crow crow crowd crowd + crowded crowd crowding crowd + crowds crowd crowflowers crowflow + crowing crow crowkeeper crowkeep + crown crown crowned crown + crowner crowner crownet crownet + crownets crownet crowning crown + crowns crown crows crow + crudy crudi cruel cruel + cruell cruell crueller crueller + cruelly cruelli cruels cruel + cruelty cruelti crum crum + crumble crumbl crumbs crumb + crupper crupper crusadoes crusado + crush crush crushed crush + crushest crushest crushing crush + crust crust crusts crust + crusty crusti crutch crutch + crutches crutch cry cry + crying cry crystal crystal + crystalline crystallin crystals crystal + cub cub cubbert cubbert + cubiculo cubiculo cubit cubit + cubs cub cuckold cuckold + cuckoldly cuckoldli cuckolds cuckold + cuckoo cuckoo cucullus cucullu + cudgel cudgel cudgeled cudgel + cudgell cudgel cudgelling cudgel + cudgels cudgel cue cue + cues cue cuff cuff + cuffs cuff cuique cuiqu + cull cull culling cull + cullion cullion cullionly cullionli + cullions cullion culpable culpabl + culverin culverin cum cum + cumber cumber cumberland cumberland + cunning cun cunningly cunningli + cunnings cun cuore cuor + cup cup cupbearer cupbear + cupboarding cupboard cupid cupid + cupids cupid cuppele cuppel + cups cup cur cur + curan curan curate curat + curb curb curbed curb + curbing curb curbs curb + curd curd curdied curdi + curds curd cure cure + cured cure cureless cureless + curer curer cures cure + curfew curfew curing cure + curio curio curiosity curios + curious curiou curiously curious + curl curl curled curl + curling curl curls curl + currance curranc currants currant + current current currents current + currish currish curry curri + curs cur curse curs + cursed curs curses curs + cursies cursi cursing curs + cursorary cursorari curst curst + curster curster curstest curstest + curstness curst cursy cursi + curtail curtail curtain curtain + curtains curtain curtal curtal + curtis curti curtle curtl + curtsied curtsi curtsies curtsi + curtsy curtsi curvet curvet + curvets curvet cushes cush + cushion cushion cushions cushion + custalorum custalorum custard custard + custody custodi custom custom + customary customari customed custom + customer custom customers custom + customs custom custure custur + cut cut cutler cutler + cutpurse cutpurs cutpurses cutpurs + cuts cut cutter cutter + cutting cut cuttle cuttl + cxsar cxsar cyclops cyclop + cydnus cydnu cygnet cygnet + cygnets cygnet cym cym + cymbals cymbal cymbeline cymbelin + cyme cyme cynic cynic + cynthia cynthia cypress cypress + cypriot cypriot cyprus cypru + cyrus cyru cytherea cytherea + d d dabbled dabbl + dace dace dad dad + daedalus daedalu daemon daemon + daff daff daffed daf + daffest daffest daffodils daffodil + dagger dagger daggers dagger + dagonet dagonet daily daili + daintier daintier dainties dainti + daintiest daintiest daintily daintili + daintiness dainti daintry daintri + dainty dainti daisied daisi + daisies daisi daisy daisi + dale dale dalliance dallianc + dallied dalli dallies dalli + dally dalli dallying dalli + dalmatians dalmatian dam dam + damage damag damascus damascu + damask damask damasked damask + dame dame dames dame + damm damm damn damn + damnable damnabl damnably damnabl + damnation damnat damned damn + damns damn damoiselle damoisel + damon damon damosella damosella + damp damp dams dam + damsel damsel damsons damson + dan dan danc danc + dance danc dancer dancer + dances danc dancing danc + dandle dandl dandy dandi + dane dane dang dang + danger danger dangerous danger + dangerously danger dangers danger + dangling dangl daniel daniel + danish danish dank dank + dankish dankish danskers dansker + daphne daphn dappled dappl + dapples dappl dar dar + dardan dardan dardanian dardanian + dardanius dardaniu dare dare + dared dare dareful dare + dares dare darest darest + daring dare darius dariu + dark dark darken darken + darkening darken darkens darken + darker darker darkest darkest + darkling darkl darkly darkli + darkness dark darling darl + darlings darl darnel darnel + darraign darraign dart dart + darted dart darter darter + dartford dartford darting dart + darts dart dash dash + dashes dash dashing dash + dastard dastard dastards dastard + dat dat datchet datchet + date date dated date + dateless dateless dates date + daub daub daughter daughter + daughters daughter daunt daunt + daunted daunt dauntless dauntless + dauphin dauphin daventry daventri + davy davi daw daw + dawn dawn dawning dawn + daws daw day dai + daylight daylight days dai + dazzle dazzl dazzled dazzl + dazzling dazzl de de + dead dead deadly deadli + deaf deaf deafing deaf + deafness deaf deafs deaf + deal deal dealer dealer + dealers dealer dealest dealest + dealing deal dealings deal + deals deal dealt dealt + dean dean deanery deaneri + dear dear dearer dearer + dearest dearest dearly dearli + dearness dear dears dear + dearth dearth dearths dearth + death death deathbed deathb + deathful death deaths death + deathsman deathsman deathsmen deathsmen + debarred debar debase debas + debate debat debated debat + debatement debat debateth debateth + debating debat debauch debauch + debile debil debility debil + debitor debitor debonair debonair + deborah deborah debosh debosh + debt debt debted debt + debtor debtor debtors debtor + debts debt debuty debuti + decay decai decayed decai + decayer decay decaying decai + decays decai deceas decea + decease deceas deceased deceas + deceit deceit deceitful deceit + deceits deceit deceiv deceiv + deceivable deceiv deceive deceiv + deceived deceiv deceiver deceiv + deceivers deceiv deceives deceiv + deceivest deceivest deceiveth deceiveth + deceiving deceiv december decemb + decent decent deceptious decepti + decerns decern decide decid + decides decid decimation decim + decipher deciph deciphers deciph + decision decis decius deciu + deck deck decking deck + decks deck deckt deckt + declare declar declares declar + declension declens declensions declens + declin declin decline declin + declined declin declines declin + declining declin decoct decoct + decorum decorum decreas decrea + decrease decreas decreasing decreas + decree decre decreed decre + decrees decre decrepit decrepit + dedicate dedic dedicated dedic + dedicates dedic dedication dedic + deed deed deedless deedless + deeds deed deem deem + deemed deem deep deep + deeper deeper deepest deepest + deeply deepli deeps deep + deepvow deepvow deer deer + deesse deess defac defac + deface defac defaced defac + defacer defac defacers defac + defacing defac defam defam + default default defeat defeat + defeated defeat defeats defeat + defeatures defeatur defect defect + defective defect defects defect + defence defenc defences defenc + defend defend defendant defend + defended defend defender defend + defenders defend defending defend + defends defend defense defens + defensible defens defensive defens + defer defer deferr deferr + defiance defianc deficient defici + defied defi defies defi + defil defil defile defil + defiler defil defiles defil + defiling defil define defin + definement defin definite definit + definitive definit definitively definit + deflow deflow deflower deflow + deflowered deflow deform deform + deformed deform deformities deform + deformity deform deftly deftli + defunct defunct defunction defunct + defuse defus defy defi + defying defi degenerate degener + degraded degrad degree degre + degrees degre deified deifi + deifying deifi deign deign + deigned deign deiphobus deiphobu + deities deiti deity deiti + deja deja deject deject + dejected deject delabreth delabreth + delay delai delayed delai + delaying delai delays delai + delectable delect deliberate deliber + delicate delic delicates delic + delicious delici deliciousness delici + delight delight delighted delight + delightful delight delights delight + delinquents delinqu deliv deliv + deliver deliv deliverance deliver + delivered deliv delivering deliv + delivers deliv delivery deliveri + delphos delpho deluded delud + deluding delud deluge delug + delve delv delver delver + delves delv demand demand + demanded demand demanding demand + demands demand demean demean + demeanor demeanor demeanour demeanour + demerits demerit demesnes demesn + demetrius demetriu demi demi + demigod demigod demise demis + demoiselles demoisel demon demon + demonstrable demonstr demonstrate demonstr + demonstrated demonstr demonstrating demonstr + demonstration demonstr demonstrative demonstr + demure demur demurely demur + demuring demur den den + denay denai deni deni + denial denial denials denial + denied deni denier denier + denies deni deniest deniest + denis deni denmark denmark + dennis denni denny denni + denote denot denoted denot + denotement denot denounc denounc + denounce denounc denouncing denounc + dens den denunciation denunci + deny deni denying deni + deo deo depart depart + departed depart departest departest + departing depart departure departur + depeche depech depend depend + dependant depend dependants depend + depended depend dependence depend + dependences depend dependency depend + dependent depend dependents depend + depender depend depending depend + depends depend deplore deplor + deploring deplor depopulate depopul + depos depo depose depos + deposed depos deposing depos + depositaries depositari deprav deprav + depravation deprav deprave deprav + depraved deprav depraves deprav + depress depress depriv depriv + deprive depriv depth depth + depths depth deputation deput + depute deput deputed deput + deputies deputi deputing deput + deputy deputi deracinate deracin + derby derbi dercetas derceta + dere dere derides derid + derision deris deriv deriv + derivation deriv derivative deriv + derive deriv derived deriv + derives deriv derogate derog + derogately derog derogation derog + des de desartless desartless + descant descant descend descend + descended descend descending descend + descends descend descension descens + descent descent descents descent + describe describ described describ + describes describ descried descri + description descript descriptions descript + descry descri desdemon desdemon + desdemona desdemona desert desert + deserts desert deserv deserv + deserve deserv deserved deserv + deservedly deservedli deserver deserv + deservers deserv deserves deserv + deservest deservest deserving deserv + deservings deserv design design + designment design designments design + designs design desir desir + desire desir desired desir + desirers desir desires desir + desirest desirest desiring desir + desirous desir desist desist + desk desk desolate desol + desolation desol desp desp + despair despair despairing despair + despairs despair despatch despatch + desperate desper desperately desper + desperation desper despis despi + despise despis despised despis + despiser despis despiseth despiseth + despising despis despite despit + despiteful despit despoiled despoil + dest dest destin destin + destined destin destinies destini + destiny destini destitute destitut + destroy destroi destroyed destroi + destroyer destroy destroyers destroy + destroying destroi destroys destroi + destruction destruct destructions destruct + det det detain detain + detains detain detect detect + detected detect detecting detect + detection detect detector detector + detects detect detention detent + determin determin determinate determin + determination determin determinations determin + determine determin determined determin + determines determin detest detest + detestable detest detested detest + detesting detest detests detest + detract detract detraction detract + detractions detract deucalion deucalion + deuce deuc deum deum + deux deux devant devant + devesting devest device devic + devices devic devil devil + devilish devilish devils devil + devis devi devise devis + devised devis devises devis + devising devis devoid devoid + devonshire devonshir devote devot + devoted devot devotion devot + devour devour devoured devour + devourers devour devouring devour + devours devour devout devout + devoutly devoutli dew dew + dewberries dewberri dewdrops dewdrop + dewlap dewlap dewlapp dewlapp + dews dew dewy dewi + dexter dexter dexteriously dexteri + dexterity dexter di di + diable diabl diablo diablo + diadem diadem dial dial + dialect dialect dialogue dialogu + dialogued dialogu dials dial + diameter diamet diamond diamond + diamonds diamond dian dian + diana diana diaper diaper + dibble dibbl dic dic + dice dice dicers dicer + dich dich dick dick + dickens dicken dickon dickon + dicky dicki dictator dictat + diction diction dictynna dictynna + did did diddle diddl + didest didest dido dido + didst didst die die + died di diedst diedst + dies di diest diest + diet diet dieted diet + dieter dieter dieu dieu + diff diff differ differ + difference differ differences differ + differency differ different differ + differing differ differs differ + difficile difficil difficult difficult + difficulties difficulti difficulty difficulti + diffidence diffid diffidences diffid + diffus diffu diffused diffus + diffusest diffusest dig dig + digest digest digested digest + digestion digest digestions digest + digg digg digging dig + dighton dighton dignified dignifi + dignifies dignifi dignify dignifi + dignities digniti dignity digniti + digress digress digressing digress + digression digress digs dig + digt digt dilate dilat + dilated dilat dilations dilat + dilatory dilatori dild dild + dildos dildo dilemma dilemma + dilemmas dilemma diligence dilig + diligent dilig diluculo diluculo + dim dim dimension dimens + dimensions dimens diminish diminish + diminishing diminish diminution diminut + diminutive diminut diminutives diminut + dimm dimm dimmed dim + dimming dim dimpled dimpl + dimples dimpl dims dim + din din dine dine + dined dine diner diner + dines dine ding ding + dining dine dinner dinner + dinners dinner dinnertime dinnertim + dint dint diomed diom + diomede diomed diomedes diomed + dion dion dip dip + dipp dipp dipping dip + dips dip dir dir + dire dire direct direct + directed direct directing direct + direction direct directions direct + directitude directitud directive direct + directly directli directs direct + direful dire direness dire + direst direst dirge dirg + dirges dirg dirt dirt + dirty dirti dis di + disability disabl disable disabl + disabled disabl disabling disabl + disadvantage disadvantag disagree disagre + disallow disallow disanimates disanim + disannul disannul disannuls disannul + disappointed disappoint disarm disarm + disarmed disarm disarmeth disarmeth + disarms disarm disaster disast + disasters disast disastrous disastr + disbench disbench disbranch disbranch + disburdened disburden disburs disbur + disburse disburs disbursed disburs + discandy discandi discandying discandi + discard discard discarded discard + discase discas discased discas + discern discern discerner discern + discerning discern discernings discern + discerns discern discharg discharg + discharge discharg discharged discharg + discharging discharg discipled discipl + disciples discipl disciplin disciplin + discipline disciplin disciplined disciplin + disciplines disciplin disclaim disclaim + disclaiming disclaim disclaims disclaim + disclos disclo disclose disclos + disclosed disclos discloses disclos + discolour discolour discoloured discolour + discolours discolour discomfit discomfit + discomfited discomfit discomfiture discomfitur + discomfort discomfort discomfortable discomfort + discommend discommend disconsolate disconsol + discontent discont discontented discont + discontentedly discontentedli discontenting discont + discontents discont discontinue discontinu + discontinued discontinu discord discord + discordant discord discords discord + discourse discours discoursed discours + discourser discours discourses discours + discoursive discours discourtesy discourtesi + discov discov discover discov + discovered discov discoverers discover + discoveries discoveri discovering discov + discovers discov discovery discoveri + discredit discredit discredited discredit + discredits discredit discreet discreet + discreetly discreetli discretion discret + discretions discret discuss discuss + disdain disdain disdained disdain + disdaineth disdaineth disdainful disdain + disdainfully disdainfulli disdaining disdain + disdains disdain disdnguish disdnguish + diseas disea disease diseas + diseased diseas diseases diseas + disedg disedg disembark disembark + disfigure disfigur disfigured disfigur + disfurnish disfurnish disgorge disgorg + disgrac disgrac disgrace disgrac + disgraced disgrac disgraceful disgrac + disgraces disgrac disgracing disgrac + disgracious disgraci disguis disgui + disguise disguis disguised disguis + disguiser disguis disguises disguis + disguising disguis dish dish + dishabited dishabit dishclout dishclout + dishearten dishearten disheartens dishearten + dishes dish dishonest dishonest + dishonestly dishonestli dishonesty dishonesti + dishonor dishonor dishonorable dishonor + dishonors dishonor dishonour dishonour + dishonourable dishonour dishonoured dishonour + dishonours dishonour disinherit disinherit + disinherited disinherit disjoin disjoin + disjoining disjoin disjoins disjoin + disjoint disjoint disjunction disjunct + dislik dislik dislike dislik + disliken disliken dislikes dislik + dislimns dislimn dislocate disloc + dislodg dislodg disloyal disloy + disloyalty disloyalti dismal dismal + dismantle dismantl dismantled dismantl + dismask dismask dismay dismai + dismayed dismai dismemb dismemb + dismember dismemb dismes dism + dismiss dismiss dismissed dismiss + dismissing dismiss dismission dismiss + dismount dismount dismounted dismount + disnatur disnatur disobedience disobedi + disobedient disobedi disobey disobei + disobeys disobei disorb disorb + disorder disord disordered disord + disorderly disorderli disorders disord + disparage disparag disparagement disparag + disparagements disparag dispark dispark + dispatch dispatch dispensation dispens + dispense dispens dispenses dispens + dispers disper disperse dispers + dispersed dispers dispersedly dispersedli + dispersing dispers dispiteous dispit + displac displac displace displac + displaced displac displant displant + displanting displant display displai + displayed displai displeas displea + displease displeas displeased displeas + displeasing displeas displeasure displeasur + displeasures displeasur disponge dispong + disport disport disports disport + dispos dispo dispose dispos + disposed dispos disposer dispos + disposing dispos disposition disposit + dispositions disposit dispossess dispossess + dispossessing dispossess disprais disprai + dispraise disprais dispraising disprais + dispraisingly dispraisingli dispropertied disproperti + disproportion disproport disproportioned disproport + disprov disprov disprove disprov + disproved disprov dispursed dispurs + disputable disput disputation disput + disputations disput dispute disput + disputed disput disputes disput + disputing disput disquantity disquant + disquiet disquiet disquietly disquietli + disrelish disrelish disrobe disrob + disseat disseat dissemble dissembl + dissembled dissembl dissembler dissembl + dissemblers dissembl dissembling dissembl + dissembly dissembl dissension dissens + dissensions dissens dissentious dissenti + dissever dissev dissipation dissip + dissolute dissolut dissolutely dissolut + dissolution dissolut dissolutions dissolut + dissolv dissolv dissolve dissolv + dissolved dissolv dissolves dissolv + dissuade dissuad dissuaded dissuad + distaff distaff distaffs distaff + distain distain distains distain + distance distanc distant distant + distaste distast distasted distast + distasteful distast distemp distemp + distemper distemp distemperature distemperatur + distemperatures distemperatur distempered distemp + distempering distemp distil distil + distill distil distillation distil + distilled distil distills distil + distilment distil distinct distinct + distinction distinct distinctly distinctli + distingue distingu distinguish distinguish + distinguishes distinguish distinguishment distinguish + distract distract distracted distract + distractedly distractedli distraction distract + distractions distract distracts distract + distrain distrain distraught distraught + distress distress distressed distress + distresses distress distressful distress + distribute distribut distributed distribut + distribution distribut distrust distrust + distrustful distrust disturb disturb + disturbed disturb disturbers disturb + disturbing disturb disunite disunit + disvalued disvalu disvouch disvouch + dit dit ditch ditch + ditchers ditcher ditches ditch + dites dite ditties ditti + ditty ditti diurnal diurnal + div div dive dive + diver diver divers diver + diversely divers diversity divers + divert divert diverted divert + diverts divert dives dive + divest divest dividable divid + dividant divid divide divid + divided divid divides divid + divideth divideth divin divin + divination divin divine divin + divinely divin divineness divin + diviner divin divines divin + divinest divinest divining divin + divinity divin division divis + divisions divis divorc divorc + divorce divorc divorced divorc + divorcement divorc divorcing divorc + divulg divulg divulge divulg + divulged divulg divulging divulg + dizy dizi dizzy dizzi + do do doating doat + dobbin dobbin dock dock + docks dock doct doct + doctor doctor doctors doctor + doctrine doctrin document document + dodge dodg doe doe + doer doer doers doer + does doe doest doest + doff doff dog dog + dogberry dogberri dogfish dogfish + dogg dogg dogged dog + dogs dog doigts doigt + doing do doings do + doit doit doits doit + dolabella dolabella dole dole + doleful dole doll doll + dollar dollar dollars dollar + dolor dolor dolorous dolor + dolour dolour dolours dolour + dolphin dolphin dolt dolt + dolts dolt domestic domest + domestics domest dominance domin + dominations domin dominator domin + domine domin domineer domin + domineering domin dominical domin + dominion dominion dominions dominion + domitius domitiu dommelton dommelton + don don donalbain donalbain + donation donat donc donc + doncaster doncast done done + dong dong donn donn + donne donn donner donner + donnerai donnerai doom doom + doomsday doomsdai door door + doorkeeper doorkeep doors door + dorcas dorca doreus doreu + doricles doricl dormouse dormous + dorothy dorothi dorset dorset + dorsetshire dorsetshir dost dost + dotage dotag dotant dotant + dotard dotard dotards dotard + dote dote doted dote + doters doter dotes dote + doteth doteth doth doth + doting dote double doubl + doubled doubl doubleness doubl + doubler doubler doublet doublet + doublets doublet doubling doubl + doubly doubli doubt doubt + doubted doubt doubtful doubt + doubtfully doubtfulli doubting doubt + doubtless doubtless doubts doubt + doug doug dough dough + doughty doughti doughy doughi + douglas dougla dout dout + doute dout douts dout + dove dove dovehouse dovehous + dover dover doves dove + dow dow dowager dowag + dowdy dowdi dower dower + dowerless dowerless dowers dower + dowlas dowla dowle dowl + down down downfall downfal + downright downright downs down + downstairs downstair downtrod downtrod + downward downward downwards downward + downy downi dowries dowri + dowry dowri dowsabel dowsabel + doxy doxi dozed doze + dozen dozen dozens dozen + dozy dozi drab drab + drabbing drab drabs drab + drachma drachma drachmas drachma + draff draff drag drag + dragg dragg dragged drag + dragging drag dragon dragon + dragonish dragonish dragons dragon + drain drain drained drain + drains drain drake drake + dram dram dramatis dramati + drank drank draught draught + draughts draught drave drave + draw draw drawbridge drawbridg + drawer drawer drawers drawer + draweth draweth drawing draw + drawling drawl drawn drawn + draws draw drayman drayman + draymen draymen dread dread + dreaded dread dreadful dread + dreadfully dreadfulli dreading dread + dreads dread dream dream + dreamer dreamer dreamers dreamer + dreaming dream dreams dream + dreamt dreamt drearning drearn + dreary dreari dreg dreg + dregs dreg drench drench + drenched drench dress dress + dressed dress dresser dresser + dressing dress dressings dress + drest drest drew drew + dribbling dribbl dried dri + drier drier dries dri + drift drift drily drili + drink drink drinketh drinketh + drinking drink drinkings drink + drinks drink driv driv + drive drive drivelling drivel + driven driven drives drive + driveth driveth driving drive + drizzle drizzl drizzled drizzl + drizzles drizzl droit droit + drollery drolleri dromio dromio + dromios dromio drone drone + drones drone droop droop + droopeth droopeth drooping droop + droops droop drop drop + dropheir dropheir droplets droplet + dropp dropp dropper dropper + droppeth droppeth dropping drop + droppings drop drops drop + dropsied dropsi dropsies dropsi + dropsy dropsi dropt dropt + dross dross drossy drossi + drought drought drove drove + droven droven drovier drovier + drown drown drowned drown + drowning drown drowns drown + drows drow drowse drows + drowsily drowsili drowsiness drowsi + drowsy drowsi drudge drudg + drudgery drudgeri drudges drudg + drug drug drugg drugg + drugs drug drum drum + drumble drumbl drummer drummer + drumming drum drums drum + drunk drunk drunkard drunkard + drunkards drunkard drunken drunken + drunkenly drunkenli drunkenness drunken + dry dry dryness dryness + dst dst du du + dub dub dubb dubb + ducat ducat ducats ducat + ducdame ducdam duchess duchess + duchies duchi duchy duchi + duck duck ducking duck + ducks duck dudgeon dudgeon + due due duellist duellist + duello duello duer duer + dues due duff duff + dug dug dugs dug + duke duke dukedom dukedom + dukedoms dukedom dukes duke + dulcet dulcet dulche dulch + dull dull dullard dullard + duller duller dullest dullest + dulling dull dullness dull + dulls dull dully dulli + dulness dul duly duli + dumain dumain dumb dumb + dumbe dumb dumbly dumbl + dumbness dumb dump dump + dumps dump dun dun + duncan duncan dung dung + dungeon dungeon dungeons dungeon + dunghill dunghil dunghills dunghil + dungy dungi dunnest dunnest + dunsinane dunsinan dunsmore dunsmor + dunstable dunstabl dupp dupp + durance duranc during dure + durst durst dusky duski + dust dust dusted dust + dusty dusti dutch dutch + dutchman dutchman duteous duteou + duties duti dutiful duti + duty duti dwarf dwarf + dwarfish dwarfish dwell dwell + dwellers dweller dwelling dwell + dwells dwell dwelt dwelt + dwindle dwindl dy dy + dye dye dyed dy + dyer dyer dying dy + e e each each + eager eager eagerly eagerli + eagerness eager eagle eagl + eagles eagl eaning ean + eanlings eanl ear ear + earing ear earl earl + earldom earldom earlier earlier + earliest earliest earliness earli + earls earl early earli + earn earn earned earn + earnest earnest earnestly earnestli + earnestness earnest earns earn + ears ear earth earth + earthen earthen earthlier earthlier + earthly earthli earthquake earthquak + earthquakes earthquak earthy earthi + eas ea ease eas + eased eas easeful eas + eases eas easier easier + easiest easiest easiliest easiliest + easily easili easiness easi + easing eas east east + eastcheap eastcheap easter easter + eastern eastern eastward eastward + easy easi eat eat + eaten eaten eater eater + eaters eater eating eat + eats eat eaux eaux + eaves eav ebb ebb + ebbing eb ebbs ebb + ebon ebon ebony eboni + ebrew ebrew ecce ecc + echapper echapp echo echo + echoes echo eclips eclip + eclipse eclips eclipses eclips + ecolier ecoli ecoutez ecoutez + ecstacy ecstaci ecstasies ecstasi + ecstasy ecstasi ecus ecu + eden eden edg edg + edgar edgar edge edg + edged edg edgeless edgeless + edges edg edict edict + edicts edict edifice edific + edifices edific edified edifi + edifies edifi edition edit + edm edm edmund edmund + edmunds edmund edmundsbury edmundsburi + educate educ educated educ + education educ edward edward + eel eel eels eel + effect effect effected effect + effectless effectless effects effect + effectual effectu effectually effectu + effeminate effemin effigies effigi + effus effu effuse effus + effusion effus eftest eftest + egal egal egally egal + eget eget egeus egeu + egg egg eggs egg + eggshell eggshel eglamour eglamour + eglantine eglantin egma egma + ego ego egregious egregi + egregiously egregi egress egress + egypt egypt egyptian egyptian + egyptians egyptian eie eie + eight eight eighteen eighteen + eighth eighth eightpenny eightpenni + eighty eighti eisel eisel + either either eject eject + eke ek el el + elbe elb elbow elbow + elbows elbow eld eld + elder elder elders elder + eldest eldest eleanor eleanor + elect elect elected elect + election elect elegancy eleg + elegies elegi element element + elements element elephant eleph + elephants eleph elevated elev + eleven eleven eleventh eleventh + elf elf elflocks elflock + eliads eliad elinor elinor + elizabeth elizabeth ell ell + elle ell ellen ellen + elm elm eloquence eloqu + eloquent eloqu else els + elsewhere elsewher elsinore elsinor + eltham eltham elves elv + elvish elvish ely eli + elysium elysium em em + emballing embal embalm embalm + embalms embalm embark embark + embarked embark embarquements embarqu + embassade embassad embassage embassag + embassies embassi embassy embassi + embattailed embattail embattl embattl + embattle embattl embay embai + embellished embellish embers ember + emblaze emblaz emblem emblem + emblems emblem embodied embodi + embold embold emboldens embolden + emboss emboss embossed emboss + embounded embound embowel embowel + embowell embowel embrac embrac + embrace embrac embraced embrac + embracement embrac embracements embrac + embraces embrac embracing embrac + embrasures embrasur embroider embroid + embroidery embroideri emhracing emhrac + emilia emilia eminence emin + eminent emin eminently emin + emmanuel emmanuel emnity emniti + empale empal emperal emper + emperess emperess emperial emperi + emperor emperor empery emperi + emphasis emphasi empire empir + empirics empir empiricutic empiricut + empleached empleach employ emploi + employed emploi employer employ + employment employ employments employ + empoison empoison empress empress + emptied empti emptier emptier + empties empti emptiness empti + empty empti emptying empti + emulate emul emulation emul + emulations emul emulator emul + emulous emul en en + enact enact enacted enact + enacts enact enactures enactur + enamell enamel enamelled enamel + enamour enamour enamoured enamour + enanmour enanmour encamp encamp + encamped encamp encave encav + enceladus enceladu enchaf enchaf + enchafed enchaf enchant enchant + enchanted enchant enchanting enchant + enchantingly enchantingli enchantment enchant + enchantress enchantress enchants enchant + enchas encha encircle encircl + encircled encircl enclos enclo + enclose enclos enclosed enclos + encloses enclos encloseth encloseth + enclosing enclos enclouded encloud + encompass encompass encompassed encompass + encompasseth encompasseth encompassment encompass + encore encor encorporal encorpor + encount encount encounter encount + encountered encount encounters encount + encourage encourag encouraged encourag + encouragement encourag encrimsoned encrimson + encroaching encroach encumb encumb + end end endamage endamag + endamagement endamag endanger endang + endart endart endear endear + endeared endear endeavour endeavour + endeavours endeavour ended end + ender ender ending end + endings end endite endit + endless endless endow endow + endowed endow endowments endow + endows endow ends end + endu endu endue endu + endur endur endurance endur + endure endur endured endur + endures endur enduring endur + endymion endymion eneas enea + enemies enemi enemy enemi + enernies enerni enew enew + enfeebled enfeebl enfeebles enfeebl + enfeoff enfeoff enfetter enfett + enfoldings enfold enforc enforc + enforce enforc enforced enforc + enforcedly enforcedli enforcement enforc + enforces enforc enforcest enforcest + enfranched enfranch enfranchis enfranchi + enfranchise enfranchis enfranchised enfranchis + enfranchisement enfranchis enfreed enfre + enfreedoming enfreedom engag engag + engage engag engaged engag + engagements engag engaging engag + engaol engaol engend engend + engender engend engenders engend + engilds engild engine engin + engineer engin enginer engin + engines engin engirt engirt + england england english english + englishman englishman englishmen englishmen + engluts englut englutted englut + engraffed engraf engraft engraft + engrafted engraft engrav engrav + engrave engrav engross engross + engrossed engross engrossest engrossest + engrossing engross engrossments engross + enguard enguard enigma enigma + enigmatical enigmat enjoin enjoin + enjoined enjoin enjoy enjoi + enjoyed enjoi enjoyer enjoy + enjoying enjoi enjoys enjoi + enkindle enkindl enkindled enkindl + enlard enlard enlarg enlarg + enlarge enlarg enlarged enlarg + enlargement enlarg enlargeth enlargeth + enlighten enlighten enlink enlink + enmesh enmesh enmities enmiti + enmity enmiti ennoble ennobl + ennobled ennobl enobarb enobarb + enobarbus enobarbu enon enon + enormity enorm enormous enorm + enough enough enow enow + enpatron enpatron enpierced enpierc + enquir enquir enquire enquir + enquired enquir enrag enrag + enrage enrag enraged enrag + enrages enrag enrank enrank + enrapt enrapt enrich enrich + enriched enrich enriches enrich + enridged enridg enrings enr + enrob enrob enrobe enrob + enroll enrol enrolled enrol + enrooted enroot enrounded enround + enschedul enschedul ensconce ensconc + ensconcing ensconc enseamed enseam + ensear ensear enseigne enseign + enseignez enseignez ensemble ensembl + enshelter enshelt enshielded enshield + enshrines enshrin ensign ensign + ensigns ensign enskied enski + ensman ensman ensnare ensnar + ensnared ensnar ensnareth ensnareth + ensteep ensteep ensu ensu + ensue ensu ensued ensu + ensues ensu ensuing ensu + enswathed enswath ent ent + entail entail entame entam + entangled entangl entangles entangl + entendre entendr enter enter + entered enter entering enter + enterprise enterpris enterprises enterpris + enters enter entertain entertain + entertained entertain entertainer entertain + entertaining entertain entertainment entertain + entertainments entertain enthrall enthral + enthralled enthral enthron enthron + enthroned enthron entice entic + enticements entic enticing entic + entire entir entirely entir + entitle entitl entitled entitl + entitling entitl entomb entomb + entombed entomb entrails entrail + entrance entranc entrances entranc + entrap entrap entrapp entrapp + entre entr entreat entreat + entreated entreat entreaties entreati + entreating entreat entreatments entreat + entreats entreat entreaty entreati + entrench entrench entry entri + entwist entwist envelop envelop + envenom envenom envenomed envenom + envenoms envenom envied envi + envies envi envious enviou + enviously envious environ environ + environed environ envoy envoi + envy envi envying envi + enwheel enwheel enwombed enwomb + enwraps enwrap ephesian ephesian + ephesians ephesian ephesus ephesu + epicure epicur epicurean epicurean + epicures epicur epicurism epicur + epicurus epicuru epidamnum epidamnum + epidaurus epidauru epigram epigram + epilepsy epilepsi epileptic epilept + epilogue epilogu epilogues epilogu + epistles epistl epistrophus epistrophu + epitaph epitaph epitaphs epitaph + epithet epithet epitheton epitheton + epithets epithet epitome epitom + equal equal equalities equal + equality equal equall equal + equally equal equalness equal + equals equal equinoctial equinocti + equinox equinox equipage equipag + equity equiti equivocal equivoc + equivocate equivoc equivocates equivoc + equivocation equivoc equivocator equivoc + er er erbear erbear + erbearing erbear erbears erbear + erbeat erbeat erblows erblow + erboard erboard erborne erborn + ercame ercam ercast ercast + ercharg ercharg ercharged ercharg + ercharging ercharg ercles ercl + ercome ercom ercover ercov + ercrows ercrow erdoing erdo + ere er erebus erebu + erect erect erected erect + erecting erect erection erect + erects erect erewhile erewhil + erflourish erflourish erflow erflow + erflowing erflow erflows erflow + erfraught erfraught erga erga + ergalled ergal erglanced erglanc + ergo ergo ergone ergon + ergrow ergrow ergrown ergrown + ergrowth ergrowth erhang erhang + erhanging erhang erhasty erhasti + erhear erhear erheard erheard + eringoes eringo erjoy erjoi + erleap erleap erleaps erleap + erleavens erleaven erlook erlook + erlooking erlook ermaster ermast + ermengare ermengar ermount ermount + ern ern ernight ernight + eros ero erpaid erpaid + erparted erpart erpast erpast + erpays erpai erpeer erpeer + erperch erperch erpicturing erpictur + erpingham erpingham erposting erpost + erpow erpow erpress erpress + erpressed erpress err err + errand errand errands errand + errant errant errate errat + erraught erraught erreaches erreach + erred er errest errest + erring er erroneous erron + error error errors error + errs err errule errul + errun errun erset erset + ershade ershad ershades ershad + ershine ershin ershot ershot + ersized ersiz erskip erskip + erslips erslip erspreads erspread + erst erst erstare erstar + erstep erstep erstunk erstunk + ersway erswai ersways erswai + erswell erswel erta erta + ertake ertak erteemed erteem + erthrow erthrow erthrown erthrown + erthrows erthrow ertook ertook + ertop ertop ertopping ertop + ertrip ertrip erturn erturn + erudition erudit eruption erupt + eruptions erupt ervalues ervalu + erwalk erwalk erwatch erwatch + erween erween erweens erween + erweigh erweigh erweighs erweigh + erwhelm erwhelm erwhelmed erwhelm + erworn erworn es es + escalus escalu escap escap + escape escap escaped escap + escapes escap eschew eschew + escoted escot esill esil + especial especi especially especi + esperance esper espials espial + espied espi espies espi + espous espou espouse espous + espy espi esquire esquir + esquires esquir essay essai + essays essai essence essenc + essential essenti essentially essenti + esses ess essex essex + est est establish establish + established establish estate estat + estates estat esteem esteem + esteemed esteem esteemeth esteemeth + esteeming esteem esteems esteem + estimable estim estimate estim + estimation estim estimations estim + estime estim estranged estrang + estridge estridg estridges estridg + et et etc etc + etceteras etcetera ete et + eternal etern eternally etern + eterne etern eternity etern + eterniz eterniz etes et + ethiop ethiop ethiope ethiop + ethiopes ethiop ethiopian ethiopian + etna etna eton eton + etre etr eunuch eunuch + eunuchs eunuch euphrates euphrat + euphronius euphroniu euriphile euriphil + europa europa europe europ + ev ev evade evad + evades evad evans evan + evasion evas evasions evas + eve ev even even + evening even evenly evenli + event event eventful event + events event ever ever + everlasting everlast everlastingly everlastingli + evermore evermor every everi + everyone everyon everything everyth + everywhere everywher evidence evid + evidences evid evident evid + evil evil evilly evilli + evils evil evitate evit + ewe ew ewer ewer + ewers ewer ewes ew + exact exact exacted exact + exactest exactest exacting exact + exaction exact exactions exact + exactly exactli exacts exact + exalt exalt exalted exalt + examin examin examination examin + examinations examin examine examin + examined examin examines examin + exampl exampl example exampl + exampled exampl examples exampl + exasperate exasper exasperates exasper + exceed exce exceeded exceed + exceedeth exceedeth exceeding exceed + exceedingly exceedingli exceeds exce + excel excel excelled excel + excellence excel excellencies excel + excellency excel excellent excel + excellently excel excelling excel + excels excel except except + excepted except excepting except + exception except exceptions except + exceptless exceptless excess excess + excessive excess exchang exchang + exchange exchang exchanged exchang + exchequer exchequ exchequers exchequ + excite excit excited excit + excitements excit excites excit + exclaim exclaim exclaims exclaim + exclamation exclam exclamations exclam + excludes exclud excommunicate excommun + excommunication excommun excrement excrement + excrements excrement excursion excurs + excursions excurs excus excu + excusable excus excuse excus + excused excus excuses excus + excusez excusez excusing excus + execrable execr execrations execr + execute execut executed execut + executing execut execution execut + executioner execution executioners execution + executor executor executors executor + exempt exempt exempted exempt + exequies exequi exercise exercis + exercises exercis exeter exet + exeunt exeunt exhal exhal + exhalation exhal exhalations exhal + exhale exhal exhales exhal + exhaust exhaust exhibit exhibit + exhibiters exhibit exhibition exhibit + exhort exhort exhortation exhort + exigent exig exil exil + exile exil exiled exil + exion exion exist exist + exists exist exit exit + exits exit exorciser exorcis + exorcisms exorc exorcist exorcist + expect expect expectance expect + expectancy expect expectation expect + expectations expect expected expect + expecters expect expecting expect + expects expect expedience expedi + expedient expedi expediently expedi + expedition expedit expeditious expediti + expel expel expell expel + expelling expel expels expel + expend expend expense expens + expenses expens experienc experienc + experience experi experiences experi + experiment experi experimental experiment + experiments experi expert expert + expertness expert expiate expiat + expiation expiat expir expir + expiration expir expire expir + expired expir expires expir + expiring expir explication explic + exploit exploit exploits exploit + expos expo expose expos + exposing expos exposition exposit + expositor expositor expostulate expostul + expostulation expostul exposture expostur + exposure exposur expound expound + expounded expound express express + expressed express expresseth expresseth + expressing express expressive express + expressly expressli expressure expressur + expuls expul expulsion expuls + exquisite exquisit exsufflicate exsuffl + extant extant extemporal extempor + extemporally extempor extempore extempor + extend extend extended extend + extends extend extent extent + extenuate extenu extenuated extenu + extenuates extenu extenuation extenu + exterior exterior exteriorly exteriorli + exteriors exterior extermin extermin + extern extern external extern + extinct extinct extincted extinct + extincture extinctur extinguish extinguish + extirp extirp extirpate extirp + extirped extirp extol extol + extoll extol extolment extol + exton exton extort extort + extorted extort extortion extort + extortions extort extra extra + extract extract extracted extract + extracting extract extraordinarily extraordinarili + extraordinary extraordinari extraught extraught + extravagancy extravag extravagant extravag + extreme extrem extremely extrem + extremes extrem extremest extremest + extremities extrem extremity extrem + exuent exuent exult exult + exultation exult ey ey + eyas eya eyases eyas + eye ey eyeball eyebal + eyeballs eyebal eyebrow eyebrow + eyebrows eyebrow eyed ei + eyeless eyeless eyelid eyelid + eyelids eyelid eyes ey + eyesight eyesight eyestrings eyestr + eying ei eyne eyn + eyrie eyri fa fa + fabian fabian fable fabl + fables fabl fabric fabric + fabulous fabul fac fac + face face faced face + facere facer faces face + faciant faciant facile facil + facility facil facinerious facineri + facing face facit facit + fact fact faction faction + factionary factionari factions faction + factious factiou factor factor + factors factor faculties faculti + faculty faculti fade fade + faded fade fadeth fadeth + fadge fadg fading fade + fadings fade fadom fadom + fadoms fadom fagot fagot + fagots fagot fail fail + failing fail fails fail + fain fain faint faint + fainted faint fainter fainter + fainting faint faintly faintli + faintness faint faints faint + fair fair fairer fairer + fairest fairest fairies fairi + fairing fair fairings fair + fairly fairli fairness fair + fairs fair fairwell fairwel + fairy fairi fais fai + fait fait faites fait + faith faith faithful faith + faithfull faithful faithfully faithfulli + faithless faithless faiths faith + faitors faitor fal fal + falchion falchion falcon falcon + falconbridge falconbridg falconer falcon + falconers falcon fall fall + fallacy fallaci fallen fallen + falleth falleth falliable falliabl + fallible fallibl falling fall + fallow fallow fallows fallow + falls fall fally falli + falorous falor false fals + falsehood falsehood falsely fals + falseness fals falser falser + falsify falsifi falsing fals + falstaff falstaff falstaffs falstaff + falter falter fam fam + fame fame famed fame + familiar familiar familiarity familiar + familiarly familiarli familiars familiar + family famili famine famin + famish famish famished famish + famous famou famoused famous + famously famous fan fan + fanatical fanat fancies fanci + fancy fanci fane fane + fanes fane fang fang + fangled fangl fangless fangless + fangs fang fann fann + fanning fan fans fan + fantasied fantasi fantasies fantasi + fantastic fantast fantastical fantast + fantastically fantast fantasticoes fantastico + fantasy fantasi fap fap + far far farborough farborough + farced farc fardel fardel + fardels fardel fare fare + fares fare farewell farewel + farewells farewel fariner farin + faring fare farm farm + farmer farmer farmhouse farmhous + farms farm farre farr + farrow farrow farther farther + farthest farthest farthing farth + farthingale farthingal farthingales farthingal + farthings farth fartuous fartuou + fas fa fashion fashion + fashionable fashion fashioning fashion + fashions fashion fast fast + fasted fast fasten fasten + fastened fasten faster faster + fastest fastest fasting fast + fastly fastli fastolfe fastolf + fasts fast fat fat + fatal fatal fatally fatal + fate fate fated fate + fates fate father father + fathered father fatherless fatherless + fatherly fatherli fathers father + fathom fathom fathomless fathomless + fathoms fathom fatigate fatig + fatness fat fats fat + fatted fat fatter fatter + fattest fattest fatting fat + fatuus fatuu fauconbridge fauconbridg + faulconbridge faulconbridg fault fault + faultiness faulti faultless faultless + faults fault faulty faulti + fausse fauss fauste faust + faustuses faustus faut faut + favor favor favorable favor + favorably favor favors favor + favour favour favourable favour + favoured favour favouredly favouredli + favourer favour favourers favour + favouring favour favourite favourit + favourites favourit favours favour + favout favout fawn fawn + fawneth fawneth fawning fawn + fawns fawn fay fai + fe fe fealty fealti + fear fear feared fear + fearest fearest fearful fear + fearfull fearful fearfully fearfulli + fearfulness fear fearing fear + fearless fearless fears fear + feast feast feasted feast + feasting feast feasts feast + feat feat feated feat + feater feater feather feather + feathered feather feathers feather + featly featli feats feat + featur featur feature featur + featured featur featureless featureless + features featur february februari + fecks feck fed fed + fedary fedari federary federari + fee fee feeble feebl + feebled feebl feebleness feebl + feebling feebl feebly feebli + feed feed feeder feeder + feeders feeder feedeth feedeth + feeding feed feeds feed + feel feel feeler feeler + feeling feel feelingly feelingli + feels feel fees fee + feet feet fehemently fehement + feign feign feigned feign + feigning feign feil feil + feith feith felicitate felicit + felicity felic fell fell + fellest fellest fellies felli + fellow fellow fellowly fellowli + fellows fellow fellowship fellowship + fellowships fellowship fells fell + felon felon felonious feloni + felony feloni felt felt + female femal females femal + feminine feminin fen fen + fenc fenc fence fenc + fencer fencer fencing fenc + fends fend fennel fennel + fenny fenni fens fen + fenton fenton fer fer + ferdinand ferdinand fere fere + fernseed fernse ferrara ferrara + ferrers ferrer ferret ferret + ferry ferri ferryman ferryman + fertile fertil fertility fertil + fervency fervenc fervour fervour + fery feri fest fest + feste fest fester fester + festinate festin festinately festin + festival festiv festivals festiv + fet fet fetch fetch + fetches fetch fetching fetch + fetlock fetlock fetlocks fetlock + fett fett fetter fetter + fettering fetter fetters fetter + fettle fettl feu feu + feud feud fever fever + feverous fever fevers fever + few few fewer fewer + fewest fewest fewness few + fickle fickl fickleness fickl + fico fico fiction fiction + fiddle fiddl fiddler fiddler + fiddlestick fiddlestick fidele fidel + fidelicet fidelicet fidelity fidel + fidius fidiu fie fie + field field fielded field + fields field fiend fiend + fiends fiend fierce fierc + fiercely fierc fierceness fierc + fiery fieri fife fife + fifes fife fifteen fifteen + fifteens fifteen fifteenth fifteenth + fifth fifth fifty fifti + fiftyfold fiftyfold fig fig + fight fight fighter fighter + fightest fightest fighteth fighteth + fighting fight fights fight + figo figo figs fig + figur figur figure figur + figured figur figures figur + figuring figur fike fike + fil fil filberts filbert + filch filch filches filch + filching filch file file + filed file files file + filial filial filius filiu + fill fill filled fill + fillet fillet filling fill + fillip fillip fills fill + filly filli film film + fils fil filth filth + filths filth filthy filthi + fin fin finally final + finch finch find find + finder finder findeth findeth + finding find findings find + finds find fine fine + fineless fineless finely fine + finem finem fineness fine + finer finer fines fine + finest finest fing fing + finger finger fingering finger + fingers finger fingre fingr + fingres fingr finical finic + finish finish finished finish + finisher finish finless finless + finn finn fins fin + finsbury finsburi fir fir + firago firago fire fire + firebrand firebrand firebrands firebrand + fired fire fires fire + firework firework fireworks firework + firing fire firk firk + firm firm firmament firmament + firmly firmli firmness firm + first first firstlings firstl + fish fish fisher fisher + fishermen fishermen fishers fisher + fishes fish fishified fishifi + fishmonger fishmong fishpond fishpond + fisnomy fisnomi fist fist + fisting fist fists fist + fistula fistula fit fit + fitchew fitchew fitful fit + fitly fitli fitment fitment + fitness fit fits fit + fitted fit fitter fitter + fittest fittest fitteth fitteth + fitting fit fitzwater fitzwat + five five fivepence fivep + fives five fix fix + fixed fix fixes fix + fixeth fixeth fixing fix + fixture fixtur fl fl + flag flag flagging flag + flagon flagon flagons flagon + flags flag flail flail + flakes flake flaky flaki + flam flam flame flame + flamen flamen flamens flamen + flames flame flaming flame + flaminius flaminiu flanders flander + flannel flannel flap flap + flaring flare flash flash + flashes flash flashing flash + flask flask flat flat + flatly flatli flatness flat + flats flat flatt flatt + flatter flatter flattered flatter + flatterer flatter flatterers flatter + flatterest flatterest flatteries flatteri + flattering flatter flatters flatter + flattery flatteri flaunts flaunt + flavio flavio flavius flaviu + flaw flaw flaws flaw + flax flax flaxen flaxen + flay flai flaying flai + flea flea fleance fleanc + fleas flea flecked fleck + fled fled fledge fledg + flee flee fleec fleec + fleece fleec fleeces fleec + fleer fleer fleering fleer + fleers fleer fleet fleet + fleeter fleeter fleeting fleet + fleming fleme flemish flemish + flesh flesh fleshes flesh + fleshly fleshli fleshment fleshment + fleshmonger fleshmong flew flew + flexible flexibl flexure flexur + flibbertigibbet flibbertigibbet flickering flicker + flidge flidg fliers flier + flies fli flieth flieth + flight flight flights flight + flighty flighti flinch flinch + fling fling flint flint + flints flint flinty flinti + flirt flirt float float + floated float floating float + flock flock flocks flock + flood flood floodgates floodgat + floods flood floor floor + flora flora florence florenc + florentine florentin florentines florentin + florentius florentiu florizel florizel + flote flote floulish floulish + flour flour flourish flourish + flourishes flourish flourisheth flourisheth + flourishing flourish flout flout + flouted flout flouting flout + flouts flout flow flow + flowed flow flower flower + flowerets floweret flowers flower + flowing flow flown flown + flows flow fluellen fluellen + fluent fluent flung flung + flush flush flushing flush + fluster fluster flute flute + flutes flute flutter flutter + flux flux fluxive fluxiv + fly fly flying fly + fo fo foal foal + foals foal foam foam + foamed foam foaming foam + foams foam foamy foami + fob fob focative foc + fodder fodder foe foe + foeman foeman foemen foemen + foes foe fog fog + foggy foggi fogs fog + foh foh foi foi + foil foil foiled foil + foils foil foin foin + foining foin foins foin + fois foi foison foison + foisons foison foist foist + foix foix fold fold + folded fold folds fold + folio folio folk folk + folks folk follies folli + follow follow followed follow + follower follow followers follow + followest followest following follow + follows follow folly folli + fond fond fonder fonder + fondly fondli fondness fond + font font fontibell fontibel + food food fool fool + fooleries fooleri foolery fooleri + foolhardy foolhardi fooling fool + foolish foolish foolishly foolishli + foolishness foolish fools fool + foot foot football footbal + footboy footboi footboys footboi + footed foot footfall footfal + footing foot footman footman + footmen footmen footpath footpath + footsteps footstep footstool footstool + fopp fopp fopped fop + foppery fopperi foppish foppish + fops fop for for + forage forag foragers forag + forbade forbad forbear forbear + forbearance forbear forbears forbear + forbid forbid forbidden forbidden + forbiddenly forbiddenli forbids forbid + forbod forbod forborne forborn + forc forc force forc + forced forc forceful forc + forceless forceless forces forc + forcible forcibl forcibly forcibl + forcing forc ford ford + fordid fordid fordo fordo + fordoes fordo fordone fordon + fore fore forecast forecast + forefather forefath forefathers forefath + forefinger forefing forego forego + foregone foregon forehand forehand + forehead forehead foreheads forehead + forehorse forehors foreign foreign + foreigner foreign foreigners foreign + foreknowing foreknow foreknowledge foreknowledg + foremost foremost forenamed forenam + forenoon forenoon forerun forerun + forerunner forerunn forerunning forerun + foreruns forerun foresaid foresaid + foresaw foresaw foresay foresai + foresee forese foreseeing forese + foresees forese foreshow foreshow + foreskirt foreskirt forespent foresp + forest forest forestall forestal + forestalled forestal forester forest + foresters forest forests forest + foretell foretel foretelling foretel + foretells foretel forethink forethink + forethought forethought foretold foretold + forever forev foreward foreward + forewarn forewarn forewarned forewarn + forewarning forewarn forfeit forfeit + forfeited forfeit forfeiters forfeit + forfeiting forfeit forfeits forfeit + forfeiture forfeitur forfeitures forfeitur + forfend forfend forfended forfend + forg forg forgave forgav + forge forg forged forg + forgeries forgeri forgery forgeri + forges forg forget forget + forgetful forget forgetfulness forget + forgetive forget forgets forget + forgetting forget forgive forgiv + forgiven forgiven forgiveness forgiv + forgo forgo forgoing forgo + forgone forgon forgot forgot + forgotten forgotten fork fork + forked fork forks fork + forlorn forlorn form form + formal formal formally formal + formed form former former + formerly formerli formless formless + forms form fornication fornic + fornications fornic fornicatress fornicatress + forres forr forrest forrest + forsake forsak forsaken forsaken + forsaketh forsaketh forslow forslow + forsook forsook forsooth forsooth + forspent forspent forspoke forspok + forswear forswear forswearing forswear + forswore forswor forsworn forsworn + fort fort forted fort + forth forth forthcoming forthcom + forthlight forthlight forthright forthright + forthwith forthwith fortification fortif + fortifications fortif fortified fortifi + fortifies fortifi fortify fortifi + fortinbras fortinbra fortitude fortitud + fortnight fortnight fortress fortress + fortresses fortress forts fort + fortun fortun fortuna fortuna + fortunate fortun fortunately fortun + fortune fortun fortuned fortun + fortunes fortun fortward fortward + forty forti forum forum + forward forward forwarding forward + forwardness forward forwards forward + forwearied forweari fosset fosset + fost fost foster foster + fostered foster fought fought + foughten foughten foul foul + fouler fouler foulest foulest + foully foulli foulness foul + found found foundation foundat + foundations foundat founded found + founder founder fount fount + fountain fountain fountains fountain + founts fount four four + fourscore fourscor fourteen fourteen + fourth fourth foutra foutra + fowl fowl fowler fowler + fowling fowl fowls fowl + fox fox foxes fox + foxship foxship fracted fract + fraction fraction fractions fraction + fragile fragil fragment fragment + fragments fragment fragrant fragrant + frail frail frailer frailer + frailties frailti frailty frailti + fram fram frame frame + framed frame frames frame + frampold frampold fran fran + francais francai france franc + frances franc franchise franchis + franchised franchis franchisement franchis + franchises franchis franciae francia + francis franci francisca francisca + franciscan franciscan francisco francisco + frank frank franker franker + frankfort frankfort franklin franklin + franklins franklin frankly frankli + frankness frank frantic frantic + franticly franticli frateretto frateretto + fratrum fratrum fraud fraud + fraudful fraud fraught fraught + fraughtage fraughtag fraughting fraught + fray frai frays frai + freckl freckl freckled freckl + freckles freckl frederick frederick + free free freed freed + freedom freedom freedoms freedom + freehearted freeheart freelier freelier + freely freeli freeman freeman + freemen freemen freeness freeness + freer freer frees free + freestone freeston freetown freetown + freeze freez freezes freez + freezing freez freezings freez + french french frenchman frenchman + frenchmen frenchmen frenchwoman frenchwoman + frenzy frenzi frequent frequent + frequents frequent fresh fresh + fresher fresher freshes fresh + freshest freshest freshly freshli + freshness fresh fret fret + fretful fret frets fret + fretted fret fretten fretten + fretting fret friar friar + friars friar friday fridai + fridays fridai friend friend + friended friend friending friend + friendless friendless friendliness friendli + friendly friendli friends friend + friendship friendship friendships friendship + frieze friez fright fright + frighted fright frightened frighten + frightful fright frighting fright + frights fright fringe fring + fringed fring frippery fripperi + frisk frisk fritters fritter + frivolous frivol fro fro + frock frock frog frog + frogmore frogmor froissart froissart + frolic frolic from from + front front fronted front + frontier frontier frontiers frontier + fronting front frontlet frontlet + fronts front frost frost + frosts frost frosty frosti + froth froth froward froward + frown frown frowning frown + frowningly frowningli frowns frown + froze froze frozen frozen + fructify fructifi frugal frugal + fruit fruit fruiterer fruiter + fruitful fruit fruitfully fruitfulli + fruitfulness fruit fruition fruition + fruitless fruitless fruits fruit + frush frush frustrate frustrat + frutify frutifi fry fry + fubb fubb fuel fuel + fugitive fugit fulfil fulfil + fulfill fulfil fulfilling fulfil + fulfils fulfil full full + fullam fullam fuller fuller + fullers fuller fullest fullest + fullness full fully fulli + fulness ful fulsome fulsom + fulvia fulvia fum fum + fumble fumbl fumbles fumbl + fumblest fumblest fumbling fumbl + fume fume fumes fume + fuming fume fumiter fumit + fumitory fumitori fun fun + function function functions function + fundamental fundament funeral funer + funerals funer fur fur + furbish furbish furies furi + furious furiou furlongs furlong + furnace furnac furnaces furnac + furnish furnish furnished furnish + furnishings furnish furniture furnitur + furnival furniv furor furor + furr furr furrow furrow + furrowed furrow furrows furrow + furth furth further further + furtherance further furtherer further + furthermore furthermor furthest furthest + fury furi furze furz + furzes furz fust fust + fustian fustian fustilarian fustilarian + fusty fusti fut fut + future futur futurity futur + g g gabble gabbl + gaberdine gaberdin gabriel gabriel + gad gad gadding gad + gads gad gadshill gadshil + gag gag gage gage + gaged gage gagg gagg + gaging gage gagne gagn + gain gain gained gain + gainer gainer gaingiving gaingiv + gains gain gainsaid gainsaid + gainsay gainsai gainsaying gainsai + gainsays gainsai gainst gainst + gait gait gaited gait + galathe galath gale gale + galen galen gales gale + gall gall gallant gallant + gallantly gallantli gallantry gallantri + gallants gallant galled gall + gallery galleri galley gallei + galleys gallei gallia gallia + gallian gallian galliard galliard + galliasses galliass gallimaufry gallimaufri + galling gall gallons gallon + gallop gallop galloping gallop + gallops gallop gallow gallow + galloway gallowai gallowglasses gallowglass + gallows gallow gallowses gallows + galls gall gallus gallu + gam gam gambol gambol + gambold gambold gambols gambol + gamboys gamboi game game + gamers gamer games game + gamesome gamesom gamester gamest + gaming game gammon gammon + gamut gamut gan gan + gangren gangren ganymede ganymed + gaol gaol gaoler gaoler + gaolers gaoler gaols gaol + gap gap gape gape + gapes gape gaping gape + gar gar garb garb + garbage garbag garboils garboil + garcon garcon gard gard + garde gard garden garden + gardener garden gardeners garden + gardens garden gardez gardez + gardiner gardin gardon gardon + gargantua gargantua gargrave gargrav + garish garish garland garland + garlands garland garlic garlic + garment garment garments garment + garmet garmet garner garner + garners garner garnish garnish + garnished garnish garret garret + garrison garrison garrisons garrison + gart gart garter garter + garterd garterd gartering garter + garters garter gascony gasconi + gash gash gashes gash + gaskins gaskin gasp gasp + gasping gasp gasted gast + gastness gast gat gat + gate gate gated gate + gates gate gath gath + gather gather gathered gather + gathering gather gathers gather + gatories gatori gatory gatori + gaud gaud gaudeo gaudeo + gaudy gaudi gauge gaug + gaul gaul gaultree gaultre + gaunt gaunt gauntlet gauntlet + gauntlets gauntlet gav gav + gave gave gavest gavest + gawded gawd gawds gawd + gawsey gawsei gay gai + gayness gay gaz gaz + gaze gaze gazed gaze + gazer gazer gazers gazer + gazes gaze gazeth gazeth + gazing gaze gear gear + geck geck geese gees + geffrey geffrei geld geld + gelded geld gelding geld + gelida gelida gelidus gelidu + gelt gelt gem gem + geminy gemini gems gem + gen gen gender gender + genders gender general gener + generally gener generals gener + generation gener generations gener + generative gener generosity generos + generous gener genitive genit + genitivo genitivo genius geniu + gennets gennet genoa genoa + genoux genoux gens gen + gent gent gentilhomme gentilhomm + gentility gentil gentle gentl + gentlefolks gentlefolk gentleman gentleman + gentlemanlike gentlemanlik gentlemen gentlemen + gentleness gentl gentler gentler + gentles gentl gentlest gentlest + gentlewoman gentlewoman gentlewomen gentlewomen + gently gentli gentry gentri + george georg gerard gerard + germaines germain germains germain + german german germane german + germans german germany germani + gertrude gertrud gest gest + gests gest gesture gestur + gestures gestur get get + getrude getrud gets get + getter getter getting get + ghastly ghastli ghost ghost + ghosted ghost ghostly ghostli + ghosts ghost gi gi + giant giant giantess giantess + giantlike giantlik giants giant + gib gib gibber gibber + gibbet gibbet gibbets gibbet + gibe gibe giber giber + gibes gibe gibing gibe + gibingly gibingli giddily giddili + giddiness giddi giddy giddi + gift gift gifts gift + gig gig giglets giglet + giglot giglot gilbert gilbert + gild gild gilded gild + gilding gild gilliams gilliam + gillian gillian gills gill + gillyvors gillyvor gilt gilt + gimmal gimmal gimmers gimmer + gin gin ging ging + ginger ginger gingerbread gingerbread + gingerly gingerli ginn ginn + gins gin gioucestershire gioucestershir + gipes gipe gipsies gipsi + gipsy gipsi gird gird + girded gird girdle girdl + girdled girdl girdles girdl + girdling girdl girl girl + girls girl girt girt + girth girth gis gi + giv giv give give + given given giver giver + givers giver gives give + givest givest giveth giveth + giving give givings give + glad glad gladded glad + gladding glad gladly gladli + gladness glad glamis glami + glanc glanc glance glanc + glanced glanc glances glanc + glancing glanc glanders glander + glansdale glansdal glare glare + glares glare glass glass + glasses glass glassy glassi + glaz glaz glazed glaze + gleams gleam glean glean + gleaned glean gleaning glean + gleeful gleeful gleek gleek + gleeking gleek gleeks gleek + glend glend glendower glendow + glib glib glide glide + glided glide glides glide + glideth glideth gliding glide + glimmer glimmer glimmering glimmer + glimmers glimmer glimpse glimps + glimpses glimps glist glist + glistening glisten glister glister + glistering glister glisters glister + glitt glitt glittering glitter + globe globe globes globe + glooming gloom gloomy gloomi + glories glori glorified glorifi + glorify glorifi glorious gloriou + gloriously glorious glory glori + glose glose gloss gloss + glosses gloss glou glou + glouceste gloucest gloucester gloucest + gloucestershire gloucestershir glove glove + glover glover gloves glove + glow glow glowed glow + glowing glow glowworm glowworm + gloz gloz gloze gloze + glozes gloze glu glu + glue glue glued glu + glues glue glut glut + glutt glutt glutted glut + glutton glutton gluttoning glutton + gluttony gluttoni gnarled gnarl + gnarling gnarl gnat gnat + gnats gnat gnaw gnaw + gnawing gnaw gnawn gnawn + gnaws gnaw go go + goad goad goaded goad + goads goad goal goal + goat goat goatish goatish + goats goat gobbets gobbet + gobbo gobbo goblet goblet + goblets goblet goblin goblin + goblins goblin god god + godded god godden godden + goddess goddess goddesses goddess + goddild goddild godfather godfath + godfathers godfath godhead godhead + godlike godlik godliness godli + godly godli godmother godmoth + gods god godson godson + goer goer goers goer + goes goe goest goest + goeth goeth goffe goff + gogs gog going go + gold gold golden golden + goldenly goldenli goldsmith goldsmith + goldsmiths goldsmith golgotha golgotha + goliases golias goliath goliath + gon gon gondola gondola + gondolier gondoli gone gone + goneril goneril gong gong + gonzago gonzago gonzalo gonzalo + good good goodfellow goodfellow + goodlier goodlier goodliest goodliest + goodly goodli goodman goodman + goodness good goodnight goodnight + goodrig goodrig goods good + goodwife goodwif goodwill goodwil + goodwin goodwin goodwins goodwin + goodyear goodyear goodyears goodyear + goose goos gooseberry gooseberri + goosequills goosequil goot goot + gor gor gorbellied gorbelli + gorboduc gorboduc gordian gordian + gore gore gored gore + gorg gorg gorge gorg + gorgeous gorgeou gorget gorget + gorging gorg gorgon gorgon + gormandize gormand gormandizing gormand + gory gori gosling gosl + gospel gospel gospels gospel + goss goss gossamer gossam + gossip gossip gossiping gossip + gossiplike gossiplik gossips gossip + got got goth goth + goths goth gotten gotten + gourd gourd gout gout + gouts gout gouty gouti + govern govern governance govern + governed govern governess gover + government govern governor governor + governors governor governs govern + gower gower gown gown + gowns gown grac grac + grace grace graced grace + graceful grace gracefully gracefulli + graceless graceless graces grace + gracing grace gracious graciou + graciously gracious gradation gradat + graff graff graffing graf + graft graft grafted graft + grafters grafter grain grain + grained grain grains grain + gramercies gramerci gramercy gramerci + grammar grammar grand grand + grandam grandam grandame grandam + grandchild grandchild grande grand + grandeur grandeur grandfather grandfath + grandjurors grandjuror grandmother grandmoth + grandpre grandpr grandsir grandsir + grandsire grandsir grandsires grandsir + grange grang grant grant + granted grant granting grant + grants grant grape grape + grapes grape grapple grappl + grapples grappl grappling grappl + grasp grasp grasped grasp + grasps grasp grass grass + grasshoppers grasshopp grassy grassi + grate grate grated grate + grateful grate grates grate + gratiano gratiano gratify gratifi + gratii gratii gratillity gratil + grating grate gratis grati + gratitude gratitud gratulate gratul + grav grav grave grave + gravediggers gravedigg gravel gravel + graveless graveless gravell gravel + gravely grave graven graven + graveness grave graver graver + graves grave gravest gravest + gravestone graveston gravities graviti + gravity graviti gravy gravi + gray grai graymalkin graymalkin + graz graz graze graze + grazed graze grazing graze + grease greas greases greas + greasily greasili greasy greasi + great great greater greater + greatest greatest greatly greatli + greatness great grecian grecian + grecians grecian gree gree + greece greec greed greed + greedily greedili greediness greedi + greedy greedi greeing gree + greek greek greekish greekish + greeks greek green green + greener greener greenly greenli + greens green greensleeves greensleev + greenwich greenwich greenwood greenwood + greet greet greeted greet + greeting greet greetings greet + greets greet greg greg + gregory gregori gremio gremio + grew grew grey grei + greybeard greybeard greybeards greybeard + greyhound greyhound greyhounds greyhound + grief grief griefs grief + griev griev grievance grievanc + grievances grievanc grieve griev + grieved griev grieves griev + grievest grievest grieving griev + grievingly grievingli grievous grievou + grievously grievous griffin griffin + griffith griffith grim grim + grime grime grimly grimli + grin grin grind grind + grinding grind grindstone grindston + grinning grin grip grip + gripe gripe gripes gripe + griping gripe grise grise + grisly grisli grissel grissel + grize grize grizzle grizzl + grizzled grizzl groan groan + groaning groan groans groan + groat groat groats groat + groin groin groom groom + grooms groom grop grop + groping grope gros gro + gross gross grosser grosser + grossly grossli grossness gross + ground ground grounded ground + groundlings groundl grounds ground + grove grove grovel grovel + grovelling grovel groves grove + grow grow groweth groweth + growing grow grown grown + grows grow growth growth + grub grub grubb grubb + grubs grub grudge grudg + grudged grudg grudges grudg + grudging grudg gruel gruel + grumble grumbl grumblest grumblest + grumbling grumbl grumblings grumbl + grumio grumio grund grund + grunt grunt gualtier gualtier + guard guard guardage guardag + guardant guardant guarded guard + guardian guardian guardians guardian + guards guard guardsman guardsman + gud gud gudgeon gudgeon + guerdon guerdon guerra guerra + guess guess guesses guess + guessingly guessingli guest guest + guests guest guiana guiana + guichard guichard guide guid + guided guid guider guider + guiderius guideriu guides guid + guiding guid guidon guidon + guienne guienn guil guil + guildenstern guildenstern guilders guilder + guildford guildford guildhall guildhal + guile guil guiled guil + guileful guil guilfords guilford + guilt guilt guiltian guiltian + guiltier guiltier guiltily guiltili + guiltiness guilti guiltless guiltless + guilts guilt guilty guilti + guinea guinea guinever guinev + guise guis gul gul + gules gule gulf gulf + gulfs gulf gull gull + gulls gull gum gum + gumm gumm gums gum + gun gun gunner gunner + gunpowder gunpowd guns gun + gurnet gurnet gurney gurnei + gust gust gusts gust + gusty gusti guts gut + gutter gutter guy gui + guynes guyn guysors guysor + gypsy gypsi gyve gyve + gyved gyve gyves gyve + h h ha ha + haberdasher haberdash habiliment habili + habiliments habili habit habit + habitation habit habited habit + habits habit habitude habitud + hack hack hacket hacket + hackney hacknei hacks hack + had had hadst hadst + haec haec haeres haer + hag hag hagar hagar + haggard haggard haggards haggard + haggish haggish haggled haggl + hags hag hail hail + hailed hail hailstone hailston + hailstones hailston hair hair + hairless hairless hairs hair + hairy hairi hal hal + halberd halberd halberds halberd + halcyon halcyon hale hale + haled hale hales hale + half half halfcan halfcan + halfpence halfpenc halfpenny halfpenni + halfpennyworth halfpennyworth halfway halfwai + halidom halidom hall hall + halloa halloa halloing hallo + hallond hallond halloo halloo + hallooing halloo hallow hallow + hallowed hallow hallowmas hallowma + hallown hallown hals hal + halt halt halter halter + halters halter halting halt + halts halt halves halv + ham ham hames hame + hamlet hamlet hammer hammer + hammered hammer hammering hammer + hammers hammer hamper hamper + hampton hampton hams ham + hamstring hamstr hand hand + handed hand handful hand + handicraft handicraft handicraftsmen handicraftsmen + handing hand handiwork handiwork + handkercher handkerch handkerchers handkerch + handkerchief handkerchief handle handl + handled handl handles handl + handless handless handlest handlest + handling handl handmaid handmaid + handmaids handmaid hands hand + handsaw handsaw handsome handsom + handsomely handsom handsomeness handsom + handwriting handwrit handy handi + hang hang hanged hang + hangers hanger hangeth hangeth + hanging hang hangings hang + hangman hangman hangmen hangmen + hangs hang hannibal hannib + hap hap hapless hapless + haply hapli happ happ + happen happen happened happen + happier happier happies happi + happiest happiest happily happili + happiness happi happy happi + haps hap harbinger harbing + harbingers harbing harbor harbor + harbour harbour harbourage harbourag + harbouring harbour harbours harbour + harcourt harcourt hard hard + harder harder hardest hardest + hardiest hardiest hardiment hardiment + hardiness hardi hardly hardli + hardness hard hardocks hardock + hardy hardi hare hare + harelip harelip hares hare + harfleur harfleur hark hark + harlot harlot harlotry harlotri + harlots harlot harm harm + harmed harm harmful harm + harming harm harmless harmless + harmonious harmoni harmony harmoni + harms harm harness har + harp harp harper harper + harpier harpier harping harp + harpy harpi harried harri + harrow harrow harrows harrow + harry harri harsh harsh + harshly harshli harshness harsh + hart hart harts hart + harum harum harvest harvest + has ha hast hast + haste hast hasted hast + hasten hasten hastes hast + hastily hastili hasting hast + hastings hast hasty hasti + hat hat hatch hatch + hatches hatch hatchet hatchet + hatching hatch hatchment hatchment + hate hate hated hate + hateful hate hater hater + haters hater hates hate + hateth hateth hatfield hatfield + hath hath hating hate + hatred hatr hats hat + haud haud hauf hauf + haught haught haughtiness haughti + haughty haughti haunch haunch + haunches haunch haunt haunt + haunted haunt haunting haunt + haunts haunt hautboy hautboi + hautboys hautboi have have + haven haven havens haven + haver haver having have + havings have havior havior + haviour haviour havoc havoc + hawk hawk hawking hawk + hawks hawk hawthorn hawthorn + hawthorns hawthorn hay hai + hazard hazard hazarded hazard + hazards hazard hazel hazel + hazelnut hazelnut he he + head head headborough headborough + headed head headier headier + heading head headland headland + headless headless headlong headlong + heads head headsman headsman + headstrong headstrong heady headi + heal heal healed heal + healing heal heals heal + health health healthful health + healths health healthsome healthsom + healthy healthi heap heap + heaping heap heaps heap + hear hear heard heard + hearer hearer hearers hearer + hearest hearest heareth heareth + hearing hear hearings hear + heark heark hearken hearken + hearkens hearken hears hear + hearsay hearsai hearse hears + hearsed hears hearst hearst + heart heart heartache heartach + heartbreak heartbreak heartbreaking heartbreak + hearted heart hearten hearten + hearth hearth hearths hearth + heartily heartili heartiness hearti + heartless heartless heartlings heartl + heartly heartli hearts heart + heartsick heartsick heartstrings heartstr + hearty hearti heat heat + heated heat heath heath + heathen heathen heathenish heathenish + heating heat heats heat + heauties heauti heav heav + heave heav heaved heav + heaven heaven heavenly heavenli + heavens heaven heaves heav + heavier heavier heaviest heaviest + heavily heavili heaviness heavi + heaving heav heavings heav + heavy heavi hebona hebona + hebrew hebrew hecate hecat + hectic hectic hector hector + hectors hector hecuba hecuba + hedg hedg hedge hedg + hedgehog hedgehog hedgehogs hedgehog + hedges hedg heed heed + heeded heed heedful heed + heedfull heedful heedfully heedfulli + heedless heedless heel heel + heels heel hefted heft + hefts heft heifer heifer + heifers heifer heigh heigh + height height heighten heighten + heinous heinou heinously heinous + heir heir heiress heiress + heirless heirless heirs heir + held held helen helen + helena helena helenus helenu + helias helia helicons helicon + hell hell hellespont hellespont + hellfire hellfir hellish hellish + helm helm helmed helm + helmet helmet helmets helmet + helms helm help help + helper helper helpers helper + helpful help helping help + helpless helpless helps help + helter helter hem hem + heme heme hemlock hemlock + hemm hemm hemp hemp + hempen hempen hems hem + hen hen hence henc + henceforth henceforth henceforward henceforward + henchman henchman henri henri + henricus henricu henry henri + hens hen hent hent + henton henton her her + herald herald heraldry heraldri + heralds herald herb herb + herbert herbert herblets herblet + herbs herb herculean herculean + hercules hercul herd herd + herds herd herdsman herdsman + herdsmen herdsmen here here + hereabout hereabout hereabouts hereabout + hereafter hereaft hereby herebi + hereditary hereditari hereford hereford + herefordshire herefordshir herein herein + hereof hereof heresies heresi + heresy heresi heretic heret + heretics heret hereto hereto + hereupon hereupon heritage heritag + heritier heriti hermes herm + hermia hermia hermione hermion + hermit hermit hermitage hermitag + hermits hermit herne hern + hero hero herod herod + herods herod heroes hero + heroic heroic heroical heroic + herring her herrings her + hers her herself herself + hesperides hesperid hesperus hesperu + hest hest hests hest + heure heur heureux heureux + hew hew hewgh hewgh + hewing hew hewn hewn + hews hew hey hei + heyday heydai hibocrates hibocr + hic hic hiccups hiccup + hick hick hid hid + hidden hidden hide hide + hideous hideou hideously hideous + hideousness hideous hides hide + hidest hidest hiding hide + hie hie hied hi + hiems hiem hies hi + hig hig high high + higher higher highest highest + highly highli highmost highmost + highness high hight hight + highway highwai highways highwai + hilding hild hildings hild + hill hill hillo hillo + hilloa hilloa hills hill + hilt hilt hilts hilt + hily hili him him + himself himself hinc hinc + hinckley hincklei hind hind + hinder hinder hindered hinder + hinders hinder hindmost hindmost + hinds hind hing hing + hinge hing hinges hing + hint hint hip hip + hipp hipp hipparchus hipparchu + hippolyta hippolyta hips hip + hir hir hire hire + hired hire hiren hiren + hirtius hirtiu his hi + hisperia hisperia hiss hiss + hisses hiss hissing hiss + hist hist historical histor + history histori hit hit + hither hither hitherto hitherto + hitherward hitherward hitherwards hitherward + hits hit hitting hit + hive hive hives hive + hizzing hizz ho ho + hoa hoa hoar hoar + hoard hoard hoarded hoard + hoarding hoard hoars hoar + hoarse hoars hoary hoari + hob hob hobbididence hobbidid + hobby hobbi hobbyhorse hobbyhors + hobgoblin hobgoblin hobnails hobnail + hoc hoc hod hod + hodge hodg hog hog + hogs hog hogshead hogshead + hogsheads hogshead hois hoi + hoise hois hoist hoist + hoisted hoist hoists hoist + holborn holborn hold hold + holden holden holder holder + holdeth holdeth holdfast holdfast + holding hold holds hold + hole hole holes hole + holidam holidam holidame holidam + holiday holidai holidays holidai + holier holier holiest holiest + holily holili holiness holi + holla holla holland holland + hollander holland hollanders holland + holloa holloa holloaing holloa + hollow hollow hollowly hollowli + hollowness hollow holly holli + holmedon holmedon holofernes holofern + holp holp holy holi + homage homag homager homag + home home homely home + homes home homespuns homespun + homeward homeward homewards homeward + homicide homicid homicides homicid + homily homili hominem hominem + hommes homm homo homo + honest honest honester honest + honestest honestest honestly honestli + honesty honesti honey honei + honeycomb honeycomb honeying honei + honeyless honeyless honeysuckle honeysuckl + honeysuckles honeysuckl honi honi + honneur honneur honor honor + honorable honor honorably honor + honorato honorato honorificabilitudinitatibus honorificabilitudinitatibu + honors honor honour honour + honourable honour honourably honour + honoured honour honourest honourest + honourible honour honouring honour + honours honour hoo hoo + hood hood hooded hood + hoodman hoodman hoods hood + hoodwink hoodwink hoof hoof + hoofs hoof hook hook + hooking hook hooks hook + hoop hoop hoops hoop + hoot hoot hooted hoot + hooting hoot hoots hoot + hop hop hope hope + hopeful hope hopeless hopeless + hopes hope hopest hopest + hoping hope hopkins hopkin + hoppedance hopped hor hor + horace horac horatio horatio + horizon horizon horn horn + hornbook hornbook horned horn + horner horner horning horn + hornpipes hornpip horns horn + horologe horolog horrible horribl + horribly horribl horrid horrid + horrider horrid horridly horridli + horror horror horrors horror + hors hor horse hors + horseback horseback horsed hors + horsehairs horsehair horseman horseman + horsemanship horsemanship horsemen horsemen + horses hors horseway horsewai + horsing hors hortensio hortensio + hortensius hortensiu horum horum + hose hose hospitable hospit + hospital hospit hospitality hospit + host host hostage hostag + hostages hostag hostess hostess + hostile hostil hostility hostil + hostilius hostiliu hosts host + hot hot hotly hotli + hotspur hotspur hotter hotter + hottest hottest hound hound + hounds hound hour hour + hourly hourli hours hour + hous hou house hous + household household householder household + householders household households household + housekeeper housekeep housekeepers housekeep + housekeeping housekeep houseless houseless + houses hous housewife housewif + housewifery housewiferi housewives housew + hovel hovel hover hover + hovered hover hovering hover + hovers hover how how + howbeit howbeit howe how + howeer howeer however howev + howl howl howled howl + howlet howlet howling howl + howls howl howsoe howso + howsoever howsoev howsome howsom + hoxes hox hoy hoi + hoyday hoydai hubert hubert + huddled huddl huddling huddl + hue hue hued hu + hues hue hug hug + huge huge hugely huge + hugeness huge hugg hugg + hugger hugger hugh hugh + hugs hug hujus huju + hulk hulk hulks hulk + hull hull hulling hull + hullo hullo hum hum + human human humane human + humanely human humanity human + humble humbl humbled humbl + humbleness humbl humbler humbler + humbles humbl humblest humblest + humbling humbl humbly humbl + hume hume humh humh + humidity humid humility humil + humming hum humor humor + humorous humor humors humor + humour humour humourists humourist + humours humour humphrey humphrei + humphry humphri hums hum + hundred hundr hundreds hundr + hundredth hundredth hung hung + hungarian hungarian hungary hungari + hunger hunger hungerford hungerford + hungerly hungerli hungry hungri + hunt hunt hunted hunt + hunter hunter hunters hunter + hunteth hunteth hunting hunt + huntington huntington huntress huntress + hunts hunt huntsman huntsman + huntsmen huntsmen hurdle hurdl + hurl hurl hurling hurl + hurls hurl hurly hurli + hurlyburly hurlyburli hurricano hurricano + hurricanoes hurricano hurried hurri + hurries hurri hurry hurri + hurt hurt hurting hurt + hurtled hurtl hurtless hurtless + hurtling hurtl hurts hurt + husband husband husbanded husband + husbandless husbandless husbandry husbandri + husbands husband hush hush + hushes hush husht husht + husks husk huswife huswif + huswifes huswif hutch hutch + hybla hybla hydra hydra + hyen hyen hymen hymen + hymenaeus hymenaeu hymn hymn + hymns hymn hyperboles hyperbol + hyperbolical hyperbol hyperion hyperion + hypocrisy hypocrisi hypocrite hypocrit + hypocrites hypocrit hyrcan hyrcan + hyrcania hyrcania hyrcanian hyrcanian + hyssop hyssop hysterica hysterica + i i iachimo iachimo + iaculis iaculi iago iago + iament iament ibat ibat + icarus icaru ice ic + iceland iceland ici ici + icicle icicl icicles icicl + icy ici idea idea + ideas idea idem idem + iden iden ides id + idiot idiot idiots idiot + idle idl idleness idl + idles idl idly idli + idol idol idolatrous idolatr + idolatry idolatri ield ield + if if ifs if + ignis igni ignoble ignobl + ignobly ignobl ignominious ignomini + ignominy ignomini ignomy ignomi + ignorance ignor ignorant ignor + ii ii iii iii + iiii iiii il il + ilbow ilbow ild ild + ilion ilion ilium ilium + ill ill illegitimate illegitim + illiterate illiter illness ill + illo illo ills ill + illume illum illumin illumin + illuminate illumin illumineth illumineth + illusion illus illusions illus + illustrate illustr illustrated illustr + illustrious illustri illyria illyria + illyrian illyrian ils il + im im image imag + imagery imageri images imag + imagin imagin imaginary imaginari + imagination imagin imaginations imagin + imagine imagin imagining imagin + imaginings imagin imbar imbar + imbecility imbecil imbrue imbru + imitari imitari imitate imit + imitated imit imitation imit + imitations imit immaculate immacul + immanity imman immask immask + immaterial immateri immediacy immediaci + immediate immedi immediately immedi + imminence immin imminent immin + immoderate immoder immoderately immoder + immodest immodest immoment immoment + immortal immort immortaliz immortaliz + immortally immort immur immur + immured immur immures immur + imogen imogen imp imp + impaint impaint impair impair + impairing impair impale impal + impaled impal impanelled impanel + impart impart imparted impart + impartial imparti impartment impart + imparts impart impasted impast + impatience impati impatient impati + impatiently impati impawn impawn + impeach impeach impeached impeach + impeachment impeach impeachments impeach + impedes imped impediment impedi + impediments impedi impenetrable impenetr + imperator imper imperceiverant imperceiver + imperfect imperfect imperfection imperfect + imperfections imperfect imperfectly imperfectli + imperial imperi imperious imperi + imperiously imperi impertinency impertin + impertinent impertin impeticos impetico + impetuosity impetuos impetuous impetu + impieties impieti impiety impieti + impious impiou implacable implac + implements implement implies impli + implor implor implorators implor + implore implor implored implor + imploring implor impon impon + import import importance import + importancy import important import + importantly importantli imported import + importeth importeth importing import + importless importless imports import + importun importun importunacy importunaci + importunate importun importune importun + importunes importun importunity importun + impos impo impose impos + imposed impos imposition imposit + impositions imposit impossibilities imposs + impossibility imposs impossible imposs + imposthume imposthum impostor impostor + impostors impostor impotence impot + impotent impot impounded impound + impregnable impregn imprese impres + impress impress impressed impress + impressest impressest impression impress + impressure impressur imprimendum imprimendum + imprimis imprimi imprint imprint + imprinted imprint imprison imprison + imprisoned imprison imprisoning imprison + imprisonment imprison improbable improb + improper improp improve improv + improvident improvid impudence impud + impudency impud impudent impud + impudently impud impudique impudiqu + impugn impugn impugns impugn + impure impur imputation imput + impute imput in in + inaccessible inaccess inaidable inaid + inaudible inaud inauspicious inauspici + incaged incag incantations incant + incapable incap incardinate incardin + incarnadine incarnadin incarnate incarn + incarnation incarn incens incen + incense incens incensed incens + incensement incens incenses incens + incensing incens incertain incertain + incertainties incertainti incertainty incertainti + incessant incess incessantly incessantli + incest incest incestuous incestu + inch inch incharitable incharit + inches inch incidency incid + incident incid incision incis + incite incit incites incit + incivil incivil incivility incivil + inclin inclin inclinable inclin + inclination inclin incline inclin + inclined inclin inclines inclin + inclining inclin inclips inclip + include includ included includ + includes includ inclusive inclus + incomparable incompar incomprehensible incomprehens + inconsiderate inconsider inconstancy inconst + inconstant inconst incontinency incontin + incontinent incontin incontinently incontin + inconvenience inconveni inconveniences inconveni + inconvenient inconveni incony inconi + incorporate incorpor incorps incorp + incorrect incorrect increas increa + increase increas increases increas + increaseth increaseth increasing increas + incredible incred incredulous incredul + incur incur incurable incur + incurr incurr incurred incur + incursions incurs ind ind + inde ind indebted indebt + indeed inde indent indent + indented indent indenture indentur + indentures indentur index index + indexes index india india + indian indian indict indict + indicted indict indictment indict + indies indi indifferency indiffer + indifferent indiffer indifferently indiffer + indigent indig indigest indigest + indigested indigest indign indign + indignation indign indignations indign + indigne indign indignities indign + indignity indign indirect indirect + indirection indirect indirections indirect + indirectly indirectli indiscreet indiscreet + indiscretion indiscret indispos indispo + indisposition indisposit indissoluble indissolubl + indistinct indistinct indistinguish indistinguish + indistinguishable indistinguish indited indit + individable individ indrench indrench + indu indu indubitate indubit + induc induc induce induc + induced induc inducement induc + induction induct inductions induct + indue indu indued indu + indues indu indulgence indulg + indulgences indulg indulgent indulg + indurance indur industrious industri + industriously industri industry industri + inequality inequ inestimable inestim + inevitable inevit inexecrable inexecr + inexorable inexor inexplicable inexplic + infallible infal infallibly infal + infamonize infamon infamous infam + infamy infami infancy infanc + infant infant infants infant + infect infect infected infect + infecting infect infection infect + infections infect infectious infecti + infectiously infecti infects infect + infer infer inference infer + inferior inferior inferiors inferior + infernal infern inferr inferr + inferreth inferreth inferring infer + infest infest infidel infidel + infidels infidel infinite infinit + infinitely infinit infinitive infinit + infirm infirm infirmities infirm + infirmity infirm infixed infix + infixing infix inflam inflam + inflame inflam inflaming inflam + inflammation inflamm inflict inflict + infliction inflict influence influenc + influences influenc infold infold + inform inform informal inform + information inform informations inform + informed inform informer inform + informs inform infortunate infortun + infring infr infringe infring + infringed infring infus infu + infuse infus infused infus + infusing infus infusion infus + ingener ingen ingenious ingeni + ingeniously ingeni inglorious inglori + ingots ingot ingraffed ingraf + ingraft ingraft ingrate ingrat + ingrated ingrat ingrateful ingrat + ingratitude ingratitud ingratitudes ingratitud + ingredient ingredi ingredients ingredi + ingross ingross inhabit inhabit + inhabitable inhabit inhabitants inhabit + inhabited inhabit inhabits inhabit + inhearse inhears inhearsed inhears + inherent inher inherit inherit + inheritance inherit inherited inherit + inheriting inherit inheritor inheritor + inheritors inheritor inheritrix inheritrix + inherits inherit inhibited inhibit + inhibition inhibit inhoop inhoop + inhuman inhuman iniquities iniqu + iniquity iniqu initiate initi + injointed injoint injunction injunct + injunctions injunct injur injur + injure injur injurer injur + injuries injuri injurious injuri + injury injuri injustice injustic + ink ink inkhorn inkhorn + inkle inkl inkles inkl + inkling inkl inky inki + inlaid inlaid inland inland + inlay inlai inly inli + inmost inmost inn inn + inner inner innkeeper innkeep + innocence innoc innocency innoc + innocent innoc innocents innoc + innovation innov innovator innov + inns inn innumerable innumer + inoculate inocul inordinate inordin + inprimis inprimi inquir inquir + inquire inquir inquiry inquiri + inquisition inquisit inquisitive inquisit + inroads inroad insane insan + insanie insani insatiate insati + insconce insconc inscrib inscrib + inscription inscript inscriptions inscript + inscroll inscrol inscrutable inscrut + insculp insculp insculpture insculptur + insensible insens inseparable insepar + inseparate insepar insert insert + inserted insert inset inset + inshell inshel inshipp inshipp + inside insid insinewed insinew + insinuate insinu insinuateth insinuateth + insinuating insinu insinuation insinu + insisted insist insisting insist + insisture insistur insociable insoci + insolence insol insolent insol + insomuch insomuch inspir inspir + inspiration inspir inspirations inspir + inspire inspir inspired inspir + install instal installed instal + instalment instal instance instanc + instances instanc instant instant + instantly instantli instate instat + instead instead insteeped insteep + instigate instig instigated instig + instigation instig instigations instig + instigator instig instinct instinct + instinctively instinct institute institut + institutions institut instruct instruct + instructed instruct instruction instruct + instructions instruct instructs instruct + instrument instrument instrumental instrument + instruments instrument insubstantial insubstanti + insufficience insuffici insufficiency insuffici + insult insult insulted insult + insulting insult insultment insult + insults insult insupportable insupport + insuppressive insuppress insurrection insurrect + insurrections insurrect int int + integer integ integritas integrita + integrity integr intellect intellect + intellects intellect intellectual intellectu + intelligence intellig intelligencer intelligenc + intelligencing intelligenc intelligent intellig + intelligis intelligi intelligo intelligo + intemperance intemper intemperate intemper + intend intend intended intend + intendeth intendeth intending intend + intendment intend intends intend + intenible inten intent intent + intention intent intentively intent + intents intent inter inter + intercept intercept intercepted intercept + intercepter intercept interception intercept + intercepts intercept intercession intercess + intercessors intercessor interchained interchain + interchang interchang interchange interchang + interchangeably interchang interchangement interchang + interchanging interchang interdiction interdict + interest interest interim interim + interims interim interior interior + interjections interject interjoin interjoin + interlude interlud intermingle intermingl + intermission intermiss intermissive intermiss + intermit intermit intermix intermix + intermixed intermix interpose interpos + interposer interpos interposes interpos + interpret interpret interpretation interpret + interpreted interpret interpreter interpret + interpreters interpret interprets interpret + interr interr interred inter + interrogatories interrogatori interrupt interrupt + interrupted interrupt interrupter interrupt + interruptest interruptest interruption interrupt + interrupts interrupt intertissued intertissu + intervallums intervallum interview interview + intestate intest intestine intestin + intil intil intimate intim + intimation intim intitled intitl + intituled intitul into into + intolerable intoler intoxicates intox + intreasured intreasur intreat intreat + intrench intrench intrenchant intrench + intricate intric intrinse intrins + intrinsicate intrins intrude intrud + intruder intrud intruding intrud + intrusion intrus inundation inund + inure inur inurn inurn + invade invad invades invad + invasion invas invasive invas + invectively invect invectives invect + inveigled inveigl invent invent + invented invent invention invent + inventions invent inventor inventor + inventorially inventori inventoried inventori + inventors inventor inventory inventori + inverness inver invert invert + invest invest invested invest + investing invest investments invest + inveterate inveter invincible invinc + inviolable inviol invised invis + invisible invis invitation invit + invite invit invited invit + invites invit inviting invit + invitis inviti invocate invoc + invocation invoc invoke invok + invoked invok invulnerable invulner + inward inward inwardly inwardli + inwardness inward inwards inward + ionia ionia ionian ionian + ipse ips ipswich ipswich + ira ira irae ira + iras ira ire ir + ireful ir ireland ireland + iris iri irish irish + irishman irishman irishmen irishmen + irks irk irksome irksom + iron iron irons iron + irreconcil irreconcil irrecoverable irrecover + irregular irregular irregulous irregul + irreligious irreligi irremovable irremov + irreparable irrepar irresolute irresolut + irrevocable irrevoc is is + isabel isabel isabella isabella + isbel isbel isbels isbel + iscariot iscariot ise is + ish ish isidore isidor + isis isi island island + islander island islanders island + islands island isle isl + isles isl israel israel + issu issu issue issu + issued issu issueless issueless + issues issu issuing issu + ist ist ista ista + it it italian italian + italy itali itch itch + itches itch itching itch + item item items item + iteration iter ithaca ithaca + its it itself itself + itshall itshal iv iv + ivory ivori ivy ivi + iwis iwi ix ix + j j jacet jacet + jack jack jackanapes jackanap + jacks jack jacksauce jacksauc + jackslave jackslav jacob jacob + jade jade jaded jade + jades jade jail jail + jakes jake jamany jamani + james jame jamy jami + jane jane jangled jangl + jangling jangl january januari + janus janu japhet japhet + jaquenetta jaquenetta jaques jaqu + jar jar jarring jar + jars jar jarteer jarteer + jasons jason jaunce jaunc + jauncing jaunc jaundice jaundic + jaundies jaundi jaw jaw + jawbone jawbon jaws jaw + jay jai jays jai + jc jc je je + jealous jealou jealousies jealousi + jealousy jealousi jeer jeer + jeering jeer jelly jelli + jenny jenni jeopardy jeopardi + jephtha jephtha jephthah jephthah + jerkin jerkin jerkins jerkin + jerks jerk jeronimy jeronimi + jerusalem jerusalem jeshu jeshu + jesses jess jessica jessica + jest jest jested jest + jester jester jesters jester + jesting jest jests jest + jesu jesu jesus jesu + jet jet jets jet + jew jew jewel jewel + jeweller jewel jewels jewel + jewess jewess jewish jewish + jewry jewri jews jew + jezebel jezebel jig jig + jigging jig jill jill + jills jill jingling jingl + joan joan job job + jockey jockei jocund jocund + jog jog jogging jog + john john johns john + join join joinder joinder + joined join joiner joiner + joineth joineth joins join + joint joint jointed joint + jointing joint jointly jointli + jointress jointress joints joint + jointure jointur jollity jolliti + jolly jolli jolt jolt + joltheads jolthead jordan jordan + joseph joseph joshua joshua + jot jot jour jour + jourdain jourdain journal journal + journey journei journeying journei + journeyman journeyman journeymen journeymen + journeys journei jove jove + jovem jovem jovial jovial + jowl jowl jowls jowl + joy joi joyed joi + joyful joy joyfully joyfulli + joyless joyless joyous joyou + joys joi juan juan + jud jud judas juda + judases judas jude jude + judg judg judge judg + judged judg judgement judgement + judges judg judgest judgest + judging judg judgment judgment + judgments judgment judicious judici + jug jug juggle juggl + juggled juggl juggler juggler + jugglers juggler juggling juggl + jugs jug juice juic + juiced juic jul jul + jule jule julia julia + juliet juliet julietta julietta + julio julio julius juliu + july juli jump jump + jumpeth jumpeth jumping jump + jumps jump june june + junes june junior junior + junius juniu junkets junket + juno juno jupiter jupit + jure jure jurement jurement + jurisdiction jurisdict juror juror + jurors juror jury juri + jurymen jurymen just just + justeius justeiu justest justest + justice justic justicer justic + justicers justic justices justic + justification justif justified justifi + justify justifi justle justl + justled justl justles justl + justling justl justly justli + justness just justs just + jutting jut jutty jutti + juvenal juven kam kam + kate kate kated kate + kates kate katharine katharin + katherina katherina katherine katherin + kecksies kecksi keech keech + keel keel keels keel + keen keen keenness keen + keep keep keepdown keepdown + keeper keeper keepers keeper + keepest keepest keeping keep + keeps keep keiser keiser + ken ken kendal kendal + kennel kennel kent kent + kentish kentish kentishman kentishman + kentishmen kentishmen kept kept + kerchief kerchief kerely kere + kern kern kernal kernal + kernel kernel kernels kernel + kerns kern kersey kersei + kettle kettl kettledrum kettledrum + kettledrums kettledrum key kei + keys kei kibe kibe + kibes kibe kick kick + kicked kick kickshaws kickshaw + kickshawses kickshaws kicky kicki + kid kid kidney kidnei + kikely kike kildare kildar + kill kill killed kill + killer killer killeth killeth + killing kill killingworth killingworth + kills kill kiln kiln + kimbolton kimbolton kin kin + kind kind kinder kinder + kindest kindest kindle kindl + kindled kindl kindless kindless + kindlier kindlier kindling kindl + kindly kindli kindness kind + kindnesses kind kindred kindr + kindreds kindr kinds kind + kine kine king king + kingdom kingdom kingdoms kingdom + kingly kingli kings king + kinred kinr kins kin + kinsman kinsman kinsmen kinsmen + kinswoman kinswoman kirtle kirtl + kirtles kirtl kiss kiss + kissed kiss kisses kiss + kissing kiss kitchen kitchen + kitchens kitchen kite kite + kites kite kitten kitten + kj kj kl kl + klll klll knack knack + knacks knack knapp knapp + knav knav knave knave + knaveries knaveri knavery knaveri + knaves knave knavish knavish + knead knead kneaded knead + kneading knead knee knee + kneel kneel kneeling kneel + kneels kneel knees knee + knell knell knew knew + knewest knewest knife knife + knight knight knighted knight + knighthood knighthood knighthoods knighthood + knightly knightli knights knight + knit knit knits knit + knitters knitter knitteth knitteth + knives knive knobs knob + knock knock knocking knock + knocks knock knog knog + knoll knoll knot knot + knots knot knotted knot + knotty knotti know know + knower knower knowest knowest + knowing know knowingly knowingli + knowings know knowledge knowledg + known known knows know + l l la la + laban laban label label + labell label labienus labienu + labio labio labor labor + laboring labor labors labor + labour labour laboured labour + labourer labour labourers labour + labouring labour labours labour + laboursome laboursom labras labra + labyrinth labyrinth lac lac + lace lace laced lace + lacedaemon lacedaemon laces lace + lacies laci lack lack + lackbeard lackbeard lacked lack + lackey lackei lackeying lackei + lackeys lackei lacking lack + lacks lack lad lad + ladder ladder ladders ladder + lade lade laden laden + ladies ladi lading lade + lads lad lady ladi + ladybird ladybird ladyship ladyship + ladyships ladyship laer laer + laertes laert lafeu lafeu + lag lag lagging lag + laid laid lain lain + laissez laissez lake lake + lakes lake lakin lakin + lam lam lamb lamb + lambert lambert lambkin lambkin + lambkins lambkin lambs lamb + lame lame lamely lame + lameness lame lament lament + lamentable lament lamentably lament + lamentation lament lamentations lament + lamented lament lamenting lament + lamentings lament laments lament + lames lame laming lame + lammas lamma lammastide lammastid + lamound lamound lamp lamp + lampass lampass lamps lamp + lanc lanc lancaster lancast + lance lanc lances lanc + lanceth lanceth lanch lanch + land land landed land + landing land landless landless + landlord landlord landmen landmen + lands land lane lane + lanes lane langage langag + langley langlei langton langton + language languag languageless languageless + languages languag langues langu + languish languish languished languish + languishes languish languishing languish + languishings languish languishment languish + languor languor lank lank + lantern lantern lanterns lantern + lanthorn lanthorn lap lap + lapis lapi lapland lapland + lapp lapp laps lap + lapse laps lapsed laps + lapsing laps lapwing lapw + laquais laquai larded lard + larder larder larding lard + lards lard large larg + largely larg largeness larg + larger larger largess largess + largest largest lark lark + larks lark larron larron + lartius lartiu larum larum + larums larum las la + lascivious lascivi lash lash + lass lass lasses lass + last last lasted last + lasting last lastly lastli + lasts last latch latch + latches latch late late + lated late lately late + later later latest latest + lath lath latin latin + latten latten latter latter + lattice lattic laud laud + laudable laudabl laudis laudi + laugh laugh laughable laughabl + laughed laugh laugher laugher + laughest laughest laughing laugh + laughs laugh laughter laughter + launce launc launcelot launcelot + launces launc launch launch + laund laund laundress laundress + laundry laundri laur laur + laura laura laurel laurel + laurels laurel laurence laurenc + laus lau lavache lavach + lave lave lavee lave + lavender lavend lavina lavina + lavinia lavinia lavish lavish + lavishly lavishli lavolt lavolt + lavoltas lavolta law law + lawful law lawfully lawfulli + lawless lawless lawlessly lawlessli + lawn lawn lawns lawn + lawrence lawrenc laws law + lawyer lawyer lawyers lawyer + lay lai layer layer + layest layest laying lai + lays lai lazar lazar + lazars lazar lazarus lazaru + lazy lazi lc lc + ld ld ldst ldst + le le lead lead + leaden leaden leader leader + leaders leader leadest leadest + leading lead leads lead + leaf leaf leagu leagu + league leagu leagued leagu + leaguer leaguer leagues leagu + leah leah leak leak + leaky leaki lean lean + leander leander leaner leaner + leaning lean leanness lean + leans lean leap leap + leaped leap leaping leap + leaps leap leapt leapt + lear lear learn learn + learned learn learnedly learnedli + learning learn learnings learn + learns learn learnt learnt + leas lea lease leas + leases leas leash leash + leasing leas least least + leather leather leathern leathern + leav leav leave leav + leaven leaven leavening leaven + leaver leaver leaves leav + leaving leav leavy leavi + lecher lecher lecherous lecher + lechers lecher lechery lecheri + lecon lecon lecture lectur + lectures lectur led led + leda leda leech leech + leeches leech leek leek + leeks leek leer leer + leers leer lees lee + leese lees leet leet + leets leet left left + leg leg legacies legaci + legacy legaci legate legat + legatine legatin lege lege + legerity leger leges lege + legg legg legion legion + legions legion legitimate legitim + legitimation legitim legs leg + leicester leicest leicestershire leicestershir + leiger leiger leigers leiger + leisure leisur leisurely leisur + leisures leisur leman leman + lemon lemon lena lena + lend lend lender lender + lending lend lendings lend + lends lend length length + lengthen lengthen lengthens lengthen + lengths length lenity leniti + lennox lennox lent lent + lenten lenten lentus lentu + leo leo leon leon + leonardo leonardo leonati leonati + leonato leonato leonatus leonatu + leontes leont leopard leopard + leopards leopard leper leper + leperous leper lepidus lepidu + leprosy leprosi lequel lequel + lers ler les le + less less lessen lessen + lessens lessen lesser lesser + lesson lesson lessoned lesson + lessons lesson lest lest + lestrake lestrak let let + lethargied lethargi lethargies lethargi + lethargy lethargi lethe leth + lets let lett lett + letter letter letters letter + letting let lettuce lettuc + leur leur leve leve + level level levell level + levelled level levels level + leven leven levers lever + leviathan leviathan leviathans leviathan + levied levi levies levi + levity leviti levy levi + levying levi lewd lewd + lewdly lewdli lewdness lewd + lewdsters lewdster lewis lewi + liable liabl liar liar + liars liar libbard libbard + libelling libel libels libel + liberal liber liberality liber + liberte libert liberties liberti + libertine libertin libertines libertin + liberty liberti library librari + libya libya licence licenc + licens licen license licens + licentious licenti lichas licha + licio licio lick lick + licked lick licker licker + lictors lictor lid lid + lids lid lie lie + lied li lief lief + liefest liefest liege lieg + liegeman liegeman liegemen liegemen + lien lien lies li + liest liest lieth lieth + lieu lieu lieutenant lieuten + lieutenantry lieutenantri lieutenants lieuten + lieve liev life life + lifeblood lifeblood lifeless lifeless + lifelings lifel lift lift + lifted lift lifter lifter + lifteth lifteth lifting lift + lifts lift lig lig + ligarius ligariu liggens liggen + light light lighted light + lighten lighten lightens lighten + lighter lighter lightest lightest + lightly lightli lightness light + lightning lightn lightnings lightn + lights light lik lik + like like liked like + likeliest likeliest likelihood likelihood + likelihoods likelihood likely like + likeness like liker liker + likes like likest likest + likewise likewis liking like + likings like lilies lili + lily lili lim lim + limander limand limb limb + limbeck limbeck limbecks limbeck + limber limber limbo limbo + limbs limb lime lime + limed lime limehouse limehous + limekilns limekiln limit limit + limitation limit limited limit + limits limit limn limn + limp limp limping limp + limps limp lin lin + lincoln lincoln lincolnshire lincolnshir + line line lineal lineal + lineally lineal lineament lineament + lineaments lineament lined line + linen linen linens linen + lines line ling ling + lingare lingar linger linger + lingered linger lingers linger + linguist linguist lining line + link link links link + linsey linsei linstock linstock + linta linta lion lion + lionel lionel lioness lioness + lions lion lip lip + lipp lipp lips lip + lipsbury lipsburi liquid liquid + liquor liquor liquorish liquorish + liquors liquor lirra lirra + lisbon lisbon lisp lisp + lisping lisp list list + listen listen listening listen + lists list literatured literatur + lither lither litter litter + little littl littlest littlest + liv liv live live + lived live livelier liveli + livelihood livelihood livelong livelong + lively live liver liver + liveries liveri livers liver + livery liveri lives live + livest livest liveth liveth + livia livia living live + livings live lizard lizard + lizards lizard ll ll + lll lll llous llou + lnd lnd lo lo + loa loa loach loach + load load loaden loaden + loading load loads load + loaf loaf loam loam + loan loan loath loath + loathe loath loathed loath + loather loather loathes loath + loathing loath loathly loathli + loathness loath loathsome loathsom + loathsomeness loathsom loathsomest loathsomest + loaves loav lob lob + lobbies lobbi lobby lobbi + local local lochaber lochab + lock lock locked lock + locking lock lockram lockram + locks lock locusts locust + lode lode lodg lodg + lodge lodg lodged lodg + lodgers lodger lodges lodg + lodging lodg lodgings lodg + lodovico lodovico lodowick lodowick + lofty lofti log log + logger logger loggerhead loggerhead + loggerheads loggerhead loggets logget + logic logic logs log + loins loin loiter loiter + loiterer loiter loiterers loiter + loitering loiter lolling loll + lolls loll lombardy lombardi + london london londoners london + lone lone loneliness loneli + lonely lone long long + longaville longavil longboat longboat + longed long longer longer + longest longest longeth longeth + longing long longings long + longly longli longs long + longtail longtail loo loo + loof loof look look + looked look looker looker + lookers looker lookest lookest + looking look looks look + loon loon loop loop + loos loo loose loos + loosed loos loosely loos + loosen loosen loosing loos + lop lop lopp lopp + loquitur loquitur lord lord + lorded lord lording lord + lordings lord lordliness lordli + lordly lordli lords lord + lordship lordship lordships lordship + lorenzo lorenzo lorn lorn + lorraine lorrain lorship lorship + los lo lose lose + loser loser losers loser + loses lose losest losest + loseth loseth losing lose + loss loss losses loss + lost lost lot lot + lots lot lott lott + lottery lotteri loud loud + louder louder loudly loudli + lour lour loureth loureth + louring lour louse lous + louses lous lousy lousi + lout lout louted lout + louts lout louvre louvr + lov lov love love + loved love lovedst lovedst + lovel lovel lovelier loveli + loveliness loveli lovell lovel + lovely love lover lover + lovered lover lovers lover + loves love lovest lovest + loveth loveth loving love + lovingly lovingli low low + lowe low lower lower + lowest lowest lowing low + lowliness lowli lowly lowli + lown lown lowness low + loyal loyal loyally loyal + loyalties loyalti loyalty loyalti + lozel lozel lt lt + lubber lubber lubberly lubberli + luc luc luccicos luccico + luce luce lucentio lucentio + luces luce lucetta lucetta + luciana luciana lucianus lucianu + lucifer lucif lucifier lucifi + lucilius luciliu lucina lucina + lucio lucio lucius luciu + luck luck luckier luckier + luckiest luckiest luckily luckili + luckless luckless lucky lucki + lucre lucr lucrece lucrec + lucretia lucretia lucullius luculliu + lucullus lucullu lucy luci + lud lud ludlow ludlow + lug lug lugg lugg + luggage luggag luke luke + lukewarm lukewarm lull lull + lulla lulla lullaby lullabi + lulls lull lumbert lumbert + lump lump lumpish lumpish + luna luna lunacies lunaci + lunacy lunaci lunatic lunat + lunatics lunat lunes lune + lungs lung lupercal luperc + lurch lurch lure lure + lurk lurk lurketh lurketh + lurking lurk lurks lurk + luscious lusciou lush lush + lust lust lusted lust + luster luster lustful lust + lustier lustier lustiest lustiest + lustig lustig lustihood lustihood + lustily lustili lustre lustr + lustrous lustrou lusts lust + lusty lusti lute lute + lutes lute lutestring lutestr + lutheran lutheran luxurious luxuri + luxuriously luxuri luxury luxuri + ly ly lycaonia lycaonia + lycurguses lycurgus lydia lydia + lye lye lyen lyen + lying ly lym lym + lymoges lymog lynn lynn + lysander lysand m m + ma ma maan maan + mab mab macbeth macbeth + maccabaeus maccabaeu macdonwald macdonwald + macduff macduff mace mace + macedon macedon maces mace + machiavel machiavel machination machin + machinations machin machine machin + mack mack macmorris macmorri + maculate macul maculation macul + mad mad madam madam + madame madam madams madam + madcap madcap madded mad + madding mad made made + madeira madeira madly madli + madman madman madmen madmen + madness mad madonna madonna + madrigals madrig mads mad + maecenas maecena maggot maggot + maggots maggot magic magic + magical magic magician magician + magistrate magistr magistrates magistr + magnanimity magnanim magnanimous magnanim + magni magni magnifi magnifi + magnificence magnific magnificent magnific + magnifico magnifico magnificoes magnifico + magnus magnu mahomet mahomet + mahu mahu maid maid + maiden maiden maidenhead maidenhead + maidenheads maidenhead maidenhood maidenhood + maidenhoods maidenhood maidenliest maidenliest + maidenly maidenli maidens maiden + maidhood maidhood maids maid + mail mail mailed mail + mails mail maim maim + maimed maim maims maim + main main maincourse maincours + maine main mainly mainli + mainmast mainmast mains main + maintain maintain maintained maintain + maintains maintain maintenance mainten + mais mai maison maison + majestas majesta majestee majeste + majestic majest majestical majest + majestically majest majesties majesti + majesty majesti major major + majority major mak mak + make make makeless makeless + maker maker makers maker + makes make makest makest + maketh maketh making make + makings make mal mal + mala mala maladies maladi + malady maladi malapert malapert + malcolm malcolm malcontent malcont + malcontents malcont male male + maledictions maledict malefactions malefact + malefactor malefactor malefactors malefactor + males male malevolence malevol + malevolent malevol malhecho malhecho + malice malic malicious malici + maliciously malici malign malign + malignancy malign malignant malign + malignantly malignantli malkin malkin + mall mall mallard mallard + mallet mallet mallows mallow + malmsey malmsei malt malt + maltworms maltworm malvolio malvolio + mamillius mamilliu mammering mammer + mammet mammet mammets mammet + mammock mammock man man + manacle manacl manacles manacl + manage manag managed manag + manager manag managing manag + manakin manakin manchus manchu + mandate mandat mandragora mandragora + mandrake mandrak mandrakes mandrak + mane mane manent manent + manes mane manet manet + manfully manfulli mangle mangl + mangled mangl mangles mangl + mangling mangl mangy mangi + manhood manhood manhoods manhood + manifest manifest manifested manifest + manifests manifest manifold manifold + manifoldly manifoldli manka manka + mankind mankind manlike manlik + manly manli mann mann + manna manna manner manner + mannerly mannerli manners manner + manningtree manningtre mannish mannish + manor manor manors manor + mans man mansion mansion + mansionry mansionri mansions mansion + manslaughter manslaught mantle mantl + mantled mantl mantles mantl + mantua mantua mantuan mantuan + manual manual manure manur + manured manur manus manu + many mani map map + mapp mapp maps map + mar mar marble marbl + marbled marbl marcade marcad + marcellus marcellu march march + marches march marcheth marcheth + marching march marchioness marchio + marchpane marchpan marcians marcian + marcius marciu marcus marcu + mardian mardian mare mare + mares mare marg marg + margarelon margarelon margaret margaret + marge marg margent margent + margery margeri maria maria + marian marian mariana mariana + maries mari marigold marigold + mariner marin mariners marin + maritime maritim marjoram marjoram + mark mark marked mark + market market marketable market + marketplace marketplac markets market + marking mark markman markman + marks mark marl marl + marle marl marmoset marmoset + marquess marquess marquis marqui + marr marr marriage marriag + marriages marriag married marri + marries marri marring mar + marrow marrow marrowless marrowless + marrows marrow marry marri + marrying marri mars mar + marseilles marseil marsh marsh + marshal marshal marshalsea marshalsea + marshalship marshalship mart mart + marted mart martem martem + martext martext martial martial + martin martin martino martino + martius martiu martlemas martlema + martlet martlet marts mart + martyr martyr martyrs martyr + marullus marullu marv marv + marvel marvel marvell marvel + marvellous marvel marvellously marvel + marvels marvel mary mari + mas ma masculine masculin + masham masham mask mask + masked mask masker masker + maskers masker masking mask + masks mask mason mason + masonry masonri masons mason + masque masqu masquers masquer + masques masqu masquing masqu + mass mass massacre massacr + massacres massacr masses mass + massy massi mast mast + mastcr mastcr master master + masterdom masterdom masterest masterest + masterless masterless masterly masterli + masterpiece masterpiec masters master + mastership mastership mastic mastic + mastiff mastiff mastiffs mastiff + masts mast match match + matches match matcheth matcheth + matching match matchless matchless + mate mate mated mate + mater mater material materi + mates mate mathematics mathemat + matin matin matron matron + matrons matron matter matter + matters matter matthew matthew + mattock mattock mattress mattress + mature matur maturity matur + maud maud maudlin maudlin + maugre maugr maul maul + maund maund mauri mauri + mauritania mauritania mauvais mauvai + maw maw maws maw + maxim maxim may mai + mayday maydai mayest mayest + mayor mayor maypole maypol + mayst mayst maz maz + maze maze mazed maze + mazes maze mazzard mazzard + me me meacock meacock + mead mead meadow meadow + meadows meadow meads mead + meagre meagr meal meal + meals meal mealy meali + mean mean meanders meander + meaner meaner meanest meanest + meaneth meaneth meaning mean + meanings mean meanly meanli + means mean meant meant + meantime meantim meanwhile meanwhil + measles measl measur measur + measurable measur measure measur + measured measur measureless measureless + measures measur measuring measur + meat meat meats meat + mechanic mechan mechanical mechan + mechanicals mechan mechanics mechan + mechante mechant med med + medal medal meddle meddl + meddler meddler meddling meddl + mede mede medea medea + media media mediation mediat + mediators mediat medice medic + medicinal medicin medicine medicin + medicines medicin meditate medit + meditates medit meditating medit + meditation medit meditations medit + mediterranean mediterranean mediterraneum mediterraneum + medlar medlar medlars medlar + meed meed meeds meed + meek meek meekly meekli + meekness meek meet meet + meeter meeter meetest meetest + meeting meet meetings meet + meetly meetli meetness meet + meets meet meg meg + mehercle mehercl meilleur meilleur + meiny meini meisen meisen + melancholies melancholi melancholy melancholi + melford melford mell mell + mellifluous melliflu mellow mellow + mellowing mellow melodious melodi + melody melodi melt melt + melted melt melteth melteth + melting melt melts melt + melun melun member member + members member memento memento + memorable memor memorandums memorandum + memorial memori memorials memori + memories memori memoriz memoriz + memorize memor memory memori + memphis memphi men men + menac menac menace menac + menaces menac menaphon menaphon + menas mena mend mend + mended mend mender mender + mending mend mends mend + menecrates menecr menelaus menelau + menenius meneniu mental mental + menteith menteith mention mention + mentis menti menton menton + mephostophilus mephostophilu mer mer + mercatante mercatant mercatio mercatio + mercenaries mercenari mercenary mercenari + mercer mercer merchandise merchandis + merchandized merchand merchant merchant + merchants merchant mercies merci + merciful merci mercifully mercifulli + merciless merciless mercurial mercuri + mercuries mercuri mercury mercuri + mercutio mercutio mercy merci + mere mere mered mere + merely mere merest merest + meridian meridian merit merit + merited merit meritorious meritori + merits merit merlin merlin + mermaid mermaid mermaids mermaid + merops merop merrier merrier + merriest merriest merrily merrili + merriman merriman merriment merriment + merriments merriment merriness merri + merry merri mervailous mervail + mes me mesh mesh + meshes mesh mesopotamia mesopotamia + mess mess message messag + messages messag messala messala + messaline messalin messenger messeng + messengers messeng messes mess + messina messina met met + metal metal metals metal + metamorphis metamorphi metamorphoses metamorphos + metaphor metaphor metaphysical metaphys + metaphysics metaphys mete mete + metellus metellu meteor meteor + meteors meteor meteyard meteyard + metheglin metheglin metheglins metheglin + methink methink methinks methink + method method methods method + methought methought methoughts methought + metre metr metres metr + metropolis metropoli mette mett + mettle mettl mettled mettl + meus meu mew mew + mewed mew mewling mewl + mexico mexico mi mi + mice mice michael michael + michaelmas michaelma micher micher + miching mich mickle mickl + microcosm microcosm mid mid + midas mida middest middest + middle middl middleham middleham + midnight midnight midriff midriff + midst midst midsummer midsumm + midway midwai midwife midwif + midwives midwiv mienne mienn + might might mightful might + mightier mightier mightiest mightiest + mightily mightili mightiness mighti + mightst mightst mighty mighti + milan milan milch milch + mild mild milder milder + mildest mildest mildew mildew + mildews mildew mildly mildli + mildness mild mile mile + miles mile milford milford + militarist militarist military militari + milk milk milking milk + milkmaid milkmaid milks milk + milksops milksop milky milki + mill mill mille mill + miller miller milliner millin + million million millioned million + millions million mills mill + millstones millston milo milo + mimic mimic minc minc + mince minc minces minc + mincing minc mind mind + minded mind minding mind + mindless mindless minds mind + mine mine mineral miner + minerals miner minerva minerva + mines mine mingle mingl + mingled mingl mingling mingl + minikin minikin minim minim + minime minim minimo minimo + minimus minimu mining mine + minion minion minions minion + minist minist minister minist + ministers minist ministration ministr + minnow minnow minnows minnow + minola minola minority minor + minos mino minotaurs minotaur + minstrel minstrel minstrels minstrel + minstrelsy minstrelsi mint mint + mints mint minute minut + minutely minut minutes minut + minx minx mio mio + mir mir mirable mirabl + miracle miracl miracles miracl + miraculous miracul miranda miranda + mire mire mirror mirror + mirrors mirror mirth mirth + mirthful mirth miry miri + mis mi misadventur misadventur + misadventure misadventur misanthropos misanthropo + misapplied misappli misbecame misbecam + misbecom misbecom misbecome misbecom + misbegot misbegot misbegotten misbegotten + misbeliever misbeliev misbelieving misbeliev + misbhav misbhav miscall miscal + miscalled miscal miscarried miscarri + miscarries miscarri miscarry miscarri + miscarrying miscarri mischance mischanc + mischances mischanc mischief mischief + mischiefs mischief mischievous mischiev + misconceived misconceiv misconst misconst + misconster misconst misconstruction misconstruct + misconstrued misconstru misconstrues misconstru + miscreant miscreant miscreate miscreat + misdeed misde misdeeds misde + misdemean misdemean misdemeanours misdemeanour + misdoubt misdoubt misdoubteth misdoubteth + misdoubts misdoubt misenum misenum + miser miser miserable miser + miserably miser misericorde misericord + miseries miseri misers miser + misery miseri misfortune misfortun + misfortunes misfortun misgive misgiv + misgives misgiv misgiving misgiv + misgoverned misgovern misgovernment misgovern + misgraffed misgraf misguide misguid + mishap mishap mishaps mishap + misheard misheard misinterpret misinterpret + mislead mislead misleader mislead + misleaders mislead misleading mislead + misled misl mislike mislik + misord misord misplac misplac + misplaced misplac misplaces misplac + mispris mispri misprised mispris + misprision mispris misprizing mispriz + misproud misproud misquote misquot + misreport misreport miss miss + missed miss misses miss + misshap misshap misshapen misshapen + missheathed missheath missing miss + missingly missingli missions mission + missive missiv missives missiv + misspoke misspok mist mist + mista mista mistak mistak + mistake mistak mistaken mistaken + mistakes mistak mistaketh mistaketh + mistaking mistak mistakings mistak + mistemp mistemp mistempered mistemp + misterm misterm mistful mist + misthink misthink misthought misthought + mistletoe mistleto mistook mistook + mistreadings mistread mistress mistress + mistresses mistress mistresss mistresss + mistriship mistriship mistrust mistrust + mistrusted mistrust mistrustful mistrust + mistrusting mistrust mists mist + misty misti misus misu + misuse misus misused misus + misuses misus mites mite + mithridates mithrid mitigate mitig + mitigation mitig mix mix + mixed mix mixture mixtur + mixtures mixtur mm mm + mnd mnd moan moan + moans moan moat moat + moated moat mobled mobl + mock mock mockable mockabl + mocker mocker mockeries mockeri + mockers mocker mockery mockeri + mocking mock mocks mock + mockvater mockvat mockwater mockwat + model model modena modena + moderate moder moderately moder + moderation moder modern modern + modest modest modesties modesti + modestly modestli modesty modesti + modicums modicum modo modo + module modul moe moe + moi moi moiety moieti + moist moist moisten moisten + moisture moistur moldwarp moldwarp + mole mole molehill molehil + moles mole molest molest + molestation molest mollification mollif + mollis molli molten molten + molto molto mome mome + moment moment momentary momentari + moming mome mon mon + monachum monachum monarch monarch + monarchies monarchi monarchize monarch + monarcho monarcho monarchs monarch + monarchy monarchi monast monast + monastery monasteri monastic monast + monday mondai monde mond + money monei moneys monei + mong mong monger monger + mongers monger monging mong + mongrel mongrel mongrels mongrel + mongst mongst monk monk + monkey monkei monkeys monkei + monks monk monmouth monmouth + monopoly monopoli mons mon + monsieur monsieur monsieurs monsieur + monster monster monsters monster + monstrous monstrou monstrously monstrous + monstrousness monstrous monstruosity monstruos + montacute montacut montage montag + montague montagu montagues montagu + montano montano montant montant + montez montez montferrat montferrat + montgomery montgomeri month month + monthly monthli months month + montjoy montjoi monument monument + monumental monument monuments monument + mood mood moods mood + moody moodi moon moon + moonbeams moonbeam moonish moonish + moonlight moonlight moons moon + moonshine moonshin moonshines moonshin + moor moor moorfields moorfield + moors moor moorship moorship + mop mop mope mope + moping mope mopping mop + mopsa mopsa moral moral + moraler moral morality moral + moralize moral mordake mordak + more more moreover moreov + mores more morgan morgan + mori mori morisco morisco + morn morn morning morn + mornings morn morocco morocco + morris morri morrow morrow + morrows morrow morsel morsel + morsels morsel mort mort + mortal mortal mortality mortal + mortally mortal mortals mortal + mortar mortar mortgaged mortgag + mortified mortifi mortifying mortifi + mortimer mortim mortimers mortim + mortis morti mortise mortis + morton morton mose mose + moss moss mossgrown mossgrown + most most mote mote + moth moth mother mother + mothers mother moths moth + motion motion motionless motionless + motions motion motive motiv + motives motiv motley motlei + mots mot mought mought + mould mould moulded mould + mouldeth mouldeth moulds mould + mouldy mouldi moult moult + moulten moulten mounch mounch + mounseur mounseur mounsieur mounsieur + mount mount mountain mountain + mountaineer mountain mountaineers mountain + mountainous mountain mountains mountain + mountant mountant mountanto mountanto + mountebank mountebank mountebanks mountebank + mounted mount mounteth mounteth + mounting mount mounts mount + mourn mourn mourned mourn + mourner mourner mourners mourner + mournful mourn mournfully mournfulli + mourning mourn mourningly mourningli + mournings mourn mourns mourn + mous mou mouse mous + mousetrap mousetrap mousing mous + mouth mouth mouthed mouth + mouths mouth mov mov + movables movabl move move + moveable moveabl moveables moveabl + moved move mover mover + movers mover moves move + moveth moveth moving move + movingly movingli movousus movousu + mow mow mowbray mowbrai + mower mower mowing mow + mows mow moy moi + moys moi moyses moys + mrs mr much much + muck muck mud mud + mudded mud muddied muddi + muddy muddi muffins muffin + muffl muffl muffle muffl + muffled muffl muffler muffler + muffling muffl mugger mugger + mugs mug mulberries mulberri + mulberry mulberri mule mule + mules mule muleteers mulet + mulier mulier mulieres mulier + muliteus muliteu mull mull + mulmutius mulmutiu multiplied multipli + multiply multipli multiplying multipli + multipotent multipot multitude multitud + multitudes multitud multitudinous multitudin + mum mum mumble mumbl + mumbling mumbl mummers mummer + mummy mummi mun mun + munch munch muniments muniment + munition munit murd murd + murder murder murdered murder + murderer murder murderers murder + murdering murder murderous murder + murders murder mure mure + murk murk murkiest murkiest + murky murki murmur murmur + murmurers murmur murmuring murmur + murrain murrain murray murrai + murrion murrion murther murther + murtherer murther murtherers murther + murthering murther murtherous murther + murthers murther mus mu + muscadel muscadel muscovites muscovit + muscovits muscovit muscovy muscovi + muse muse muses muse + mush mush mushrooms mushroom + music music musical music + musician musician musicians musician + musics music musing muse + musings muse musk musk + musket musket muskets musket + muskos musko muss muss + mussel mussel mussels mussel + must must mustachio mustachio + mustard mustard mustardseed mustardse + muster muster mustering muster + musters muster musty musti + mutability mutabl mutable mutabl + mutation mutat mutations mutat + mute mute mutes mute + mutest mutest mutine mutin + mutineer mutin mutineers mutin + mutines mutin mutinies mutini + mutinous mutin mutiny mutini + mutius mutiu mutter mutter + muttered mutter mutton mutton + muttons mutton mutual mutual + mutualities mutual mutually mutual + muzzl muzzl muzzle muzzl + muzzled muzzl mv mv + mww mww my my + mynheers mynheer myrmidon myrmidon + myrmidons myrmidon myrtle myrtl + myself myself myst myst + mysteries mysteri mystery mysteri + n n nag nag + nage nage nags nag + naiads naiad nail nail + nails nail nak nak + naked nake nakedness naked + nal nal nam nam + name name named name + nameless nameless namely name + names name namest namest + naming name nan nan + nance nanc nap nap + nape nape napes nape + napkin napkin napkins napkin + naples napl napless napless + napping nap naps nap + narbon narbon narcissus narcissu + narines narin narrow narrow + narrowly narrowli naso naso + nasty nasti nathaniel nathaniel + natifs natif nation nation + nations nation native nativ + nativity nativ natur natur + natural natur naturalize natur + naturally natur nature natur + natured natur natures natur + natus natu naught naught + naughtily naughtili naughty naughti + navarre navarr nave nave + navel navel navigation navig + navy navi nay nai + nayward nayward nayword nayword + nazarite nazarit ne ne + neaf neaf neamnoins neamnoin + neanmoins neanmoin neapolitan neapolitan + neapolitans neapolitan near near + nearer nearer nearest nearest + nearly nearli nearness near + neat neat neatly neatli + neb neb nebour nebour + nebuchadnezzar nebuchadnezzar nec nec + necessaries necessari necessarily necessarili + necessary necessari necessitied necess + necessities necess necessity necess + neck neck necklace necklac + necks neck nectar nectar + ned ned nedar nedar + need need needed need + needer needer needful need + needfull needful needing need + needle needl needles needl + needless needless needly needli + needs need needy needi + neer neer neeze neez + nefas nefa negation negat + negative neg negatives neg + neglect neglect neglected neglect + neglecting neglect neglectingly neglectingli + neglection neglect negligence neglig + negligent neglig negotiate negoti + negotiations negoti negro negro + neigh neigh neighbors neighbor + neighbour neighbour neighbourhood neighbourhood + neighbouring neighbour neighbourly neighbourli + neighbours neighbour neighing neigh + neighs neigh neither neither + nell nell nemean nemean + nemesis nemesi neoptolemus neoptolemu + nephew nephew nephews nephew + neptune neptun ner ner + nereides nereid nerissa nerissa + nero nero neroes nero + ners ner nerve nerv + nerves nerv nervii nervii + nervy nervi nessus nessu + nest nest nestor nestor + nests nest net net + nether nether netherlands netherland + nets net nettle nettl + nettled nettl nettles nettl + neuter neuter neutral neutral + nev nev never never + nevil nevil nevils nevil + new new newborn newborn + newer newer newest newest + newgate newgat newly newli + newness new news new + newsmongers newsmong newt newt + newts newt next next + nibbling nibbl nicanor nicanor + nice nice nicely nice + niceness nice nicer nicer + nicety niceti nicholas nichola + nick nick nickname nicknam + nicks nick niece niec + nieces niec niggard niggard + niggarding niggard niggardly niggardli + nigh nigh night night + nightcap nightcap nightcaps nightcap + nighted night nightgown nightgown + nightingale nightingal nightingales nightingal + nightly nightli nightmare nightmar + nights night nightwork nightwork + nihil nihil nile nile + nill nill nilus nilu + nimble nimbl nimbleness nimbl + nimbler nimbler nimbly nimbl + nine nine nineteen nineteen + ning ning ningly ningli + ninny ninni ninth ninth + ninus ninu niobe niob + niobes niob nip nip + nipp nipp nipping nip + nipple nippl nips nip + nit nit nly nly + nnight nnight nnights nnight + no no noah noah + nob nob nobility nobil + nobis nobi noble nobl + nobleman nobleman noblemen noblemen + nobleness nobl nobler nobler + nobles nobl noblesse nobless + noblest noblest nobly nobli + nobody nobodi noces noce + nod nod nodded nod + nodding nod noddle noddl + noddles noddl noddy noddi + nods nod noes noe + nointed noint nois noi + noise nois noiseless noiseless + noisemaker noisemak noises nois + noisome noisom nole nole + nominate nomin nominated nomin + nomination nomin nominativo nominativo + non non nonage nonag + nonce nonc none none + nonino nonino nonny nonni + nonpareil nonpareil nonsuits nonsuit + nony noni nook nook + nooks nook noon noon + noonday noondai noontide noontid + nor nor norbery norberi + norfolk norfolk norman norman + normandy normandi normans norman + north north northampton northampton + northamptonshire northamptonshir northerly northerli + northern northern northgate northgat + northumberland northumberland northumberlands northumberland + northward northward norway norwai + norways norwai norwegian norwegian + norweyan norweyan nos no + nose nose nosegays nosegai + noseless noseless noses nose + noster noster nostra nostra + nostril nostril nostrils nostril + not not notable notabl + notably notabl notary notari + notch notch note note + notebook notebook noted note + notedly notedli notes note + notest notest noteworthy noteworthi + nothing noth nothings noth + notice notic notify notifi + noting note notion notion + notorious notori notoriously notori + notre notr notwithstanding notwithstand + nought nought noun noun + nouns noun nourish nourish + nourished nourish nourisher nourish + nourishes nourish nourisheth nourisheth + nourishing nourish nourishment nourish + nous nou novel novel + novelties novelti novelty novelti + noverbs noverb novi novi + novice novic novices novic + novum novum now now + nowhere nowher noyance noyanc + ns ns nt nt + nubibus nubibu numa numa + numb numb number number + numbered number numbering number + numberless numberless numbers number + numbness numb nun nun + nuncio nuncio nuncle nuncl + nunnery nunneri nuns nun + nuntius nuntiu nuptial nuptial + nurs nur nurse nurs + nursed nurs nurser nurser + nursery nurseri nurses nurs + nurseth nurseth nursh nursh + nursing nurs nurtur nurtur + nurture nurtur nut nut + nuthook nuthook nutmeg nutmeg + nutmegs nutmeg nutriment nutriment + nuts nut nutshell nutshel + ny ny nym nym + nymph nymph nymphs nymph + o o oak oak + oaken oaken oaks oak + oared oar oars oar + oatcake oatcak oaten oaten + oath oath oathable oathabl + oaths oath oats oat + ob ob obduracy obduraci + obdurate obdur obedience obedi + obedient obedi obeisance obeis + oberon oberon obey obei + obeyed obei obeying obei + obeys obei obidicut obidicut + object object objected object + objections object objects object + oblation oblat oblations oblat + obligation oblig obligations oblig + obliged oblig oblique obliqu + oblivion oblivion oblivious oblivi + obloquy obloqui obscene obscen + obscenely obscen obscur obscur + obscure obscur obscured obscur + obscurely obscur obscures obscur + obscuring obscur obscurity obscur + obsequies obsequi obsequious obsequi + obsequiously obsequi observ observ + observance observ observances observ + observancy observ observant observ + observants observ observation observ + observe observ observed observ + observer observ observers observ + observing observ observingly observingli + obsque obsqu obstacle obstacl + obstacles obstacl obstinacy obstinaci + obstinate obstin obstinately obstin + obstruct obstruct obstruction obstruct + obstructions obstruct obtain obtain + obtained obtain obtaining obtain + occasion occas occasions occas + occident occid occidental occident + occulted occult occupat occupat + occupation occup occupations occup + occupied occupi occupies occupi + occupy occupi occurrence occurr + occurrences occurr occurrents occurr + ocean ocean oceans ocean + octavia octavia octavius octaviu + ocular ocular od od + odd odd oddest oddest + oddly oddli odds odd + ode od odes od + odious odiou odoriferous odorifer + odorous odor odour odour + odours odour ods od + oeillades oeillad oes oe + oeuvres oeuvr of of + ofephesus ofephesu off off + offal offal offence offenc + offenceful offenc offences offenc + offend offend offended offend + offendendo offendendo offender offend + offenders offend offendeth offendeth + offending offend offendress offendress + offends offend offense offens + offenseless offenseless offenses offens + offensive offens offer offer + offered offer offering offer + offerings offer offers offer + offert offert offic offic + office offic officed offic + officer offic officers offic + offices offic official offici + officious offici offspring offspr + oft oft often often + oftener often oftentimes oftentim + oh oh oil oil + oils oil oily oili + old old oldcastle oldcastl + olden olden older older + oldest oldest oldness old + olive oliv oliver oliv + olivers oliv olives oliv + olivia olivia olympian olympian + olympus olympu oman oman + omans oman omen omen + ominous omin omission omiss + omit omit omittance omitt + omitted omit omitting omit + omne omn omnes omn + omnipotent omnipot on on + once onc one on + ones on oneyers oney + ongles ongl onion onion + onions onion only onli + onset onset onward onward + onwards onward oo oo + ooze ooz oozes ooz + oozy oozi op op + opal opal ope op + open open opener open + opening open openly openli + openness open opens open + operant oper operate oper + operation oper operations oper + operative oper opes op + oph oph ophelia ophelia + opinion opinion opinions opinion + opportune opportun opportunities opportun + opportunity opportun oppos oppo + oppose oppos opposed oppos + opposeless opposeless opposer oppos + opposers oppos opposes oppos + opposing oppos opposite opposit + opposites opposit opposition opposit + oppositions opposit oppress oppress + oppressed oppress oppresses oppress + oppresseth oppresseth oppressing oppress + oppression oppress oppressor oppressor + opprest opprest opprobriously opprobri + oppugnancy oppugn opulency opul + opulent opul or or + oracle oracl oracles oracl + orange orang oration orat + orator orat orators orat + oratory oratori orb orb + orbed orb orbs orb + orchard orchard orchards orchard + ord ord ordain ordain + ordained ordain ordaining ordain + order order ordered order + ordering order orderless orderless + orderly orderli orders order + ordinance ordin ordinant ordin + ordinaries ordinari ordinary ordinari + ordnance ordnanc ords ord + ordure ordur ore or + organ organ organs organ + orgillous orgil orient orient + orifex orifex origin origin + original origin orisons orison + ork ork orlando orlando + orld orld orleans orlean + ornament ornament ornaments ornament + orodes orod orphan orphan + orphans orphan orpheus orpheu + orsino orsino ort ort + orthography orthographi orts ort + oscorbidulchos oscorbidulcho osier osier + osiers osier osprey osprei + osr osr osric osric + ossa ossa ost ost + ostent ostent ostentare ostentar + ostentation ostent ostents ostent + ostler ostler ostlers ostler + ostrich ostrich osw osw + oswald oswald othello othello + other other othergates otherg + others other otherwhere otherwher + otherwhiles otherwhil otherwise otherwis + otter otter ottoman ottoman + ottomites ottomit oublie oubli + ouches ouch ought ought + oui oui ounce ounc + ounces ounc ouphes ouph + our our ours our + ourself ourself ourselves ourselv + ousel ousel out out + outbids outbid outbrave outbrav + outbraves outbrav outbreak outbreak + outcast outcast outcries outcri + outcry outcri outdar outdar + outdare outdar outdares outdar + outdone outdon outfac outfac + outface outfac outfaced outfac + outfacing outfac outfly outfli + outfrown outfrown outgo outgo + outgoes outgo outgrown outgrown + outjest outjest outlaw outlaw + outlawry outlawri outlaws outlaw + outliv outliv outlive outliv + outlives outliv outliving outliv + outlook outlook outlustres outlustr + outpriz outpriz outrage outrag + outrageous outrag outrages outrag + outran outran outright outright + outroar outroar outrun outrun + outrunning outrun outruns outrun + outscold outscold outscorn outscorn + outsell outsel outsells outsel + outside outsid outsides outsid + outspeaks outspeak outsport outsport + outstare outstar outstay outstai + outstood outstood outstretch outstretch + outstretched outstretch outstrike outstrik + outstrip outstrip outstripped outstrip + outswear outswear outvenoms outvenom + outward outward outwardly outwardli + outwards outward outwear outwear + outweighs outweigh outwent outwent + outworn outworn outworths outworth + oven oven over over + overawe overaw overbear overbear + overblown overblown overboard overboard + overbold overbold overborne overborn + overbulk overbulk overbuys overbui + overcame overcam overcast overcast + overcharg overcharg overcharged overcharg + overcome overcom overcomes overcom + overdone overdon overearnest overearnest + overfar overfar overflow overflow + overflown overflown overglance overgl + overgo overgo overgone overgon + overgorg overgorg overgrown overgrown + overhead overhead overhear overhear + overheard overheard overhold overhold + overjoyed overjoi overkind overkind + overland overland overleather overleath + overlive overl overlook overlook + overlooking overlook overlooks overlook + overmaster overmast overmounting overmount + overmuch overmuch overpass overpass + overpeer overp overpeering overp + overplus overplu overrul overrul + overrun overrun overscutch overscutch + overset overset overshades overshad + overshine overshin overshines overshin + overshot overshot oversights oversight + overspread overspread overstain overstain + overswear overswear overt overt + overta overta overtake overtak + overtaketh overtaketh overthrow overthrow + overthrown overthrown overthrows overthrow + overtook overtook overtopp overtopp + overture overtur overturn overturn + overwatch overwatch overween overween + overweening overween overweigh overweigh + overwhelm overwhelm overwhelming overwhelm + overworn overworn ovid ovid + ovidius ovidiu ow ow + owe ow owed ow + owedst owedst owen owen + owes ow owest owest + oweth oweth owing ow + owl owl owls owl + own own owner owner + owners owner owning own + owns own owy owi + ox ox oxen oxen + oxford oxford oxfordshire oxfordshir + oxlips oxlip oyes oy + oyster oyster p p + pabble pabbl pabylon pabylon + pac pac pace pace + paced pace paces pace + pacified pacifi pacify pacifi + pacing pace pack pack + packet packet packets packet + packhorses packhors packing pack + packings pack packs pack + packthread packthread pacorus pacoru + paction paction pad pad + paddle paddl paddling paddl + paddock paddock padua padua + pagan pagan pagans pagan + page page pageant pageant + pageants pageant pages page + pah pah paid paid + pail pail pailfuls pail + pails pail pain pain + pained pain painful pain + painfully painfulli pains pain + paint paint painted paint + painter painter painting paint + paintings paint paints paint + pair pair paired pair + pairs pair pajock pajock + pal pal palabras palabra + palace palac palaces palac + palamedes palamed palate palat + palates palat palatine palatin + palating palat pale pale + paled pale paleness pale + paler paler pales pale + palestine palestin palfrey palfrei + palfreys palfrei palisadoes palisado + pall pall pallabris pallabri + pallas palla pallets pallet + palm palm palmer palmer + palmers palmer palms palm + palmy palmi palpable palpabl + palsied palsi palsies palsi + palsy palsi palt palt + palter palter paltry paltri + paly pali pamp pamp + pamper pamper pamphlets pamphlet + pan pan pancackes pancack + pancake pancak pancakes pancak + pandar pandar pandars pandar + pandarus pandaru pander pander + panderly panderli panders pander + pandulph pandulph panel panel + pang pang panging pang + pangs pang pannier pannier + pannonians pannonian pansa pansa + pansies pansi pant pant + pantaloon pantaloon panted pant + pantheon pantheon panther panther + panthino panthino panting pant + pantingly pantingli pantler pantler + pantry pantri pants pant + pap pap papal papal + paper paper papers paper + paphlagonia paphlagonia paphos papho + papist papist paps pap + par par parable parabl + paracelsus paracelsu paradise paradis + paradox paradox paradoxes paradox + paragon paragon paragons paragon + parallel parallel parallels parallel + paramour paramour paramours paramour + parapets parapet paraquito paraquito + parasite parasit parasites parasit + parca parca parcel parcel + parcell parcel parcels parcel + parch parch parched parch + parching parch parchment parchment + pard pard pardon pardon + pardona pardona pardoned pardon + pardoner pardon pardoning pardon + pardonne pardonn pardonner pardonn + pardonnez pardonnez pardons pardon + pare pare pared pare + parel parel parent parent + parentage parentag parents parent + parfect parfect paring pare + parings pare paris pari + parish parish parishioners parishion + parisians parisian paritors paritor + park park parks park + parle parl parler parler + parles parl parley parlei + parlez parlez parliament parliament + parlors parlor parlour parlour + parlous parlou parmacity parmac + parolles parol parricide parricid + parricides parricid parrot parrot + parrots parrot parsley parslei + parson parson part part + partake partak partaken partaken + partaker partak partakers partak + parted part parthia parthia + parthian parthian parthians parthian + parti parti partial partial + partialize partial partially partial + participate particip participation particip + particle particl particular particular + particularities particular particularize particular + particularly particularli particulars particular + parties parti parting part + partisan partisan partisans partisan + partition partit partizan partizan + partlet partlet partly partli + partner partner partners partner + partridge partridg parts part + party parti pas pa + pash pash pashed pash + pashful pash pass pass + passable passabl passado passado + passage passag passages passag + passant passant passed pass + passenger passeng passengers passeng + passes pass passeth passeth + passing pass passio passio + passion passion passionate passion + passioning passion passions passion + passive passiv passport passport + passy passi past past + paste past pasterns pastern + pasties pasti pastime pastim + pastimes pastim pastoral pastor + pastorals pastor pastors pastor + pastry pastri pasture pastur + pastures pastur pasty pasti + pat pat patay patai + patch patch patchery patcheri + patches patch pate pate + pated pate patent patent + patents patent paternal patern + pates pate path path + pathetical pathet paths path + pathway pathwai pathways pathwai + patience patienc patient patient + patiently patient patients patient + patines patin patrician patrician + patricians patrician patrick patrick + patrimony patrimoni patroclus patroclu + patron patron patronage patronag + patroness patro patrons patron + patrum patrum patter patter + pattern pattern patterns pattern + pattle pattl pauca pauca + paucas pauca paul paul + paulina paulina paunch paunch + paunches paunch pause paus + pauser pauser pauses paus + pausingly pausingli pauvres pauvr + pav pav paved pave + pavement pavement pavilion pavilion + pavilions pavilion pavin pavin + paw paw pawn pawn + pawns pawn paws paw + pax pax pay pai + payest payest paying pai + payment payment payments payment + pays pai paysan paysan + paysans paysan pe pe + peace peac peaceable peaceabl + peaceably peaceabl peaceful peac + peacemakers peacemak peaces peac + peach peach peaches peach + peacock peacock peacocks peacock + peak peak peaking peak + peal peal peals peal + pear pear peard peard + pearl pearl pearls pearl + pears pear peas pea + peasant peasant peasantry peasantri + peasants peasant peascod peascod + pease peas peaseblossom peaseblossom + peat peat peaten peaten + peating peat pebble pebbl + pebbled pebbl pebbles pebbl + peck peck pecks peck + peculiar peculiar pecus pecu + pedant pedant pedantical pedant + pedascule pedascul pede pede + pedestal pedest pedigree pedigre + pedlar pedlar pedlars pedlar + pedro pedro peds ped + peel peel peep peep + peeped peep peeping peep + peeps peep peer peer + peereth peereth peering peer + peerless peerless peers peer + peesel peesel peevish peevish + peevishly peevishli peflur peflur + peg peg pegasus pegasu + pegs peg peise peis + peised peis peize peiz + pelf pelf pelican pelican + pelion pelion pell pell + pella pella pelleted pellet + peloponnesus peloponnesu pelt pelt + pelting pelt pembroke pembrok + pen pen penalties penalti + penalty penalti penance penanc + pence penc pencil pencil + pencill pencil pencils pencil + pendant pendant pendent pendent + pendragon pendragon pendulous pendul + penelope penelop penetrable penetr + penetrate penetr penetrative penetr + penitence penit penitent penit + penitential penitenti penitently penit + penitents penit penker penker + penknife penknif penn penn + penned pen penning pen + pennons pennon penny penni + pennyworth pennyworth pennyworths pennyworth + pens pen pense pens + pension pension pensioners pension + pensive pensiv pensived pensiv + pensively pensiv pent pent + pentecost pentecost penthesilea penthesilea + penthouse penthous penurious penuri + penury penuri peopl peopl + people peopl peopled peopl + peoples peopl pepin pepin + pepper pepper peppercorn peppercorn + peppered pepper per per + peradventure peradventur peradventures peradventur + perceiv perceiv perceive perceiv + perceived perceiv perceives perceiv + perceiveth perceiveth perch perch + perchance perchanc percies perci + percussion percuss percy perci + perdie perdi perdita perdita + perdition perdit perdonato perdonato + perdu perdu perdurable perdur + perdurably perdur perdy perdi + pere pere peregrinate peregrin + peremptorily peremptorili peremptory peremptori + perfect perfect perfected perfect + perfecter perfect perfectest perfectest + perfection perfect perfections perfect + perfectly perfectli perfectness perfect + perfidious perfidi perfidiously perfidi + perforce perforc perform perform + performance perform performances perform + performed perform performer perform + performers perform performing perform + performs perform perfum perfum + perfume perfum perfumed perfum + perfumer perfum perfumes perfum + perge perg perhaps perhap + periapts periapt perigort perigort + perigouna perigouna peril peril + perilous peril perils peril + period period periods period + perish perish perished perish + perishest perishest perisheth perisheth + perishing perish periwig periwig + perjur perjur perjure perjur + perjured perjur perjuries perjuri + perjury perjuri perk perk + perkes perk permafoy permafoi + permanent perman permission permiss + permissive permiss permit permit + permitted permit pernicious pernici + perniciously pernici peroration peror + perpend perpend perpendicular perpendicular + perpendicularly perpendicularli perpetual perpetu + perpetually perpetu perpetuity perpetu + perplex perplex perplexed perplex + perplexity perplex pers per + persecuted persecut persecutions persecut + persecutor persecutor perseus perseu + persever persev perseverance persever + persevers persev persia persia + persian persian persist persist + persisted persist persistency persist + persistive persist persists persist + person person personae persona + personage personag personages personag + personal person personally person + personate person personated person + personates person personating person + persons person perspective perspect + perspectively perspect perspectives perspect + perspicuous perspicu persuade persuad + persuaded persuad persuades persuad + persuading persuad persuasion persuas + persuasions persuas pert pert + pertain pertain pertaining pertain + pertains pertain pertaunt pertaunt + pertinent pertin pertly pertli + perturb perturb perturbation perturb + perturbations perturb perturbed perturb + perus peru perusal perus + peruse perus perused perus + perusing perus perverse pervers + perversely pervers perverseness pervers + pervert pervert perverted pervert + peseech peseech pest pest + pester pester pestiferous pestifer + pestilence pestil pestilent pestil + pet pet petar petar + peter peter petit petit + petition petit petitionary petitionari + petitioner petition petitioners petition + petitions petit peto peto + petrarch petrarch petruchio petruchio + petter petter petticoat petticoat + petticoats petticoat pettiness petti + pettish pettish pettitoes pettito + petty petti peu peu + pew pew pewter pewter + pewterer pewter phaethon phaethon + phaeton phaeton phantasime phantasim + phantasimes phantasim phantasma phantasma + pharamond pharamond pharaoh pharaoh + pharsalia pharsalia pheasant pheasant + pheazar pheazar phebe phebe + phebes phebe pheebus pheebu + pheeze pheez phibbus phibbu + philadelphos philadelpho philario philario + philarmonus philarmonu philemon philemon + philip philip philippan philippan + philippe philipp philippi philippi + phillida phillida philo philo + philomel philomel philomela philomela + philosopher philosoph philosophers philosoph + philosophical philosoph philosophy philosophi + philostrate philostr philotus philotu + phlegmatic phlegmat phoebe phoeb + phoebus phoebu phoenicia phoenicia + phoenicians phoenician phoenix phoenix + phorbus phorbu photinus photinu + phrase phrase phraseless phraseless + phrases phrase phrygia phrygia + phrygian phrygian phrynia phrynia + physic physic physical physic + physician physician physicians physician + physics physic pia pia + pibble pibbl pible pibl + picardy picardi pick pick + pickaxe pickax pickaxes pickax + pickbone pickbon picked pick + pickers picker picking pick + pickle pickl picklock picklock + pickpurse pickpurs picks pick + pickt pickt pickthanks pickthank + pictur pictur picture pictur + pictured pictur pictures pictur + pid pid pie pie + piec piec piece piec + pieces piec piecing piec + pied pi piedness pied + pier pier pierc pierc + pierce pierc pierced pierc + pierces pierc pierceth pierceth + piercing pierc piercy pierci + piers pier pies pi + piety pieti pig pig + pigeon pigeon pigeons pigeon + pight pight pigmy pigmi + pigrogromitus pigrogromitu pike pike + pikes pike pil pil + pilate pilat pilates pilat + pilchers pilcher pile pile + piles pile pilf pilf + pilfering pilfer pilgrim pilgrim + pilgrimage pilgrimag pilgrims pilgrim + pill pill pillage pillag + pillagers pillag pillar pillar + pillars pillar pillicock pillicock + pillory pillori pillow pillow + pillows pillow pills pill + pilot pilot pilots pilot + pimpernell pimpernel pin pin + pinch pinch pinched pinch + pinches pinch pinching pinch + pindarus pindaru pine pine + pined pine pines pine + pinfold pinfold pining pine + pinion pinion pink pink + pinn pinn pinnace pinnac + pins pin pinse pins + pint pint pintpot pintpot + pioned pion pioneers pioneer + pioner pioner pioners pioner + pious piou pip pip + pipe pipe piper piper + pipers piper pipes pipe + piping pipe pippin pippin + pippins pippin pirate pirat + pirates pirat pisa pisa + pisanio pisanio pish pish + pismires pismir piss piss + pissing piss pistol pistol + pistols pistol pit pit + pitch pitch pitched pitch + pitcher pitcher pitchers pitcher + pitchy pitchi piteous piteou + piteously piteous pitfall pitfal + pith pith pithless pithless + pithy pithi pitie piti + pitied piti pities piti + pitiful piti pitifully pitifulli + pitiless pitiless pits pit + pittance pittanc pittie pitti + pittikins pittikin pity piti + pitying piti pius piu + plac plac place place + placed place placentio placentio + places place placeth placeth + placid placid placing place + plack plack placket placket + plackets placket plagu plagu + plague plagu plagued plagu + plagues plagu plaguing plagu + plaguy plagui plain plain + plainer plainer plainest plainest + plaining plain plainings plain + plainly plainli plainness plain + plains plain plainsong plainsong + plaintful plaint plaintiff plaintiff + plaintiffs plaintiff plaints plaint + planched planch planet planet + planetary planetari planets planet + planks plank plant plant + plantage plantag plantagenet plantagenet + plantagenets plantagenet plantain plantain + plantation plantat planted plant + planteth planteth plants plant + plash plash plashy plashi + plast plast plaster plaster + plasterer plaster plat plat + plate plate plated plate + plates plate platform platform + platforms platform plats plat + platted plat plausible plausibl + plausive plausiv plautus plautu + play plai played plai + player player players player + playeth playeth playfellow playfellow + playfellows playfellow playhouse playhous + playing plai plays plai + plea plea pleach pleach + pleached pleach plead plead + pleaded plead pleader pleader + pleaders pleader pleading plead + pleads plead pleas plea + pleasance pleasanc pleasant pleasant + pleasantly pleasantli please pleas + pleased pleas pleaser pleaser + pleasers pleaser pleases pleas + pleasest pleasest pleaseth pleaseth + pleasing pleas pleasure pleasur + pleasures pleasur plebeians plebeian + plebeii plebeii plebs pleb + pledge pledg pledges pledg + pleines plein plenitude plenitud + plenteous plenteou plenteously plenteous + plenties plenti plentiful plenti + plentifully plentifulli plenty plenti + pless pless plessed pless + plessing pless pliant pliant + plied pli plies pli + plight plight plighted plight + plighter plighter plod plod + plodded plod plodders plodder + plodding plod plods plod + plood plood ploody ploodi + plot plot plots plot + plotted plot plotter plotter + plough plough ploughed plough + ploughman ploughman ploughmen ploughmen + plow plow plows plow + pluck pluck plucked pluck + plucker plucker plucking pluck + plucks pluck plue plue + plum plum plume plume + plumed plume plumes plume + plummet plummet plump plump + plumpy plumpi plums plum + plung plung plunge plung + plunged plung plural plural + plurisy plurisi plus plu + pluto pluto plutus plutu + ply ply po po + pocket pocket pocketing pocket + pockets pocket pocky pocki + pody podi poem poem + poesy poesi poet poet + poetical poetic poetry poetri + poets poet poictiers poictier + poinards poinard poins poin + point point pointblank pointblank + pointed point pointing point + points point pois poi + poise pois poising pois + poison poison poisoned poison + poisoner poison poisoning poison + poisonous poison poisons poison + poke poke poking poke + pol pol polack polack + polacks polack poland poland + pold pold pole pole + poleaxe poleax polecat polecat + polecats polecat polemon polemon + poles pole poli poli + policies polici policy polici + polish polish polished polish + politic polit politician politician + politicians politician politicly politicli + polixenes polixen poll poll + polluted pollut pollution pollut + polonius poloniu poltroons poltroon + polusion polus polydamus polydamu + polydore polydor polyxena polyxena + pomander pomand pomegranate pomegran + pomewater pomewat pomfret pomfret + pomgarnet pomgarnet pommel pommel + pomp pomp pompeius pompeiu + pompey pompei pompion pompion + pompous pompou pomps pomp + pond pond ponder ponder + ponderous ponder ponds pond + poniard poniard poniards poniard + pont pont pontic pontic + pontifical pontif ponton ponton + pooh pooh pool pool + poole pool poop poop + poor poor poorer poorer + poorest poorest poorly poorli + pop pop pope pope + popedom popedom popilius popiliu + popingay popingai popish popish + popp popp poppy poppi + pops pop popular popular + popularity popular populous popul + porch porch porches porch + pore pore poring pore + pork pork porn porn + porpentine porpentin porridge porridg + porringer porring port port + portable portabl portage portag + portal portal portance portanc + portcullis portculli portend portend + portends portend portent portent + portentous portent portents portent + porter porter porters porter + portia portia portion portion + portly portli portotartarossa portotartarossa + portrait portrait portraiture portraitur + ports port portugal portug + pose pose posied posi + posies posi position posit + positive posit positively posit + posse poss possess possess + possessed possess possesses possess + possesseth possesseth possessing possess + possession possess possessions possess + possessor possessor posset posset + possets posset possibilities possibl + possibility possibl possible possibl + possibly possibl possitable possit + post post poste post + posted post posterior posterior + posteriors posterior posterity poster + postern postern posterns postern + posters poster posthorse posthors + posthorses posthors posthumus posthumu + posting post postmaster postmast + posts post postscript postscript + posture postur postures postur + posy posi pot pot + potable potabl potations potat + potato potato potatoes potato + potch potch potency potenc + potent potent potentates potent + potential potenti potently potent + potents potent pothecary pothecari + pother pother potion potion + potions potion potpan potpan + pots pot potter potter + potting pot pottle pottl + pouch pouch poulter poulter + poultice poultic poultney poultnei + pouncet pouncet pound pound + pounds pound pour pour + pourest pourest pouring pour + pourquoi pourquoi pours pour + pout pout poverty poverti + pow pow powd powd + powder powder power power + powerful power powerfully powerfulli + powerless powerless powers power + pox pox poys poi + poysam poysam prabbles prabbl + practic practic practice practic + practiced practic practicer practic + practices practic practicing practic + practis practi practisants practis + practise practis practiser practis + practisers practis practises practis + practising practis praeclarissimus praeclarissimu + praemunire praemunir praetor praetor + praetors praetor pragging prag + prague pragu prain prain + prains prain prais prai + praise prais praised prais + praises prais praisest praisest + praiseworthy praiseworthi praising prais + prancing pranc prank prank + pranks prank prat prat + prate prate prated prate + prater prater prating prate + prattle prattl prattler prattler + prattling prattl prave prave + prawls prawl prawns prawn + pray prai prayer prayer + prayers prayer praying prai + prays prai pre pre + preach preach preached preach + preachers preacher preaches preach + preaching preach preachment preachment + pread pread preambulate preambul + precedence preced precedent preced + preceding preced precept precept + preceptial precepti precepts precept + precinct precinct precious preciou + preciously precious precipice precipic + precipitating precipit precipitation precipit + precise precis precisely precis + preciseness precis precisian precisian + precor precor precurse precurs + precursors precursor predeceased predeceas + predecessor predecessor predecessors predecessor + predestinate predestin predicament predica + predict predict prediction predict + predictions predict predominance predomin + predominant predomin predominate predomin + preeches preech preeminence preemin + preface prefac prefer prefer + preferment prefer preferments prefer + preferr preferr preferreth preferreth + preferring prefer prefers prefer + prefiguring prefigur prefix prefix + prefixed prefix preformed preform + pregnancy pregnanc pregnant pregnant + pregnantly pregnantli prejudicates prejud + prejudice prejudic prejudicial prejudici + prelate prelat premeditated premedit + premeditation premedit premised premis + premises premis prenez prenez + prenominate prenomin prentice prentic + prentices prentic preordinance preordin + prepar prepar preparation prepar + preparations prepar prepare prepar + prepared prepar preparedly preparedli + prepares prepar preparing prepar + prepost prepost preposterous preposter + preposterously preposter prerogatifes prerogatif + prerogative prerog prerogatived prerogativ + presage presag presagers presag + presages presag presageth presageth + presaging presag prescience prescienc + prescribe prescrib prescript prescript + prescription prescript prescriptions prescript + prescripts prescript presence presenc + presences presenc present present + presentation present presented present + presenter present presenters present + presenteth presenteth presenting present + presently present presentment present + presents present preserv preserv + preservation preserv preservative preserv + preserve preserv preserved preserv + preserver preserv preservers preserv + preserving preserv president presid + press press pressed press + presser presser presses press + pressing press pressure pressur + pressures pressur prest prest + prester prester presume presum + presumes presum presuming presum + presumption presumpt presumptuous presumptu + presuppos presuppo pret pret + pretence pretenc pretences pretenc + pretend pretend pretended pretend + pretending pretend pretense pretens + pretext pretext pretia pretia + prettier prettier prettiest prettiest + prettily prettili prettiness pretti + pretty pretti prevail prevail + prevailed prevail prevaileth prevaileth + prevailing prevail prevailment prevail + prevails prevail prevent prevent + prevented prevent prevention prevent + preventions prevent prevents prevent + prey prei preyful prey + preys prei priam priam + priami priami priamus priamu + pribbles pribbl price price + prick prick pricked prick + pricket pricket pricking prick + pricks prick pricksong pricksong + pride pride prides pride + pridge pridg prie prie + pried pri prief prief + pries pri priest priest + priesthood priesthood priests priest + prig prig primal primal + prime prime primer primer + primero primero primest primest + primitive primit primo primo + primogenity primogen primrose primros + primroses primros primy primi + prince princ princely princ + princes princ princess princess + principal princip principalities princip + principality princip principle principl + principles principl princox princox + prings pring print print + printed print printing print + printless printless prints print + prioress prioress priories priori + priority prioriti priory priori + priscian priscian prison prison + prisoner prison prisoners prison + prisonment prison prisonnier prisonni + prisons prison pristine pristin + prithe prith prithee prithe + privacy privaci private privat + privately privat privates privat + privilage privilag privileg privileg + privilege privileg privileged privileg + privileges privileg privilegio privilegio + privily privili privity priviti + privy privi priz priz + prize prize prized prize + prizer prizer prizes prize + prizest prizest prizing prize + pro pro probable probabl + probal probal probation probat + proceed proce proceeded proceed + proceeders proceed proceeding proceed + proceedings proceed proceeds proce + process process procession process + proclaim proclaim proclaimed proclaim + proclaimeth proclaimeth proclaims proclaim + proclamation proclam proclamations proclam + proconsul proconsul procrastinate procrastin + procreant procreant procreants procreant + procreation procreat procrus procru + proculeius proculeiu procur procur + procurator procur procure procur + procured procur procures procur + procuring procur prodigal prodig + prodigality prodig prodigally prodig + prodigals prodig prodigies prodigi + prodigious prodigi prodigiously prodigi + prodigy prodigi proditor proditor + produc produc produce produc + produced produc produces produc + producing produc proface profac + profan profan profanation profan + profane profan profaned profan + profanely profan profaneness profan + profaners profan profaning profan + profess profess professed profess + professes profess profession profess + professions profess professors professor + proffer proffer proffered proffer + profferer proffer proffers proffer + proficient profici profit profit + profitable profit profitably profit + profited profit profiting profit + profitless profitless profits profit + profound profound profoundest profoundest + profoundly profoundli progenitors progenitor + progeny progeni progne progn + prognosticate prognost prognostication prognost + progress progress progression progress + prohibit prohibit prohibition prohibit + project project projection project + projects project prolixious prolixi + prolixity prolix prologue prologu + prologues prologu prolong prolong + prolongs prolong promethean promethean + prometheus prometheu promis promi + promise promis promised promis + promises promis promiseth promiseth + promising promis promontory promontori + promotion promot promotions promot + prompt prompt prompted prompt + promptement promptement prompter prompter + prompting prompt prompts prompt + prompture promptur promulgate promulg + prone prone prononcer prononc + prononcez prononcez pronoun pronoun + pronounc pronounc pronounce pronounc + pronounced pronounc pronouncing pronounc + pronouns pronoun proof proof + proofs proof prop prop + propagate propag propagation propag + propend propend propension propens + proper proper properer proper + properly properli propertied properti + properties properti property properti + prophecies propheci prophecy propheci + prophesied prophesi prophesier prophesi + prophesy prophesi prophesying prophesi + prophet prophet prophetess prophetess + prophetic prophet prophetically prophet + prophets prophet propinquity propinqu + propontic propont proportion proport + proportionable proportion proportions proport + propos propo propose propos + proposed propos proposer propos + proposes propos proposing propos + proposition proposit propositions proposit + propounded propound propp propp + propre propr propriety proprieti + props prop propugnation propugn + prorogue prorogu prorogued prorogu + proscription proscript proscriptions proscript + prose prose prosecute prosecut + prosecution prosecut proselytes proselyt + proserpina proserpina prosp prosp + prospect prospect prosper prosper + prosperity prosper prospero prospero + prosperous prosper prosperously prosper + prospers prosper prostitute prostitut + prostrate prostrat protect protect + protected protect protection protect + protector protector protectors protector + protectorship protectorship protectress protectress + protects protect protest protest + protestation protest protestations protest + protested protest protester protest + protesting protest protests protest + proteus proteu protheus protheu + protract protract protractive protract + proud proud prouder prouder + proudest proudest proudlier proudlier + proudly proudli prouds proud + prov prov provand provand + prove prove proved prove + provender provend proverb proverb + proverbs proverb proves prove + proveth proveth provide provid + provided provid providence provid + provident provid providently provid + provider provid provides provid + province provinc provinces provinc + provincial provinci proving prove + provision provis proviso proviso + provocation provoc provok provok + provoke provok provoked provok + provoker provok provokes provok + provoketh provoketh provoking provok + provost provost prowess prowess + prudence prudenc prudent prudent + prun prun prune prune + prunes prune pruning prune + pry pry prying pry + psalm psalm psalmist psalmist + psalms psalm psalteries psalteri + ptolemies ptolemi ptolemy ptolemi + public public publican publican + publication public publicly publicli + publicola publicola publish publish + published publish publisher publish + publishing publish publius publiu + pucelle pucel puck puck + pudder pudder pudding pud + puddings pud puddle puddl + puddled puddl pudency pudenc + pueritia pueritia puff puff + puffing puf puffs puff + pugging pug puis pui + puissance puissanc puissant puissant + puke puke puking puke + pulcher pulcher puling pule + pull pull puller puller + pullet pullet pulling pull + pulls pull pulpit pulpit + pulpiter pulpit pulpits pulpit + pulse puls pulsidge pulsidg + pump pump pumpion pumpion + pumps pump pun pun + punched punch punish punish + punished punish punishes punish + punishment punish punishments punish + punk punk punto punto + puny puni pupil pupil + pupils pupil puppet puppet + puppets puppet puppies puppi + puppy puppi pur pur + purblind purblind purchas purcha + purchase purchas purchased purchas + purchases purchas purchaseth purchaseth + purchasing purchas pure pure + purely pure purer purer + purest purest purg purg + purgation purgat purgative purg + purgatory purgatori purge purg + purged purg purgers purger + purging purg purifies purifi + purifying purifi puritan puritan + purity puriti purlieus purlieu + purple purpl purpled purpl + purples purpl purport purport + purpos purpo purpose purpos + purposed purpos purposely purpos + purposes purpos purposeth purposeth + purposing purpos purr purr + purs pur purse purs + pursents pursent purses purs + pursu pursu pursue pursu + pursued pursu pursuers pursuer + pursues pursu pursuest pursuest + pursueth pursueth pursuing pursu + pursuit pursuit pursuivant pursuiv + pursuivants pursuiv pursy pursi + purus puru purveyor purveyor + push push pushes push + pusillanimity pusillanim put put + putrefy putrefi putrified putrifi + puts put putter putter + putting put puttock puttock + puzzel puzzel puzzle puzzl + puzzled puzzl puzzles puzzl + py py pygmalion pygmalion + pygmies pygmi pygmy pygmi + pyramid pyramid pyramides pyramid + pyramids pyramid pyramis pyrami + pyramises pyramis pyramus pyramu + pyrenean pyrenean pyrrhus pyrrhu + pythagoras pythagora qu qu + quadrangle quadrangl quae quae + quaff quaff quaffing quaf + quagmire quagmir quail quail + quailing quail quails quail + quaint quaint quaintly quaintli + quak quak quake quak + quakes quak qualification qualif + qualified qualifi qualifies qualifi + qualify qualifi qualifying qualifi + qualite qualit qualities qualiti + quality qualiti qualm qualm + qualmish qualmish quam quam + quand quand quando quando + quantities quantiti quantity quantiti + quare quar quarrel quarrel + quarrell quarrel quarreller quarrel + quarrelling quarrel quarrelous quarrel + quarrels quarrel quarrelsome quarrelsom + quarries quarri quarry quarri + quart quart quarter quarter + quartered quarter quartering quarter + quarters quarter quarts quart + quasi quasi quat quat + quatch quatch quay quai + que que quean quean + queas quea queasiness queasi + queasy queasi queen queen + queens queen quell quell + queller queller quench quench + quenched quench quenching quench + quenchless quenchless quern quern + quest quest questant questant + question question questionable question + questioned question questioning question + questionless questionless questions question + questrists questrist quests quest + queubus queubu qui qui + quick quick quicken quicken + quickens quicken quicker quicker + quicklier quicklier quickly quickli + quickness quick quicksand quicksand + quicksands quicksand quicksilverr quicksilverr + quid quid quiddities quidditi + quiddits quiddit quier quier + quiet quiet quieter quieter + quietly quietli quietness quiet + quietus quietu quill quill + quillets quillet quills quill + quilt quilt quinapalus quinapalu + quince quinc quinces quinc + quintain quintain quintessence quintess + quintus quintu quip quip + quips quip quire quir + quiring quir quirk quirk + quirks quirk quis qui + quit quit quite quit + quits quit quittance quittanc + quitted quit quitting quit + quiver quiver quivering quiver + quivers quiver quo quo + quod quod quoifs quoif + quoint quoint quoit quoit + quoits quoit quondam quondam + quoniam quoniam quote quot + quoted quot quotes quot + quoth quoth quotidian quotidian + r r rabbit rabbit + rabble rabbl rabblement rabblement + race race rack rack + rackers racker racket racket + rackets racket racking rack + racks rack radiance radianc + radiant radiant radish radish + rafe rafe raft raft + rag rag rage rage + rages rage rageth rageth + ragg ragg ragged rag + raggedness ragged raging rage + ragozine ragozin rags rag + rah rah rail rail + railed rail railer railer + railest railest raileth raileth + railing rail rails rail + raiment raiment rain rain + rainbow rainbow raineth raineth + raining rain rainold rainold + rains rain rainy raini + rais rai raise rais + raised rais raises rais + raising rais raisins raisin + rak rak rake rake + rakers raker rakes rake + ral ral rald rald + ralph ralph ram ram + rambures rambur ramm ramm + rampallian rampallian rampant rampant + ramping ramp rampir rampir + ramps ramp rams ram + ramsey ramsei ramston ramston + ran ran rance ranc + rancorous rancor rancors rancor + rancour rancour random random + rang rang range rang + ranged rang rangers ranger + ranges rang ranging rang + rank rank ranker ranker + rankest rankest ranking rank + rankle rankl rankly rankli + rankness rank ranks rank + ransack ransack ransacking ransack + ransom ransom ransomed ransom + ransoming ransom ransomless ransomless + ransoms ransom rant rant + ranting rant rap rap + rape rape rapes rape + rapier rapier rapiers rapier + rapine rapin raps rap + rapt rapt rapture raptur + raptures raptur rar rar + rare rare rarely rare + rareness rare rarer rarer + rarest rarest rarities rariti + rarity rariti rascal rascal + rascalliest rascalliest rascally rascal + rascals rascal rased rase + rash rash rasher rasher + rashly rashli rashness rash + rat rat ratcatcher ratcatch + ratcliff ratcliff rate rate + rated rate rately rate + rates rate rather rather + ratherest ratherest ratified ratifi + ratifiers ratifi ratify ratifi + rating rate rational ration + ratolorum ratolorum rats rat + ratsbane ratsban rattle rattl + rattles rattl rattling rattl + rature ratur raught raught + rav rav rave rave + ravel ravel raven raven + ravening raven ravenous raven + ravens raven ravenspurgh ravenspurgh + raves rave ravin ravin + raving rave ravish ravish + ravished ravish ravisher ravish + ravishing ravish ravishments ravish + raw raw rawer rawer + rawly rawli rawness raw + ray rai rayed rai + rays rai raz raz + raze raze razed raze + razes raze razeth razeth + razing raze razor razor + razorable razor razors razor + razure razur re re + reach reach reaches reach + reacheth reacheth reaching reach + read read reader reader + readiest readiest readily readili + readiness readi reading read + readins readin reads read + ready readi real real + really realli realm realm + realms realm reap reap + reapers reaper reaping reap + reaps reap rear rear + rears rear rearward rearward + reason reason reasonable reason + reasonably reason reasoned reason + reasoning reason reasonless reasonless + reasons reason reave reav + rebate rebat rebato rebato + rebeck rebeck rebel rebel + rebell rebel rebelling rebel + rebellion rebellion rebellious rebelli + rebels rebel rebound rebound + rebuk rebuk rebuke rebuk + rebukeable rebuk rebuked rebuk + rebukes rebuk rebus rebu + recall recal recant recant + recantation recant recanter recant + recanting recant receipt receipt + receipts receipt receiv receiv + receive receiv received receiv + receiver receiv receives receiv + receivest receivest receiveth receiveth + receiving receiv receptacle receptacl + rechate rechat reciprocal reciproc + reciprocally reciproc recite recit + recited recit reciterai reciterai + reck reck recking reck + reckless reckless reckon reckon + reckoned reckon reckoning reckon + reckonings reckon recks reck + reclaim reclaim reclaims reclaim + reclusive reclus recognizance recogniz + recognizances recogniz recoil recoil + recoiling recoil recollected recollect + recomforted recomfort recomforture recomfortur + recommend recommend recommended recommend + recommends recommend recompens recompen + recompense recompens reconcil reconcil + reconcile reconcil reconciled reconcil + reconcilement reconcil reconciler reconcil + reconciles reconcil reconciliation reconcili + record record recordation record + recorded record recorder record + recorders record records record + recount recount recounted recount + recounting recount recountments recount + recounts recount recourse recours + recov recov recover recov + recoverable recover recovered recov + recoveries recoveri recovers recov + recovery recoveri recreant recreant + recreants recreant recreate recreat + recreation recreat rectify rectifi + rector rector rectorship rectorship + recure recur recured recur + red red redbreast redbreast + redder redder reddest reddest + rede rede redeem redeem + redeemed redeem redeemer redeem + redeeming redeem redeems redeem + redeliver redeliv redemption redempt + redime redim redness red + redoubled redoubl redoubted redoubt + redound redound redress redress + redressed redress redresses redress + reduce reduc reechy reechi + reed reed reeds reed + reek reek reeking reek + reeks reek reeky reeki + reel reel reeleth reeleth + reeling reel reels reel + refell refel refer refer + reference refer referr referr + referred refer refigured refigur + refin refin refined refin + reflect reflect reflecting reflect + reflection reflect reflex reflex + reform reform reformation reform + reformed reform refractory refractori + refrain refrain refresh refresh + refreshing refresh reft reft + refts reft refuge refug + refus refu refusal refus + refuse refus refused refus + refusest refusest refusing refus + reg reg regal regal + regalia regalia regan regan + regard regard regardance regard + regarded regard regardfully regardfulli + regarding regard regards regard + regenerate regener regent regent + regentship regentship regia regia + regiment regiment regiments regiment + regina regina region region + regions region regist regist + register regist registers regist + regreet regreet regreets regreet + regress regress reguerdon reguerdon + regular regular rehears rehear + rehearsal rehears rehearse rehears + reign reign reigned reign + reignier reignier reigning reign + reigns reign rein rein + reinforc reinforc reinforce reinforc + reinforcement reinforc reins rein + reiterate reiter reject reject + rejected reject rejoic rejoic + rejoice rejoic rejoices rejoic + rejoiceth rejoiceth rejoicing rejoic + rejoicingly rejoicingli rejoindure rejoindur + rejourn rejourn rel rel + relapse relaps relate relat + relates relat relation relat + relations relat relative rel + releas relea release releas + released releas releasing releas + relent relent relenting relent + relents relent reliances relianc + relics relic relief relief + reliev reliev relieve reliev + relieved reliev relieves reliev + relieving reliev religion religion + religions religion religious religi + religiously religi relinquish relinquish + reliques reliqu reliquit reliquit + relish relish relume relum + rely reli relying reli + remain remain remainder remaind + remainders remaind remained remain + remaineth remaineth remaining remain + remains remain remark remark + remarkable remark remediate remedi + remedied remedi remedies remedi + remedy remedi rememb rememb + remember rememb remembered rememb + remembers rememb remembrance remembr + remembrancer remembranc remembrances remembr + remercimens remercimen remiss remiss + remission remiss remissness remiss + remit remit remnant remnant + remnants remnant remonstrance remonstr + remorse remors remorseful remors + remorseless remorseless remote remot + remotion remot remov remov + remove remov removed remov + removedness removed remover remov + removes remov removing remov + remunerate remuner remuneration remuner + rence renc rend rend + render render rendered render + renders render rendezvous rendezv + renegado renegado renege reneg + reneges reneg renew renew + renewed renew renewest renewest + renounce renounc renouncement renounc + renouncing renounc renowmed renowm + renown renown renowned renown + rent rent rents rent + repaid repaid repair repair + repaired repair repairing repair + repairs repair repass repass + repast repast repasture repastur + repay repai repaying repai + repays repai repeal repeal + repealing repeal repeals repeal + repeat repeat repeated repeat + repeating repeat repeats repeat + repel repel repent repent + repentance repent repentant repent + repented repent repenting repent + repents repent repetition repetit + repetitions repetit repin repin + repine repin repining repin + replant replant replenish replenish + replenished replenish replete replet + replication replic replied repli + replies repli repliest repliest + reply repli replying repli + report report reported report + reporter report reportest reportest + reporting report reportingly reportingli + reports report reposal repos + repose repos reposeth reposeth + reposing repos repossess repossess + reprehend reprehend reprehended reprehend + reprehending reprehend represent repres + representing repres reprieve repriev + reprieves repriev reprisal repris + reproach reproach reproaches reproach + reproachful reproach reproachfully reproachfulli + reprobate reprob reprobation reprob + reproof reproof reprov reprov + reprove reprov reproveable reprov + reproves reprov reproving reprov + repugn repugn repugnancy repugn + repugnant repugn repulse repuls + repulsed repuls repurchas repurcha + repured repur reputation reput + repute reput reputed reput + reputeless reputeless reputes reput + reputing reput request request + requested request requesting request + requests request requiem requiem + requir requir require requir + required requir requires requir + requireth requireth requiring requir + requisite requisit requisites requisit + requit requit requital requit + requite requit requited requit + requites requit rer rer + rere rere rers rer + rescu rescu rescue rescu + rescued rescu rescues rescu + rescuing rescu resemblance resembl + resemble resembl resembled resembl + resembles resembl resembleth resembleth + resembling resembl reserv reserv + reservation reserv reserve reserv + reserved reserv reserves reserv + reside resid residence resid + resident resid resides resid + residing resid residue residu + resign resign resignation resign + resist resist resistance resist + resisted resist resisting resist + resists resist resolute resolut + resolutely resolut resolutes resolut + resolution resolut resolv resolv + resolve resolv resolved resolv + resolvedly resolvedli resolves resolv + resolveth resolveth resort resort + resorted resort resounding resound + resounds resound respeaking respeak + respect respect respected respect + respecting respect respective respect + respectively respect respects respect + respice respic respite respit + respites respit responsive respons + respose respos ress ress + rest rest rested rest + resteth resteth restful rest + resting rest restitution restitut + restless restless restor restor + restoration restor restorative restor + restore restor restored restor + restores restor restoring restor + restrain restrain restrained restrain + restraining restrain restrains restrain + restraint restraint rests rest + resty resti resum resum + resume resum resumes resum + resurrections resurrect retail retail + retails retail retain retain + retainers retain retaining retain + retell retel retention retent + retentive retent retinue retinu + retir retir retire retir + retired retir retirement retir + retires retir retiring retir + retold retold retort retort + retorts retort retourne retourn + retract retract retreat retreat + retrograde retrograd rets ret + return return returned return + returnest returnest returneth returneth + returning return returns return + revania revania reveal reveal + reveals reveal revel revel + reveler revel revell revel + reveller revel revellers revel + revelling revel revelry revelri + revels revel reveng reveng + revenge reveng revenged reveng + revengeful reveng revengement reveng + revenger reveng revengers reveng + revenges reveng revenging reveng + revengingly revengingli revenue revenu + revenues revenu reverb reverb + reverberate reverber reverbs reverb + reverenc reverenc reverence rever + reverend reverend reverent rever + reverently rever revers rever + reverse revers reversion revers + reverted revert review review + reviewest reviewest revil revil + revile revil revisits revisit + reviv reviv revive reviv + revives reviv reviving reviv + revok revok revoke revok + revokement revok revolt revolt + revolted revolt revolting revolt + revolts revolt revolution revolut + revolutions revolut revolve revolv + revolving revolv reward reward + rewarded reward rewarder reward + rewarding reward rewards reward + reword reword reworded reword + rex rex rey rei + reynaldo reynaldo rford rford + rful rful rfull rfull + rhapsody rhapsodi rheims rheim + rhenish rhenish rhesus rhesu + rhetoric rhetor rheum rheum + rheumatic rheumat rheums rheum + rheumy rheumi rhinoceros rhinocero + rhodes rhode rhodope rhodop + rhubarb rhubarb rhym rhym + rhyme rhyme rhymers rhymer + rhymes rhyme rhyming rhyme + rialto rialto rib rib + ribald ribald riband riband + ribands riband ribaudred ribaudr + ribb ribb ribbed rib + ribbon ribbon ribbons ribbon + ribs rib rice rice + rich rich richard richard + richer richer riches rich + richest richest richly richli + richmond richmond richmonds richmond + rid rid riddance riddanc + ridden ridden riddle riddl + riddles riddl riddling riddl + ride ride rider rider + riders rider rides ride + ridest ridest rideth rideth + ridge ridg ridges ridg + ridiculous ridicul riding ride + rids rid rien rien + ries ri rifle rifl + rift rift rifted rift + rig rig rigg rigg + riggish riggish right right + righteous righteou righteously righteous + rightful right rightfully rightfulli + rightly rightli rights right + rigol rigol rigorous rigor + rigorously rigor rigour rigour + ril ril rim rim + rin rin rinaldo rinaldo + rind rind ring ring + ringing ring ringleader ringlead + ringlets ringlet rings ring + ringwood ringwood riot riot + rioter rioter rioting riot + riotous riotou riots riot + rip rip ripe ripe + ripely ripe ripen ripen + ripened ripen ripeness ripe + ripening ripen ripens ripen + riper riper ripest ripest + riping ripe ripp ripp + ripping rip rise rise + risen risen rises rise + riseth riseth rish rish + rising rise rite rite + rites rite rivage rivag + rival rival rivality rival + rivall rival rivals rival + rive rive rived rive + rivelled rivel river river + rivers river rivet rivet + riveted rivet rivets rivet + rivo rivo rj rj + rless rless road road + roads road roam roam + roaming roam roan roan + roar roar roared roar + roarers roarer roaring roar + roars roar roast roast + roasted roast rob rob + roba roba robas roba + robb robb robbed rob + robber robber robbers robber + robbery robberi robbing rob + robe robe robed robe + robert robert robes robe + robin robin robs rob + robustious robusti rochester rochest + rochford rochford rock rock + rocks rock rocky rocki + rod rod rode rode + roderigo roderigo rods rod + roe roe roes roe + roger roger rogero rogero + rogue rogu roguery rogueri + rogues rogu roguish roguish + roi roi roisting roist + roll roll rolled roll + rolling roll rolls roll + rom rom romage romag + roman roman romano romano + romanos romano romans roman + rome rome romeo romeo + romish romish rondure rondur + ronyon ronyon rood rood + roof roof roofs roof + rook rook rooks rook + rooky rooki room room + rooms room root root + rooted root rootedly rootedli + rooteth rooteth rooting root + roots root rope rope + ropery roperi ropes rope + roping rope ros ro + rosalind rosalind rosalinda rosalinda + rosalinde rosalind rosaline rosalin + roscius rosciu rose rose + rosed rose rosemary rosemari + rosencrantz rosencrantz roses rose + ross ross rosy rosi + rot rot rote rote + roted rote rother rother + rotherham rotherham rots rot + rotted rot rotten rotten + rottenness rotten rotting rot + rotundity rotund rouen rouen + rough rough rougher rougher + roughest roughest roughly roughli + roughness rough round round + rounded round roundel roundel + rounder rounder roundest roundest + rounding round roundly roundli + rounds round roundure roundur + rous rou rouse rous + roused rous rousillon rousillon + rously rousli roussi roussi + rout rout routed rout + routs rout rove rove + rover rover row row + rowel rowel rowland rowland + rowlands rowland roy roi + royal royal royalize royal + royally royal royalties royalti + royalty royalti roynish roynish + rs rs rt rt + rub rub rubb rubb + rubbing rub rubbish rubbish + rubies rubi rubious rubiou + rubs rub ruby rubi + rud rud rudand rudand + rudder rudder ruddiness ruddi + ruddock ruddock ruddy ruddi + rude rude rudely rude + rudeness rude ruder ruder + rudesby rudesbi rudest rudest + rudiments rudiment rue rue + rued ru ruff ruff + ruffian ruffian ruffians ruffian + ruffle ruffl ruffling ruffl + ruffs ruff rug rug + rugby rugbi rugemount rugemount + rugged rug ruin ruin + ruinate ruinat ruined ruin + ruining ruin ruinous ruinou + ruins ruin rul rul + rule rule ruled rule + ruler ruler rulers ruler + rules rule ruling rule + rumble rumbl ruminaies ruminai + ruminat ruminat ruminate rumin + ruminated rumin ruminates rumin + rumination rumin rumor rumor + rumour rumour rumourer rumour + rumours rumour rump rump + run run runagate runag + runagates runag runaway runawai + runaways runawai rung rung + runn runn runner runner + runners runner running run + runs run rupture ruptur + ruptures ruptur rural rural + rush rush rushes rush + rushing rush rushling rushl + rushy rushi russet russet + russia russia russian russian + russians russian rust rust + rusted rust rustic rustic + rustically rustic rustics rustic + rustle rustl rustling rustl + rusts rust rusty rusti + rut rut ruth ruth + ruthful ruth ruthless ruthless + rutland rutland ruttish ruttish + ry ry rye rye + rything ryth s s + sa sa saba saba + sabbath sabbath sable sabl + sables sabl sack sack + sackbuts sackbut sackcloth sackcloth + sacked sack sackerson sackerson + sacks sack sacrament sacrament + sacred sacr sacrific sacrif + sacrifice sacrific sacrificers sacrific + sacrifices sacrific sacrificial sacrifici + sacrificing sacrif sacrilegious sacrilegi + sacring sacr sad sad + sadder sadder saddest saddest + saddle saddl saddler saddler + saddles saddl sadly sadli + sadness sad saf saf + safe safe safeguard safeguard + safely safe safer safer + safest safest safeties safeti + safety safeti saffron saffron + sag sag sage sage + sagittary sagittari said said + saidst saidst sail sail + sailing sail sailmaker sailmak + sailor sailor sailors sailor + sails sail sain sain + saint saint sainted saint + saintlike saintlik saints saint + saith saith sake sake + sakes sake sala sala + salad salad salamander salamand + salary salari sale sale + salerio salerio salicam salicam + salique saliqu salisbury salisburi + sall sall sallet sallet + sallets sallet sallies salli + sallow sallow sally salli + salmon salmon salmons salmon + salt salt salter salter + saltiers saltier saltness salt + saltpetre saltpetr salutation salut + salutations salut salute salut + saluted salut salutes salut + saluteth saluteth salv salv + salvation salvat salve salv + salving salv same same + samingo samingo samp samp + sampire sampir sample sampl + sampler sampler sampson sampson + samson samson samsons samson + sancta sancta sanctified sanctifi + sanctifies sanctifi sanctify sanctifi + sanctimonies sanctimoni sanctimonious sanctimoni + sanctimony sanctimoni sanctities sanctiti + sanctity sanctiti sanctuarize sanctuar + sanctuary sanctuari sand sand + sandal sandal sandbag sandbag + sanded sand sands sand + sandy sandi sandys sandi + sang sang sanguine sanguin + sanguis sangui sanity saniti + sans san santrailles santrail + sap sap sapient sapient + sapit sapit sapless sapless + sapling sapl sapphire sapphir + sapphires sapphir saracens saracen + sarcenet sarcenet sard sard + sardians sardian sardinia sardinia + sardis sardi sarum sarum + sat sat satan satan + satchel satchel sate sate + sated sate satiate satiat + satiety satieti satin satin + satire satir satirical satir + satis sati satisfaction satisfact + satisfied satisfi satisfies satisfi + satisfy satisfi satisfying satisfi + saturday saturdai saturdays saturdai + saturn saturn saturnine saturnin + saturninus saturninu satyr satyr + satyrs satyr sauc sauc + sauce sauc sauced sauc + saucers saucer sauces sauc + saucily saucili sauciness sauci + saucy sauci sauf sauf + saunder saunder sav sav + savage savag savagely savag + savageness savag savagery savageri + savages savag save save + saved save saves save + saving save saviour saviour + savory savori savour savour + savouring savour savours savour + savoury savouri savoy savoi + saw saw sawed saw + sawest sawest sawn sawn + sawpit sawpit saws saw + sawyer sawyer saxons saxon + saxony saxoni saxton saxton + say sai sayest sayest + saying sai sayings sai + says sai sayst sayst + sblood sblood sc sc + scab scab scabbard scabbard + scabs scab scaffold scaffold + scaffoldage scaffoldag scal scal + scald scald scalded scald + scalding scald scale scale + scaled scale scales scale + scaling scale scall scall + scalp scalp scalps scalp + scaly scali scamble scambl + scambling scambl scamels scamel + scan scan scandal scandal + scandaliz scandaliz scandalous scandal + scandy scandi scann scann + scant scant scanted scant + scanter scanter scanting scant + scantling scantl scants scant + scap scap scape scape + scaped scape scapes scape + scapeth scapeth scar scar + scarce scarc scarcely scarc + scarcity scarciti scare scare + scarecrow scarecrow scarecrows scarecrow + scarf scarf scarfed scarf + scarfs scarf scaring scare + scarlet scarlet scarr scarr + scarre scarr scars scar + scarus scaru scath scath + scathe scath scathful scath + scatt scatt scatter scatter + scattered scatter scattering scatter + scatters scatter scelera scelera + scelerisque scelerisqu scene scene + scenes scene scent scent + scented scent scept scept + scepter scepter sceptre sceptr + sceptred sceptr sceptres sceptr + schedule schedul schedules schedul + scholar scholar scholarly scholarli + scholars scholar school school + schoolboy schoolboi schoolboys schoolboi + schoolfellows schoolfellow schooling school + schoolmaster schoolmast schoolmasters schoolmast + schools school sciatica sciatica + sciaticas sciatica science scienc + sciences scienc scimitar scimitar + scion scion scions scion + scissors scissor scoff scoff + scoffer scoffer scoffing scof + scoffs scoff scoggin scoggin + scold scold scolding scold + scolds scold sconce sconc + scone scone scope scope + scopes scope scorch scorch + scorched scorch score score + scored score scores score + scoring score scorn scorn + scorned scorn scornful scorn + scornfully scornfulli scorning scorn + scorns scorn scorpion scorpion + scorpions scorpion scot scot + scotch scotch scotches scotch + scotland scotland scots scot + scottish scottish scoundrels scoundrel + scour scour scoured scour + scourg scourg scourge scourg + scouring scour scout scout + scouts scout scowl scowl + scrap scrap scrape scrape + scraping scrape scraps scrap + scratch scratch scratches scratch + scratching scratch scream scream + screams scream screech screech + screeching screech screen screen + screens screen screw screw + screws screw scribbl scribbl + scribbled scribbl scribe scribe + scribes scribe scrimers scrimer + scrip scrip scrippage scrippag + scripture scriptur scriptures scriptur + scrivener scriven scroll scroll + scrolls scroll scroop scroop + scrowl scrowl scroyles scroyl + scrubbed scrub scruple scrupl + scruples scrupl scrupulous scrupul + scuffles scuffl scuffling scuffl + scullion scullion sculls scull + scum scum scurril scurril + scurrility scurril scurrilous scurril + scurvy scurvi scuse scuse + scut scut scutcheon scutcheon + scutcheons scutcheon scylla scylla + scythe scyth scythed scyth + scythia scythia scythian scythian + sdeath sdeath se se + sea sea seacoal seacoal + seafaring seafar seal seal + sealed seal sealing seal + seals seal seam seam + seamen seamen seamy seami + seaport seaport sear sear + searce searc search search + searchers searcher searches search + searcheth searcheth searching search + seared sear seas sea + seasick seasick seaside seasid + season season seasoned season + seasons season seat seat + seated seat seats seat + sebastian sebastian second second + secondarily secondarili secondary secondari + seconded second seconds second + secrecy secreci secret secret + secretaries secretari secretary secretari + secretly secretli secrets secret + sect sect sectary sectari + sects sect secundo secundo + secure secur securely secur + securing secur security secur + sedg sedg sedge sedg + sedges sedg sedgy sedgi + sedition sedit seditious sediti + seduc seduc seduce seduc + seduced seduc seducer seduc + seducing seduc see see + seed seed seeded seed + seedness seed seeds seed + seedsman seedsman seein seein + seeing see seek seek + seeking seek seeks seek + seel seel seeling seel + seely seeli seem seem + seemed seem seemers seemer + seemest seemest seemeth seemeth + seeming seem seemingly seemingli + seemly seemli seems seem + seen seen seer seer + sees see seese sees + seest seest seethe seeth + seethes seeth seething seeth + seeting seet segregation segreg + seigneur seigneur seigneurs seigneur + seiz seiz seize seiz + seized seiz seizes seiz + seizeth seizeth seizing seiz + seizure seizur seld seld + seldom seldom select select + seleucus seleucu self self + selfsame selfsam sell sell + seller seller selling sell + sells sell selves selv + semblable semblabl semblably semblabl + semblance semblanc semblances semblanc + semblative sembl semi semi + semicircle semicircl semiramis semirami + semper semper sempronius semproniu + senate senat senator senat + senators senat send send + sender sender sendeth sendeth + sending send sends send + seneca seneca senior senior + seniory seniori senis seni + sennet sennet senoys senoi + sense sens senseless senseless + senses sens sensible sensibl + sensibly sensibl sensual sensual + sensuality sensual sent sent + sentenc sentenc sentence sentenc + sentences sentenc sententious sententi + sentinel sentinel sentinels sentinel + separable separ separate separ + separated separ separates separ + separation separ septentrion septentrion + sepulchre sepulchr sepulchres sepulchr + sepulchring sepulchr sequel sequel + sequence sequenc sequent sequent + sequest sequest sequester sequest + sequestration sequestr sere sere + serenis sereni serge serg + sergeant sergeant serious seriou + seriously serious sermon sermon + sermons sermon serpent serpent + serpentine serpentin serpents serpent + serpigo serpigo serv serv + servant servant servanted servant + servants servant serve serv + served serv server server + serves serv serveth serveth + service servic serviceable servic + services servic servile servil + servility servil servilius serviliu + serving serv servingman servingman + servingmen servingmen serviteur serviteur + servitor servitor servitors servitor + servitude servitud sessa sessa + session session sessions session + sestos sesto set set + setebos setebo sets set + setter setter setting set + settle settl settled settl + settlest settlest settling settl + sev sev seven seven + sevenfold sevenfold sevennight sevennight + seventeen seventeen seventh seventh + seventy seventi sever sever + several sever severally sever + severals sever severe sever + severed sever severely sever + severest severest severing sever + severity sever severn severn + severs sever sew sew + seward seward sewer sewer + sewing sew sex sex + sexes sex sexton sexton + sextus sextu seymour seymour + seyton seyton sfoot sfoot + sh sh shackle shackl + shackles shackl shade shade + shades shade shadow shadow + shadowed shadow shadowing shadow + shadows shadow shadowy shadowi + shady shadi shafalus shafalu + shaft shaft shafts shaft + shag shag shak shak + shake shake shaked shake + shaken shaken shakes shake + shaking shake shales shale + shall shall shallenge shalleng + shallow shallow shallowest shallowest + shallowly shallowli shallows shallow + shalt shalt sham sham + shambles shambl shame shame + shamed shame shameful shame + shamefully shamefulli shameless shameless + shames shame shamest shamest + shaming shame shank shank + shanks shank shap shap + shape shape shaped shape + shapeless shapeless shapen shapen + shapes shape shaping shape + shar shar shard shard + sharded shard shards shard + share share shared share + sharers sharer shares share + sharing share shark shark + sharp sharp sharpen sharpen + sharpened sharpen sharpens sharpen + sharper sharper sharpest sharpest + sharply sharpli sharpness sharp + sharps sharp shatter shatter + shav shav shave shave + shaven shaven shaw shaw + she she sheaf sheaf + sheal sheal shear shear + shearers shearer shearing shear + shearman shearman shears shear + sheath sheath sheathe sheath + sheathed sheath sheathes sheath + sheathing sheath sheaved sheav + sheaves sheav shed shed + shedding shed sheds shed + sheen sheen sheep sheep + sheepcote sheepcot sheepcotes sheepcot + sheeps sheep sheepskins sheepskin + sheer sheer sheet sheet + sheeted sheet sheets sheet + sheffield sheffield shelf shelf + shell shell shells shell + shelt shelt shelter shelter + shelters shelter shelves shelv + shelving shelv shelvy shelvi + shent shent shepherd shepherd + shepherdes shepherd shepherdess shepherdess + shepherdesses shepherdess shepherds shepherd + sher sher sheriff sheriff + sherris sherri shes she + sheweth sheweth shield shield + shielded shield shields shield + shift shift shifted shift + shifting shift shifts shift + shilling shill shillings shill + shin shin shine shine + shines shine shineth shineth + shining shine shins shin + shiny shini ship ship + shipboard shipboard shipman shipman + shipmaster shipmast shipmen shipmen + shipp shipp shipped ship + shipping ship ships ship + shipt shipt shipwreck shipwreck + shipwrecking shipwreck shipwright shipwright + shipwrights shipwright shire shire + shirley shirlei shirt shirt + shirts shirt shive shive + shiver shiver shivering shiver + shivers shiver shoal shoal + shoals shoal shock shock + shocks shock shod shod + shoe shoe shoeing shoe + shoemaker shoemak shoes shoe + shog shog shone shone + shook shook shoon shoon + shoot shoot shooter shooter + shootie shooti shooting shoot + shoots shoot shop shop + shops shop shore shore + shores shore shorn shorn + short short shortcake shortcak + shorten shorten shortened shorten + shortens shorten shorter shorter + shortly shortli shortness short + shot shot shotten shotten + shoughs shough should should + shoulder shoulder shouldering shoulder + shoulders shoulder shouldst shouldst + shout shout shouted shout + shouting shout shouts shout + shov shov shove shove + shovel shovel shovels shovel + show show showed show + shower shower showers shower + showest showest showing show + shown shown shows show + shreds shred shrew shrew + shrewd shrewd shrewdly shrewdli + shrewdness shrewd shrewish shrewish + shrewishly shrewishli shrewishness shrewish + shrews shrew shrewsbury shrewsburi + shriek shriek shrieking shriek + shrieks shriek shrieve shriev + shrift shrift shrill shrill + shriller shriller shrills shrill + shrilly shrilli shrimp shrimp + shrine shrine shrink shrink + shrinking shrink shrinks shrink + shriv shriv shrive shrive + shriver shriver shrives shrive + shriving shrive shroud shroud + shrouded shroud shrouding shroud + shrouds shroud shrove shrove + shrow shrow shrows shrow + shrub shrub shrubs shrub + shrug shrug shrugs shrug + shrunk shrunk shudd shudd + shudders shudder shuffl shuffl + shuffle shuffl shuffled shuffl + shuffling shuffl shun shun + shunless shunless shunn shunn + shunned shun shunning shun + shuns shun shut shut + shuts shut shuttle shuttl + shy shy shylock shylock + si si sibyl sibyl + sibylla sibylla sibyls sibyl + sicil sicil sicilia sicilia + sicilian sicilian sicilius siciliu + sicils sicil sicily sicili + sicinius siciniu sick sick + sicken sicken sickens sicken + sicker sicker sickle sickl + sicklemen sicklemen sicklied sickli + sickliness sickli sickly sickli + sickness sick sicles sicl + sicyon sicyon side side + sided side sides side + siege sieg sieges sieg + sienna sienna sies si + sieve siev sift sift + sifted sift sigeia sigeia + sigh sigh sighed sigh + sighing sigh sighs sigh + sight sight sighted sight + sightless sightless sightly sightli + sights sight sign sign + signal signal signet signet + signieur signieur significant signific + significants signific signified signifi + signifies signifi signify signifi + signifying signifi signior signior + signiories signiori signiors signior + signiory signiori signor signor + signories signori signs sign + signum signum silenc silenc + silence silenc silenced silenc + silencing silenc silent silent + silently silent silius siliu + silk silk silken silken + silkman silkman silks silk + silliest silliest silliness silli + silling sill silly silli + silva silva silver silver + silvered silver silverly silverli + silvia silvia silvius silviu + sima sima simile simil + similes simil simois simoi + simon simon simony simoni + simp simp simpcox simpcox + simple simpl simpleness simpl + simpler simpler simples simpl + simplicity simplic simply simpli + simular simular simulation simul + sin sin since sinc + sincere sincer sincerely sincer + sincerity sincer sinel sinel + sinew sinew sinewed sinew + sinews sinew sinewy sinewi + sinful sin sinfully sinfulli + sing sing singe sing + singeing sing singer singer + singes sing singeth singeth + singing sing single singl + singled singl singleness singl + singly singli sings sing + singular singular singulariter singularit + singularities singular singularity singular + singuled singul sinister sinist + sink sink sinking sink + sinks sink sinn sinn + sinner sinner sinners sinner + sinning sin sinon sinon + sins sin sip sip + sipping sip sir sir + sire sire siren siren + sirrah sirrah sirs sir + sist sist sister sister + sisterhood sisterhood sisterly sisterli + sisters sister sit sit + sith sith sithence sithenc + sits sit sitting sit + situate situat situation situat + situations situat siward siward + six six sixpence sixpenc + sixpences sixpenc sixpenny sixpenni + sixteen sixteen sixth sixth + sixty sixti siz siz + size size sizes size + sizzle sizzl skains skain + skamble skambl skein skein + skelter skelter skies ski + skilful skil skilfully skilfulli + skill skill skilless skilless + skillet skillet skillful skill + skills skill skim skim + skimble skimbl skin skin + skinker skinker skinny skinni + skins skin skip skip + skipp skipp skipper skipper + skipping skip skirmish skirmish + skirmishes skirmish skirr skirr + skirted skirt skirts skirt + skittish skittish skulking skulk + skull skull skulls skull + sky sky skyey skyei + skyish skyish slab slab + slack slack slackly slackli + slackness slack slain slain + slake slake sland sland + slander slander slandered slander + slanderer slander slanderers slander + slandering slander slanderous slander + slanders slander slash slash + slaught slaught slaughter slaughter + slaughtered slaughter slaughterer slaughter + slaughterman slaughterman slaughtermen slaughtermen + slaughterous slaughter slaughters slaughter + slave slave slaver slaver + slavery slaveri slaves slave + slavish slavish slay slai + slayeth slayeth slaying slai + slays slai sleave sleav + sledded sled sleek sleek + sleekly sleekli sleep sleep + sleeper sleeper sleepers sleeper + sleepest sleepest sleeping sleep + sleeps sleep sleepy sleepi + sleeve sleev sleeves sleev + sleid sleid sleided sleid + sleight sleight sleights sleight + slender slender slenderer slender + slenderly slenderli slept slept + slew slew slewest slewest + slice slice slid slid + slide slide slides slide + sliding slide slight slight + slighted slight slightest slightest + slightly slightli slightness slight + slights slight slily slili + slime slime slimy slimi + slings sling slink slink + slip slip slipp slipp + slipper slipper slippers slipper + slippery slipperi slips slip + slish slish slit slit + sliver sliver slobb slobb + slomber slomber slop slop + slope slope slops slop + sloth sloth slothful sloth + slough slough slovenly slovenli + slovenry slovenri slow slow + slower slower slowly slowli + slowness slow slubber slubber + slug slug sluggard sluggard + sluggardiz sluggardiz sluggish sluggish + sluic sluic slumb slumb + slumber slumber slumbers slumber + slumbery slumberi slunk slunk + slut slut sluts slut + sluttery slutteri sluttish sluttish + sluttishness sluttish sly sly + slys sly smack smack + smacking smack smacks smack + small small smaller smaller + smallest smallest smallness small + smalus smalu smart smart + smarting smart smartly smartli + smatch smatch smatter smatter + smear smear smell smell + smelling smell smells smell + smelt smelt smil smil + smile smile smiled smile + smiles smile smilest smilest + smilets smilet smiling smile + smilingly smilingli smirch smirch + smirched smirch smit smit + smite smite smites smite + smith smith smithfield smithfield + smock smock smocks smock + smok smok smoke smoke + smoked smoke smokes smoke + smoking smoke smoky smoki + smooth smooth smoothed smooth + smoothing smooth smoothly smoothli + smoothness smooth smooths smooth + smote smote smoth smoth + smother smother smothered smother + smothering smother smug smug + smulkin smulkin smutch smutch + snaffle snaffl snail snail + snails snail snake snake + snakes snake snaky snaki + snap snap snapp snapp + snapper snapper snar snar + snare snare snares snare + snarl snarl snarleth snarleth + snarling snarl snatch snatch + snatchers snatcher snatches snatch + snatching snatch sneak sneak + sneaking sneak sneap sneap + sneaping sneap sneck sneck + snip snip snipe snipe + snipt snipt snore snore + snores snore snoring snore + snorting snort snout snout + snow snow snowballs snowbal + snowed snow snowy snowi + snuff snuff snuffs snuff + snug snug so so + soak soak soaking soak + soaks soak soar soar + soaring soar soars soar + sob sob sobbing sob + sober sober soberly soberli + sobriety sobrieti sobs sob + sociable sociabl societies societi + society societi socks sock + socrates socrat sod sod + sodden sodden soe soe + soever soever soft soft + soften soften softens soften + softer softer softest softest + softly softli softness soft + soil soil soiled soil + soilure soilur soit soit + sojourn sojourn sol sol + sola sola solace solac + solanio solanio sold sold + soldat soldat solder solder + soldest soldest soldier soldier + soldiers soldier soldiership soldiership + sole sole solely sole + solem solem solemn solemn + solemness solem solemnities solemn + solemnity solemn solemniz solemniz + solemnize solemn solemnized solemn + solemnly solemnli soles sole + solicit solicit solicitation solicit + solicited solicit soliciting solicit + solicitings solicit solicitor solicitor + solicits solicit solid solid + solidares solidar solidity solid + solinus solinu solitary solitari + solomon solomon solon solon + solum solum solus solu + solyman solyman some some + somebody somebodi someone someon + somerset somerset somerville somervil + something someth sometime sometim + sometimes sometim somever somev + somewhat somewhat somewhere somewher + somewhither somewhith somme somm + son son sonance sonanc + song song songs song + sonnet sonnet sonneting sonnet + sonnets sonnet sons son + sont sont sonties sonti + soon soon sooner sooner + soonest soonest sooth sooth + soothe sooth soothers soother + soothing sooth soothsay soothsai + soothsayer soothsay sooty sooti + sop sop sophister sophist + sophisticated sophist sophy sophi + sops sop sorcerer sorcer + sorcerers sorcer sorceress sorceress + sorceries sorceri sorcery sorceri + sore sore sorel sorel + sorely sore sorer sorer + sores sore sorrier sorrier + sorriest sorriest sorrow sorrow + sorrowed sorrow sorrowest sorrowest + sorrowful sorrow sorrowing sorrow + sorrows sorrow sorry sorri + sort sort sortance sortanc + sorted sort sorting sort + sorts sort sossius sossiu + sot sot soto soto + sots sot sottish sottish + soud soud sought sought + soul soul sould sould + soulless soulless souls soul + sound sound sounded sound + sounder sounder soundest soundest + sounding sound soundless soundless + soundly soundli soundness sound + soundpost soundpost sounds sound + sour sour source sourc + sources sourc sourest sourest + sourly sourli sours sour + sous sou souse sous + south south southam southam + southampton southampton southerly southerli + southern southern southward southward + southwark southwark southwell southwel + souviendrai souviendrai sov sov + sovereign sovereign sovereignest sovereignest + sovereignly sovereignli sovereignty sovereignti + sovereignvours sovereignvour sow sow + sowing sow sowl sowl + sowter sowter space space + spaces space spacious spaciou + spade spade spades spade + spain spain spak spak + spake spake spakest spakest + span span spangle spangl + spangled spangl spaniard spaniard + spaniel spaniel spaniels spaniel + spanish spanish spann spann + spans span spar spar + spare spare spares spare + sparing spare sparingly sparingli + spark spark sparkle sparkl + sparkles sparkl sparkling sparkl + sparks spark sparrow sparrow + sparrows sparrow sparta sparta + spartan spartan spavin spavin + spavins spavin spawn spawn + speak speak speaker speaker + speakers speaker speakest speakest + speaketh speaketh speaking speak + speaks speak spear spear + speargrass speargrass spears spear + special special specialities special + specially special specialties specialti + specialty specialti specify specifi + speciously specious spectacle spectacl + spectacled spectacl spectacles spectacl + spectators spectat spectatorship spectatorship + speculation specul speculations specul + speculative specul sped sped + speech speech speeches speech + speechless speechless speed speed + speeded speed speedier speedier + speediest speediest speedily speedili + speediness speedi speeding speed + speeds speed speedy speedi + speens speen spell spell + spelling spell spells spell + spelt spelt spencer spencer + spend spend spendest spendest + spending spend spends spend + spendthrift spendthrift spent spent + sperato sperato sperm sperm + spero spero sperr sperr + spher spher sphere sphere + sphered sphere spheres sphere + spherical spheric sphery spheri + sphinx sphinx spice spice + spiced spice spicery spiceri + spices spice spider spider + spiders spider spied spi + spies spi spieth spieth + spightfully spightfulli spigot spigot + spill spill spilling spill + spills spill spilt spilt + spilth spilth spin spin + spinii spinii spinners spinner + spinster spinster spinsters spinster + spire spire spirit spirit + spirited spirit spiritless spiritless + spirits spirit spiritual spiritu + spiritualty spiritualti spirt spirt + spit spit spital spital + spite spite spited spite + spiteful spite spites spite + spits spit spitted spit + spitting spit splay splai + spleen spleen spleenful spleen + spleens spleen spleeny spleeni + splendour splendour splenitive splenit + splinter splinter splinters splinter + split split splits split + splitted split splitting split + spoil spoil spoils spoil + spok spok spoke spoke + spoken spoken spokes spoke + spokesman spokesman sponge spong + spongy spongi spoon spoon + spoons spoon sport sport + sportful sport sporting sport + sportive sportiv sports sport + spot spot spotless spotless + spots spot spotted spot + spousal spousal spouse spous + spout spout spouting spout + spouts spout sprag sprag + sprang sprang sprat sprat + sprawl sprawl spray sprai + sprays sprai spread spread + spreading spread spreads spread + sprighted spright sprightful spright + sprightly sprightli sprigs sprig + spring spring springe spring + springes spring springeth springeth + springhalt springhalt springing spring + springs spring springtime springtim + sprinkle sprinkl sprinkles sprinkl + sprite sprite sprited sprite + spritely sprite sprites sprite + spriting sprite sprout sprout + spruce spruce sprung sprung + spun spun spur spur + spurio spurio spurn spurn + spurns spurn spurr spurr + spurrer spurrer spurring spur + spurs spur spy spy + spying spy squabble squabbl + squadron squadron squadrons squadron + squand squand squar squar + square squar squarer squarer + squares squar squash squash + squeak squeak squeaking squeak + squeal squeal squealing squeal + squeezes squeez squeezing squeez + squele squel squier squier + squints squint squiny squini + squire squir squires squir + squirrel squirrel st st + stab stab stabb stabb + stabbed stab stabbing stab + stable stabl stableness stabl + stables stabl stablish stablish + stablishment stablish stabs stab + stacks stack staff staff + stafford stafford staffords stafford + staffordshire staffordshir stag stag + stage stage stages stage + stagger stagger staggering stagger + staggers stagger stags stag + staid staid staider staider + stain stain stained stain + staines stain staineth staineth + staining stain stainless stainless + stains stain stair stair + stairs stair stake stake + stakes stake stale stale + staled stale stalk stalk + stalking stalk stalks stalk + stall stall stalling stall + stalls stall stamford stamford + stammer stammer stamp stamp + stamped stamp stamps stamp + stanch stanch stanchless stanchless + stand stand standard standard + standards standard stander stander + standers stander standest standest + standeth standeth standing stand + stands stand staniel staniel + stanley stanlei stanze stanz + stanzo stanzo stanzos stanzo + staple stapl staples stapl + star star stare stare + stared stare stares stare + staring stare starings stare + stark stark starkly starkli + starlight starlight starling starl + starr starr starry starri + stars star start start + started start starting start + startingly startingli startle startl + startles startl starts start + starv starv starve starv + starved starv starvelackey starvelackei + starveling starvel starveth starveth + starving starv state state + statelier stateli stately state + states state statesman statesman + statesmen statesmen statilius statiliu + station station statist statist + statists statist statue statu + statues statu stature statur + statures statur statute statut + statutes statut stave stave + staves stave stay stai + stayed stai stayest stayest + staying stai stays stai + stead stead steaded stead + steadfast steadfast steadier steadier + steads stead steal steal + stealer stealer stealers stealer + stealing steal steals steal + stealth stealth stealthy stealthi + steed steed steeds steed + steel steel steeled steel + steely steeli steep steep + steeped steep steeple steepl + steeples steepl steeps steep + steepy steepi steer steer + steerage steerag steering steer + steers steer stelled stell + stem stem stemming stem + stench stench step step + stepdame stepdam stephano stephano + stephen stephen stepmothers stepmoth + stepp stepp stepping step + steps step sterile steril + sterility steril sterling sterl + stern stern sternage sternag + sterner sterner sternest sternest + sternness stern steterat steterat + stew stew steward steward + stewards steward stewardship stewardship + stewed stew stews stew + stick stick sticking stick + stickler stickler sticks stick + stiff stiff stiffen stiffen + stiffly stiffli stifle stifl + stifled stifl stifles stifl + stigmatic stigmat stigmatical stigmat + stile stile still still + stiller stiller stillest stillest + stillness still stilly stilli + sting sting stinging sting + stingless stingless stings sting + stink stink stinking stink + stinkingly stinkingli stinks stink + stint stint stinted stint + stints stint stir stir + stirr stirr stirred stir + stirrer stirrer stirrers stirrer + stirreth stirreth stirring stir + stirrup stirrup stirrups stirrup + stirs stir stitchery stitcheri + stitches stitch stithied stithi + stithy stithi stoccadoes stoccado + stoccata stoccata stock stock + stockfish stockfish stocking stock + stockings stock stockish stockish + stocks stock stog stog + stogs stog stoics stoic + stokesly stokesli stol stol + stole stole stolen stolen + stolest stolest stomach stomach + stomachers stomach stomaching stomach + stomachs stomach ston ston + stone stone stonecutter stonecutt + stones stone stonish stonish + stony stoni stood stood + stool stool stools stool + stoop stoop stooping stoop + stoops stoop stop stop + stope stope stopp stopp + stopped stop stopping stop + stops stop stor stor + store store storehouse storehous + storehouses storehous stores store + stories stori storm storm + stormed storm storming storm + storms storm stormy stormi + story stori stoup stoup + stoups stoup stout stout + stouter stouter stoutly stoutli + stoutness stout stover stover + stow stow stowage stowag + stowed stow strachy strachi + stragglers straggler straggling straggl + straight straight straightest straightest + straightway straightwai strain strain + strained strain straining strain + strains strain strait strait + straited strait straiter straiter + straitly straitli straitness strait + straits strait strand strand + strange strang strangely strang + strangeness strang stranger stranger + strangers stranger strangest strangest + strangle strangl strangled strangl + strangler strangler strangles strangl + strangling strangl strappado strappado + straps strap stratagem stratagem + stratagems stratagem stratford stratford + strato strato straw straw + strawberries strawberri strawberry strawberri + straws straw strawy strawi + stray strai straying strai + strays strai streak streak + streaks streak stream stream + streamers streamer streaming stream + streams stream streching strech + street street streets street + strength strength strengthen strengthen + strengthened strengthen strengthless strengthless + strengths strength stretch stretch + stretched stretch stretches stretch + stretching stretch strew strew + strewing strew strewings strew + strewments strewment stricken stricken + strict strict stricter stricter + strictest strictest strictly strictli + stricture strictur stride stride + strides stride striding stride + strife strife strifes strife + strik strik strike strike + strikers striker strikes strike + strikest strikest striking strike + string string stringless stringless + strings string strip strip + stripes stripe stripling stripl + striplings stripl stripp stripp + stripping strip striv striv + strive strive strives strive + striving strive strok strok + stroke stroke strokes stroke + strond strond stronds strond + strong strong stronger stronger + strongest strongest strongly strongli + strooke strook strossers strosser + strove strove strown strown + stroy stroi struck struck + strucken strucken struggle struggl + struggles struggl struggling struggl + strumpet strumpet strumpeted strumpet + strumpets strumpet strung strung + strut strut struts strut + strutted strut strutting strut + stubble stubbl stubborn stubborn + stubbornest stubbornest stubbornly stubbornli + stubbornness stubborn stuck stuck + studded stud student student + students student studied studi + studies studi studious studiou + studiously studious studs stud + study studi studying studi + stuff stuff stuffing stuf + stuffs stuff stumble stumbl + stumbled stumbl stumblest stumblest + stumbling stumbl stump stump + stumps stump stung stung + stupefy stupefi stupid stupid + stupified stupifi stuprum stuprum + sturdy sturdi sty sty + styga styga stygian stygian + styl styl style style + styx styx su su + sub sub subcontracted subcontract + subdu subdu subdue subdu + subdued subdu subduements subduement + subdues subdu subduing subdu + subject subject subjected subject + subjection subject subjects subject + submerg submerg submission submiss + submissive submiss submit submit + submits submit submitting submit + suborn suborn subornation suborn + suborned suborn subscrib subscrib + subscribe subscrib subscribed subscrib + subscribes subscrib subscription subscript + subsequent subsequ subsidies subsidi + subsidy subsidi subsist subsist + subsisting subsist substance substanc + substances substanc substantial substanti + substitute substitut substituted substitut + substitutes substitut substitution substitut + subtile subtil subtilly subtilli + subtle subtl subtleties subtleti + subtlety subtleti subtly subtli + subtractors subtractor suburbs suburb + subversion subvers subverts subvert + succedant succed succeed succe + succeeded succeed succeeders succeed + succeeding succeed succeeds succe + success success successantly successantli + successes success successful success + successfully successfulli succession success + successive success successively success + successor successor successors successor + succour succour succours succour + such such suck suck + sucker sucker suckers sucker + sucking suck suckle suckl + sucks suck sudden sudden + suddenly suddenli sue sue + sued su suerly suerli + sues sue sueth sueth + suff suff suffer suffer + sufferance suffer sufferances suffer + suffered suffer suffering suffer + suffers suffer suffic suffic + suffice suffic sufficed suffic + suffices suffic sufficeth sufficeth + sufficiency suffici sufficient suffici + sufficiently suffici sufficing suffic + sufficit sufficit suffigance suffig + suffocate suffoc suffocating suffoc + suffocation suffoc suffolk suffolk + suffrage suffrag suffrages suffrag + sug sug sugar sugar + sugarsop sugarsop suggest suggest + suggested suggest suggesting suggest + suggestion suggest suggestions suggest + suggests suggest suis sui + suit suit suitable suitabl + suited suit suiting suit + suitor suitor suitors suitor + suits suit suivez suivez + sullen sullen sullens sullen + sullied sulli sullies sulli + sully sulli sulph sulph + sulpherous sulpher sulphur sulphur + sulphurous sulphur sultan sultan + sultry sultri sum sum + sumless sumless summ summ + summa summa summary summari + summer summer summers summer + summit summit summon summon + summoners summon summons summon + sumpter sumpter sumptuous sumptuou + sumptuously sumptuous sums sum + sun sun sunbeams sunbeam + sunburning sunburn sunburnt sunburnt + sund sund sunday sundai + sundays sundai sunder sunder + sunders sunder sundry sundri + sung sung sunk sunk + sunken sunken sunny sunni + sunrising sunris suns sun + sunset sunset sunshine sunshin + sup sup super super + superficial superfici superficially superfici + superfluity superflu superfluous superflu + superfluously superflu superflux superflux + superior superior supernal supern + supernatural supernatur superpraise superprais + superscript superscript superscription superscript + superserviceable superservic superstition superstit + superstitious superstiti superstitiously superstiti + supersubtle supersubtl supervise supervis + supervisor supervisor supp supp + supper supper suppers supper + suppertime suppertim supping sup + supplant supplant supple suppl + suppler suppler suppliance supplianc + suppliant suppliant suppliants suppliant + supplicant supplic supplication supplic + supplications supplic supplie suppli + supplied suppli supplies suppli + suppliest suppliest supply suppli + supplyant supplyant supplying suppli + supplyment supplyment support support + supportable support supportance support + supported support supporter support + supporters support supporting support + supportor supportor suppos suppo + supposal suppos suppose suppos + supposed suppos supposes suppos + supposest supposest supposing suppos + supposition supposit suppress suppress + suppressed suppress suppresseth suppresseth + supremacy supremaci supreme suprem + sups sup sur sur + surance suranc surcease surceas + surd surd sure sure + surecard surecard surely sure + surer surer surest surest + sureties sureti surety sureti + surfeit surfeit surfeited surfeit + surfeiter surfeit surfeiting surfeit + surfeits surfeit surge surg + surgeon surgeon surgeons surgeon + surgere surger surgery surgeri + surges surg surly surli + surmis surmi surmise surmis + surmised surmis surmises surmis + surmount surmount surmounted surmount + surmounts surmount surnam surnam + surname surnam surnamed surnam + surpasseth surpasseth surpassing surpass + surplice surplic surplus surplu + surpris surpri surprise surpris + surprised surpris surrender surrend + surrey surrei surreys surrei + survey survei surveyest surveyest + surveying survei surveyor surveyor + surveyors surveyor surveys survei + survive surviv survives surviv + survivor survivor susan susan + suspect suspect suspected suspect + suspecting suspect suspects suspect + suspend suspend suspense suspens + suspicion suspicion suspicions suspicion + suspicious suspici suspiration suspir + suspire suspir sust sust + sustain sustain sustaining sustain + sutler sutler sutton sutton + suum suum swabber swabber + swaddling swaddl swag swag + swagg swagg swagger swagger + swaggerer swagger swaggerers swagger + swaggering swagger swain swain + swains swain swallow swallow + swallowed swallow swallowing swallow + swallows swallow swam swam + swan swan swans swan + sward sward sware sware + swarm swarm swarming swarm + swart swart swarth swarth + swarths swarth swarthy swarthi + swashers swasher swashing swash + swath swath swathing swath + swathling swathl sway swai + swaying swai sways swai + swear swear swearer swearer + swearers swearer swearest swearest + swearing swear swearings swear + swears swear sweat sweat + sweaten sweaten sweating sweat + sweats sweat sweaty sweati + sweep sweep sweepers sweeper + sweeps sweep sweet sweet + sweeten sweeten sweetens sweeten + sweeter sweeter sweetest sweetest + sweetheart sweetheart sweeting sweet + sweetly sweetli sweetmeats sweetmeat + sweetness sweet sweets sweet + swell swell swelling swell + swellings swell swells swell + swelter swelter sweno sweno + swept swept swerve swerv + swerver swerver swerving swerv + swift swift swifter swifter + swiftest swiftest swiftly swiftli + swiftness swift swill swill + swills swill swim swim + swimmer swimmer swimmers swimmer + swimming swim swims swim + swine swine swineherds swineherd + swing swing swinge swing + swinish swinish swinstead swinstead + switches switch swits swit + switzers switzer swol swol + swoll swoll swoln swoln + swoon swoon swooned swoon + swooning swoon swoons swoon + swoop swoop swoopstake swoopstak + swor swor sword sword + sworder sworder swords sword + swore swore sworn sworn + swounded swound swounds swound + swum swum swung swung + sy sy sycamore sycamor + sycorax sycorax sylla sylla + syllable syllabl syllables syllabl + syllogism syllog symbols symbol + sympathise sympathis sympathiz sympathiz + sympathize sympath sympathized sympath + sympathy sympathi synagogue synagogu + synod synod synods synod + syracuse syracus syracusian syracusian + syracusians syracusian syria syria + syrups syrup t t + ta ta taber taber + table tabl tabled tabl + tables tabl tablet tablet + tabor tabor taborer tabor + tabors tabor tabourines tabourin + taciturnity taciturn tack tack + tackle tackl tackled tackl + tackles tackl tackling tackl + tacklings tackl taddle taddl + tadpole tadpol taffeta taffeta + taffety taffeti tag tag + tagrag tagrag tah tah + tail tail tailor tailor + tailors tailor tails tail + taint taint tainted taint + tainting taint taints taint + tainture taintur tak tak + take take taken taken + taker taker takes take + takest takest taketh taketh + taking take tal tal + talbot talbot talbotites talbotit + talbots talbot tale tale + talent talent talents talent + taleporter taleport tales tale + talk talk talked talk + talker talker talkers talker + talkest talkest talking talk + talks talk tall tall + taller taller tallest tallest + tallies talli tallow tallow + tally talli talons talon + tam tam tambourines tambourin + tame tame tamed tame + tamely tame tameness tame + tamer tamer tames tame + taming tame tamora tamora + tamworth tamworth tan tan + tang tang tangle tangl + tangled tangl tank tank + tanlings tanl tann tann + tanned tan tanner tanner + tanquam tanquam tanta tanta + tantaene tantaen tap tap + tape tape taper taper + tapers taper tapestries tapestri + tapestry tapestri taphouse taphous + tapp tapp tapster tapster + tapsters tapster tar tar + tardied tardi tardily tardili + tardiness tardi tardy tardi + tarentum tarentum targe targ + targes targ target target + targets target tarpeian tarpeian + tarquin tarquin tarquins tarquin + tarr tarr tarre tarr + tarriance tarrianc tarried tarri + tarries tarri tarry tarri + tarrying tarri tart tart + tartar tartar tartars tartar + tartly tartli tartness tart + task task tasker tasker + tasking task tasks task + tassel tassel taste tast + tasted tast tastes tast + tasting tast tatt tatt + tatter tatter tattered tatter + tatters tatter tattle tattl + tattling tattl tattlings tattl + taught taught taunt taunt + taunted taunt taunting taunt + tauntingly tauntingli taunts taunt + taurus tauru tavern tavern + taverns tavern tavy tavi + tawdry tawdri tawny tawni + tax tax taxation taxat + taxations taxat taxes tax + taxing tax tc tc + te te teach teach + teacher teacher teachers teacher + teaches teach teachest teachest + teacheth teacheth teaching teach + team team tear tear + tearful tear tearing tear + tears tear tearsheet tearsheet + teat teat tedious tediou + tediously tedious tediousness tedious + teem teem teeming teem + teems teem teen teen + teeth teeth teipsum teipsum + telamon telamon telamonius telamoniu + tell tell teller teller + telling tell tells tell + tellus tellu temp temp + temper temper temperality temper + temperance temper temperate temper + temperately temper tempers temper + tempest tempest tempests tempest + tempestuous tempestu temple templ + temples templ temporal tempor + temporary temporari temporiz temporiz + temporize tempor temporizer tempor + temps temp tempt tempt + temptation temptat temptations temptat + tempted tempt tempter tempter + tempters tempter tempteth tempteth + tempting tempt tempts tempt + ten ten tenable tenabl + tenant tenant tenantius tenantiu + tenantless tenantless tenants tenant + tench tench tend tend + tendance tendanc tended tend + tender tender tendered tender + tenderly tenderli tenderness tender + tenders tender tending tend + tends tend tenedos tenedo + tenement tenement tenements tenement + tenfold tenfold tennis tenni + tenour tenour tenours tenour + tens ten tent tent + tented tent tenth tenth + tenths tenth tents tent + tenure tenur tenures tenur + tercel tercel tereus tereu + term term termagant termag + termed term terminations termin + termless termless terms term + terra terra terrace terrac + terram terram terras terra + terre terr terrene terren + terrestrial terrestri terrible terribl + terribly terribl territories territori + territory territori terror terror + terrors terror tertian tertian + tertio tertio test test + testament testament tested test + tester tester testern testern + testify testifi testimonied testimoni + testimonies testimoni testimony testimoni + testiness testi testril testril + testy testi tetchy tetchi + tether tether tetter tetter + tevil tevil tewksbury tewksburi + text text tgv tgv + th th thaes thae + thames thame than than + thane thane thanes thane + thank thank thanked thank + thankful thank thankfully thankfulli + thankfulness thank thanking thank + thankings thank thankless thankless + thanks thank thanksgiving thanksgiv + thasos thaso that that + thatch thatch thaw thaw + thawing thaw thaws thaw + the the theatre theatr + theban theban thebes thebe + thee thee theft theft + thefts theft thein thein + their their theirs their + theise theis them them + theme theme themes theme + themselves themselv then then + thence thenc thenceforth thenceforth + theoric theoric there there + thereabout thereabout thereabouts thereabout + thereafter thereaft thereat thereat + thereby therebi therefore therefor + therein therein thereof thereof + thereon thereon thereto thereto + thereunto thereunto thereupon thereupon + therewith therewith therewithal therewith + thersites thersit these these + theseus theseu thessalian thessalian + thessaly thessali thetis theti + thews thew they thei + thick thick thicken thicken + thickens thicken thicker thicker + thickest thickest thicket thicket + thickskin thickskin thief thief + thievery thieveri thieves thiev + thievish thievish thigh thigh + thighs thigh thimble thimbl + thimbles thimbl thin thin + thine thine thing thing + things thing think think + thinkest thinkest thinking think + thinkings think thinks think + thinkst thinkst thinly thinli + third third thirdly thirdli + thirds third thirst thirst + thirsting thirst thirsts thirst + thirsty thirsti thirteen thirteen + thirties thirti thirtieth thirtieth + thirty thirti this thi + thisby thisbi thisne thisn + thistle thistl thistles thistl + thither thither thitherward thitherward + thoas thoa thomas thoma + thorn thorn thorns thorn + thorny thorni thorough thorough + thoroughly thoroughli those those + thou thou though though + thought thought thoughtful thought + thoughts thought thousand thousand + thousands thousand thracian thracian + thraldom thraldom thrall thrall + thralled thrall thralls thrall + thrash thrash thrasonical thrason + thread thread threadbare threadbar + threaden threaden threading thread + threat threat threaten threaten + threatening threaten threatens threaten + threatest threatest threats threat + three three threefold threefold + threepence threepenc threepile threepil + threes three threescore threescor + thresher thresher threshold threshold + threw threw thrice thrice + thrift thrift thriftless thriftless + thrifts thrift thrifty thrifti + thrill thrill thrilling thrill + thrills thrill thrive thrive + thrived thrive thrivers thriver + thrives thrive thriving thrive + throat throat throats throat + throbbing throb throbs throb + throca throca throe throe + throes throe thromuldo thromuldo + thron thron throne throne + throned throne thrones throne + throng throng thronging throng + throngs throng throstle throstl + throttle throttl through through + throughfare throughfar throughfares throughfar + throughly throughli throughout throughout + throw throw thrower thrower + throwest throwest throwing throw + thrown thrown throws throw + thrum thrum thrumm thrumm + thrush thrush thrust thrust + thrusteth thrusteth thrusting thrust + thrusts thrust thumb thumb + thumbs thumb thump thump + thund thund thunder thunder + thunderbolt thunderbolt thunderbolts thunderbolt + thunderer thunder thunders thunder + thunderstone thunderston thunderstroke thunderstrok + thurio thurio thursday thursdai + thus thu thwack thwack + thwart thwart thwarted thwart + thwarting thwart thwartings thwart + thy thy thyme thyme + thymus thymu thyreus thyreu + thyself thyself ti ti + tib tib tiber tiber + tiberio tiberio tibey tibei + ticed tice tick tick + tickl tickl tickle tickl + tickled tickl tickles tickl + tickling tickl ticklish ticklish + tiddle tiddl tide tide + tides tide tidings tide + tidy tidi tie tie + tied ti ties ti + tiff tiff tiger tiger + tigers tiger tight tight + tightly tightli tike tike + til til tile tile + till till tillage tillag + tilly tilli tilt tilt + tilter tilter tilth tilth + tilting tilt tilts tilt + tiltyard tiltyard tim tim + timandra timandra timber timber + time time timeless timeless + timelier timeli timely time + times time timon timon + timor timor timorous timor + timorously timor tinct tinct + tincture tinctur tinctures tinctur + tinder tinder tingling tingl + tinker tinker tinkers tinker + tinsel tinsel tiny tini + tip tip tipp tipp + tippling tippl tips tip + tipsy tipsi tiptoe tipto + tir tir tire tire + tired tire tires tire + tirest tirest tiring tire + tirra tirra tirrits tirrit + tis ti tish tish + tisick tisick tissue tissu + titan titan titania titania + tithe tith tithed tith + tithing tith titinius titiniu + title titl titled titl + titleless titleless titles titl + tittle tittl tittles tittl + titular titular titus titu + tn tn to to + toad toad toads toad + toadstool toadstool toast toast + toasted toast toasting toast + toasts toast toaze toaz + toby tobi tock tock + tod tod today todai + todpole todpol tods tod + toe toe toes toe + tofore tofor toge toge + toged toge together togeth + toil toil toiled toil + toiling toil toils toil + token token tokens token + told told toledo toledo + tolerable toler toll toll + tolling toll tom tom + tomb tomb tombe tomb + tombed tomb tombless tombless + tomboys tomboi tombs tomb + tomorrow tomorrow tomyris tomyri + ton ton tongs tong + tongu tongu tongue tongu + tongued tongu tongueless tongueless + tongues tongu tonight tonight + too too took took + tool tool tools tool + tooth tooth toothache toothach + toothpick toothpick toothpicker toothpick + top top topas topa + topful top topgallant topgal + topless topless topmast topmast + topp topp topping top + topple toppl topples toppl + tops top topsail topsail + topsy topsi torch torch + torchbearer torchbear torchbearers torchbear + torcher torcher torches torch + torchlight torchlight tore tore + torment torment tormenta tormenta + tormente torment tormented torment + tormenting torment tormentors tormentor + torments torment torn torn + torrent torrent tortive tortiv + tortoise tortois tortur tortur + torture tortur tortured tortur + torturer tortur torturers tortur + tortures tortur torturest torturest + torturing tortur toryne toryn + toss toss tossed toss + tosseth tosseth tossing toss + tot tot total total + totally total tott tott + tottered totter totters totter + tou tou touch touch + touched touch touches touch + toucheth toucheth touching touch + touchstone touchston tough tough + tougher tougher toughness tough + touraine tourain tournaments tournament + tours tour tous tou + tout tout touze touz + tow tow toward toward + towardly towardli towards toward + tower tower towering tower + towers tower town town + towns town township township + townsman townsman townsmen townsmen + towton towton toy toi + toys toi trace trace + traces trace track track + tract tract tractable tractabl + trade trade traded trade + traders trader trades trade + tradesman tradesman tradesmen tradesmen + trading trade tradition tradit + traditional tradit traduc traduc + traduced traduc traducement traduc + traffic traffic traffickers traffick + traffics traffic tragedian tragedian + tragedians tragedian tragedies tragedi + tragedy tragedi tragic tragic + tragical tragic trail trail + train train trained train + training train trains train + trait trait traitor traitor + traitorly traitorli traitorous traitor + traitorously traitor traitors traitor + traitress traitress traject traject + trammel trammel trample trampl + trampled trampl trampling trampl + tranc tranc trance tranc + tranio tranio tranquil tranquil + tranquillity tranquil transcendence transcend + transcends transcend transferred transfer + transfigur transfigur transfix transfix + transform transform transformation transform + transformations transform transformed transform + transgress transgress transgresses transgress + transgressing transgress transgression transgress + translate translat translated translat + translates translat translation translat + transmigrates transmigr transmutation transmut + transparent transpar transport transport + transportance transport transported transport + transporting transport transports transport + transpose transpos transshape transshap + trap trap trapp trapp + trappings trap traps trap + trash trash travail travail + travails travail travel travel + traveler travel traveling travel + travell travel travelled travel + traveller travel travellers travel + travellest travellest travelling travel + travels travel travers traver + traverse travers tray trai + treacherous treacher treacherously treacher + treachers treacher treachery treacheri + tread tread treading tread + treads tread treason treason + treasonable treason treasonous treason + treasons treason treasure treasur + treasurer treasur treasures treasur + treasuries treasuri treasury treasuri + treat treat treaties treati + treatise treatis treats treat + treaty treati treble trebl + trebled trebl trebles trebl + trebonius treboniu tree tree + trees tree tremble trembl + trembled trembl trembles trembl + tremblest tremblest trembling trembl + tremblingly tremblingli tremor tremor + trempling trempl trench trench + trenchant trenchant trenched trench + trencher trencher trenchering trencher + trencherman trencherman trenchers trencher + trenches trench trenching trench + trent trent tres tre + trespass trespass trespasses trespass + tressel tressel tresses tress + treys trei trial trial + trials trial trib trib + tribe tribe tribes tribe + tribulation tribul tribunal tribun + tribune tribun tribunes tribun + tributaries tributari tributary tributari + tribute tribut tributes tribut + trice trice trick trick + tricking trick trickling trickl + tricks trick tricksy tricksi + trident trident tried tri + trier trier trifle trifl + trifled trifl trifler trifler + trifles trifl trifling trifl + trigon trigon trill trill + trim trim trimly trimli + trimm trimm trimmed trim + trimming trim trims trim + trinculo trinculo trinculos trinculo + trinkets trinket trip trip + tripartite tripartit tripe tripe + triple tripl triplex triplex + tripoli tripoli tripolis tripoli + tripp tripp tripping trip + trippingly trippingli trips trip + tristful trist triton triton + triumph triumph triumphant triumphant + triumphantly triumphantli triumpher triumpher + triumphers triumpher triumphing triumph + triumphs triumph triumvir triumvir + triumvirate triumvir triumvirs triumvir + triumviry triumviri trivial trivial + troat troat trod trod + trodden trodden troiant troiant + troien troien troilus troilu + troiluses troilus trojan trojan + trojans trojan troll troll + tromperies tromperi trompet trompet + troop troop trooping troop + troops troop trop trop + trophies trophi trophy trophi + tropically tropic trot trot + troth troth trothed troth + troths troth trots trot + trotting trot trouble troubl + troubled troubl troubler troubler + troubles troubl troublesome troublesom + troublest troublest troublous troublou + trough trough trout trout + trouts trout trovato trovato + trow trow trowel trowel + trowest trowest troy troi + troyan troyan troyans troyan + truant truant truce truce + truckle truckl trudge trudg + true true trueborn trueborn + truepenny truepenni truer truer + truest truest truie truie + trull trull trulls trull + truly truli trump trump + trumpery trumperi trumpet trumpet + trumpeter trumpet trumpeters trumpet + trumpets trumpet truncheon truncheon + truncheoners truncheon trundle trundl + trunk trunk trunks trunk + trust trust trusted trust + truster truster trusters truster + trusting trust trusts trust + trusty trusti truth truth + truths truth try try + ts ts tu tu + tuae tuae tub tub + tubal tubal tubs tub + tuck tuck tucket tucket + tuesday tuesdai tuft tuft + tufts tuft tug tug + tugg tugg tugging tug + tuition tuition tullus tullu + tully tulli tumble tumbl + tumbled tumbl tumbler tumbler + tumbling tumbl tumult tumult + tumultuous tumultu tun tun + tune tune tuneable tuneabl + tuned tune tuners tuner + tunes tune tunis tuni + tuns tun tupping tup + turban turban turbans turban + turbulence turbul turbulent turbul + turd turd turf turf + turfy turfi turk turk + turkey turkei turkeys turkei + turkish turkish turks turk + turlygod turlygod turmoil turmoil + turmoiled turmoil turn turn + turnbull turnbul turncoat turncoat + turncoats turncoat turned turn + turneth turneth turning turn + turnips turnip turns turn + turph turph turpitude turpitud + turquoise turquois turret turret + turrets turret turtle turtl + turtles turtl turvy turvi + tuscan tuscan tush tush + tut tut tutor tutor + tutored tutor tutors tutor + tutto tutto twain twain + twang twang twangling twangl + twas twa tway twai + tweaks tweak tween tween + twelfth twelfth twelve twelv + twelvemonth twelvemonth twentieth twentieth + twenty twenti twere twere + twice twice twig twig + twiggen twiggen twigs twig + twilight twilight twill twill + twilled twill twin twin + twine twine twink twink + twinkle twinkl twinkled twinkl + twinkling twinkl twinn twinn + twins twin twire twire + twist twist twisted twist + twit twit twits twit + twitting twit twixt twixt + two two twofold twofold + twopence twopenc twopences twopenc + twos two twould twould + tyb tyb tybalt tybalt + tybalts tybalt tyburn tyburn + tying ty tyke tyke + tymbria tymbria type type + types type typhon typhon + tyrannical tyrann tyrannically tyrann + tyrannize tyrann tyrannous tyrann + tyranny tyranni tyrant tyrant + tyrants tyrant tyrian tyrian + tyrrel tyrrel u u + ubique ubiqu udders udder + udge udg uds ud + uglier uglier ugliest ugliest + ugly ugli ulcer ulcer + ulcerous ulcer ulysses ulyss + um um umber umber + umbra umbra umbrage umbrag + umfrevile umfrevil umpire umpir + umpires umpir un un + unable unabl unaccommodated unaccommod + unaccompanied unaccompani unaccustom unaccustom + unaching unach unacquainted unacquaint + unactive unact unadvis unadvi + unadvised unadvis unadvisedly unadvisedli + unagreeable unagre unanel unanel + unanswer unansw unappeas unappea + unapproved unapprov unapt unapt + unaptness unapt unarm unarm + unarmed unarm unarms unarm + unassail unassail unassailable unassail + unattainted unattaint unattempted unattempt + unattended unattend unauspicious unauspici + unauthorized unauthor unavoided unavoid + unawares unawar unback unback + unbak unbak unbanded unband + unbar unbar unbarb unbarb + unbashful unbash unbated unbat + unbatter unbatt unbecoming unbecom + unbefitting unbefit unbegot unbegot + unbegotten unbegotten unbelieved unbeliev + unbend unbend unbent unbent + unbewail unbewail unbid unbid + unbidden unbidden unbind unbind + unbinds unbind unbitted unbit + unbless unbless unblest unblest + unbloodied unbloodi unblown unblown + unbodied unbodi unbolt unbolt + unbolted unbolt unbonneted unbonnet + unbookish unbookish unborn unborn + unbosom unbosom unbound unbound + unbounded unbound unbow unbow + unbowed unbow unbrac unbrac + unbraced unbrac unbraided unbraid + unbreathed unbreath unbred unbr + unbreech unbreech unbridled unbridl + unbroke unbrok unbruis unbrui + unbruised unbruis unbuckle unbuckl + unbuckles unbuckl unbuckling unbuckl + unbuild unbuild unburden unburden + unburdens unburden unburied unburi + unburnt unburnt unburthen unburthen + unbutton unbutton unbuttoning unbutton + uncapable uncap uncape uncap + uncase uncas uncasing uncas + uncaught uncaught uncertain uncertain + uncertainty uncertainti unchain unchain + unchanging unchang uncharge uncharg + uncharged uncharg uncharitably uncharit + unchary unchari unchaste unchast + uncheck uncheck unchilded unchild + uncivil uncivil unclaim unclaim + unclasp unclasp uncle uncl + unclean unclean uncleanliness uncleanli + uncleanly uncleanli uncleanness unclean + uncles uncl unclew unclew + unclog unclog uncoined uncoin + uncolted uncolt uncomeliness uncomeli + uncomfortable uncomfort uncompassionate uncompassion + uncomprehensive uncomprehens unconfinable unconfin + unconfirm unconfirm unconfirmed unconfirm + unconquer unconqu unconquered unconqu + unconsidered unconsid unconstant unconst + unconstrain unconstrain unconstrained unconstrain + uncontemn uncontemn uncontroll uncontrol + uncorrected uncorrect uncounted uncount + uncouple uncoupl uncourteous uncourt + uncouth uncouth uncover uncov + uncovered uncov uncropped uncrop + uncross uncross uncrown uncrown + unction unction unctuous unctuou + uncuckolded uncuckold uncurable uncur + uncurbable uncurb uncurbed uncurb + uncurls uncurl uncurrent uncurr + uncurse uncurs undaunted undaunt + undeaf undeaf undeck undeck + undeeded undeed under under + underbearing underbear underborne underborn + undercrest undercrest underfoot underfoot + undergo undergo undergoes undergo + undergoing undergo undergone undergon + underground underground underhand underhand + underlings underl undermine undermin + underminers undermin underneath underneath + underprizing underpr underprop underprop + understand understand understandeth understandeth + understanding understand understandings understand + understands understand understood understood + underta underta undertake undertak + undertakeing undertak undertaker undertak + undertakes undertak undertaking undertak + undertakings undertak undertook undertook + undervalu undervalu undervalued undervalu + underwent underw underwrit underwrit + underwrite underwrit undescried undescri + undeserved undeserv undeserver undeserv + undeservers undeserv undeserving undeserv + undetermin undetermin undid undid + undinted undint undiscernible undiscern + undiscover undiscov undishonoured undishonour + undispos undispo undistinguishable undistinguish + undistinguished undistinguish undividable undivid + undivided undivid undivulged undivulg + undo undo undoes undo + undoing undo undone undon + undoubted undoubt undoubtedly undoubtedli + undream undream undress undress + undressed undress undrown undrown + unduteous undut undutiful unduti + une un uneared unear + unearned unearn unearthly unearthli + uneasines uneasin uneasy uneasi + uneath uneath uneducated uneduc + uneffectual uneffectu unelected unelect + unequal unequ uneven uneven + unexamin unexamin unexecuted unexecut + unexpected unexpect unexperienc unexperienc + unexperient unexperi unexpressive unexpress + unfair unfair unfaithful unfaith + unfallible unfal unfam unfam + unfashionable unfashion unfasten unfasten + unfather unfath unfathered unfath + unfed unf unfeed unfe + unfeeling unfeel unfeigned unfeign + unfeignedly unfeignedli unfellowed unfellow + unfelt unfelt unfenced unfenc + unfilial unfili unfill unfil + unfinish unfinish unfirm unfirm + unfit unfit unfitness unfit + unfix unfix unfledg unfledg + unfold unfold unfolded unfold + unfoldeth unfoldeth unfolding unfold + unfolds unfold unfool unfool + unforc unforc unforced unforc + unforfeited unforfeit unfortified unfortifi + unfortunate unfortun unfought unfought + unfrequented unfrequ unfriended unfriend + unfurnish unfurnish ungain ungain + ungalled ungal ungart ungart + ungarter ungart ungenitur ungenitur + ungentle ungentl ungentleness ungentl + ungently ungent ungird ungird + ungodly ungodli ungor ungor + ungot ungot ungotten ungotten + ungovern ungovern ungracious ungraci + ungrateful ungrat ungravely ungrav + ungrown ungrown unguarded unguard + unguem unguem unguided unguid + unhack unhack unhair unhair + unhallow unhallow unhallowed unhallow + unhand unhand unhandled unhandl + unhandsome unhandsom unhang unhang + unhappied unhappi unhappily unhappili + unhappiness unhappi unhappy unhappi + unhardened unharden unharm unharm + unhatch unhatch unheard unheard + unhearts unheart unheedful unheed + unheedfully unheedfulli unheedy unheedi + unhelpful unhelp unhidden unhidden + unholy unholi unhop unhop + unhopefullest unhopefullest unhorse unhors + unhospitable unhospit unhous unhou + unhoused unhous unhurtful unhurt + unicorn unicorn unicorns unicorn + unimproved unimprov uninhabitable uninhabit + uninhabited uninhabit unintelligent unintellig + union union unions union + unite unit united unit + unity uniti universal univers + universe univers universities univers + university univers unjointed unjoint + unjust unjust unjustice unjustic + unjustly unjustli unkennel unkennel + unkept unkept unkind unkind + unkindest unkindest unkindly unkindli + unkindness unkind unking unk + unkinglike unkinglik unkiss unkiss + unknit unknit unknowing unknow + unknown unknown unlace unlac + unlaid unlaid unlawful unlaw + unlawfully unlawfulli unlearn unlearn + unlearned unlearn unless unless + unlesson unlesson unletter unlett + unlettered unlett unlick unlick + unlike unlik unlikely unlik + unlimited unlimit unlineal unlin + unlink unlink unload unload + unloaded unload unloading unload + unloads unload unlock unlock + unlocks unlock unlook unlook + unlooked unlook unloos unloo + unloose unloos unlov unlov + unloving unlov unluckily unluckili + unlucky unlucki unmade unmad + unmake unmak unmanly unmanli + unmann unmann unmanner unmann + unmannerd unmannerd unmannerly unmannerli + unmarried unmarri unmask unmask + unmasked unmask unmasking unmask + unmasks unmask unmast unmast + unmatch unmatch unmatchable unmatch + unmatched unmatch unmeasurable unmeasur + unmeet unmeet unmellowed unmellow + unmerciful unmerci unmeritable unmerit + unmeriting unmerit unminded unmind + unmindfull unmindful unmingled unmingl + unmitigable unmitig unmitigated unmitig + unmix unmix unmoan unmoan + unmov unmov unmoved unmov + unmoving unmov unmuffles unmuffl + unmuffling unmuffl unmusical unmus + unmuzzle unmuzzl unmuzzled unmuzzl + unnatural unnatur unnaturally unnatur + unnaturalness unnatur unnecessarily unnecessarili + unnecessary unnecessari unneighbourly unneighbourli + unnerved unnerv unnoble unnobl + unnoted unnot unnumb unnumb + unnumber unnumb unowed unow + unpack unpack unpaid unpaid + unparagon unparagon unparallel unparallel + unpartial unparti unpath unpath + unpaved unpav unpay unpai + unpeaceable unpeac unpeg unpeg + unpeople unpeopl unpeopled unpeopl + unperfect unperfect unperfectness unperfect + unpick unpick unpin unpin + unpink unpink unpitied unpiti + unpitifully unpitifulli unplagu unplagu + unplausive unplaus unpleas unplea + unpleasant unpleas unpleasing unpleas + unpolicied unpolici unpolish unpolish + unpolished unpolish unpolluted unpollut + unpossess unpossess unpossessing unpossess + unpossible unposs unpractis unpracti + unpregnant unpregn unpremeditated unpremedit + unprepar unprepar unprepared unprepar + unpress unpress unprevailing unprevail + unprevented unprev unpriz unpriz + unprizable unpriz unprofitable unprofit + unprofited unprofit unproper unprop + unproperly unproperli unproportion unproport + unprovide unprovid unprovided unprovid + unprovident unprovid unprovokes unprovok + unprun unprun unpruned unprun + unpublish unpublish unpurged unpurg + unpurpos unpurpo unqualitied unqual + unqueen unqueen unquestion unquest + unquestionable unquestion unquiet unquiet + unquietly unquietli unquietness unquiet + unraised unrais unrak unrak + unread unread unready unreadi + unreal unreal unreasonable unreason + unreasonably unreason unreclaimed unreclaim + unreconciled unreconcil unreconciliable unreconcili + unrecounted unrecount unrecuring unrecur + unregarded unregard unregist unregist + unrelenting unrel unremovable unremov + unremovably unremov unreprievable unrepriev + unresolv unresolv unrespected unrespect + unrespective unrespect unrest unrest + unrestor unrestor unrestrained unrestrain + unreveng unreveng unreverend unreverend + unreverent unrever unrevers unrev + unrewarded unreward unrighteous unright + unrightful unright unripe unrip + unripp unripp unrivall unrival + unroll unrol unroof unroof + unroosted unroost unroot unroot + unrough unrough unruly unruli + unsafe unsaf unsaluted unsalut + unsanctified unsanctifi unsatisfied unsatisfi + unsavoury unsavouri unsay unsai + unscalable unscal unscann unscann + unscarr unscarr unschool unschool + unscorch unscorch unscour unscour + unscratch unscratch unseal unseal + unseam unseam unsearch unsearch + unseason unseason unseasonable unseason + unseasonably unseason unseasoned unseason + unseconded unsecond unsecret unsecret + unseduc unseduc unseeing unse + unseeming unseem unseemly unseemli + unseen unseen unseminar unseminar + unseparable unsepar unserviceable unservic + unset unset unsettle unsettl + unsettled unsettl unsever unsev + unsex unsex unshak unshak + unshaked unshak unshaken unshaken + unshaped unshap unshapes unshap + unsheath unsheath unsheathe unsheath + unshorn unshorn unshout unshout + unshown unshown unshrinking unshrink + unshrubb unshrubb unshunn unshunn + unshunnable unshunn unsifted unsift + unsightly unsightli unsinew unsinew + unsisting unsist unskilful unskil + unskilfully unskilfulli unskillful unskil + unslipping unslip unsmirched unsmirch + unsoil unsoil unsolicited unsolicit + unsorted unsort unsought unsought + unsound unsound unsounded unsound + unspeak unspeak unspeakable unspeak + unspeaking unspeak unsphere unspher + unspoke unspok unspoken unspoken + unspotted unspot unsquar unsquar + unstable unstabl unstaid unstaid + unstain unstain unstained unstain + unstanched unstanch unstate unstat + unsteadfast unsteadfast unstooping unstoop + unstringed unstring unstuff unstuff + unsubstantial unsubstanti unsuitable unsuit + unsuiting unsuit unsullied unsulli + unsunn unsunn unsur unsur + unsure unsur unsuspected unsuspect + unsway unswai unswayable unsway + unswayed unswai unswear unswear + unswept unswept unsworn unsworn + untainted untaint untalk untalk + untangle untangl untangled untangl + untasted untast untaught untaught + untempering untemp untender untend + untent untent untented untent + unthankful unthank unthankfulness unthank + unthink unthink unthought unthought + unthread unthread unthrift unthrift + unthrifts unthrift unthrifty unthrifti + untie unti untied unti + until until untimber untimb + untimely untim untir untir + untirable untir untired untir + untitled untitl unto unto + untold untold untouch untouch + untoward untoward untowardly untowardli + untraded untrad untrain untrain + untrained untrain untread untread + untreasur untreasur untried untri + untrimmed untrim untrod untrod + untrodden untrodden untroubled untroubl + untrue untru untrussing untruss + untruth untruth untruths untruth + untucked untuck untun untun + untune untun untuneable untun + untutor untutor untutored untutor + untwine untwin unurg unurg + unus unu unused unus + unusual unusu unvalued unvalu + unvanquish unvanquish unvarnish unvarnish + unveil unveil unveiling unveil + unvenerable unvener unvex unvex + unviolated unviol unvirtuous unvirtu + unvisited unvisit unvulnerable unvulner + unwares unwar unwarily unwarili + unwash unwash unwatch unwatch + unwearied unweari unwed unw + unwedgeable unwedg unweeded unweed + unweighed unweigh unweighing unweigh + unwelcome unwelcom unwept unwept + unwhipp unwhipp unwholesome unwholesom + unwieldy unwieldi unwilling unwil + unwillingly unwillingli unwillingness unwilling + unwind unwind unwiped unwip + unwise unwis unwisely unwis + unwish unwish unwished unwish + unwitted unwit unwittingly unwittingli + unwonted unwont unwooed unwoo + unworthier unworthi unworthiest unworthiest + unworthily unworthili unworthiness unworthi + unworthy unworthi unwrung unwrung + unyok unyok unyoke unyok + up up upbraid upbraid + upbraided upbraid upbraidings upbraid + upbraids upbraid uphoarded uphoard + uphold uphold upholdeth upholdeth + upholding uphold upholds uphold + uplift uplift uplifted uplift + upmost upmost upon upon + upper upper uprear uprear + upreared uprear upright upright + uprighteously upright uprightness upright + uprise upris uprising upris + uproar uproar uproars uproar + uprous uprou upshoot upshoot + upshot upshot upside upsid + upspring upspr upstairs upstair + upstart upstart upturned upturn + upward upward upwards upward + urchin urchin urchinfield urchinfield + urchins urchin urg urg + urge urg urged urg + urgent urgent urges urg + urgest urgest urging urg + urinal urin urinals urin + urine urin urn urn + urns urn urs ur + ursa ursa ursley urslei + ursula ursula urswick urswick + us us usage usag + usance usanc usances usanc + use us used us + useful us useless useless + user user uses us + usest usest useth useth + usher usher ushered usher + ushering usher ushers usher + using us usual usual + usually usual usurer usur + usurers usur usuries usuri + usuring usur usurp usurp + usurpation usurp usurped usurp + usurper usurp usurpers usurp + usurping usurp usurpingly usurpingli + usurps usurp usury usuri + ut ut utensil utensil + utensils utensil utility util + utmost utmost utt utt + utter utter utterance utter + uttered utter uttereth uttereth + uttering utter utterly utterli + uttermost uttermost utters utter + uy uy v v + va va vacancy vacanc + vacant vacant vacation vacat + vade vade vagabond vagabond + vagabonds vagabond vagram vagram + vagrom vagrom vail vail + vailed vail vailing vail + vaillant vaillant vain vain + vainer vainer vainglory vainglori + vainly vainli vainness vain + vais vai valanc valanc + valance valanc vale vale + valence valenc valentine valentin + valentinus valentinu valentio valentio + valeria valeria valerius valeriu + vales vale valiant valiant + valiantly valiantli valiantness valiant + validity valid vallant vallant + valley vallei valleys vallei + vally valli valor valor + valorous valor valorously valor + valour valour valu valu + valuation valuat value valu + valued valu valueless valueless + values valu valuing valu + vane vane vanish vanish + vanished vanish vanishes vanish + vanishest vanishest vanishing vanish + vanities vaniti vanity vaniti + vanquish vanquish vanquished vanquish + vanquisher vanquish vanquishest vanquishest + vanquisheth vanquisheth vant vant + vantage vantag vantages vantag + vantbrace vantbrac vapians vapian + vapor vapor vaporous vapor + vapour vapour vapours vapour + vara vara variable variabl + variance varianc variation variat + variations variat varied vari + variest variest variety varieti + varld varld varlet varlet + varletry varletri varlets varlet + varletto varletto varnish varnish + varrius varriu varro varro + vary vari varying vari + vassal vassal vassalage vassalag + vassals vassal vast vast + vastidity vastid vasty vasti + vat vat vater vater + vaudemont vaudemont vaughan vaughan + vault vault vaultages vaultag + vaulted vault vaulting vault + vaults vault vaulty vaulti + vaumond vaumond vaunt vaunt + vaunted vaunt vaunter vaunter + vaunting vaunt vauntingly vauntingli + vaunts vaunt vauvado vauvado + vaux vaux vaward vaward + ve ve veal veal + vede vede vehemence vehem + vehemency vehem vehement vehement + vehor vehor veil veil + veiled veil veiling veil + vein vein veins vein + vell vell velure velur + velutus velutu velvet velvet + vendible vendibl venerable vener + venereal vener venetia venetia + venetian venetian venetians venetian + veneys venei venge veng + vengeance vengeanc vengeances vengeanc + vengeful veng veni veni + venial venial venice venic + venison venison venit venit + venom venom venomous venom + venomously venom vent vent + ventages ventag vented vent + ventidius ventidiu ventricle ventricl + vents vent ventur ventur + venture ventur ventured ventur + ventures ventur venturing ventur + venturous ventur venue venu + venus venu venuto venuto + ver ver verb verb + verba verba verbal verbal + verbatim verbatim verbosity verbos + verdict verdict verdun verdun + verdure verdur vere vere + verefore verefor verg verg + verge verg vergers verger + verges verg verier verier + veriest veriest verified verifi + verify verifi verily verili + veritable verit verite verit + verities veriti verity veriti + vermilion vermilion vermin vermin + vernon vernon verona verona + veronesa veronesa versal versal + verse vers verses vers + versing vers vert vert + very veri vesper vesper + vessel vessel vessels vessel + vestal vestal vestments vestment + vesture vestur vetch vetch + vetches vetch veux veux + vex vex vexation vexat + vexations vexat vexed vex + vexes vex vexest vexest + vexeth vexeth vexing vex + vi vi via via + vial vial vials vial + viand viand viands viand + vic vic vicar vicar + vice vice vicegerent viceger + vicentio vicentio viceroy viceroi + viceroys viceroi vices vice + vici vici vicious viciou + viciousness vicious vict vict + victims victim victor victor + victoress victoress victories victori + victorious victori victors victor + victory victori victual victual + victuall victual victuals victual + videlicet videlicet video video + vides vide videsne videsn + vidi vidi vie vie + vied vi vienna vienna + view view viewest viewest + vieweth vieweth viewing view + viewless viewless views view + vigil vigil vigilance vigil + vigilant vigil vigitant vigit + vigour vigour vii vii + viii viii vile vile + vilely vile vileness vile + viler viler vilest vilest + vill vill village villag + villager villag villagery villageri + villages villag villain villain + villainies villaini villainous villain + villainously villain villains villain + villainy villaini villanies villani + villanous villan villany villani + villiago villiago villian villian + villianda villianda villians villian + vinaigre vinaigr vincentio vincentio + vincere vincer vindicative vindic + vine vine vinegar vinegar + vines vine vineyard vineyard + vineyards vineyard vint vint + vintner vintner viol viol + viola viola violate violat + violated violat violates violat + violation violat violator violat + violence violenc violent violent + violenta violenta violenteth violenteth + violently violent violet violet + violets violet viper viper + viperous viper vipers viper + vir vir virgilia virgilia + virgin virgin virginal virgin + virginalling virginal virginity virgin + virginius virginiu virgins virgin + virgo virgo virtue virtu + virtues virtu virtuous virtuou + virtuously virtuous visag visag + visage visag visages visag + visard visard viscount viscount + visible visibl visibly visibl + vision vision visions vision + visit visit visitation visit + visitations visit visited visit + visiting visit visitings visit + visitor visitor visitors visitor + visits visit visor visor + vita vita vitae vita + vital vital vitement vitement + vitruvio vitruvio vitx vitx + viva viva vivant vivant + vive vive vixen vixen + viz viz vizaments vizament + vizard vizard vizarded vizard + vizards vizard vizor vizor + vlouting vlout vocation vocat + vocativo vocativo vocatur vocatur + voce voce voic voic + voice voic voices voic + void void voided void + voiding void voke voke + volable volabl volant volant + volivorco volivorco volley vollei + volquessen volquessen volsce volsc + volsces volsc volscian volscian + volscians volscian volt volt + voltemand voltemand volubility volubl + voluble volubl volume volum + volumes volum volumnia volumnia + volumnius volumniu voluntaries voluntari + voluntary voluntari voluptuously voluptu + voluptuousness voluptu vomissement vomiss + vomit vomit vomits vomit + vor vor vore vore + vortnight vortnight vot vot + votaries votari votarist votarist + votarists votarist votary votari + votre votr vouch vouch + voucher voucher vouchers voucher + vouches vouch vouching vouch + vouchsaf vouchsaf vouchsafe vouchsaf + vouchsafed vouchsaf vouchsafes vouchsaf + vouchsafing vouchsaf voudrais voudrai + vour vour vous vou + voutsafe voutsaf vow vow + vowed vow vowel vowel + vowels vowel vowing vow + vows vow vox vox + voyage voyag voyages voyag + vraiment vraiment vulcan vulcan + vulgar vulgar vulgarly vulgarli + vulgars vulgar vulgo vulgo + vulnerable vulner vulture vultur + vultures vultur vurther vurther + w w wad wad + waddled waddl wade wade + waded wade wafer wafer + waft waft waftage waftag + wafting waft wafts waft + wag wag wage wage + wager wager wagers wager + wages wage wagging wag + waggish waggish waggling waggl + waggon waggon waggoner waggon + wagon wagon wagoner wagon + wags wag wagtail wagtail + wail wail wailful wail + wailing wail wails wail + wain wain wainropes wainrop + wainscot wainscot waist waist + wait wait waited wait + waiter waiter waiteth waiteth + waiting wait waits wait + wak wak wake wake + waked wake wakefield wakefield + waken waken wakened waken + wakes wake wakest wakest + waking wake wales wale + walk walk walked walk + walking walk walks walk + wall wall walled wall + wallet wallet wallets wallet + wallon wallon walloon walloon + wallow wallow walls wall + walnut walnut walter walter + wan wan wand wand + wander wander wanderer wander + wanderers wander wandering wander + wanders wander wands wand + wane wane waned wane + wanes wane waning wane + wann wann want want + wanted want wanteth wanteth + wanting want wanton wanton + wantonly wantonli wantonness wanton + wantons wanton wants want + wappen wappen war war + warble warbl warbling warbl + ward ward warded ward + warden warden warder warder + warders warder wardrobe wardrob + wardrop wardrop wards ward + ware ware wares ware + warily warili warkworth warkworth + warlike warlik warm warm + warmed warm warmer warmer + warming warm warms warm + warmth warmth warn warn + warned warn warning warn + warnings warn warns warn + warp warp warped warp + warr warr warrant warrant + warranted warrant warranteth warranteth + warrantise warrantis warrantize warrant + warrants warrant warranty warranti + warren warren warrener warren + warring war warrior warrior + warriors warrior wars war + wart wart warwick warwick + warwickshire warwickshir wary wari + was wa wash wash + washed wash washer washer + washes wash washford washford + washing wash wasp wasp + waspish waspish wasps wasp + wassail wassail wassails wassail + wast wast waste wast + wasted wast wasteful wast + wasters waster wastes wast + wasting wast wat wat + watch watch watched watch + watchers watcher watches watch + watchful watch watching watch + watchings watch watchman watchman + watchmen watchmen watchword watchword + water water waterdrops waterdrop + watered water waterfly waterfli + waterford waterford watering water + waterish waterish waterpots waterpot + waterrugs waterrug waters water + waterton waterton watery wateri + wav wav wave wave + waved wave waver waver + waverer waver wavering waver + waves wave waving wave + waw waw wawl wawl + wax wax waxed wax + waxen waxen waxes wax + waxing wax way wai + waylaid waylaid waylay waylai + ways wai wayward wayward + waywarder wayward waywardness wayward + we we weak weak + weaken weaken weakens weaken + weaker weaker weakest weakest + weakling weakl weakly weakli + weakness weak weal weal + wealsmen wealsmen wealth wealth + wealthiest wealthiest wealthily wealthili + wealthy wealthi wealtlly wealtlli + wean wean weapon weapon + weapons weapon wear wear + wearer wearer wearers wearer + wearied weari wearies weari + weariest weariest wearily wearili + weariness weari wearing wear + wearisome wearisom wears wear + weary weari weasel weasel + weather weather weathercock weathercock + weathers weather weav weav + weave weav weaver weaver + weavers weaver weaves weav + weaving weav web web + wed wed wedded wed + wedding wed wedg wedg + wedged wedg wedges wedg + wedlock wedlock wednesday wednesdai + weed weed weeded weed + weeder weeder weeding weed + weeds weed weedy weedi + week week weeke week + weekly weekli weeks week + ween ween weening ween + weep weep weeper weeper + weeping weep weepingly weepingli + weepings weep weeps weep + weet weet weigh weigh + weighed weigh weighing weigh + weighs weigh weight weight + weightier weightier weightless weightless + weights weight weighty weighti + weird weird welcom welcom + welcome welcom welcomer welcom + welcomes welcom welcomest welcomest + welfare welfar welkin welkin + well well wells well + welsh welsh welshman welshman + welshmen welshmen welshwomen welshwomen + wench wench wenches wench + wenching wench wend wend + went went wept wept + weraday weradai were were + wert wert west west + western western westminster westminst + westmoreland westmoreland westward westward + wet wet wether wether + wetting wet wezand wezand + whale whale whales whale + wharf wharf wharfs wharf + what what whate whate + whatever whatev whatsoe whatso + whatsoever whatsoev whatsome whatsom + whe whe wheat wheat + wheaten wheaten wheel wheel + wheeling wheel wheels wheel + wheer wheer wheeson wheeson + wheezing wheez whelk whelk + whelks whelk whelm whelm + whelp whelp whelped whelp + whelps whelp when when + whenas whena whence whenc + whencesoever whencesoev whene whene + whenever whenev whensoever whensoev + where where whereabout whereabout + whereas wherea whereat whereat + whereby wherebi wherefore wherefor + wherein wherein whereinto whereinto + whereof whereof whereon whereon + whereout whereout whereso whereso + wheresoe whereso wheresoever wheresoev + wheresome wheresom whereto whereto + whereuntil whereuntil whereunto whereunto + whereupon whereupon wherever wherev + wherewith wherewith wherewithal wherewith + whet whet whether whether + whetstone whetston whetted whet + whew whew whey whei + which which whiff whiff + whiffler whiffler while while + whiles while whilst whilst + whin whin whine whine + whined whine whinid whinid + whining whine whip whip + whipp whipp whippers whipper + whipping whip whips whip + whipster whipster whipstock whipstock + whipt whipt whirl whirl + whirled whirl whirligig whirligig + whirling whirl whirlpool whirlpool + whirls whirl whirlwind whirlwind + whirlwinds whirlwind whisp whisp + whisper whisper whispering whisper + whisperings whisper whispers whisper + whist whist whistle whistl + whistles whistl whistling whistl + whit whit white white + whitehall whitehal whitely white + whiteness white whiter whiter + whites white whitest whitest + whither whither whiting white + whitmore whitmor whitsters whitster + whitsun whitsun whittle whittl + whizzing whizz who who + whoa whoa whoe whoe + whoever whoever whole whole + wholesom wholesom wholesome wholesom + wholly wholli whom whom + whoobub whoobub whoop whoop + whooping whoop whor whor + whore whore whoremaster whoremast + whoremasterly whoremasterli whoremonger whoremong + whores whore whoreson whoreson + whoresons whoreson whoring whore + whorish whorish whose whose + whoso whoso whosoe whoso + whosoever whosoev why why + wi wi wick wick + wicked wick wickednes wickedn + wickedness wicked wicket wicket + wicky wicki wid wid + wide wide widens widen + wider wider widow widow + widowed widow widower widow + widowhood widowhood widows widow + wield wield wife wife + wight wight wights wight + wild wild wildcats wildcat + wilder wilder wilderness wilder + wildest wildest wildfire wildfir + wildly wildli wildness wild + wilds wild wiles wile + wilful wil wilfull wilful + wilfully wilfulli wilfulnes wilfuln + wilfulness wil will will + willed will willers willer + willeth willeth william william + williams william willing will + willingly willingli willingness willing + willoughby willoughbi willow willow + wills will wilt wilt + wiltshire wiltshir wimpled wimpl + win win wince winc + winch winch winchester winchest + wincot wincot wind wind + winded wind windgalls windgal + winding wind windlasses windlass + windmill windmil window window + windows window windpipe windpip + winds wind windsor windsor + windy windi wine wine + wing wing winged wing + wingfield wingfield wingham wingham + wings wing wink wink + winking wink winks wink + winner winner winners winner + winning win winnow winnow + winnowed winnow winnows winnow + wins win winter winter + winterly winterli winters winter + wip wip wipe wipe + wiped wipe wipes wipe + wiping wipe wire wire + wires wire wiry wiri + wisdom wisdom wisdoms wisdom + wise wise wiselier wiseli + wisely wise wiser wiser + wisest wisest wish wish + wished wish wisher wisher + wishers wisher wishes wish + wishest wishest wisheth wisheth + wishful wish wishing wish + wishtly wishtli wisp wisp + wist wist wit wit + witb witb witch witch + witchcraft witchcraft witches witch + witching witch with with + withal withal withdraw withdraw + withdrawing withdraw withdrawn withdrawn + withdrew withdrew wither wither + withered wither withering wither + withers wither withheld withheld + withhold withhold withholds withhold + within within withold withold + without without withstand withstand + withstanding withstand withstood withstood + witless witless witness wit + witnesses wit witnesseth witnesseth + witnessing wit wits wit + witted wit wittenberg wittenberg + wittiest wittiest wittily wittili + witting wit wittingly wittingli + wittol wittol wittolly wittolli + witty witti wiv wiv + wive wive wived wive + wives wive wiving wive + wizard wizard wizards wizard + wo wo woe woe + woeful woeful woefull woeful + woefullest woefullest woes woe + woful woful wolf wolf + wolfish wolfish wolsey wolsei + wolves wolv wolvish wolvish + woman woman womanhood womanhood + womanish womanish womankind womankind + womanly womanli womb womb + wombs womb womby wombi + women women won won + woncot woncot wond wond + wonder wonder wondered wonder + wonderful wonder wonderfully wonderfulli + wondering wonder wonders wonder + wondrous wondrou wondrously wondrous + wont wont wonted wont + woo woo wood wood + woodbine woodbin woodcock woodcock + woodcocks woodcock wooden wooden + woodland woodland woodman woodman + woodmonger woodmong woods wood + woodstock woodstock woodville woodvil + wooed woo wooer wooer + wooers wooer wooes wooe + woof woof wooing woo + wooingly wooingli wool wool + woollen woollen woolly woolli + woolsack woolsack woolsey woolsei + woolward woolward woos woo + wor wor worcester worcest + word word words word + wore wore worins worin + work work workers worker + working work workings work + workman workman workmanly workmanli + workmanship workmanship workmen workmen + works work worky worki + world world worldlings worldl + worldly worldli worlds world + worm worm worms worm + wormwood wormwood wormy wormi + worn worn worried worri + worries worri worry worri + worrying worri worse wors + worser worser worship worship + worshipful worship worshipfully worshipfulli + worshipp worshipp worshipper worshipp + worshippers worshipp worshippest worshippest + worships worship worst worst + worsted worst wort wort + worth worth worthied worthi + worthier worthier worthies worthi + worthiest worthiest worthily worthili + worthiness worthi worthless worthless + worths worth worthy worthi + worts wort wot wot + wots wot wotting wot + wouid wouid would would + wouldest wouldest wouldst wouldst + wound wound wounded wound + wounding wound woundings wound + woundless woundless wounds wound + wouns woun woven woven + wow wow wrack wrack + wrackful wrack wrangle wrangl + wrangler wrangler wranglers wrangler + wrangling wrangl wrap wrap + wrapp wrapp wraps wrap + wrapt wrapt wrath wrath + wrathful wrath wrathfully wrathfulli + wraths wrath wreak wreak + wreakful wreak wreaks wreak + wreath wreath wreathed wreath + wreathen wreathen wreaths wreath + wreck wreck wrecked wreck + wrecks wreck wren wren + wrench wrench wrenching wrench + wrens wren wrest wrest + wrested wrest wresting wrest + wrestle wrestl wrestled wrestl + wrestler wrestler wrestling wrestl + wretch wretch wretchcd wretchcd + wretched wretch wretchedness wretched + wretches wretch wring wring + wringer wringer wringing wring + wrings wring wrinkle wrinkl + wrinkled wrinkl wrinkles wrinkl + wrist wrist wrists wrist + writ writ write write + writer writer writers writer + writes write writhled writhl + writing write writings write + writs writ written written + wrong wrong wronged wrong + wronger wronger wrongful wrong + wrongfully wrongfulli wronging wrong + wrongly wrongli wrongs wrong + wronk wronk wrote wrote + wroth wroth wrought wrought + wrung wrung wry wry + wrying wry wt wt + wul wul wye wye + x x xanthippe xanthipp + xi xi xii xii + xiii xiii xiv xiv + xv xv y y + yard yard yards yard + yare yare yarely yare + yarn yarn yaughan yaughan + yaw yaw yawn yawn + yawning yawn ycleped yclepe + ycliped yclipe ye ye + yea yea yead yead + year year yearly yearli + yearn yearn yearns yearn + years year yeas yea + yeast yeast yedward yedward + yell yell yellow yellow + yellowed yellow yellowing yellow + yellowness yellow yellows yellow + yells yell yelping yelp + yeoman yeoman yeomen yeomen + yerk yerk yes ye + yesterday yesterdai yesterdays yesterdai + yesternight yesternight yesty yesti + yet yet yew yew + yicld yicld yield yield + yielded yield yielder yielder + yielders yielder yielding yield + yields yield yok yok + yoke yoke yoked yoke + yokefellow yokefellow yokes yoke + yoketh yoketh yon yon + yond yond yonder yonder + yongrey yongrei yore yore + yorick yorick york york + yorkists yorkist yorks york + yorkshire yorkshir you you + young young younger younger + youngest youngest youngling youngl + younglings youngl youngly youngli + younker younker your your + yours your yourself yourself + yourselves yourselv youth youth + youthful youth youths youth + youtli youtli zanies zani + zany zani zeal zeal + zealous zealou zeals zeal + zed zed zenelophon zenelophon + zenith zenith zephyrs zephyr + zir zir zo zo + zodiac zodiac zodiacs zodiac + zone zone zounds zound + zwagger zwagger +} + + +set i 0 +foreach {in out} $test_vocab { + do_test "1.$i.($in -> $out)" { + lindex [sqlite3_fts5_tokenize db porter $in] 0 + } $out + incr i +} + + +finish_test + diff --git a/ext/fts5/fts5tokenizer.test b/ext/fts5/fts5tokenizer.test new file mode 100644 index 0000000000..9fa853c84c --- /dev/null +++ b/ext/fts5/fts5tokenizer.test @@ -0,0 +1,82 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 tokenizers +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5tokenizer + + + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); + DROP TABLE ft1; +} +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize='porter'); + DROP TABLE ft1; +} +do_execsql_test 1.2 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = porter); + DROP TABLE ft1; +} +do_execsql_test 1.3 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter'); + DROP TABLE ft1; +} +do_execsql_test 1.4 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter simple'); + DROP TABLE ft1; +} + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); + INSERT INTO ft1 VALUES('embedded databases'); +} +do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1 +do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1 +do_execsql_test 2.3 { + SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' +} 1 + + +proc tcl_create {args} { + set ::targs $args + error "failed" +} +sqlite3_fts5_create_tokenizer db tcl tcl_create + +foreach {tn directive expected} { + 1 {tokenize='tcl a b c'} {a b c} + 2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f} + 3 {tokenize="tcl 'g' 'h' 'i'"} {g h i} + 4 {tokenize = tcl} {} +} { + do_catchsql_test 3.$tn.1 " + CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive) + " {1 {error in tokenizer constructor}} + do_test 3.$tn.2 { set ::targs } $expected +} + + +do_catchsql_test 4.1 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc); +} {1 {parse error in "tokenize = tcl abc"}} +do_catchsql_test 4.2 { + CREATE VIRTUAL TABLE ft2 USING fts5(x y) +} {1 {parse error in "x y"}} + +finish_test + diff --git a/manifest b/manifest index 4fbaaa8a3d..e0c9552b7b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sfts5\sbm25()\sfunction\sso\sthat\sit\smatches\sthe\sdocumentation. -D 2014-12-23T19:18:34.426 +C Fixes\sto\sbuilt-in\stokenizers. +D 2014-12-29T11:24:46.773 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,20 +104,22 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 6dc8a8504d84aef13d922db06faa8fbcf8c11424 -F ext/fts5/fts5.h 7598f4b55b888890650829124717874973c52649 -F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 +F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74 +F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b +F ext/fts5/fts5Int.h b5dfed6a1b256ff21d11898f14ab337205844469 F ext/fts5/fts5_aux.c 445e54031ff94174673f4f5aac6c064df20a2a6b F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce +F ext/fts5/fts5_config.c 73774e37a99218833b767f96bb5af35ebe43b77c F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 -F ext/fts5/fts5_tcl.c 4392e74421d24cc37c370732e8b48217cd2c1777 -F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b +F ext/fts5/fts5_tcl.c ce11e46589986b957b89809aabd3936d898d501b +F ext/fts5/fts5_tokenize.c 5d6e785345b0d87d174fcc0653bfacd0d9fd7f2e F ext/fts5/fts5auxdata.test 3844d0f098441cedf75b9cc96d5e6e94d1a3bef4 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 +F ext/fts5/fts5porter.test bb0fba17ce825527addec33fdb12b0062aabd588 +F ext/fts5/fts5tokenizer.test 09096cebc17a41650b52eec6e45c9a29923bfdcd w ext/fts5/fts5porter2.test F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1210,7 +1212,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ca5d44042aa7461dcc8b700b0763df4df9d4a891 -R 62a7bacc31f6964d59aeb316de8e27d6 +P 1ac7a8d0af9a71ddf6a1421033dcb9fa67c6120c +R c89483d1ebbc9d631471515ddc2ac098 U dan -Z 38f208724902306f1118acd017f9d3d1 +Z 32745924be785a3126a9785b730c9992 diff --git a/manifest.uuid b/manifest.uuid index c18189239b..c3d3f24818 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1ac7a8d0af9a71ddf6a1421033dcb9fa67c6120c \ No newline at end of file +b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e \ No newline at end of file From e716aca24b79c52cac5a08f218ec29279c5dda42 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 29 Dec 2014 15:59:36 +0000 Subject: [PATCH 060/206] Move all fts5 test files to new directory "ext/fts5/test". FossilOrigin-Name: 7f148edb30103c5f4fee20cd08e38537f9615bf2 --- {test => ext/fts5/test}/fts5aa.test | 4 ++- {test => ext/fts5/test}/fts5ab.test | 4 ++- {test => ext/fts5/test}/fts5ac.test | 4 ++- {test => ext/fts5/test}/fts5ad.test | 4 ++- {test => ext/fts5/test}/fts5ae.test | 4 ++- {test => ext/fts5/test}/fts5af.test | 4 ++- {test => ext/fts5/test}/fts5ag.test | 4 ++- {test => ext/fts5/test}/fts5ah.test | 4 ++- {test => ext/fts5/test}/fts5ai.test | 4 ++- {test => ext/fts5/test}/fts5aj.test | 4 ++- {test => ext/fts5/test}/fts5ak.test | 4 ++- {test => ext/fts5/test}/fts5al.test | 4 ++- ext/fts5/{ => test}/fts5auxdata.test | 2 +- {test => ext/fts5/test}/fts5ea.test | 4 ++- {test => ext/fts5/test}/fts5fault1.test | 4 ++- ext/fts5/{ => test}/fts5porter.test | 2 +- ext/fts5/{ => test}/fts5tokenizer.test | 2 +- manifest | 46 ++++++++++++------------- manifest.uuid | 2 +- test/permutations.test | 9 +---- 20 files changed, 70 insertions(+), 49 deletions(-) rename {test => ext/fts5/test}/fts5aa.test (98%) rename {test => ext/fts5/test}/fts5ab.test (97%) rename {test => ext/fts5/test}/fts5ac.test (99%) rename {test => ext/fts5/test}/fts5ad.test (99%) rename {test => ext/fts5/test}/fts5ae.test (98%) rename {test => ext/fts5/test}/fts5af.test (97%) rename {test => ext/fts5/test}/fts5ag.test (98%) rename {test => ext/fts5/test}/fts5ah.test (96%) rename {test => ext/fts5/test}/fts5ai.test (92%) rename {test => ext/fts5/test}/fts5aj.test (94%) rename {test => ext/fts5/test}/fts5ak.test (97%) rename {test => ext/fts5/test}/fts5al.test (98%) rename ext/fts5/{ => test}/fts5auxdata.test (97%) rename {test => ext/fts5/test}/fts5ea.test (96%) rename {test => ext/fts5/test}/fts5fault1.test (96%) rename ext/fts5/{ => test}/fts5porter.test (99%) rename ext/fts5/{ => test}/fts5tokenizer.test (96%) diff --git a/test/fts5aa.test b/ext/fts5/test/fts5aa.test similarity index 98% rename from test/fts5aa.test rename to ext/fts5/test/fts5aa.test index b9440c5c90..7b9bd77b67 100644 --- a/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -12,7 +12,9 @@ # focus of this script is testing the FTS5 module. # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5aa diff --git a/test/fts5ab.test b/ext/fts5/test/fts5ab.test similarity index 97% rename from test/fts5ab.test rename to ext/fts5/test/fts5ab.test index 88b8692787..1b744ebcce 100644 --- a/test/fts5ab.test +++ b/ext/fts5/test/fts5ab.test @@ -13,7 +13,9 @@ # # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ab diff --git a/test/fts5ac.test b/ext/fts5/test/fts5ac.test similarity index 99% rename from test/fts5ac.test rename to ext/fts5/test/fts5ac.test index 1044a81932..cd51f31756 100644 --- a/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -13,7 +13,9 @@ # # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ac diff --git a/test/fts5ad.test b/ext/fts5/test/fts5ad.test similarity index 99% rename from test/fts5ad.test rename to ext/fts5/test/fts5ad.test index bdf71265cf..8af8345efe 100644 --- a/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -13,7 +13,9 @@ # # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ad diff --git a/test/fts5ae.test b/ext/fts5/test/fts5ae.test similarity index 98% rename from test/fts5ae.test rename to ext/fts5/test/fts5ae.test index b6475d8bbf..644ef99d21 100644 --- a/test/fts5ae.test +++ b/ext/fts5/test/fts5ae.test @@ -13,7 +13,9 @@ # # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ae diff --git a/test/fts5af.test b/ext/fts5/test/fts5af.test similarity index 97% rename from test/fts5af.test rename to ext/fts5/test/fts5af.test index ca56c0ec1c..3e7ff918b8 100644 --- a/test/fts5af.test +++ b/ext/fts5/test/fts5af.test @@ -15,7 +15,9 @@ # snippet() function. # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5af diff --git a/test/fts5ag.test b/ext/fts5/test/fts5ag.test similarity index 98% rename from test/fts5ag.test rename to ext/fts5/test/fts5ag.test index 52b4774d55..e3659a71d5 100644 --- a/test/fts5ag.test +++ b/ext/fts5/test/fts5ag.test @@ -12,7 +12,9 @@ # focus of this script is testing the FTS5 module. # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ag diff --git a/test/fts5ah.test b/ext/fts5/test/fts5ah.test similarity index 96% rename from test/fts5ah.test rename to ext/fts5/test/fts5ah.test index b1dffc78fa..c01d966704 100644 --- a/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -12,7 +12,9 @@ # focus of this script is testing the FTS5 module. # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ah diff --git a/test/fts5ai.test b/ext/fts5/test/fts5ai.test similarity index 92% rename from test/fts5ai.test rename to ext/fts5/test/fts5ai.test index eba9d09d71..78a346b04a 100644 --- a/test/fts5ai.test +++ b/ext/fts5/test/fts5ai.test @@ -14,7 +14,9 @@ # Specifically, it tests transactions and savepoints # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ai diff --git a/test/fts5aj.test b/ext/fts5/test/fts5aj.test similarity index 94% rename from test/fts5aj.test rename to ext/fts5/test/fts5aj.test index 49386f57bc..3e12934c05 100644 --- a/test/fts5aj.test +++ b/ext/fts5/test/fts5aj.test @@ -16,7 +16,9 @@ # and deleted, # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5aj diff --git a/test/fts5ak.test b/ext/fts5/test/fts5ak.test similarity index 97% rename from test/fts5ak.test rename to ext/fts5/test/fts5ak.test index 53977ab70c..c16056b72e 100644 --- a/test/fts5ak.test +++ b/ext/fts5/test/fts5ak.test @@ -14,7 +14,9 @@ # Specifically, the auxiliary function "highlight". # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ak diff --git a/test/fts5al.test b/ext/fts5/test/fts5al.test similarity index 98% rename from test/fts5al.test rename to ext/fts5/test/fts5al.test index 236fdf8521..926ad951cf 100644 --- a/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -14,7 +14,9 @@ # Specifically, this function tests the %_config table. # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5al diff --git a/ext/fts5/fts5auxdata.test b/ext/fts5/test/fts5auxdata.test similarity index 97% rename from ext/fts5/fts5auxdata.test rename to ext/fts5/test/fts5auxdata.test index 158e393c32..661a78526a 100644 --- a/ext/fts5/fts5auxdata.test +++ b/ext/fts5/test/fts5auxdata.test @@ -13,7 +13,7 @@ # if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] + set testdir [file join [file dirname [info script]] .. .. .. test] } source $testdir/tester.tcl set testprefix fts5auxdata diff --git a/test/fts5ea.test b/ext/fts5/test/fts5ea.test similarity index 96% rename from test/fts5ea.test rename to ext/fts5/test/fts5ea.test index 1518b8892c..19e1903433 100644 --- a/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -10,7 +10,9 @@ #************************************************************************* # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl set testprefix fts5ea diff --git a/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test similarity index 96% rename from test/fts5fault1.test rename to ext/fts5/test/fts5fault1.test index 8a493e323f..5f340e0fd7 100644 --- a/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -12,7 +12,9 @@ # focus of this script is testing the FTS5 module. # -set testdir [file dirname $argv0] +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} source $testdir/tester.tcl source $testdir/malloc_common.tcl set testprefix fts5fault1 diff --git a/ext/fts5/fts5porter.test b/ext/fts5/test/fts5porter.test similarity index 99% rename from ext/fts5/fts5porter.test rename to ext/fts5/test/fts5porter.test index 7c67f83dab..9254626471 100644 --- a/ext/fts5/fts5porter.test +++ b/ext/fts5/test/fts5porter.test @@ -15,7 +15,7 @@ # if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] + set testdir [file join [file dirname [info script]] .. .. .. test] } source $testdir/tester.tcl set testprefix fts5porter diff --git a/ext/fts5/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test similarity index 96% rename from ext/fts5/fts5tokenizer.test rename to ext/fts5/test/fts5tokenizer.test index 9fa853c84c..bd2eb7aae2 100644 --- a/ext/fts5/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -13,7 +13,7 @@ # if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] + set testdir [file join [file dirname [info script]] .. .. .. test] } source $testdir/tester.tcl set testprefix fts5tokenizer diff --git a/manifest b/manifest index e0c9552b7b..d8bf03435a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sto\sbuilt-in\stokenizers. -D 2014-12-29T11:24:46.773 +C Move\sall\sfts5\stest\sfiles\sto\snew\sdirectory\s"ext/fts5/test". +D 2014-12-29T15:59:36.706 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -116,10 +116,24 @@ F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 F ext/fts5/fts5_tcl.c ce11e46589986b957b89809aabd3936d898d501b F ext/fts5/fts5_tokenize.c 5d6e785345b0d87d174fcc0653bfacd0d9fd7f2e -F ext/fts5/fts5auxdata.test 3844d0f098441cedf75b9cc96d5e6e94d1a3bef4 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 -F ext/fts5/fts5porter.test bb0fba17ce825527addec33fdb12b0062aabd588 -F ext/fts5/fts5tokenizer.test 09096cebc17a41650b52eec6e45c9a29923bfdcd w ext/fts5/fts5porter2.test +F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf w test/fts5aa.test +F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 w test/fts5ab.test +F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b w test/fts5ac.test +F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 w test/fts5ad.test +F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 w test/fts5ae.test +F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd w test/fts5af.test +F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 w test/fts5ag.test +F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d w test/fts5ah.test +F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f w test/fts5ai.test +F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 w test/fts5aj.test +F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f w test/fts5ak.test +F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 w test/fts5al.test +F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 w ext/fts5/fts5auxdata.test +F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 w test/fts5ea.test +F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 w test/fts5fault1.test +F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 w ext/fts5/fts5porter.test +F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca w ext/fts5/fts5tokenizer.test F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -603,20 +617,6 @@ F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 F test/fts5_common.tcl 2488117cd80b7a4de7c20054b89f082b77b4189c -F test/fts5aa.test 27c7d3c865e144a0501dcbfbd6d2ae87f77602ea -F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc -F test/fts5ac.test 021e175b809d2baa23792807caae5dfc6bc706f4 -F test/fts5ad.test ff518db6b0d7750b51ee6531ffebf82e57094bfd -F test/fts5ae.test 0877873a2b9df6b3a2d832ed5ea928f838d19faf -F test/fts5af.test 355d2048bd9ddc2f8f4e80a4cb1e70c6204422a0 -F test/fts5ag.test 8b2bb67cf2a3245eaad5e49ab8daa6be6e64332b -F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 -F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d -F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba -F test/fts5ak.test 26187e57ba56a9e10e6da894a038b07588e7249d -F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d -F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 -F test/fts5fault1.test ba59b6f0897a4fe510c446b98968ec1e8800a56b F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -783,7 +783,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54 -F test/permutations.test a762abd3f97809c877c93e6b526ec07bb2a75b96 +F test/permutations.test f26508e576234a5875041dd1632c5eed9fc495d9 F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552 @@ -1212,7 +1212,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1ac7a8d0af9a71ddf6a1421033dcb9fa67c6120c -R c89483d1ebbc9d631471515ddc2ac098 +P b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e +R c65f16b94aeceea9cda28cb8f092d4a9 U dan -Z 32745924be785a3126a9785b730c9992 +Z 822a98c34fd542b912bf890d737a0e9f diff --git a/manifest.uuid b/manifest.uuid index c3d3f24818..7b2535c49c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e \ No newline at end of file +7f148edb30103c5f4fee20cd08e38537f9615bf2 \ No newline at end of file diff --git a/test/permutations.test b/test/permutations.test index 4e366ca36f..c5b83cb821 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -224,14 +224,7 @@ test_suite "fts3" -prefix "" -description { test_suite "fts5" -prefix "" -description { All FTS5 tests. -} -files { - fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test - fts5af.test fts5ag.test fts5ah.test fts5ai.test fts5aj.test - fts5ak.test fts5al.test - fts5ea.test - - fts5fault1.test -} +} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. From 6024772ba292a9abc6810dd0b12767d02b47ccf1 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 1 Jan 2015 16:46:10 +0000 Subject: [PATCH 061/206] Add a version of the unicode61 tokenizer to fts5. FossilOrigin-Name: d09f7800cf14f73ea86d037107ef80295b2c173a --- ext/fts3/unicode/mkunicode.tcl | 50 ++- ext/fts5/fts5_tcl.c | 66 +++- ext/fts5/fts5_tokenize.c | 283 +++++++++++++++- ext/fts5/fts5_unicode2.c | 363 ++++++++++++++++++++ ext/fts5/test/fts5unicode.test | 39 +++ ext/fts5/test/fts5unicode2.test | 567 ++++++++++++++++++++++++++++++++ main.mk | 4 + manifest | 55 ++-- manifest.uuid | 2 +- 9 files changed, 1369 insertions(+), 60 deletions(-) create mode 100644 ext/fts5/fts5_unicode2.c create mode 100644 ext/fts5/test/fts5unicode.test create mode 100644 ext/fts5/test/fts5unicode2.test diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 2da17c51a5..f1adb5ffde 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -732,8 +732,12 @@ proc print_fileheader {} { */ }] puts "" - puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)" - puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" + if {$::generate_fts5_code} { + puts "#if defined(SQLITE_ENABLE_FTS5)" + } else { + puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)" + puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" + } puts "" puts "#include " puts "" @@ -760,22 +764,38 @@ proc print_test_main {} { # our liking. # proc usage {} { - puts -nonewline stderr "Usage: $::argv0 ?-test? " + puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? " puts stderr " " exit 1 } -if {[llength $argv]!=2 && [llength $argv]!=3} usage -if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage +if {[llength $argv]<2} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] -set generate_test_code [expr {[llength $argv]==3}] + +set generate_test_code 0 +set generate_fts5_code 0 +set function_prefix "sqlite3Fts" +for {set i 0} {$i < [llength $argv]-2} {incr i} { + switch -- [lindex $argv $i] { + -test { + set generate_test_code 1 + } + -fts5 { + set function_prefix sqlite3Fts5 + set generate_fts5_code 1 + } + default { + usage + } + } +} print_fileheader # Print the isalnum() function to stdout. # set lRange [an_load_separator_ranges] -print_isalnum sqlite3FtsUnicodeIsalnum $lRange +print_isalnum ${function_prefix}UnicodeIsalnum $lRange # Leave a gap between the two generated C functions. # @@ -790,22 +810,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}] print_rd $mappings puts "" puts "" -print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings +print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings puts "" puts "" # Print the fold() function to stdout. # -print_fold sqlite3FtsUnicodeFold +print_fold ${function_prefix}UnicodeFold # Print the test routines and main() function to stdout, if -test # was specified. # if {$::generate_test_code} { - print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange - print_fold_test sqlite3FtsUnicodeFold $mappings + print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange + print_fold_test ${function_prefix}UnicodeFold $mappings print_test_main } -puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" -puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */" +if {$generate_fts5_code} { + puts "#endif /* defined(SQLITE_ENABLE_FTS5) */" +} else { + puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" + puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */" +} diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 575f4f871a..1ce1bba49d 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -518,16 +518,31 @@ static int f5tCreateFunction( return TCL_OK; } +typedef struct F5tTokenizeCtx F5tTokenizeCtx; +struct F5tTokenizeCtx { + Tcl_Obj *pRet; + int bSubst; + const char *zInput; +}; + static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd, int iPos ){ - Tcl_Obj *pRet = (Tcl_Obj*)pCtx; - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iStart)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iEnd)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; + if( p->bSubst ){ + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); + Tcl_ListObjAppendElement( + 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart) + ); + }else{ + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd)); + Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos)); + } return SQLITE_OK; } @@ -543,7 +558,6 @@ static int f5tTokenize( int objc, Tcl_Obj *CONST objv[] ){ - char *zName; char *zText; int nText; sqlite3 *db = 0; @@ -554,21 +568,39 @@ static int f5tTokenize( void *pUserdata; int rc; - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "DB NAME TEXT"); + int nArg; + const char **azArg; + F5tTokenizeCtx ctx; + + if( objc!=4 && objc!=5 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT"); return TCL_ERROR; } - if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; - zName = Tcl_GetString(objv[2]); - zText = Tcl_GetStringFromObj(objv[3], &nText); + if( objc==5 ){ + char *zOpt = Tcl_GetString(objv[1]); + if( strcmp("-subst", zOpt) ){ + Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0); + return TCL_ERROR; + } + } + if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR; + if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){ + return TCL_ERROR; + } + if( nArg==0 ){ + Tcl_AppendResult(interp, "no such tokenizer: ", 0); + Tcl_Free((void*)azArg); + return TCL_ERROR; + } + zText = Tcl_GetStringFromObj(objv[objc-1], &nText); - rc = pApi->xFindTokenizer(pApi, zName, &pUserdata, &tokenizer); + rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer); if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "no such tokenizer: ", zName, 0); + Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0); return TCL_ERROR; } - rc = tokenizer.xCreate(pUserdata, 0, 0, &pTok); + rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0); return TCL_ERROR; @@ -576,7 +608,10 @@ static int f5tTokenize( pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); - rc = tokenizer.xTokenize(pTok, pRet, zText, nText, xTokenizeCb2); + ctx.bSubst = (objc==5); + ctx.pRet = pRet; + ctx.zInput = zText; + rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); @@ -585,6 +620,7 @@ static int f5tTokenize( } + Tcl_Free((void*)azArg); Tcl_SetObjResult(interp, pRet); Tcl_DecrRefCount(pRet); return TCL_OK; diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 5352faa2c6..b23eccd97f 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -15,6 +15,9 @@ #include #include +/************************************************************************** +** Start of unicode61 tokenizer implementation. +*/ /* ** Create a "simple" tokenizer. @@ -69,7 +72,7 @@ static int fts5SimpleTokenize( const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) ){ - int rc; + int rc = SQLITE_OK; int ie; int is = 0; int iPos = 0; @@ -78,7 +81,7 @@ static int fts5SimpleTokenize( int nFold = sizeof(aFold); char *pFold = aFold; - do { + while( is=0xc0 ){ \ + c = sqlite3Utf8Trans1[c-0xc0]; \ + while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } + +#define WRITE_UTF8(zOut, c) { \ + if( c<0x00080 ){ \ + *zOut++ = (unsigned char)(c&0xFF); \ + } \ + else if( c<0x00800 ){ \ + *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ + *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ + } \ + else if( c<0x10000 ){ \ + *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ + *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ + }else{ \ + *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ + *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ + *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ + } \ +} + +#endif /* ifndef SQLITE_AMALGAMATION */ + +typedef struct Unicode61Tokenizer Unicode61Tokenizer; +struct Unicode61Tokenizer { + int bRemoveDiacritic; /* True if remove_diacritics=1 is set */ + int nException; + int *aiException; +}; + +static int fts5UnicodeAddExceptions( + Unicode61Tokenizer *p, /* Tokenizer object */ + const char *z, /* Characters to treat as exceptions */ + int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ +){ + int rc = SQLITE_OK; + int n = strlen(z); + int *aNew; + + if( n>0 ){ + aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int)); + if( aNew ){ + int nNew = p->nException; + const unsigned char *zCsr = (const unsigned char*)z; + const unsigned char *zTerm = (const unsigned char*)&z[n]; + while( zCsriCode ) break; + } + memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); + aNew[i] = iCode; + nNew++; + } + } + p->aiException = aNew; + p->nException = nNew; + }else{ + rc = SQLITE_NOMEM; + } + } + + return rc; +} + +/* +** Return true if the p->aiException[] array contains the value iCode. +*/ +static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ + if( p->nException>0 ){ + int *a = p->aiException; + int iLo = 0; + int iHi = p->nException-1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( iCode==a[iTest] ){ + return 1; + }else if( iCode>a[iTest] ){ + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + } + + return 0; +} + +/* +** Create a "unicode61" tokenizer. +*/ +static int fts5UnicodeCreate( + void *pCtx, + const char **azArg, int nArg, + Fts5Tokenizer **ppOut +){ + int rc = SQLITE_OK; /* Return code */ + Unicode61Tokenizer *p = 0; /* New tokenizer object */ + + if( nArg%2 ){ + rc = SQLITE_ERROR; + }else{ + p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); + if( p ){ + int i; + memset(p, 0, sizeof(Unicode61Tokenizer)); + p->bRemoveDiacritic = 1; + for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = (zArg[0]=='1'); + }else + if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ + rc = fts5UnicodeAddExceptions(p, zArg, 1); + }else + if( 0==sqlite3_stricmp(azArg[i], "separators") ){ + rc = fts5UnicodeAddExceptions(p, zArg, 0); + }else{ + rc = SQLITE_ERROR; + } + } + }else{ + rc = SQLITE_NOMEM; + } + *ppOut = (Fts5Tokenizer*)p; + } + return rc; +} + +/* +** Delete a "unicode61" tokenizer. +*/ +static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ + Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; + sqlite3_free(p->aiException); + sqlite3_free(p); + return; +} + +/* +** Return true if, for the purposes of tokenizing with the tokenizer +** passed as the first argument, codepoint iCode is considered a token +** character (not a separator). +*/ +static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ + assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); +} + +/* +** Tokenize some text using a unicode61 tokenizer. +*/ +static int fts5UnicodeTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + const char *pText, int nText, + int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) +){ + Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; + const unsigned char *zInput = (const unsigned char*)pText; + const unsigned char *zTerm = &zInput[nText]; + const unsigned char *z = zInput; + int rc = SQLITE_OK; + int nBuf = 0; + unsigned char *zBuf = 0; + unsigned char *zOut = 0; + int iPos = 0; + + while( rc==SQLITE_OK && zzBuf ){ + bAlnum = sqlite3Fts5UnicodeIsdiacritic(iCode); + } + + if( bAlnum ){ + int iOut; + + /* Grow the output buffer if required */ + while( (zOut-zBuf)+4>=nBuf ){ + unsigned char *zNew; + nBuf = (nBuf ? nBuf*2 : 128); + zNew = sqlite3_realloc(zBuf, nBuf); + if( zNew==0 ){ + rc = SQLITE_NOMEM; + goto tokenize_finished; + }else{ + zOut = &zNew[zOut-zBuf]; + zBuf = zNew; + } + } + + /* Write the new character to it */ + iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); + if( iOut ) WRITE_UTF8(zOut, iOut); + } + + if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){ + int ie = (bAlnum ? z : zCode) - zInput; + rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie, iPos); + zOut = zBuf; + iPos++; + } + } + + tokenize_finished: + sqlite3_free(zBuf); + return rc; +} + /************************************************************************** ** Start of porter2 stemmer implementation. */ @@ -477,8 +749,9 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ const char *zName; fts5_tokenizer x; } aBuiltin[] = { - { "porter", { fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize } }, - { "simple", { fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize } } + { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, + { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, + { "simple", {fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize }} }; int rc = SQLITE_OK; /* Return code */ diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c new file mode 100644 index 0000000000..5692bf2b39 --- /dev/null +++ b/ext/fts5/fts5_unicode2.c @@ -0,0 +1,363 @@ +/* +** 2012 May 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +/* +** DO NOT EDIT THIS MACHINE GENERATED FILE. +*/ + +#if defined(SQLITE_ENABLE_FTS5) + +#include + +/* +** Return true if the argument corresponds to a unicode codepoint +** classified as either a letter or a number. Otherwise false. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +int sqlite3Fts5UnicodeIsalnum(int c){ + /* Each unsigned integer in the following array corresponds to a contiguous + ** range of unicode codepoints that are not either letters or numbers (i.e. + ** codepoints for which this function should return 0). + ** + ** The most significant 22 bits in each 32-bit value contain the first + ** codepoint in the range. The least significant 10 bits are used to store + ** the size of the range (always at least 1). In other words, the value + ** ((C<<22) + N) represents a range of N codepoints starting with codepoint + ** C. It is not possible to represent a range larger than 1023 codepoints + ** using this format. + */ + const static unsigned int aEntry[] = { + 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, + 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, + 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, + 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, + 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, + 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, + 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, + 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, + 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, + 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, + 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, + 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, + 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, + 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, + 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, + 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, + 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, + 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, + 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, + 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, + 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, + 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, + 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, + 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, + 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, + 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, + 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, + 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, + 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, + 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, + 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, + 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, + 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, + 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, + 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, + 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, + 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, + 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, + 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, + 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, + 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, + 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, + 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, + 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, + 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, + 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, + 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, + 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, + 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, + 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, + 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, + 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, + 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, + 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, + 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, + 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, + 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, + 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, + 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, + 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, + 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, + }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; + + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ + unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; + int iRes; + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aEntry[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( aEntry[0]=aEntry[iRes] ); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + } + return 1; +} + + +/* +** If the argument is a codepoint corresponding to a lowercase letter +** in the ASCII range with a diacritic added, return the codepoint +** of the ASCII letter only. For example, if passed 235 - "LATIN +** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER +** E"). The resuls of passing a codepoint that corresponds to an +** uppercase letter are undefined. +*/ +static int remove_diacritic(int c){ + unsigned short aDia[] = { + 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, + 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, + 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, + 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, + 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, + 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, + 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, + 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, + 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, + 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, + 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, + 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, + 62924, 63050, 63082, 63274, 63390, + }; + char aChar[] = { + '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', + 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', + 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', + 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', + 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', + 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', + 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', + 'e', 'i', 'o', 'u', 'y', + }; + + unsigned int key = (((unsigned int)c)<<3) | 0x00000007; + int iRes = 0; + int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aDia[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( key>=aDia[iRes] ); + return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); +}; + + +/* +** Return true if the argument interpreted as a unicode codepoint +** is a diacritical modifier character. +*/ +int sqlite3Fts5UnicodeIsdiacritic(int c){ + unsigned int mask0 = 0x08029FDF; + unsigned int mask1 = 0x000361F8; + if( c<768 || c>817 ) return 0; + return (c < 768+32) ? + (mask0 & (1 << (c-768))) : + (mask1 & (1 << (c-768-32))); +} + + +/* +** Interpret the argument as a unicode codepoint. If the codepoint +** is an upper case character that has a lower case equivalent, +** return the codepoint corresponding to the lower case version. +** Otherwise, return a copy of the argument. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ + /* Each entry in the following array defines a rule for folding a range + ** of codepoints to lower case. The rule applies to a range of nRange + ** codepoints starting at codepoint iCode. + ** + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). + ** + ** The contents of this array are generated by parsing the CaseFolding.txt + ** file distributed as part of the "Unicode Character Database". See + ** http://www.unicode.org for details. + */ + static const struct TableEntry { + unsigned short iCode; + unsigned char flags; + unsigned char nRange; + } aEntry[] = { + {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, + {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, + {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, + {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, + {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, + {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, + {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, + {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, + {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, + {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, + {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, + {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, + {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, + {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, + {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, + {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, + {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, + {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, + {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, + {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, + {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, + {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, + {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, + {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, + {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, + {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, + {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, + {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, + {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, + {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, + {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, + {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, + {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, + {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, + {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, + {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, + {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, + {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, + {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, + {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, + {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, + {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, + {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, + {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, + {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, + {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, + {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, + {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, + {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, + {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, + {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, + {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, + {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, + {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, + {65313, 14, 26}, + }; + static const unsigned short aiOff[] = { + 1, 2, 8, 15, 16, 26, 28, 32, + 37, 38, 40, 48, 63, 64, 69, 71, + 79, 80, 116, 202, 203, 205, 206, 207, + 209, 210, 211, 213, 214, 217, 218, 219, + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, + 65514, 65521, 65527, 65528, 65529, + }; + + int ret = c; + + assert( c>=0 ); + assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); + + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + int iRes = -1; + + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + int cmp = (c - aEntry[iTest].iCode); + if( cmp>=0 ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( iRes<0 || c>=aEntry[iRes].iCode ); + + if( iRes>=0 ){ + const struct TableEntry *p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); + } + } + + if( bRemoveDiacritic ) ret = remove_diacritic(ret); + } + + else if( c>=66560 && c<66600 ){ + ret = c + 40; + } + + return ret; +} +#endif /* defined(SQLITE_ENABLE_FTS5) */ diff --git a/ext/fts5/test/fts5unicode.test b/ext/fts5/test/fts5unicode.test new file mode 100644 index 0000000000..22082b9cde --- /dev/null +++ b/ext/fts5/test/fts5unicode.test @@ -0,0 +1,39 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 tokenizers +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5unicode + +proc tokenize_test {tn tokenizer input output} { + uplevel [list do_test $tn [subst -nocommands { + set ret {} + foreach {z s e p} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { + lappend ret [set z] + } + set ret + }] [list {*}$output]] +} + +foreach {tn t} {1 simple 2 unicode61} { + tokenize_test 1.$tn.0 $t {A B C D} {a b c d} + tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely} + tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely} + tokenize_test 1.$tn.3 $t {} {} +} + +finish_test + diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test new file mode 100644 index 0000000000..b26795f8a9 --- /dev/null +++ b/ext/fts5/test/fts5unicode2.test @@ -0,0 +1,567 @@ +# 2012 May 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# The tests in this file focus on testing the "unicode" FTS tokenizer. +# +# This is a modified copy of FTS4 test file "fts4_unicode.test". +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5unicode2 + +proc do_unicode_token_test {tn input res} { + uplevel [list do_test $tn [list \ + sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input + ] [list {*}$res]] +} + +proc do_unicode_token_test2 {tn input res} { + uplevel [list do_test $tn [list \ + sqlite3_fts5_tokenize -subst db "unicode61" $input + ] [list {*}$res]] +} + +proc do_unicode_token_test3 {tn args} { + set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]] + set input [lindex $args end-1] + set res [lindex $args end] + uplevel [list do_test $tn [list \ + sqlite3_fts5_tokenize -subst db $tokenizer $input + ] [list {*}$res]] +} + +do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} + +do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ + "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC" + +do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ + "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx" + +# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. +do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" +do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E" + +do_unicode_token_test 1.5 "The quick brown fox" { + 0 the The 1 quick quick 2 brown brown 3 fox fox +} +do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { + 0 the The 1 quick quick 2 brown brown 3 fox fox +} + +do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D} +do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC" + +do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ + "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx" + +# Check that diacritics are removed if remove_diacritics=1 is specified. +# And that they do not break tokens. +do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx" + +# Title-case mappings work +do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5" + +#------------------------------------------------------------------------- +# +set docs [list { + Enhance the INSERT syntax to allow multiple rows to be inserted via the + VALUES clause. +} { + Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause. +} { + Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp(). +} { + Added the sqlite3_db_readonly() interface. +} { + Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the + ability to add new PRAGMA statements or to override built-in PRAGMAs. +} { + Queries of the form: "SELECT max(x), y FROM table" returns the value of y on + the same row that contains the maximum x value. +} { + Added support for the FTS4 languageid option. +} { + Documented support for the FTS4 content option. This feature has actually + been in the code since version 3.7.9 but is only now considered to be + officially supported. +} { + Pending statements no longer block ROLLBACK. Instead, the pending statement + will return SQLITE_ABORT upon next access after the ROLLBACK. +} { + Improvements to the handling of CSV inputs in the command-line shell +} { + Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be + incorrectly converted into an INNER JOIN if the WHERE clause indexable terms + connected by OR. +}] + +set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS +set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS +set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS +set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS +set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS +set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS +set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS +set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS +set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS +foreach k [array names map] { + lappend mappings [string toupper $k] [lindex $map($k) 0] + lappend mappings $k [lindex $map($k) 1] +} +proc mapdoc {doc} { + set doc [regsub -all {[[:space:]]+} $doc " "] + string map $::mappings [string trim $doc] +} + +do_test 2.0 { + execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); } + foreach doc $docs { + set d [mapdoc $doc] + execsql { INSERT INTO t2 VALUES($d) } + } +} {} + +do_test 2.1 { + set q [mapdoc "row"] + execsql { SELECT * FROM t2 WHERE t2 MATCH $q } +} [list [mapdoc { + Queries of the form: "SELECT max(x), y FROM table" returns the value of y on + the same row that contains the maximum x value. +}]] + +foreach {tn query snippet} { + 2 "row" { + ...returns the value of y on the same [row] that contains + the maximum x value. + } + 3 "ROW" { + ...returns the value of y on the same [row] that contains + the maximum x value. + } + 4 "rollback" { + ...[ROLLBACK]. Instead, the pending statement + will return SQLITE_ABORT upon next access after the [ROLLBACK]. + } + 5 "rOllback" { + ...[ROLLBACK]. Instead, the pending statement + will return SQLITE_ABORT upon next access after the [ROLLBACK]. + } + 6 "lang*" { + Added support for the FTS4 [languageid] option. + } +} { + do_test 2.$tn { + set q [mapdoc $query] + execsql { + SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q + } + } [list [mapdoc $snippet]] +} + +#------------------------------------------------------------------------- +# Make sure the unicode61 tokenizer does not crash if it is passed a +# NULL pointer. +reset_db +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y); + INSERT INTO t1 VALUES(NULL, 'a b c'); +} + +do_execsql_test 3.2 { + SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b' +} {{a [b] c}} + +do_execsql_test 3.3 { + BEGIN; + DELETE FROM t1; + INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b'); + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 SELECT * FROM t1; + INSERT INTO t1 VALUES('a b c', NULL); + INSERT INTO t1 VALUES('a x c', NULL); + COMMIT; +} + +do_execsql_test 3.4 { + SELECT * FROM t1 WHERE t1 MATCH 'a b'; +} {{a b c} {}} + +#------------------------------------------------------------------------- +# +reset_db + +do_test 4.1 { + set a "abc\uFFFEdef" + set b "abc\uD800def" + set c "\uFFFEdef" + set d "\uD800def" + execsql { + CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x); + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + } +} {} + +do_test 4.2 { + set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] + set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] + set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] + set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] + execsql { + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + } +} {} + +do_test 4.3 { + set a [binary format c* {0xF7 0xBF 0xBF 0xBF}] + set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}] + set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}] + set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}] + execsql { + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + } +} {} + + +#------------------------------------------------------------------------- + +breakpoint +do_unicode_token_test3 5.1 {tokenchars {}} { + sqlite3_reset sqlite3_column_int +} { + 0 sqlite3 sqlite3 + 1 reset reset + 2 sqlite3 sqlite3 + 3 column column + 4 int int +} + +do_unicode_token_test3 5.2 {tokenchars _} { + sqlite3_reset sqlite3_column_int +} { + 0 sqlite3_reset sqlite3_reset + 1 sqlite3_column_int sqlite3_column_int +} + +do_unicode_token_test3 5.3 {separators xyz} { + Laotianxhorseyrunszfast +} { + 0 laotian Laotian + 1 horse horse + 2 runs runs + 3 fast fast +} + +do_unicode_token_test3 5.4 {tokenchars xyz} { + Laotianxhorseyrunszfast +} { + 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast +} + +do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} { + sqlite3_resetxsqlite3_column_intyhonda_phantom +} { + 0 sqlite3_reset sqlite3_reset + 1 sqlite3_column_int sqlite3_column_int + 2 honda_phantom honda_phantom +} + +do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" { + 0 abc abc 1 def def +} + +do_unicode_token_test3 5.7 \ + "tokenchars \u2444\u2445" \ + "separators \u05D0\u05D1\u05D2" \ + "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \ + [list \ + 0 \u2444fre\u2445sh \u2444fre\u2445sh \ + 1 water water \ + 2 fish fish \ + 3 \u2445timer \u2445timer \ + ] + +# Check that it is not possible to add a standalone diacritic codepoint +# to either separators or tokenchars. +do_unicode_token_test3 5.8 "separators \u0301" \ + "hello\u0301world \u0301helloworld" \ + "0 helloworld hello\u0301world 1 helloworld helloworld" + +do_unicode_token_test3 5.9 "tokenchars \u0301" \ + "hello\u0301world \u0301helloworld" \ + "0 helloworld hello\u0301world 1 helloworld helloworld" + +do_unicode_token_test3 5.10 "separators \u0301" \ + "remove_diacritics 0" \ + "hello\u0301world \u0301helloworld" \ + "0 hello\u0301world hello\u0301world 1 helloworld helloworld" + +do_unicode_token_test3 5.11 "tokenchars \u0301" \ + "remove_diacritics 0" \ + "hello\u0301world \u0301helloworld" \ + "0 hello\u0301world hello\u0301world 1 helloworld helloworld" + + +#------------------------------------------------------------------------- + +proc do_tokenize {tokenizer txt} { + set res [list] + foreach {a b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { + lappend res $b + } + set res +} + +# Argument $lCodepoint must be a list of codepoints (integers) that +# correspond to whitespace characters. This command creates a string +# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}" +# using tokenizer $tokenizer. The test passes if the tokenizer successfully +# extracts the two 5 character tokens. +# +proc do_isspace_test {tn tokenizer lCp} { + set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp] + set txt "${whitespace}hello${whitespace}world${whitespace}" + uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] +} + +set tokenizers [list unicode61] +ifcapable icu { lappend tokenizers icu } + +# Some tests to check that the tokenizers can both identify white-space +# codepoints. All codepoints tested below are of type "Zs" in the +# UnicodeData.txt file. +foreach T $tokenizers { + do_isspace_test 6.$T.1 $T 32 + do_isspace_test 6.$T.2 $T 160 + do_isspace_test 6.$T.3 $T 5760 + do_isspace_test 6.$T.4 $T 6158 + do_isspace_test 6.$T.5 $T 8192 + do_isspace_test 6.$T.6 $T 8193 + do_isspace_test 6.$T.7 $T 8194 + do_isspace_test 6.$T.8 $T 8195 + do_isspace_test 6.$T.9 $T 8196 + do_isspace_test 6.$T.10 $T 8197 + do_isspace_test 6.$T.11 $T 8198 + do_isspace_test 6.$T.12 $T 8199 + do_isspace_test 6.$T.13 $T 8200 + do_isspace_test 6.$T.14 $T 8201 + do_isspace_test 6.$T.15 $T 8202 + do_isspace_test 6.$T.16 $T 8239 + do_isspace_test 6.$T.17 $T 8287 + do_isspace_test 6.$T.18 $T 12288 + + do_isspace_test 6.$T.19 $T {32 160 5760 6158} + do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} + do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} + do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} + do_isspace_test 6.$T.23 $T {8287 12288} +} + +#------------------------------------------------------------------------- +# Test that the private use ranges are treated as alphanumeric. +# +foreach {tn1 c} { + 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff +} { + foreach {tn2 config res} { + 1 "" "0 hello*world hello*world" + 2 "separators *" "0 hello hello 1 world world" + } { + set config [string map [list * $c] $config] + set input [string map [list * $c] "hello*world"] + set output [string map [list * $c] $res] + do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output + } +} + +#------------------------------------------------------------------------- +# Cursory test of remove_diacritics=0. +# +# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS +# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS +# 00E4;LATIN SMALL LETTER A WITH DIAERESIS +# 00F6;LATIN SMALL LETTER O WITH DIAERESIS +# +do_execsql_test 8.1.1 " + CREATE VIRTUAL TABLE t3 USING fts5( + content, tokenize='unicode61 remove_diacritics 1' + ); + INSERT INTO t3 VALUES('o'); + INSERT INTO t3 VALUES('a'); + INSERT INTO t3 VALUES('O'); + INSERT INTO t3 VALUES('A'); + INSERT INTO t3 VALUES('\xD6'); + INSERT INTO t3 VALUES('\xC4'); + INSERT INTO t3 VALUES('\xF6'); + INSERT INTO t3 VALUES('\xE4'); +" +do_execsql_test 8.1.2 { + SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC; +} {1 3 5 7} +do_execsql_test 8.1.3 { + SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC; +} {2 4 6 8} +do_execsql_test 8.2.1 { + CREATE VIRTUAL TABLE t4 USING fts5( + content, tokenize='unicode61 remove_diacritics 0' + ); + INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC; +} +do_execsql_test 8.2.2 { + SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC; +} {1 3} +do_execsql_test 8.2.3 { + SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC; +} {2 4} + +#------------------------------------------------------------------------- +# +if 0 { +foreach {tn sql} { + 1 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]); + CREATE VIRTUAL TABLE t6 USING fts4( + tokenize=unicode61 [tokenchars=="] "tokenchars=[]"); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]); + } + 2 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= ."); + CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]"); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4"); + } + 3 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .'); + CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]'); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4'); + } + 4 { + CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`); + CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`); + CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`); + } +} { + do_execsql_test 9.$tn.0 { + DROP TABLE IF EXISTS t5; + DROP TABLE IF EXISTS t5aux; + DROP TABLE IF EXISTS t6; + DROP TABLE IF EXISTS t6aux; + DROP TABLE IF EXISTS t7; + DROP TABLE IF EXISTS t7aux; + } + do_execsql_test 9.$tn.1 $sql + + do_execsql_test 9.$tn.2 { + CREATE VIRTUAL TABLE t5aux USING fts4aux(t5); + INSERT INTO t5 VALUES('one two three/four.five.six'); + SELECT * FROM t5aux; + } { + four.five.six * 1 1 four.five.six 0 1 1 + {one two three} * 1 1 {one two three} 0 1 1 + } + + do_execsql_test 9.$tn.3 { + CREATE VIRTUAL TABLE t6aux USING fts4aux(t6); + INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta'); + SELECT * FROM t6aux; + } { + {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1 + {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1 + } + + do_execsql_test 9.$tn.4 { + CREATE VIRTUAL TABLE t7aux USING fts4aux(t7); + INSERT INTO t7 VALUES('alephxbeth\xC4gimel'); + SELECT * FROM t7aux; + } { + aleph * 1 1 aleph 0 1 1 + beth * 1 1 beth 0 1 1 + gimel * 1 1 gimel 0 1 1 + } +} + +# Check that multiple options are handled correctly. +# +do_execsql_test 10.1 { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61 + "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy" + "separators=a" "separators=a" "tokenchars=a" "tokenchars=a" + ); + + INSERT INTO t1 VALUES('oneatwoxthreeyfour'); + INSERT INTO t1 VALUES('a.single=word'); + CREATE VIRTUAL TABLE t1aux USING fts4aux(t1); + SELECT * FROM t1aux; +} { + .single=word * 1 1 .single=word 0 1 1 + four * 1 1 four 0 1 1 + one * 1 1 one 0 1 1 + three * 1 1 three 0 1 1 + two * 1 1 two 0 1 1 +} + +# Test that case folding happens after tokenization, not before. +# +do_execsql_test 10.2 { + DROP TABLE IF EXISTS t2; + CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB"); + INSERT INTO t2 VALUES('oneatwoBthree'); + INSERT INTO t2 VALUES('onebtwoAthree'); + CREATE VIRTUAL TABLE t2aux USING fts4aux(t2); + SELECT * FROM t2aux; +} { + one * 1 1 one 0 1 1 + onebtwoathree * 1 1 onebtwoathree 0 1 1 + three * 1 1 three 0 1 1 + two * 1 1 two 0 1 1 +} + +# Test that the tokenchars and separators options work with the +# fts3tokenize table. +# +do_execsql_test 11.1 { + CREATE VIRTUAL TABLE ft1 USING fts3tokenize( + "unicode61", "tokenchars=@.", "separators=1234567890" + ); + SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; +} { + berlin@street sydney.road +} + +} + +finish_test diff --git a/main.mk b/main.mk index 58044218a7..7a26313b12 100644 --- a/main.mk +++ b/main.mk @@ -81,6 +81,7 @@ LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5_tokenize.o +LIBOBJ += fts5_unicode2.o LIBOBJ += fts5parse.o @@ -616,6 +617,9 @@ fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c +fts5_unicode2.o: $(TOP)/ext/fts5/fts5_unicode2.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c + fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h diff --git a/manifest b/manifest index d8bf03435a..49fabbb73c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Move\sall\sfts5\stest\sfiles\sto\snew\sdirectory\s"ext/fts5/test". -D 2014-12-29T15:59:36.706 +C Add\sa\sversion\sof\sthe\sunicode61\stokenizer\sto\sfts5. +D 2015-01-01T16:46:10.851 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -102,7 +102,7 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 +F ext/fts3/unicode/mkunicode.tcl 2fa92b916b17ee0fc94129d36969972d463bc016 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74 F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b @@ -114,26 +114,29 @@ F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 -F ext/fts5/fts5_tcl.c ce11e46589986b957b89809aabd3936d898d501b -F ext/fts5/fts5_tokenize.c 5d6e785345b0d87d174fcc0653bfacd0d9fd7f2e +F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f +F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb +F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 -F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf w test/fts5aa.test -F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 w test/fts5ab.test -F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b w test/fts5ac.test -F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 w test/fts5ad.test -F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 w test/fts5ae.test -F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd w test/fts5af.test -F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 w test/fts5ag.test -F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d w test/fts5ah.test -F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f w test/fts5ai.test -F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 w test/fts5aj.test -F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f w test/fts5ak.test -F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 w test/fts5al.test -F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 w ext/fts5/fts5auxdata.test -F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 w test/fts5ea.test -F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 w test/fts5fault1.test -F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 w ext/fts5/fts5porter.test -F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca w ext/fts5/fts5tokenizer.test +F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf +F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 +F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b +F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 +F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 +F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd +F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 +F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d +F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f +F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 +F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f +F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 +F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 +F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 +F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 +F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 +F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca +F ext/fts5/test/fts5unicode.test b9c7bb982e0ee242a0774e636e1888ca32947a83 +F ext/fts5/test/fts5unicode2.test 7b0d64bbb7bfb7b5080e032e068404b42432ee02 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -177,7 +180,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 863a6f5cdcc3a47a9dcbedc9af37d3c0d4172935 +F main.mk 602303f3596d10237f25da030ee1d96065e2e5a8 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1212,7 +1215,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e -R c65f16b94aeceea9cda28cb8f092d4a9 +P 7f148edb30103c5f4fee20cd08e38537f9615bf2 +R d01caf1e8e04bd7c1b6e26fb465c90b6 U dan -Z 822a98c34fd542b912bf890d737a0e9f +Z 5c3f4d7bf4502327dfa6eb630b5a26ec diff --git a/manifest.uuid b/manifest.uuid index 7b2535c49c..f67937770d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7f148edb30103c5f4fee20cd08e38537f9615bf2 \ No newline at end of file +d09f7800cf14f73ea86d037107ef80295b2c173a \ No newline at end of file From ade921c3ad001b0397eae849975572c07cfa2f96 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 2 Jan 2015 14:55:22 +0000 Subject: [PATCH 062/206] Allow the rank column to be remapped on a per-query basis by including a term similar to "rank match 'bm25(10,2)'" in a where clause. FossilOrigin-Name: 1cd15a1759004d5d321056905dbb6acff20dc7d9 --- ext/fts5/fts5.c | 104 ++++++++++++++++++++++++++++++-------- ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_config.c | 4 +- ext/fts5/test/fts5al.test | 57 ++++++++++++++++++++- manifest | 18 +++---- manifest.uuid | 2 +- 6 files changed, 152 insertions(+), 35 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 604d5c7cb6..6c69da97b3 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -160,6 +160,8 @@ struct Fts5Cursor { char *zSpecial; /* Result of special query */ /* "rank" function. Populated on demand from vtab.xColumn(). */ + char *zRank; /* Custom rank function */ + char *zRankArgs; /* Custom rank function args */ Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ int nRankArg; /* Number of trailing arguments for rank() */ sqlite3_value **apRankArg; /* Array of trailing arguments */ @@ -181,6 +183,7 @@ struct Fts5Cursor { #define FTS5CSR_REQUIRE_CONTENT 0x01 #define FTS5CSR_REQUIRE_DOCSIZE 0x02 #define FTS5CSR_EOF 0x04 +#define FTS5CSR_FREE_ZRANK 0x08 /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -418,6 +421,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts5Config *pConfig = pTab->pConfig; int iCons; int ePlan = FTS5_PLAN_SCAN; + int iRankMatch; iCons = fts5FindConstraint(pInfo,SQLITE_INDEX_CONSTRAINT_MATCH,pConfig->nCol); if( iCons>=0 ){ @@ -453,6 +457,14 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC; } } + + iRankMatch = fts5FindConstraint( + pInfo, SQLITE_INDEX_CONSTRAINT_MATCH, pConfig->nCol+1 + ); + if( iRankMatch>=0 ){ + pInfo->aConstraintUsage[iRankMatch].argvIndex = 1 + (iCons>=0); + pInfo->aConstraintUsage[iRankMatch].omit = 1; + } pInfo->idxNum = ePlan; return SQLITE_OK; @@ -543,6 +555,10 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ sqlite3_free(pCsr->apRankArg); sqlite3_free(pCsr->zSpecial); + if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ + sqlite3_free(pCsr->zRank); + sqlite3_free(pCsr->zRankArgs); + } sqlite3_free(pCsr); return SQLITE_OK; } @@ -636,7 +652,8 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ int nByte; int rc = SQLITE_OK; char *zSql; - const char *zRank = pConfig->zRank ? pConfig->zRank : FTS5_DEFAULT_RANK; + const char *zRank = pCsr->zRank; + const char *zRankArgs = pCsr->zRankArgs; nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase; @@ -654,8 +671,8 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ ** If SQLite a built-in statement cache, this wouldn't be a problem. */ zSql = sqlite3_mprintf("SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", pConfig->zDb, pConfig->zName, zRank, pConfig->zName, - (pConfig->zRankArgs ? ", " : ""), - (pConfig->zRankArgs ? pConfig->zRankArgs : ""), + (zRankArgs ? ", " : ""), + (zRankArgs ? zRankArgs : ""), bAsc ? "ASC" : "DESC" ); if( zSql==0 ){ @@ -747,14 +764,13 @@ static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){ static int fts5FindRankFunction(Fts5Cursor *pCsr){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; - const char *zRank = pConfig->zRank; int rc = SQLITE_OK; Fts5Auxiliary *pAux; + const char *zRank = pCsr->zRank; + const char *zRankArgs = pCsr->zRankArgs; - if( zRank==0 ) zRank = FTS5_DEFAULT_RANK; - - if( pTab->pConfig->zRankArgs ){ - char *zSql = sqlite3_mprintf("SELECT %s", pTab->pConfig->zRankArgs); + if( zRankArgs ){ + char *zSql = sqlite3_mprintf("SELECT %s", zRankArgs); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ @@ -796,10 +812,50 @@ static int fts5FindRankFunction(Fts5Cursor *pCsr){ return rc; } + +static int fts5CursorParseRank( + Fts5Config *pConfig, + Fts5Cursor *pCsr, + sqlite3_value *pRank +){ + int rc = SQLITE_OK; + if( pRank ){ + const char *z = (const char*)sqlite3_value_text(pRank); + char *zRank = 0; + char *zRankArgs = 0; + + rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); + if( rc==SQLITE_OK ){ + pCsr->zRank = zRank; + pCsr->zRankArgs = zRankArgs; + CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK); + }else if( rc==SQLITE_ERROR ){ + pCsr->base.pVtab->zErrMsg = sqlite3_mprintf( + "parse error in rank function: %s", z + ); + } + }else{ + if( pConfig->zRank ){ + pCsr->zRank = (char*)pConfig->zRank; + pCsr->zRankArgs = (char*)pConfig->zRankArgs; + }else{ + pCsr->zRank = (char*)FTS5_DEFAULT_RANK; + pCsr->zRankArgs = 0; + } + } + return rc; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional ** information. +** +** There are three possible query strategies: +** +** 1. Full-text search using a MATCH operator. +** 2. A by-rowid lookup. +** 3. A full-table scan. */ static int fts5FilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ @@ -813,10 +869,13 @@ static int fts5FilterMethod( int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); int rc = SQLITE_OK; + assert( nVal<=2 ); assert( pCsr->pStmt==0 ); assert( pCsr->pExpr==0 ); assert( pCsr->csrflags==0 ); assert( pCsr->pRank==0 ); + assert( pCsr->zRank==0 ); + assert( pCsr->zRankArgs==0 ); if( pTab->pSortCsr ){ /* If pSortCsr is non-NULL, then this call is being made as part of @@ -835,19 +894,22 @@ static int fts5FilterMethod( if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){ const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); - if( zExpr[0]=='*' ){ - /* The user has issued a query of the form "MATCH '*...'". This - ** indicates that the MATCH expression is not a full text query, - ** but a request for an internal parameter. */ - rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); - }else{ - char **pzErr = &pTab->base.zErrMsg; - rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); - if( rc==SQLITE_OK ){ - if( ePlan==FTS5_PLAN_MATCH ){ - rc = fts5CursorFirst(pTab, pCsr, bAsc); - }else{ - rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); + rc = fts5CursorParseRank(pTab->pConfig, pCsr, (nVal==2 ? apVal[1] : 0)); + if( rc==SQLITE_OK ){ + if( zExpr[0]=='*' ){ + /* The user has issued a query of the form "MATCH '*...'". This + ** indicates that the MATCH expression is not a full text query, + ** but a request for an internal parameter. */ + rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); + }else{ + char **pzErr = &pTab->base.zErrMsg; + rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + if( rc==SQLITE_OK ){ + if( ePlan==FTS5_PLAN_MATCH ){ + rc = fts5CursorFirst(pTab, pCsr, bAsc); + }else{ + rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); + } } } } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 1fffcbfe51..371c99d745 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -109,6 +109,8 @@ int sqlite3Fts5ConfigLoad(Fts5Config*, int); /* Set the value of a single config attribute */ int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); +int sqlite3Fts5ConfigParseRank(const char*, char**, char**); + /* ** End of interface to code in fts5_config.c. **************************************************************************/ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index fc3fe73bba..fd6b051a08 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -552,7 +552,7 @@ static const char *fts5ConfigSkipArgs(const char *pIn){ ** + Zero or more SQL literals in a comma separated list ** + Close parenthesis - ")" */ -static int fts5ConfigParseRank( +int sqlite3Fts5ConfigParseRank( const char *zIn, /* Input string */ char **pzRank, /* OUT: Rank function name */ char **pzRankArgs /* OUT: Rank function arguments */ @@ -647,7 +647,7 @@ int sqlite3Fts5ConfigSetValue( const char *zIn = (const char*)sqlite3_value_text(pVal); char *zRank; char *zRankArgs; - rc = fts5ConfigParseRank(zIn, &zRank, &zRankArgs); + rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); if( rc==SQLITE_OK ){ sqlite3_free(pConfig->zRank); sqlite3_free(pConfig->zRankArgs); diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 926ad951cf..2cf291af64 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -178,6 +178,23 @@ do_execsql_test 4.1.1 { } do_execsql_test 4.1.2 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' + ORDER BY rowid ASC +} { + 1 0 2 4 3 6 5 103 + 6 9 7 0 9 102 10 8 +} + +do_execsql_test 4.1.3 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' + ORDER BY rank DESC +} { + 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 +} + +do_execsql_test 4.1.4 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()'); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC } { @@ -185,13 +202,13 @@ do_execsql_test 4.1.2 { 6 9 7 0 9 102 10 8 } -do_execsql_test 4.1.3 { +do_execsql_test 4.1.5 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 } -do_execsql_test 4.1.4 { +do_execsql_test 4.1.6 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) '); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { @@ -216,6 +233,42 @@ do_execsql_test 4.2.2 { 10 121 } +do_execsql_test 4.2.3 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)' +} { + 10 122 +} + +proc rowidmod {cmd imod} { + expr [$cmd xRowid] % $imod +} +sqlite3_fts5_create_function db rowidmod rowidmod +do_execsql_test 4.3.1 { + CREATE VIRTUAL TABLE t3 USING fts5(x); + INSERT INTO t3 VALUES('a one'); + INSERT INTO t3 VALUES('a two'); + INSERT INTO t3 VALUES('a three'); + INSERT INTO t3 VALUES('a four'); + INSERT INTO t3 VALUES('a five'); + INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()'); +} +breakpoint + +do_execsql_test 4.3.2 { + SELECT * FROM t3 + WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)' + ORDER BY rank ASC +} { + {a four} {a five} {a one} {a two} {a three} +} +do_execsql_test 4.3.3 { + SELECT *, rank FROM t3 + WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)' + ORDER BY rank ASC +} { + {a three} 0 {a four} 1 {a one} 1 {a five} 2 {a two} 2 +} finish_test diff --git a/manifest b/manifest index 73bbea9d02..c472027698 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-01-01T18:03:49.016 +C Allow\sthe\srank\scolumn\sto\sbe\sremapped\son\sa\sper-query\sbasis\sby\sincluding\sa\sterm\ssimilar\sto\s"rank\smatch\s'bm25(10,2)'"\sin\sa\swhere\sclause. +D 2015-01-02T14:55:22.175 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,12 +104,12 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74 +F ext/fts5/fts5.c a80283dca24506f1c748fffbba8d87ae4d348b50 F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b -F ext/fts5/fts5Int.h b5dfed6a1b256ff21d11898f14ab337205844469 +F ext/fts5/fts5Int.h b5d7970b851d2b4f1745cd2d5c95216c9847aef2 F ext/fts5/fts5_aux.c 445e54031ff94174673f4f5aac6c064df20a2a6b F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 73774e37a99218833b767f96bb5af35ebe43b77c +F ext/fts5/fts5_config.c 74a860e10c5583831f04d0088c4a49a3c6eca43d F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f @@ -129,7 +129,7 @@ F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f -F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 +F ext/fts5/test/fts5al.test bc873766fec3baae05ba6e76b379bc2f5e8eaf75 F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 @@ -1270,7 +1270,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d09f7800cf14f73ea86d037107ef80295b2c173a 66269d0d8e49eb3dc7f508714753584f648bb022 -R 547c6b40048a436817ff50668d0f1e7c +P 4b3651677e7132c4c45605bc1f216fc08ef31198 +R 691df06fdaf9c3542bb10cf702e4a0f8 U dan -Z dab366ead758ca084f8be9b0cb1bbb1d +Z 12262406c5f3f18d2ab88add956e21a6 diff --git a/manifest.uuid b/manifest.uuid index 4878666f57..9ca9c0f833 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4b3651677e7132c4c45605bc1f216fc08ef31198 \ No newline at end of file +1cd15a1759004d5d321056905dbb6acff20dc7d9 \ No newline at end of file From 0fbc269feffed7a4bc453b0be75e81d9c4c6ac88 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 3 Jan 2015 20:44:58 +0000 Subject: [PATCH 063/206] Add support for external content tables to fts5. FossilOrigin-Name: 17ef5b59f789e9fa35c4f053246d819987fd06f8 --- ext/fts5/fts5.c | 32 +++++-- ext/fts5/fts5Int.h | 5 ++ ext/fts5/fts5_config.c | 96 ++++++++++++++++---- ext/fts5/fts5_expr.c | 26 +++--- ext/fts5/fts5_storage.c | 188 ++++++++++++++++++++++++++++++---------- manifest | 20 ++--- manifest.uuid | 2 +- 7 files changed, 283 insertions(+), 86 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 6c69da97b3..7ad9176f4e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1056,6 +1056,20 @@ static int fts5SpecialInsert( return rc; } +static int fts5SpecialDelete( + Fts5Table *pTab, + sqlite3_value **apVal, + sqlite3_int64 *piRowid +){ + int rc = SQLITE_OK; + int eType1 = sqlite3_value_type(apVal[1]); + if( eType1==SQLITE_INTEGER ){ + sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]); + rc = sqlite3Fts5StorageSpecialDelete(pTab->pStorage, iDel, &apVal[2]); + } + return rc; +} + /* ** This function is the implementation of the xUpdate callback used by ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be @@ -1086,17 +1100,25 @@ static int fts5UpdateMethod( */ assert( nArg==1 || nArg==(2 + pConfig->nCol + 2) ); - if( nArg>1 && SQLITE_NULL!=sqlite3_value_type(apVal[2 + pConfig->nCol]) ){ - return fts5SpecialInsert(pTab, - apVal[2 + pConfig->nCol], apVal[2 + pConfig->nCol + 1] - ); + if( nArg>1 ){ + sqlite3_value *pCmd = sqlite3_value_type(apVal[2 + pConfig->nCol]); + if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ + const char *z = sqlite3_value_text(pCmd); + if( pConfig->bExternalContent && sqlite3_stricmp("delete", z) ){ + return fts5SpecialDelete(pTab, apVal, pRowid); + }else{ + return fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); + } + } } eType0 = sqlite3_value_type(apVal[0]); eConflict = sqlite3_vtab_on_conflict(pConfig->db); assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); - if( eType0==SQLITE_INTEGER ){ + assert( pVtab->zErrMsg==0 ); + + if( rc==SQLITE_OK && eType0==SQLITE_INTEGER ){ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 371c99d745..ef5b9e56c7 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -76,6 +76,9 @@ struct Fts5Config { char **azCol; /* Column names */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ + int bExternalContent; /* Content is external */ + char *zContent; /* "content=" option value (or NULL) */ + char *zContentRowid; /* "content_rowid=" option value (or NULL) */ Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; @@ -410,6 +413,8 @@ int sqlite3Fts5StorageRollback(Fts5Storage *p); int sqlite3Fts5StorageConfigValue(Fts5Storage *p, const char*, sqlite3_value*); +int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**); + /* ** End of interface to code in fts5_storage.c. **************************************************************************/ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index fd6b051a08..07255c400a 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -192,6 +192,47 @@ static char *fts5TrimString(char *z){ return z; } +/* +** Duplicate the string passed as the only argument into a buffer allocated +** by sqlite3_malloc(). +** +** Return 0 if an OOM error is encountered. +*/ +static char *fts5Strdup(int *pRc, const char *z){ + char *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_mprintf("%s", z); + if( pRet==0 ) *pRc = SQLITE_NOMEM; + } + return pRet; +} + +/* +** Argument z points to a nul-terminated string containing an SQL identifier. +** This function returns a copy of the identifier enclosed in backtick +** quotes. +*/ +static char *fts5EscapeName(int *pRc, const char *z){ + char *pRet = 0; + if( *pRc==SQLITE_OK ){ + int n = strlen(z); + pRet = (char*)sqlite3_malloc(2 * 2*n + 1); + if( pRet==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + int i; + char *p = pRet; + for(i=0; izContent ){ + *pzErr = sqlite3_mprintf("multiple content=... directives"); + rc = SQLITE_ERROR; + }else{ + pConfig->zContent = sqlite3_mprintf("%Q.%Q", pConfig->zDb, zArg); + pConfig->bExternalContent = 1; + if( pConfig->zContent==0 ) rc = SQLITE_NOMEM; + } + return rc; + } + + if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ + int rc = SQLITE_OK; + if( pConfig->zContentRowid ){ + *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); + rc = SQLITE_ERROR; + }else{ + pConfig->zContentRowid = fts5EscapeName(&rc, zArg); + } + return rc; + } + *pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd); return SQLITE_ERROR; } -/* -** Duplicate the string passed as the only argument into a buffer allocated -** by sqlite3_malloc(). -** -** Return 0 if an OOM error is encountered. -*/ -static char *fts5Strdup(int *pRc, const char *z){ - char *pRet = 0; - if( *pRc==SQLITE_OK ){ - pRet = sqlite3_mprintf("%s", z); - if( pRet==0 ) *pRc = SQLITE_NOMEM; - } - return pRet; -} - /* ** Allocate an instance of the default tokenizer ("simple") at ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error @@ -422,6 +472,20 @@ int sqlite3Fts5ConfigParse( rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } + /* If no zContent option was specified, fill in the default values. */ + if( rc==SQLITE_OK && pRet->zContent==0 ){ + pRet->zContent = sqlite3_mprintf("%Q.'%q_content'", pRet->zDb, pRet->zName); + if( pRet->zContent==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_free(pRet->zContentRowid); + pRet->zContentRowid = 0; + } + } + if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ + pRet->zContentRowid = fts5Strdup(&rc, "rowid"); + } + if( rc!=SQLITE_OK ){ sqlite3Fts5ConfigFree(pRet); *ppOut = 0; @@ -447,6 +511,8 @@ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ sqlite3_free(pConfig->aPrefix); sqlite3_free(pConfig->zRank); sqlite3_free(pConfig->zRankArgs); + sqlite3_free(pConfig->zContent); + sqlite3_free(pConfig->zContentRowid); sqlite3_free(pConfig); } } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 830af586b3..0d0c5bae29 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1041,19 +1041,24 @@ i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ ** It is the responsibility of the caller to eventually free the returned ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. */ -static char *fts5Strndup(const char *pIn, int nIn){ - char *zRet = (char*)sqlite3_malloc(nIn+1); - if( zRet ){ - memcpy(zRet, pIn, nIn); - zRet[nIn] = '\0'; +static char *fts5Strndup(int *pRc, const char *pIn, int nIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + zRet = (char*)sqlite3_malloc(nIn+1); + if( zRet ){ + memcpy(zRet, pIn, nIn); + zRet[nIn] = '\0'; + }else{ + *pRc = SQLITE_NOMEM; + } } return zRet; } static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ - *pz = fts5Strndup(pToken->p, pToken->n); - if( *pz==0 ) return SQLITE_NOMEM; - return SQLITE_OK; + int rc = SQLITE_OK; + *pz = fts5Strndup(&rc, pToken->p, pToken->n); + return rc; } /* @@ -1139,6 +1144,7 @@ static int fts5ParseTokenize( int iEnd, /* End offset of token */ int iPos /* Position offset of token */ ){ + int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; @@ -1159,9 +1165,9 @@ static int fts5ParseTokenize( pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = fts5Strndup(pToken, nToken); + pTerm->zTerm = fts5Strndup(&rc, pToken, nToken); - return pTerm->zTerm ? SQLITE_OK : SQLITE_NOMEM; + return rc; } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 0a9ba0c8ad..0bbf25fdab 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -61,9 +61,9 @@ static int fts5StorageGetStmt( assert( eStmt>=0 && eStmtaStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { - "SELECT * FROM %Q.'%q_content' ORDER BY id ASC", /* SCAN_ASC */ - "SELECT * FROM %Q.'%q_content' ORDER BY id DESC", /* SCAN_DESC */ - "SELECT * FROM %Q.'%q_content' WHERE rowid=?", /* LOOKUP */ + "SELECT * FROM %s ORDER BY id ASC", /* SCAN_ASC */ + "SELECT * FROM %s ORDER BY id DESC", /* SCAN_DESC */ + "SELECT * FROM %s WHERE %s=?", /* LOOKUP */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ @@ -75,32 +75,47 @@ static int fts5StorageGetStmt( "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ }; - Fts5Config *pConfig = p->pConfig; + Fts5Config *pC = p->pConfig; char *zSql = 0; - if( eStmt==FTS5_STMT_INSERT_CONTENT || eStmt==FTS5_STMT_REPLACE_CONTENT ){ - int nCol = pConfig->nCol + 1; - char *zBind; - int i; + switch( eStmt ){ + case FTS5_STMT_SCAN_ASC: + case FTS5_STMT_SCAN_DESC: + zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContent); + break; - zBind = sqlite3_malloc(1 + nCol*2); - if( zBind ){ - for(i=0; izContent, pC->zContentRowid); + break; + + case FTS5_STMT_INSERT_CONTENT: + case FTS5_STMT_REPLACE_CONTENT: { + int nCol = pC->nCol + 1; + char *zBind; + int i; + + zBind = sqlite3_malloc(1 + nCol*2); + if( zBind ){ + for(i=0; izDb, pC->zName, zBind); + sqlite3_free(zBind); } - zBind[i*2-1] = '\0'; - zSql = sqlite3_mprintf(azStmt[eStmt],pConfig->zDb,pConfig->zName,zBind); - sqlite3_free(zBind); + break; } - }else{ - zSql = sqlite3_mprintf(azStmt[eStmt], pConfig->zDb, pConfig->zName); + + default: + zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName); + break; } if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ - rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->aStmt[eStmt], 0); + rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0); sqlite3_free(zSql); } } @@ -190,18 +205,21 @@ int sqlite3Fts5StorageOpen( p->pIndex = pIndex; if( bCreate ){ - int i; - char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); - if( zDefn==0 ){ - rc = SQLITE_NOMEM; - }else{ - int iOff = sprintf(zDefn, "id INTEGER PRIMARY KEY"); - for(i=0; inCol; i++){ - iOff += sprintf(&zDefn[iOff], ", c%d", i); + if( pConfig->bExternalContent==0 ){ + char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); + if( zDefn==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + int iOff = sprintf(zDefn, "id INTEGER PRIMARY KEY"); + for(i=0; inCol; i++){ + iOff += sprintf(&zDefn[iOff], ", c%d", i); + } + rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); } - rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); + sqlite3_free(zDefn); } - sqlite3_free(zDefn); + if( rc==SQLITE_OK ){ rc = sqlite3Fts5CreateTable( pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr @@ -432,6 +450,78 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ return rc; } +int sqlite3Fts5StorageSpecialDelete( + Fts5Storage *p, + i64 iDel, + sqlite3_value **apVal +){ + Fts5Config *pConfig = p->pConfig; + int rc; + sqlite3_stmt *pDel; + + assert( p->pConfig->bExternalContent ); + rc = fts5StorageLoadTotals(p, 1); + + /* Delete the index records */ + if( rc==SQLITE_OK ){ + int iCol; + Fts5InsertCtx ctx; + ctx.pStorage = p; + ctx.iCol = -1; + + rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); + for(iCol=0; rc==SQLITE_OK && iColnCol; iCol++){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_value_text(apVal[iCol]), + sqlite3_value_bytes(apVal[iCol]), + (void*)&ctx, + fts5StorageInsertCallback + ); + p->aTotalSize[iCol-1] -= (i64)ctx.szCol; + } + p->nTotalRow--; + } + + /* Delete the %_docsize record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDel, 1, iDel); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } + + /* Write the averages record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageSaveTotals(p); + } + + return rc; + +} + +/* +** Allocate a new rowid. This is used for "external content" tables when +** a NULL value is inserted into the rowid column. The new rowid is allocated +** by inserting a dummy row into the %_docsize table. The dummy will be +** overwritten later. +*/ +static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ + sqlite3_stmt *pReplace = 0; + int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace); + if( rc==SQLITE_OK ){ + sqlite3_bind_null(pReplace, 1); + sqlite3_bind_null(pReplace, 2); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + } + if( rc==SQLITE_OK ){ + *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); + } + return rc; +} + /* ** Insert a new row into the FTS table. */ @@ -453,27 +543,35 @@ int sqlite3Fts5StorageInsert( rc = fts5StorageLoadTotals(p, 1); /* Insert the new row into the %_content table. */ - if( rc==SQLITE_OK ){ - if( eConflict==SQLITE_REPLACE ){ - eStmt = FTS5_STMT_REPLACE_CONTENT; + if( rc==SQLITE_OK && pConfig->bExternalContent==0 ){ + if( pConfig->bExternalContent ){ if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ - rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); + *piRowid = sqlite3_value_int64(apVal[1]); + }else{ + rc = fts5StorageNewRowid(p, piRowid); } }else{ - eStmt = FTS5_STMT_INSERT_CONTENT; + if( eConflict==SQLITE_REPLACE ){ + eStmt = FTS5_STMT_REPLACE_CONTENT; + if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ + rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); + } + }else{ + eStmt = FTS5_STMT_INSERT_CONTENT; + } + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, eStmt, &pInsert); + } + for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ + rc = sqlite3_bind_value(pInsert, i, apVal[i]); + } + if( rc==SQLITE_OK ){ + sqlite3_step(pInsert); + rc = sqlite3_reset(pInsert); + } + *piRowid = sqlite3_last_insert_rowid(pConfig->db); } } - if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, eStmt, &pInsert); - } - for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ - rc = sqlite3_bind_value(pInsert, i, apVal[i]); - } - if( rc==SQLITE_OK ){ - sqlite3_step(pInsert); - rc = sqlite3_reset(pInsert); - } - *piRowid = sqlite3_last_insert_rowid(pConfig->db); /* Add new entries to the FTS index */ if( rc==SQLITE_OK ){ diff --git a/manifest b/manifest index c472027698..90fe542994 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Allow\sthe\srank\scolumn\sto\sbe\sremapped\son\sa\sper-query\sbasis\sby\sincluding\sa\sterm\ssimilar\sto\s"rank\smatch\s'bm25(10,2)'"\sin\sa\swhere\sclause. -D 2015-01-02T14:55:22.175 +C Add\ssupport\sfor\sexternal\scontent\stables\sto\sfts5. +D 2015-01-03T20:44:58.134 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c a80283dca24506f1c748fffbba8d87ae4d348b50 +F ext/fts5/fts5.c 16177d7f81af1852cf7f477b5ae119215ad6044a F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b -F ext/fts5/fts5Int.h b5d7970b851d2b4f1745cd2d5c95216c9847aef2 +F ext/fts5/fts5Int.h 8062dc2363c863dc8a5b2e5651cb8c966bd6c4cb F ext/fts5/fts5_aux.c 445e54031ff94174673f4f5aac6c064df20a2a6b F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 74a860e10c5583831f04d0088c4a49a3c6eca43d -F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 +F ext/fts5/fts5_config.c 16d647c7bfe50d4e823267188e12e2d001d655e0 +F ext/fts5/fts5_expr.c 317093f00a2ccdaaee0a5290f9f228c600189c41 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f -F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853 +F ext/fts5/fts5_storage.c b95fcca70f94656854e7afcfbb9896455f6b034d F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 @@ -1270,7 +1270,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 4b3651677e7132c4c45605bc1f216fc08ef31198 -R 691df06fdaf9c3542bb10cf702e4a0f8 +P 1cd15a1759004d5d321056905dbb6acff20dc7d9 +R d21eb6bee3b06e51f22d32d1e0bd7016 U dan -Z 12262406c5f3f18d2ab88add956e21a6 +Z e88e77f44b464406d3184a89736eaa7d diff --git a/manifest.uuid b/manifest.uuid index 9ca9c0f833..49cb51dbf0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1cd15a1759004d5d321056905dbb6acff20dc7d9 \ No newline at end of file +17ef5b59f789e9fa35c4f053246d819987fd06f8 \ No newline at end of file From ded4f41d1a1dbedfa381cda102e77018236a4eff Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 5 Jan 2015 20:41:39 +0000 Subject: [PATCH 064/206] Tests and fixes for fts5 external content tables. FossilOrigin-Name: 047aaf830d1e72f0fdad3832a0b617e769d66468 --- ext/fts5/fts5.c | 70 ++++++++++++++++-------- ext/fts5/fts5Int.h | 14 +++-- ext/fts5/fts5_aux.c | 85 +++++++++++++++-------------- ext/fts5/fts5_config.c | 15 ++++-- ext/fts5/fts5_storage.c | 49 ++++++++++------- ext/fts5/test/fts5content.test | 99 ++++++++++++++++++++++++++++++++++ manifest | 21 ++++---- manifest.uuid | 2 +- 8 files changed, 254 insertions(+), 101 deletions(-) create mode 100644 ext/fts5/test/fts5content.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 7ad9176f4e..ab2216e25d 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -256,6 +256,12 @@ static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){ # define fts5CheckTransactionState(x,y,z) #endif +/* +** Return true if pTab is a contentless table. +*/ +static int fts5IsContentless(Fts5Table *pTab){ + return pTab->pConfig->eContent==FTS5_CONTENT_NONE; +} /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy @@ -917,7 +923,9 @@ static int fts5FilterMethod( /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup ** by rowid (ePlan==FTS5_PLAN_ROWID). */ int eStmt = fts5StmtType(idxNum); - rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); + rc = sqlite3Fts5StorageStmt( + pTab->pStorage, eStmt, &pCsr->pStmt, &pTab->base.zErrMsg + ); if( rc==SQLITE_OK ){ if( ePlan==FTS5_PLAN_ROWID ){ sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); @@ -995,7 +1003,9 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ if( pCsr->pStmt==0 ){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int eStmt = fts5StmtType(pCsr->idxNum); - rc = sqlite3Fts5StorageStmt(pTab->pStorage, eStmt, &pCsr->pStmt); + rc = sqlite3Fts5StorageStmt( + pTab->pStorage, eStmt, &pCsr->pStmt, &pTab->base.zErrMsg + ); assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); } @@ -1100,18 +1110,6 @@ static int fts5UpdateMethod( */ assert( nArg==1 || nArg==(2 + pConfig->nCol + 2) ); - if( nArg>1 ){ - sqlite3_value *pCmd = sqlite3_value_type(apVal[2 + pConfig->nCol]); - if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ - const char *z = sqlite3_value_text(pCmd); - if( pConfig->bExternalContent && sqlite3_stricmp("delete", z) ){ - return fts5SpecialDelete(pTab, apVal, pRowid); - }else{ - return fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); - } - } - } - eType0 = sqlite3_value_type(apVal[0]); eConflict = sqlite3_vtab_on_conflict(pConfig->db); @@ -1119,10 +1117,31 @@ static int fts5UpdateMethod( assert( pVtab->zErrMsg==0 ); if( rc==SQLITE_OK && eType0==SQLITE_INTEGER ){ - i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); + if( fts5IsContentless(pTab) ){ + pTab->base.zErrMsg = sqlite3_mprintf( + "cannot %s contentless fts5 table: %s", + (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName + ); + rc = SQLITE_ERROR; + }else{ + i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); + } + }else if( nArg>1 ){ + sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; + if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ + const char *z = sqlite3_value_text(pCmd); + if( pConfig->eContent!=FTS5_CONTENT_NORMAL + && 0==sqlite3_stricmp("delete", z) + ){ + return fts5SpecialDelete(pTab, apVal, pRowid); + }else{ + return fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); + } + } } + if( rc==SQLITE_OK && nArg>1 ){ rc = sqlite3Fts5StorageInsert(pTab->pStorage, apVal, eConflict, pRowid); } @@ -1328,11 +1347,17 @@ static int fts5ApiColumnText( const char **pz, int *pn ){ + int rc = SQLITE_OK; Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; - int rc = fts5SeekCursor(pCsr); - if( rc==SQLITE_OK ){ - *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); - *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){ + *pz = 0; + *pn = 0; + }else{ + rc = fts5SeekCursor(pCsr); + if( rc==SQLITE_OK ){ + *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); + *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + } } return rc; } @@ -1566,7 +1591,8 @@ static int fts5ColumnMethod( sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ - Fts5Config *pConfig = ((Fts5Table*)(pCursor->pVtab))->pConfig; + Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); + Fts5Config *pConfig = pTab->pConfig; Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int rc = SQLITE_OK; @@ -1597,7 +1623,7 @@ static int fts5ColumnMethod( fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); } } - }else{ + }else if( !fts5IsContentless(pTab) ){ rc = fts5SeekCursor(pCsr); if( rc==SQLITE_OK ){ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index ef5b9e56c7..30dafe9fff 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -76,9 +76,9 @@ struct Fts5Config { char **azCol; /* Column names */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ - int bExternalContent; /* Content is external */ - char *zContent; /* "content=" option value (or NULL) */ - char *zContentRowid; /* "content_rowid=" option value (or NULL) */ + int eContent; /* An FTS5_CONTENT value */ + char *zContent; /* content table */ + char *zContentRowid; /* "content_rowid=" option value */ Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; @@ -90,6 +90,12 @@ struct Fts5Config { char *zRankArgs; /* Arguments to rank function */ }; +#define FTS5_CONTENT_NORMAL 0 +#define FTS5_CONTENT_NONE 1 +#define FTS5_CONTENT_EXTERNAL 2 + + + int sqlite3Fts5ConfigParse( Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** ); @@ -401,7 +407,7 @@ int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); int sqlite3Fts5StorageIntegrity(Fts5Storage *p); -int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); +int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 64904210f6..9ddb589085 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -222,21 +222,23 @@ static void fts5HighlightFunction( ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); - if( rc==SQLITE_OK ){ - rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); - } + if( ctx.zIn ){ + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); + } - if( rc==SQLITE_OK ){ - rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb); - } - fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); + if( rc==SQLITE_OK ){ + rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); + } + fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_code(pCtx, rc); + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + sqlite3_free(ctx.zOut); } - sqlite3_free(ctx.zOut); } /* ** End of highlight() implementation. @@ -275,7 +277,6 @@ static void fts5SnippetFunction( memset(&ctx, 0, sizeof(HighlightContext)); iCol = sqlite3_value_int(apVal[0]); - rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); zEllips = (const char*)sqlite3_value_text(apVal[3]); @@ -328,39 +329,41 @@ static void fts5SnippetFunction( if( rc==SQLITE_OK ){ rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); } - if( rc==SQLITE_OK ){ - rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); - } + if( ctx.zIn ){ + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); + } - if( (iBestStart+nToken-1)>iBestLast ){ - iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; - } - if( iBestStart+nToken>nColSize ){ - iBestStart = nColSize - nToken; - } - if( iBestStart<0 ) iBestStart = 0; + if( (iBestStart+nToken-1)>iBestLast ){ + iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; + } + if( iBestStart+nToken>nColSize ){ + iBestStart = nColSize - nToken; + } + if( iBestStart<0 ) iBestStart = 0; - ctx.iRangeStart = iBestStart; - ctx.iRangeEnd = iBestStart + nToken - 1; + ctx.iRangeStart = iBestStart; + ctx.iRangeEnd = iBestStart + nToken - 1; - if( iBestStart>0 ){ - fts5HighlightAppend(&rc, &ctx, zEllips, -1); - } - if( rc==SQLITE_OK ){ - rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb); - } - if( ctx.iRangeEnd>=(nColSize-1) ){ - fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); - }else{ - fts5HighlightAppend(&rc, &ctx, zEllips, -1); - } + if( iBestStart>0 ){ + fts5HighlightAppend(&rc, &ctx, zEllips, -1); + } + if( rc==SQLITE_OK ){ + rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); + } + if( ctx.iRangeEnd>=(nColSize-1) ){ + fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); + }else{ + fts5HighlightAppend(&rc, &ctx, zEllips, -1); + } - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_code(pCtx, rc); + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + sqlite3_free(ctx.zOut); } - sqlite3_free(ctx.zOut); sqlite3_free(aSeen); } diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 07255c400a..7a2eba27ae 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -334,12 +334,19 @@ static int fts5ConfigParseSpecial( if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){ int rc = SQLITE_OK; - if( pConfig->zContent ){ + if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ *pzErr = sqlite3_mprintf("multiple content=... directives"); rc = SQLITE_ERROR; }else{ - pConfig->zContent = sqlite3_mprintf("%Q.%Q", pConfig->zDb, zArg); - pConfig->bExternalContent = 1; + if( zArg[0] ){ + pConfig->eContent = FTS5_CONTENT_EXTERNAL; + pConfig->zContent = sqlite3_mprintf("%Q.%Q", pConfig->zDb, zArg); + }else{ + pConfig->eContent = FTS5_CONTENT_NONE; + pConfig->zContent = sqlite3_mprintf( + "%Q.'%q_docsize'", pConfig->zDb, pConfig->zName + ); + } if( pConfig->zContent==0 ) rc = SQLITE_NOMEM; } return rc; @@ -473,7 +480,7 @@ int sqlite3Fts5ConfigParse( } /* If no zContent option was specified, fill in the default values. */ - if( rc==SQLITE_OK && pRet->zContent==0 ){ + if( rc==SQLITE_OK && pRet->eContent==FTS5_CONTENT_NORMAL ){ pRet->zContent = sqlite3_mprintf("%Q.'%q_content'", pRet->zDb, pRet->zName); if( pRet->zContent==0 ){ rc = SQLITE_NOMEM; diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 0bbf25fdab..e3119c7de4 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -54,7 +54,8 @@ struct Fts5Storage { static int fts5StorageGetStmt( Fts5Storage *p, /* Storage handle */ int eStmt, /* FTS5_STMT_XXX constant */ - sqlite3_stmt **ppStmt /* OUT: Prepared statement handle */ + sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ + char **pzErrMsg /* OUT: Error message (if any) */ ){ int rc = SQLITE_OK; @@ -117,6 +118,9 @@ static int fts5StorageGetStmt( }else{ rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0); sqlite3_free(zSql); + if( rc!=SQLITE_OK && pzErrMsg ){ + *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db)); + } } } @@ -205,7 +209,7 @@ int sqlite3Fts5StorageOpen( p->pIndex = pIndex; if( bCreate ){ - if( pConfig->bExternalContent==0 ){ + if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); if( zDefn==0 ){ rc = SQLITE_NOMEM; @@ -254,7 +258,9 @@ int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ /* If required, remove the shadow tables from the database */ if( bDestroy ){ - rc = sqlite3Fts5DropTable(p->pConfig, "content"); + if( p->pConfig->eContent==FTS5_CONTENT_NORMAL ){ + rc = sqlite3Fts5DropTable(p->pConfig, "content"); + } if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "docsize"); if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "config"); } @@ -298,7 +304,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ sqlite3_stmt *pSeek; /* SELECT to read row iDel from %_data */ int rc; /* Return code */ - rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek); + rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0); if( rc==SQLITE_OK ){ int rc2; sqlite3_bind_int64(pSeek, 1, iDel); @@ -338,7 +344,7 @@ static int fts5StorageInsertDocsize( Fts5Buffer *pBuf /* sz value */ ){ sqlite3_stmt *pReplace = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace); + int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int64(pReplace, 1, iRowid); sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); @@ -424,7 +430,7 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ /* Delete the %_docsize record */ if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel); + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); } if( rc==SQLITE_OK ){ sqlite3_bind_int64(pDel, 1, iDel); @@ -434,7 +440,7 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ /* Delete the %_content record */ if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel); + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); } if( rc==SQLITE_OK ){ sqlite3_bind_int64(pDel, 1, iDel); @@ -459,7 +465,7 @@ int sqlite3Fts5StorageSpecialDelete( int rc; sqlite3_stmt *pDel; - assert( p->pConfig->bExternalContent ); + assert( p->pConfig->eContent!=FTS5_CONTENT_NORMAL ); rc = fts5StorageLoadTotals(p, 1); /* Delete the index records */ @@ -477,14 +483,14 @@ int sqlite3Fts5StorageSpecialDelete( (void*)&ctx, fts5StorageInsertCallback ); - p->aTotalSize[iCol-1] -= (i64)ctx.szCol; + p->aTotalSize[iCol] -= (i64)ctx.szCol; } p->nTotalRow--; } /* Delete the %_docsize record */ if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel); + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); } if( rc==SQLITE_OK ){ sqlite3_bind_int64(pDel, 1, iDel); @@ -509,7 +515,7 @@ int sqlite3Fts5StorageSpecialDelete( */ static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ sqlite3_stmt *pReplace = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace); + int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_null(pReplace, 1); sqlite3_bind_null(pReplace, 2); @@ -543,8 +549,8 @@ int sqlite3Fts5StorageInsert( rc = fts5StorageLoadTotals(p, 1); /* Insert the new row into the %_content table. */ - if( rc==SQLITE_OK && pConfig->bExternalContent==0 ){ - if( pConfig->bExternalContent ){ + if( rc==SQLITE_OK ){ + if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ *piRowid = sqlite3_value_int64(apVal[1]); }else{ @@ -560,7 +566,7 @@ int sqlite3Fts5StorageInsert( eStmt = FTS5_STMT_INSERT_CONTENT; } if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, eStmt, &pInsert); + rc = fts5StorageGetStmt(p, eStmt, &pInsert, 0); } for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ rc = sqlite3_bind_value(pInsert, i, apVal[i]); @@ -682,7 +688,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ /* Generate the expected index checksum based on the contents of the ** %_content table. This block stores the checksum in ctx.cksum. */ - rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan); + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan, 0); if( rc==SQLITE_OK ){ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ @@ -745,13 +751,18 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ ** Obtain an SQLite statement handle that may be used to read data from the ** %_content table. */ -int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **pp){ +int sqlite3Fts5StorageStmt( + Fts5Storage *p, + int eStmt, + sqlite3_stmt **pp, + char **pzErrMsg +){ int rc; assert( eStmt==FTS5_STMT_SCAN_ASC || eStmt==FTS5_STMT_SCAN_DESC || eStmt==FTS5_STMT_LOOKUP ); - rc = fts5StorageGetStmt(p, eStmt, pp); + rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); if( rc==SQLITE_OK ){ assert( p->aStmt[eStmt]==*pp ); p->aStmt[eStmt] = 0; @@ -805,7 +816,7 @@ static int fts5StorageDecodeSizeArray( int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ int nCol = p->pConfig->nCol; sqlite3_stmt *pLookup = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup); + int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); if( rc==SQLITE_OK ){ int bCorrupt = 1; sqlite3_bind_int64(pLookup, 1, iRowid); @@ -873,7 +884,7 @@ int sqlite3Fts5StorageConfigValue( sqlite3_value *pVal ){ sqlite3_stmt *pReplace = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace); + int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_TRANSIENT); sqlite3_bind_value(pReplace, 2, pVal); diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test new file mode 100644 index 0000000000..4940a6b03c --- /dev/null +++ b/ext/fts5/test/fts5content.test @@ -0,0 +1,99 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5content + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE f1 USING fts5(a, b, content=''); + INSERT INTO f1(rowid, a, b) VALUES(1, 'one', 'o n e'); + INSERT INTO f1(rowid, a, b) VALUES(2, 'two', 't w o'); + INSERT INTO f1(rowid, a, b) VALUES(3, 'three', 't h r e e'); +} + +do_execsql_test 1.2 { + SELECT rowid FROM f1 WHERE f1 MATCH 'o'; +} {2 1} + +do_execsql_test 1.3 { + INSERT INTO f1(a, b) VALUES('four', 'f o u r'); + SELECT rowid FROM f1 WHERE f1 MATCH 'o'; +} {4 2 1} + +do_execsql_test 1.4 { + SELECT rowid, a, b FROM f1 WHERE f1 MATCH 'o'; +} {4 {} {} 2 {} {} 1 {} {}} + +do_execsql_test 1.5 { + SELECT rowid, highlight(f1, 0, '[', ']') FROM f1 WHERE f1 MATCH 'o'; +} {4 {} 2 {} 1 {}} + +do_execsql_test 1.6 { + SELECT rowid, highlight(f1, 0, '[', ']') IS NULL FROM f1 WHERE f1 MATCH 'o'; +} {4 1 2 1 1 1} + +do_execsql_test 1.7 { + SELECT rowid, snippet(f1, -1, '[', ']', '...', 5) IS NULL + FROM f1 WHERE f1 MATCH 'o'; +} {4 1 2 1 1 1} + +do_execsql_test 1.8 { + SELECT rowid, snippet(f1, 1, '[', ']', '...', 5) IS NULL + FROM f1 WHERE f1 MATCH 'o'; +} {4 1 2 1 1 1} + +do_execsql_test 1.9 { + SELECT rowid FROM f1; +} {4 3 2 1} + +do_execsql_test 1.10 { + SELECT * FROM f1; +} {{} {} {} {} {} {} {} {}} + +do_execsql_test 1.11 { + SELECT rowid, a, b FROM f1 ORDER BY rowid ASC; +} {1 {} {} 2 {} {} 3 {} {} 4 {} {}} + +do_execsql_test 1.12 { + SELECT a IS NULL FROM f1; +} {1 1 1 1} + +do_catchsql_test 1.13 { + DELETE FROM f1 WHERE rowid = 2; +} {1 {cannot DELETE from contentless fts5 table: f1}} + +do_catchsql_test 1.14 { + UPDATE f1 SET a = 'a b c' WHERE rowid = 2; +} {1 {cannot UPDATE contentless fts5 table: f1}} + +do_execsql_test 1.15 { + INSERT INTO f1(f1, rowid, a, b) VALUES('delete', 2, 'two', 't w o'); +} {} + +db eval { SELECT fts5_decode(id, block) AS d FROM f1_data } { puts $d } + +breakpoint +do_execsql_test 1.16 { + SELECT rowid FROM f1 WHERE f1 MATCH 'o'; +} {4 1} +do_execsql_test 1.17 { + SELECT rowid FROM f1; +} {4 3 1} + + + + +finish_test diff --git a/manifest b/manifest index 90fe542994..8c6f633d3c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sexternal\scontent\stables\sto\sfts5. -D 2015-01-03T20:44:58.134 +C Tests\sand\sfixes\sfor\sfts5\sexternal\scontent\stables. +D 2015-01-05T20:41:39.791 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 16177d7f81af1852cf7f477b5ae119215ad6044a +F ext/fts5/fts5.c e2c19b2c5ab96650732bb6904892a6fb9a27ab42 F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b -F ext/fts5/fts5Int.h 8062dc2363c863dc8a5b2e5651cb8c966bd6c4cb -F ext/fts5/fts5_aux.c 445e54031ff94174673f4f5aac6c064df20a2a6b +F ext/fts5/fts5Int.h 9aafe97064e9c3380991abad4f51bee51021d18d +F ext/fts5/fts5_aux.c a74523025a553f57c99c699b9e2d83c4506503b4 F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 16d647c7bfe50d4e823267188e12e2d001d655e0 +F ext/fts5/fts5_config.c 630f92bb0a301c0b4e37a05ec4e38dc51ceeba37 F ext/fts5/fts5_expr.c 317093f00a2ccdaaee0a5290f9f228c600189c41 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f -F ext/fts5/fts5_storage.c b95fcca70f94656854e7afcfbb9896455f6b034d +F ext/fts5/fts5_storage.c 68ce8ec98b009cbd350ff73df06a97b1a012e122 F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 @@ -131,6 +131,7 @@ F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f F ext/fts5/test/fts5al.test bc873766fec3baae05ba6e76b379bc2f5e8eaf75 F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 +F ext/fts5/test/fts5content.test 0f267ba2086f2dff81484c8ee71fa0d3990c41f7 F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 @@ -1270,7 +1271,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1cd15a1759004d5d321056905dbb6acff20dc7d9 -R d21eb6bee3b06e51f22d32d1e0bd7016 +P 17ef5b59f789e9fa35c4f053246d819987fd06f8 +R 6bfe2a49f6feaf1db299d5e29da25a24 U dan -Z e88e77f44b464406d3184a89736eaa7d +Z 3d2200ed8057fd64a39f743bdc333945 diff --git a/manifest.uuid b/manifest.uuid index 49cb51dbf0..ccfd516ce0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -17ef5b59f789e9fa35c4f053246d819987fd06f8 \ No newline at end of file +047aaf830d1e72f0fdad3832a0b617e769d66468 \ No newline at end of file From 2a28e507f72743f6d507c18243a3e5b5f2cc131f Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 6 Jan 2015 14:38:34 +0000 Subject: [PATCH 065/206] Further fixes and test cases related to external content tables. FossilOrigin-Name: ce6a899baff7265a60c880098a9a57ea352b5415 --- ext/fts5/fts5_config.c | 3 +- ext/fts5/fts5_index.c | 66 +++++++++++++++++++++++++++------- ext/fts5/fts5_storage.c | 9 ++--- ext/fts5/test/fts5aa.test | 21 +++++++++++ ext/fts5/test/fts5content.test | 59 ++++++++++++++++++++++++++++-- manifest | 20 +++++------ manifest.uuid | 2 +- 7 files changed, 147 insertions(+), 33 deletions(-) diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 7a2eba27ae..45c10310fe 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -216,12 +216,13 @@ static char *fts5EscapeName(int *pRc, const char *z){ char *pRet = 0; if( *pRc==SQLITE_OK ){ int n = strlen(z); - pRet = (char*)sqlite3_malloc(2 * 2*n + 1); + pRet = (char*)sqlite3_malloc(2 + 2*n + 1); if( pRet==0 ){ *pRc = SQLITE_NOMEM; }else{ int i; char *p = pRet; + *p++ = '`'; for(i=0; ipLeaf; + if( p->rc==SQLITE_OK && pLeaf ){ + if( pIter->iLeafOffsetn ){ + bRet = (pLeaf->p[pIter->iLeafOffset]==0x00); + }else{ + Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( + pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + )); + if( pNew ){ + bRet = (pNew->p[4]==0x00); + fts5DataRelease(pNew); + } + } + } + return bRet; +} + /* ** Advance iterator pIter to the next entry. ** @@ -2094,14 +2122,20 @@ static void fts5MultiIterNext( i64 iFrom /* Advance at least as far as this */ ){ if( p->rc==SQLITE_OK ){ - int iFirst = pIter->aFirst[1]; - Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; - if( bFrom && pSeg->pDlidx ){ - fts5SegIterNextFrom(p, pSeg, iFrom); - }else{ - fts5SegIterNext(p, pSeg); - } - fts5MultiIterAdvanced(p, pIter, iFirst, 1); + int bUseFrom = bFrom; + do { + int iFirst = pIter->aFirst[1]; + Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + if( bUseFrom && pSeg->pDlidx ){ + fts5SegIterNextFrom(p, pSeg, iFrom); + }else{ + fts5SegIterNext(p, pSeg); + } + fts5MultiIterAdvanced(p, pIter, iFirst, 1); + bUseFrom = 0; + }while( pIter->bSkipEmpty + && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1]]) + ); } } @@ -2120,6 +2154,7 @@ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ + int bSkipEmpty, int flags, /* True for >= */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ @@ -2152,6 +2187,7 @@ static void fts5MultiIterNew( pNew->aSeg = (Fts5SegIter*)&pNew[1]; pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_ASC)); + pNew->bSkipEmpty = bSkipEmpty; /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ @@ -2187,6 +2223,12 @@ static void fts5MultiIterNew( fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } + + if( pNew->bSkipEmpty + && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1]]) + ){ + fts5MultiIterNext(p, pNew, 0, 0); + } }else{ fts5MultiIterFree(p, pNew); *ppOut = 0; @@ -2958,7 +3000,7 @@ fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ @@ -3689,7 +3731,7 @@ static void fts5SetupPrefixIter( Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); - for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1); + for(fts5MultiIterNew(p, pStruct, 0, 1, 1, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; fts5MultiIterNext(p, p1, 0, 0) ){ @@ -3770,7 +3812,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ @@ -4031,7 +4073,7 @@ int sqlite3Fts5IndexQuery( pRet->pStruct = fts5StructureRead(p, iIdx); if( pRet->pStruct ){ fts5MultiIterNew(p, pRet->pStruct, - iIdx, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + iIdx, 1, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti ); } }else{ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index e3119c7de4..3ab4911e61 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -62,8 +62,8 @@ static int fts5StorageGetStmt( assert( eStmt>=0 && eStmtaStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { - "SELECT * FROM %s ORDER BY id ASC", /* SCAN_ASC */ - "SELECT * FROM %s ORDER BY id DESC", /* SCAN_DESC */ + "SELECT * FROM %s ORDER BY %s ASC", /* SCAN_ASC */ + "SELECT * FROM %s ORDER BY %s DESC", /* SCAN_DESC */ "SELECT * FROM %s WHERE %s=?", /* LOOKUP */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ @@ -82,9 +82,6 @@ static int fts5StorageGetStmt( switch( eStmt ){ case FTS5_STMT_SCAN_ASC: case FTS5_STMT_SCAN_DESC: - zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContent); - break; - case FTS5_STMT_LOOKUP: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContent, pC->zContentRowid); break; @@ -725,7 +722,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ /* Check that the %_docsize and %_content tables contain the expected ** number of rows. */ - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 7b9bd77b67..58bc785c28 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -303,5 +303,26 @@ do_test 12.3 { string is integer $res } {1} +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 13.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o'); +} {} + +do_execsql_test 13.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'o'; +} {2 1} + +do_execsql_test 13.4 { + DELETE FROM t1 WHERE rowid=2; +} {} + +do_execsql_test 13.5 { + SELECT rowid FROM t1 WHERE t1 MATCH 'o'; +} {1} + finish_test + diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 4940a6b03c..1c8e3d068f 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -17,6 +17,9 @@ if {![info exists testdir]} { source $testdir/tester.tcl set testprefix fts5content +#------------------------------------------------------------------------- +# Contentless tables +# do_execsql_test 1.1 { CREATE VIRTUAL TABLE f1 USING fts5(a, b, content=''); INSERT INTO f1(rowid, a, b) VALUES(1, 'one', 'o n e'); @@ -83,17 +86,67 @@ do_execsql_test 1.15 { INSERT INTO f1(f1, rowid, a, b) VALUES('delete', 2, 'two', 't w o'); } {} -db eval { SELECT fts5_decode(id, block) AS d FROM f1_data } { puts $d } - -breakpoint do_execsql_test 1.16 { SELECT rowid FROM f1 WHERE f1 MATCH 'o'; } {4 1} + do_execsql_test 1.17 { SELECT rowid FROM f1; } {4 3 1} +#------------------------------------------------------------------------- +# External content tables +# +reset_db +do_execsql_test 2.1 { + -- Create a table. And an external content fts5 table to index it. + CREATE TABLE tbl(a INTEGER PRIMARY KEY, b, c); + CREATE VIRTUAL TABLE fts_idx USING fts5(b, c, content='tbl', content_rowid='a'); + -- Triggers to keep the FTS index up to date. + CREATE TRIGGER tbl_ai AFTER INSERT ON tbl BEGIN + INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c); + END; + CREATE TRIGGER tbl_ad AFTER DELETE ON tbl BEGIN + INSERT INTO fts_idx(fts_idx, rowid, b, c) + VALUES('delete', old.a, old.b, old.c); + END; + CREATE TRIGGER tbl_au AFTER UPDATE ON tbl BEGIN + INSERT INTO fts_idx(fts_idx, rowid, b, c) + VALUES('delete', old.a, old.b, old.c); + INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c); + END; +} + +do_execsql_test 2.2 { + INSERT INTO tbl VALUES(1, 'one', 'o n e'); + INSERT INTO tbl VALUES(NULL, 'two', 't w o'); + INSERT INTO tbl VALUES(3, 'three', 't h r e e'); +} + +do_execsql_test 2.3 { + INSERT INTO fts_idx(fts_idx) VALUES('integrity-check'); +} + +do_execsql_test 2.4 { + DELETE FROM tbl WHERE rowid=2; + INSERT INTO fts_idx(fts_idx) VALUES('integrity-check'); +} + +do_execsql_test 2.5 { + UPDATE tbl SET c = c || ' x y z'; + INSERT INTO fts_idx(fts_idx) VALUES('integrity-check'); +} + +do_execsql_test 2.6 { + SELECT * FROM fts_idx WHERE fts_idx MATCH 't AND x'; +} {three {t h r e e x y z}} + +do_execsql_test 2.7 { + SELECT highlight(fts_idx, 1, '[', ']') FROM fts_idx + WHERE fts_idx MATCH 't AND x'; +} {{[t] h r e e [x] y z}} finish_test + diff --git a/manifest b/manifest index 8c6f633d3c..b653385b4f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Tests\sand\sfixes\sfor\sfts5\sexternal\scontent\stables. -D 2015-01-05T20:41:39.791 +C Further\sfixes\sand\stest\scases\srelated\sto\sexternal\scontent\stables. +D 2015-01-06T14:38:34.378 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -109,16 +109,16 @@ F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b F ext/fts5/fts5Int.h 9aafe97064e9c3380991abad4f51bee51021d18d F ext/fts5/fts5_aux.c a74523025a553f57c99c699b9e2d83c4506503b4 F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 630f92bb0a301c0b4e37a05ec4e38dc51ceeba37 +F ext/fts5/fts5_config.c ecd2f2efca1cda58525087a1a0e0bc1d34aad7a0 F ext/fts5/fts5_expr.c 317093f00a2ccdaaee0a5290f9f228c600189c41 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f -F ext/fts5/fts5_storage.c 68ce8ec98b009cbd350ff73df06a97b1a012e122 +F ext/fts5/fts5_index.c a0f370b7843183c040dbbf724e1080a615ee05cc +F ext/fts5/fts5_storage.c 9b6b8afde63ccc7e8f2f37252bf47a0ea00f468c F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 -F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf +F ext/fts5/test/fts5aa.test 2affb47c0efa9cd39e1589ff8d8d78bcc7792952 F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 @@ -131,7 +131,7 @@ F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f F ext/fts5/test/fts5al.test bc873766fec3baae05ba6e76b379bc2f5e8eaf75 F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 -F ext/fts5/test/fts5content.test 0f267ba2086f2dff81484c8ee71fa0d3990c41f7 +F ext/fts5/test/fts5content.test 55f760043ab3b066b9d91a9bf5f518198d31cc1f F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 @@ -1271,7 +1271,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 17ef5b59f789e9fa35c4f053246d819987fd06f8 -R 6bfe2a49f6feaf1db299d5e29da25a24 +P 047aaf830d1e72f0fdad3832a0b617e769d66468 +R 4c2c7726c7891be9cd96464f52b4b676 U dan -Z 3d2200ed8057fd64a39f743bdc333945 +Z b55a8c4b3246d78dc3224ac9cef3d20c diff --git a/manifest.uuid b/manifest.uuid index ccfd516ce0..ea64e9f9d3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -047aaf830d1e72f0fdad3832a0b617e769d66468 \ No newline at end of file +ce6a899baff7265a60c880098a9a57ea352b5415 \ No newline at end of file From aacf3d1a3b72f5dd6c9a294fbd1ba2251710f715 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 6 Jan 2015 19:08:26 +0000 Subject: [PATCH 066/206] Remove the iPos parameter from the tokenizer callback. Fix the "tokenchars" and "separators" options on the simple tokenizer. FossilOrigin-Name: 65f0262fb82dbfd9f80233ac7c3108e2f2716c0a --- ext/fts5/fts5.c | 4 +- ext/fts5/fts5.h | 5 +- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_aux.c | 12 +-- ext/fts5/fts5_buffer.c | 1 - ext/fts5/fts5_config.c | 4 +- ext/fts5/fts5_expr.c | 5 +- ext/fts5/fts5_index.c | 2 + ext/fts5/fts5_storage.c | 13 +-- ext/fts5/fts5_tcl.c | 21 ++--- ext/fts5/fts5_tokenize.c | 116 ++++++++++++++++-------- {test => ext/fts5/test}/fts5_common.tcl | 7 +- ext/fts5/test/fts5aa.test | 5 +- ext/fts5/test/fts5ab.test | 5 +- ext/fts5/test/fts5ac.test | 5 +- ext/fts5/test/fts5ad.test | 5 +- ext/fts5/test/fts5ae.test | 5 +- ext/fts5/test/fts5af.test | 5 +- ext/fts5/test/fts5ag.test | 5 +- ext/fts5/test/fts5ah.test | 5 +- ext/fts5/test/fts5ai.test | 5 +- ext/fts5/test/fts5aj.test | 5 +- ext/fts5/test/fts5ak.test | 5 +- ext/fts5/test/fts5al.test | 5 +- ext/fts5/test/fts5auxdata.test | 5 +- ext/fts5/test/fts5content.test | 5 +- ext/fts5/test/fts5ea.test | 5 +- ext/fts5/test/fts5fault1.test | 5 +- ext/fts5/test/fts5porter.test | 5 +- ext/fts5/test/fts5tokenizer.test | 29 ++++-- ext/fts5/test/fts5unicode.test | 7 +- ext/fts5/test/fts5unicode2.test | 85 +++++++++-------- manifest | 76 ++++++++-------- manifest.uuid | 2 +- test/tester.tcl | 1 - 35 files changed, 234 insertions(+), 243 deletions(-) rename {test => ext/fts5/test}/fts5_common.tcl (93%) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index ab2216e25d..3ddd23997e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1130,7 +1130,7 @@ static int fts5UpdateMethod( }else if( nArg>1 ){ sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ - const char *z = sqlite3_value_text(pCmd); + const char *z = (const char*)sqlite3_value_text(pCmd); if( pConfig->eContent!=FTS5_CONTENT_NORMAL && 0==sqlite3_stricmp("delete", z) ){ @@ -1220,7 +1220,7 @@ static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, - int (*xToken)(void*, const char*, int, int, int, int) + int (*xToken)(void*, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 6ccbebc283..0908cc1248 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -195,7 +195,7 @@ struct Fts5ExtensionApi { int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ + int (*xToken)(void*, const char*, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); @@ -291,8 +291,7 @@ struct fts5_tokenizer { const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ - int iEnd, /* Byte offset of end of token within input text */ - int iPos /* Position of token in input (first token is 0) */ + int iEnd /* Byte offset of end of token within input text */ ) ); }; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 30dafe9fff..e6ff083b5b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -107,7 +107,7 @@ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ + int (*xToken)(void*, const char*, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 9ddb589085..22bdbb2344 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -46,13 +46,6 @@ struct CInstIter { int iEnd; /* Last token in coalesced phrase instance */ }; -/* -** Return non-zero if the iterator is at EOF, or zero otherwise. -*/ -static int fts5CInstIterEof(CInstIter *pIter){ - return (pIter->iStart < 0); -} - /* ** Advance the iterator to the next coalesced phrase instance. Return ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. @@ -117,6 +110,7 @@ static int fts5CInstIterInit( typedef struct HighlightContext HighlightContext; struct HighlightContext { CInstIter iter; /* Coalesced Instance Iterator */ + int iPos; /* Current token offset in zIn[] */ int iRangeStart; /* First token to include */ int iRangeEnd; /* If non-zero, last token to include */ const char *zOpen; /* Opening highlight */ @@ -156,11 +150,11 @@ static int fts5HighlightCb( const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ - int iEndOff, /* End offset of token */ - int iPos /* Position offset of token */ + int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; + int iPos = p->iPos++; if( p->iRangeEnd>0 ){ if( iPosiRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 59eb1b8752..b8f55694d8 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -58,7 +58,6 @@ int sqlite3Fts5Get32(const u8 *aBuf){ } void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){ - char *a; if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return; sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal); pBuf->n += 4; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 45c10310fe..ecf24dcd48 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -364,7 +364,7 @@ static int fts5ConfigParseSpecial( return rc; } - *pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd); + *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } @@ -588,7 +588,7 @@ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ + int (*xToken)(void*, const char*, int, int, int) /* Callback */ ){ return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 0d0c5bae29..0a12adaa4c 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -457,9 +457,11 @@ static int fts5LookaheadReaderInit( return fts5LookaheadReaderNext(p); } +#if 0 static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){ return (p->iPos==FTS5_LOOKAHEAD_EOF); } +#endif typedef struct Fts5NearTrimmer Fts5NearTrimmer; struct Fts5NearTrimmer { @@ -1141,8 +1143,7 @@ static int fts5ParseTokenize( const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ - int iPos /* Position offset of token */ + int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index b661c4e6f5..e447169dc8 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -842,12 +842,14 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ ** And discard any cached reads. This function is called at the end of ** a read transaction or when any sub-transaction is rolled back. */ +#if 0 static void fts5DataReset(Fts5Index *p){ if( p->pReader ){ sqlite3_blob_close(p->pReader); p->pReader = 0; } } +#endif /* ** Remove all records associated with segment iSegid in index iIdx. diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 3ab4911e61..177de8185b 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -282,12 +282,11 @@ static int fts5StorageInsertCallback( const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ - int iPos /* Position offset of token */ + int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; - pCtx->szCol = iPos+1; + int iPos = pCtx->szCol++; return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); } @@ -312,6 +311,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ + ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), @@ -474,6 +474,7 @@ int sqlite3Fts5StorageSpecialDelete( rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iColnCol; iCol++){ + ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), @@ -651,14 +652,13 @@ static int fts5StorageIntegrityCallback( const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ - int iPos /* Position offset of token */ + int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; + int iPos = pCtx->szCol++; pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken ); - pCtx->szCol = iPos+1; return SQLITE_OK; } @@ -695,6 +695,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && inCol; i++){ ctx.iCol = i; + ctx.szCol = 0; rc = sqlite3Fts5Tokenize( pConfig, (const char*)sqlite3_column_text(pScan, i+1), diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 1ce1bba49d..f560590720 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -112,7 +112,7 @@ struct F5tAuxData { static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, - int iStart, int iEnd, int iPos + int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); @@ -122,7 +122,6 @@ static int xTokenizeCb( Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd)); - Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iPos)); rc = Tcl_EvalObjEx(p->interp, pEval, 0); Tcl_DecrRefCount(pEval); @@ -528,11 +527,10 @@ struct F5tTokenizeCtx { static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, - int iStart, int iEnd, int iPos + int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ - Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement( 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart) @@ -541,7 +539,6 @@ static int xTokenizeCb2( Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd)); - Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos)); } return SQLITE_OK; } @@ -637,7 +634,7 @@ typedef struct F5tTokenizerModule F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; - int (*xToken)(void*, const char*, int, int, int, int); + int (*xToken)(void*, const char*, int, int, int); }; struct F5tTokenizerModule { @@ -693,11 +690,11 @@ static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, const char *pText, int nText, - int (*xToken)(void*, const char*, int, int, int, int) + int (*xToken)(void*, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; - int (*xOldToken)(void*, const char*, int, int, int, int); + int (*xOldToken)(void*, const char*, int, int, int); Tcl_Obj *pEval; int rc; @@ -733,14 +730,13 @@ static int f5tTokenizerReturn( F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; - int iPos; int nToken; char *zToken; int rc; assert( p ); - if( objc!=5 ){ - Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END POS"); + if( objc!=4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END"); return TCL_ERROR; } if( p->xToken==0 ){ @@ -753,12 +749,11 @@ static int f5tTokenizerReturn( zToken = Tcl_GetStringFromObj(objv[1], &nToken); if( Tcl_GetIntFromObj(interp, objv[2], &iStart) || Tcl_GetIntFromObj(interp, objv[3], &iEnd) - || Tcl_GetIntFromObj(interp, objv[4], &iPos) ){ return TCL_ERROR; } - rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd, iPos); + rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd); Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_OK; } diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index b23eccd97f..c3f3e5aaa8 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -16,28 +16,9 @@ #include /************************************************************************** -** Start of unicode61 tokenizer implementation. +** Start of simple tokenizer implementation. */ -/* -** Create a "simple" tokenizer. -*/ -static int fts5SimpleCreate( - void *pCtx, - const char **azArg, int nArg, - Fts5Tokenizer **ppOut -){ - *ppOut = 0; - return SQLITE_OK; -} - -/* -** Delete a "simple" tokenizer. -*/ -static void fts5SimpleDelete(Fts5Tokenizer *p){ - return; -} - /* ** For tokenizers with no "unicode" modifier, the set of token characters ** is the same as the set of ASCII range alphanumeric characters. @@ -53,6 +34,69 @@ static unsigned char aSimpleTokenChar[128] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ }; +typedef struct SimpleTokenizer SimpleTokenizer; +struct SimpleTokenizer { + unsigned char aTokenChar[128]; +}; + +static void fts5SimpleAddExceptions( + SimpleTokenizer *p, + const char *zArg, + int bTokenChars +){ + int i; + for(i=0; zArg[i]; i++){ + if( (zArg[i] & 0x80)==0 ){ + p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; + } + } +} + +/* +** Create a "simple" tokenizer. +*/ +static int fts5SimpleCreate( + void *pCtx, + const char **azArg, int nArg, + Fts5Tokenizer **ppOut +){ + int rc = SQLITE_OK; + SimpleTokenizer *p = 0; + if( nArg%2 ){ + rc = SQLITE_ERROR; + }else{ + p = sqlite3_malloc(sizeof(SimpleTokenizer)); + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + memset(p, 0, sizeof(SimpleTokenizer)); + memcpy(p->aTokenChar, aSimpleTokenChar, sizeof(aSimpleTokenChar)); + for(i=0; rc==SQLITE_OK && iaTokenChar; while( iszBuf && (bAlnum==0 || z>=zTerm) ){ int ie = (bAlnum ? z : zCode) - zInput; - rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie, iPos); + rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie); zOut = zBuf; - iPos++; } } @@ -390,7 +432,7 @@ static int fts5UnicodeTokenize( } /************************************************************************** -** Start of porter2 stemmer implementation. +** Start of porter stemmer implementation. */ /* Any tokens larger than this (in bytes) are passed through without @@ -452,7 +494,7 @@ static int fts5PorterCreate( typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; - int (*xToken)(void*, const char*, int, int, int, int); + int (*xToken)(void*, const char*, int, int, int); char *aBuf; }; @@ -470,7 +512,6 @@ static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ int nBuf = *pnBuf; PorterRule *p; - for(p=aRule; p->zSuffix; p++){ assert( strlen(p->zSuffix)==p->nSuffix ); assert( strlen(p->zOutput)==p->nOutput ); @@ -577,8 +618,7 @@ static int fts5PorterCb( const char *pToken, int nToken, int iStart, - int iEnd, - int iPos + int iEnd ){ PorterContext *p = (PorterContext*)pCtx; @@ -716,10 +756,10 @@ static int fts5PorterCb( nBuf--; } - return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos); + return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd); pass_through: - return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos); + return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd); } /* @@ -729,7 +769,7 @@ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, - int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) + int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; diff --git a/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl similarity index 93% rename from test/fts5_common.tcl rename to ext/fts5/test/fts5_common.tcl index 78f561ac95..c8c6a8a944 100644 --- a/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -10,7 +10,10 @@ #*********************************************************************** # - +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. .. test] +} +source $testdir/tester.tcl proc fts5_test_poslist {cmd} { @@ -45,7 +48,7 @@ proc fts5_test_columntotalsize {cmd} { set res } -proc test_append_token {varname token iStart iEnd iPos} { +proc test_append_token {varname token iStart iEnd} { upvar $varname var lappend var $token } diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 58bc785c28..48c880a694 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -12,10 +12,7 @@ # focus of this script is testing the FTS5 module. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aa # If SQLITE_ENABLE_FTS3 is defined, omit this file. diff --git a/ext/fts5/test/fts5ab.test b/ext/fts5/test/fts5ab.test index 1b744ebcce..0d96ea47af 100644 --- a/ext/fts5/test/fts5ab.test +++ b/ext/fts5/test/fts5ab.test @@ -13,10 +13,7 @@ # # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ab # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index cd51f31756..76f663ac5f 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -13,10 +13,7 @@ # # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ac # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index 8af8345efe..9bc694f784 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -13,10 +13,7 @@ # # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ad # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ae.test b/ext/fts5/test/fts5ae.test index 644ef99d21..f327bc9207 100644 --- a/ext/fts5/test/fts5ae.test +++ b/ext/fts5/test/fts5ae.test @@ -13,10 +13,7 @@ # # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ae # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5af.test b/ext/fts5/test/fts5af.test index 3e7ff918b8..8c50f84866 100644 --- a/ext/fts5/test/fts5af.test +++ b/ext/fts5/test/fts5af.test @@ -15,10 +15,7 @@ # snippet() function. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5af # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ag.test b/ext/fts5/test/fts5ag.test index e3659a71d5..42a588f56c 100644 --- a/ext/fts5/test/fts5ag.test +++ b/ext/fts5/test/fts5ag.test @@ -12,10 +12,7 @@ # focus of this script is testing the FTS5 module. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ag # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test index c01d966704..37c014fcf1 100644 --- a/ext/fts5/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -12,10 +12,7 @@ # focus of this script is testing the FTS5 module. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ah # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ai.test b/ext/fts5/test/fts5ai.test index 78a346b04a..63c46fd042 100644 --- a/ext/fts5/test/fts5ai.test +++ b/ext/fts5/test/fts5ai.test @@ -14,10 +14,7 @@ # Specifically, it tests transactions and savepoints # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ai # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5aj.test b/ext/fts5/test/fts5aj.test index 3e12934c05..6b9dddd8b0 100644 --- a/ext/fts5/test/fts5aj.test +++ b/ext/fts5/test/fts5aj.test @@ -16,10 +16,7 @@ # and deleted, # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aj # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5ak.test b/ext/fts5/test/fts5ak.test index c16056b72e..756ae0a898 100644 --- a/ext/fts5/test/fts5ak.test +++ b/ext/fts5/test/fts5ak.test @@ -14,10 +14,7 @@ # Specifically, the auxiliary function "highlight". # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ak # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 2cf291af64..31d51713af 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -14,10 +14,7 @@ # Specifically, this function tests the %_config table. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5al # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5auxdata.test b/ext/fts5/test/fts5auxdata.test index 661a78526a..ee408a0645 100644 --- a/ext/fts5/test/fts5auxdata.test +++ b/ext/fts5/test/fts5auxdata.test @@ -12,10 +12,7 @@ # Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5auxdata do_execsql_test 1.0 { diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 1c8e3d068f..934af7b3b2 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -11,10 +11,7 @@ # # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5content #------------------------------------------------------------------------- diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index 19e1903433..f913006530 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -10,10 +10,7 @@ #************************************************************************* # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ea # If SQLITE_ENABLE_FTS5 is defined, omit this file. diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 5f340e0fd7..7685b8bb79 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -12,10 +12,7 @@ # focus of this script is testing the FTS5 module. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault1 diff --git a/ext/fts5/test/fts5porter.test b/ext/fts5/test/fts5porter.test index 9254626471..83ca852305 100644 --- a/ext/fts5/test/fts5porter.test +++ b/ext/fts5/test/fts5porter.test @@ -14,10 +14,7 @@ # http://tartarus.org/martin/PorterStemmer/ # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5porter set test_vocab { diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index bd2eb7aae2..e45f7fd89a 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -12,14 +12,10 @@ # Tests focusing on the fts5 tokenizers # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5tokenizer - do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); DROP TABLE ft1; @@ -51,7 +47,6 @@ do_execsql_test 2.3 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' } 1 - proc tcl_create {args} { set ::targs $args error "failed" @@ -70,7 +65,6 @@ foreach {tn directive expected} { do_test 3.$tn.2 { set ::targs } $expected } - do_catchsql_test 4.1 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc); } {1 {parse error in "tokenize = tcl abc"}} @@ -78,5 +72,26 @@ do_catchsql_test 4.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x y) } {1 {parse error in "x y"}} +#------------------------------------------------------------------------- +# Test the "separators" and "tokenchars" options a bit. +# +foreach {tn tokenizer} {1 simple 2 unicode61} { + reset_db + set T "$tokenizer tokenchars ',.:' separators 'xyz'" + execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")" + do_execsql_test 5.$tn.1 { + INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz'); + } + foreach {tn2 token res} { + 1 abc 1 2 def 1 3 ghi 1 4 jkl {} + 5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1 + 9 vw 1 + } { + do_execsql_test 5.$tn.2.$tn2 " + SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"' + " $res + } +} + finish_test diff --git a/ext/fts5/test/fts5unicode.test b/ext/fts5/test/fts5unicode.test index 22082b9cde..737604c1f1 100644 --- a/ext/fts5/test/fts5unicode.test +++ b/ext/fts5/test/fts5unicode.test @@ -12,16 +12,13 @@ # Tests focusing on the fts5 tokenizers # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode proc tokenize_test {tn tokenizer input output} { uplevel [list do_test $tn [subst -nocommands { set ret {} - foreach {z s e p} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { + foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { lappend ret [set z] } set ret diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test index b26795f8a9..056106e18e 100644 --- a/ext/fts5/test/fts5unicode2.test +++ b/ext/fts5/test/fts5unicode2.test @@ -14,10 +14,7 @@ # This is a modified copy of FTS4 test file "fts4_unicode.test". # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode2 proc do_unicode_token_test {tn input res} { @@ -41,37 +38,37 @@ proc do_unicode_token_test3 {tn args} { ] [list {*}$res]] } -do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} +do_unicode_token_test 1.0 {a B c D} {a a b B c c d D} do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ - "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC" + "\uE4 \uC4 \uF6 \uD6 \uFC \uDC" do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ - "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx" + "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx" # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. -do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" -do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E" +do_unicode_token_test 1.3 "\uDF" "\uDF \uDF" +do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E" do_unicode_token_test 1.5 "The quick brown fox" { - 0 the The 1 quick quick 2 brown brown 3 fox fox + the The quick quick brown brown fox fox } do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { - 0 the The 1 quick quick 2 brown brown 3 fox fox + the The quick quick brown brown fox fox } -do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC" +do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D} +do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC" do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ - "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx" + "xax x\uC4x xox x\uD6x xux x\uDCx" # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. -do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx" +do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work -do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5" +do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" #------------------------------------------------------------------------- # @@ -263,45 +260,45 @@ breakpoint do_unicode_token_test3 5.1 {tokenchars {}} { sqlite3_reset sqlite3_column_int } { - 0 sqlite3 sqlite3 - 1 reset reset - 2 sqlite3 sqlite3 - 3 column column - 4 int int + sqlite3 sqlite3 + reset reset + sqlite3 sqlite3 + column column + int int } do_unicode_token_test3 5.2 {tokenchars _} { sqlite3_reset sqlite3_column_int } { - 0 sqlite3_reset sqlite3_reset - 1 sqlite3_column_int sqlite3_column_int + sqlite3_reset sqlite3_reset + sqlite3_column_int sqlite3_column_int } do_unicode_token_test3 5.3 {separators xyz} { Laotianxhorseyrunszfast } { - 0 laotian Laotian - 1 horse horse - 2 runs runs - 3 fast fast + laotian Laotian + horse horse + runs runs + fast fast } do_unicode_token_test3 5.4 {tokenchars xyz} { Laotianxhorseyrunszfast } { - 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast + laotianxhorseyrunszfast Laotianxhorseyrunszfast } do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} { sqlite3_resetxsqlite3_column_intyhonda_phantom } { - 0 sqlite3_reset sqlite3_reset - 1 sqlite3_column_int sqlite3_column_int - 2 honda_phantom honda_phantom + sqlite3_reset sqlite3_reset + sqlite3_column_int sqlite3_column_int + honda_phantom honda_phantom } do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" { - 0 abc abc 1 def def + abc abc def def } do_unicode_token_test3 5.7 \ @@ -309,38 +306,37 @@ do_unicode_token_test3 5.7 \ "separators \u05D0\u05D1\u05D2" \ "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \ [list \ - 0 \u2444fre\u2445sh \u2444fre\u2445sh \ - 1 water water \ - 2 fish fish \ - 3 \u2445timer \u2445timer \ + \u2444fre\u2445sh \u2444fre\u2445sh \ + water water \ + fish fish \ + \u2445timer \u2445timer \ ] # Check that it is not possible to add a standalone diacritic codepoint # to either separators or tokenchars. do_unicode_token_test3 5.8 "separators \u0301" \ "hello\u0301world \u0301helloworld" \ - "0 helloworld hello\u0301world 1 helloworld helloworld" + "helloworld hello\u0301world helloworld helloworld" do_unicode_token_test3 5.9 "tokenchars \u0301" \ "hello\u0301world \u0301helloworld" \ - "0 helloworld hello\u0301world 1 helloworld helloworld" + "helloworld hello\u0301world helloworld helloworld" do_unicode_token_test3 5.10 "separators \u0301" \ "remove_diacritics 0" \ "hello\u0301world \u0301helloworld" \ - "0 hello\u0301world hello\u0301world 1 helloworld helloworld" + "hello\u0301world hello\u0301world helloworld helloworld" do_unicode_token_test3 5.11 "tokenchars \u0301" \ "remove_diacritics 0" \ "hello\u0301world \u0301helloworld" \ - "0 hello\u0301world hello\u0301world 1 helloworld helloworld" - + "hello\u0301world hello\u0301world helloworld helloworld" #------------------------------------------------------------------------- proc do_tokenize {tokenizer txt} { set res [list] - foreach {a b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { + foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { lappend res $b } set res @@ -391,6 +387,7 @@ foreach T $tokenizers { do_isspace_test 6.$T.23 $T {8287 12288} } + #------------------------------------------------------------------------- # Test that the private use ranges are treated as alphanumeric. # @@ -398,8 +395,8 @@ foreach {tn1 c} { 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff } { foreach {tn2 config res} { - 1 "" "0 hello*world hello*world" - 2 "separators *" "0 hello hello 1 world world" + 1 "" "hello*world hello*world" + 2 "separators *" "hello hello world world" } { set config [string map [list * $c] $config] set input [string map [list * $c] "hello*world"] diff --git a/manifest b/manifest index b653385b4f..5c31f642e1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\sfixes\sand\stest\scases\srelated\sto\sexternal\scontent\stables. -D 2015-01-06T14:38:34.378 +C Remove\sthe\siPos\sparameter\sfrom\sthe\stokenizer\scallback.\sFix\sthe\s"tokenchars"\sand\s"separators"\soptions\son\sthe\ssimple\stokenizer. +D 2015-01-06T19:08:26.571 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,40 +104,41 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c e2c19b2c5ab96650732bb6904892a6fb9a27ab42 -F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b -F ext/fts5/fts5Int.h 9aafe97064e9c3380991abad4f51bee51021d18d -F ext/fts5/fts5_aux.c a74523025a553f57c99c699b9e2d83c4506503b4 -F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c ecd2f2efca1cda58525087a1a0e0bc1d34aad7a0 -F ext/fts5/fts5_expr.c 317093f00a2ccdaaee0a5290f9f228c600189c41 +F ext/fts5/fts5.c 9f6f6597410d9fe76db385955ad6be171c454331 +F ext/fts5/fts5.h cfafdf6f43f9402f999334382085e46f89d85ecf +F ext/fts5/fts5Int.h 8b338037a968da542a98bbbcdbb10bcf361ee2fe +F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f +F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 +F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 +F ext/fts5/fts5_expr.c 0320ae948e82cf7dca800463de7f5b6a808ba7c3 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c a0f370b7843183c040dbbf724e1080a615ee05cc -F ext/fts5/fts5_storage.c 9b6b8afde63ccc7e8f2f37252bf47a0ea00f468c -F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f -F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb +F ext/fts5/fts5_index.c b58bcfba3fe4e53fbf2dc525ec25aa37b77ac9f0 +F ext/fts5/fts5_storage.c cd72f2839049d5277df0edd0cf5c801f33542b07 +F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 +F ext/fts5/fts5_tokenize.c 4c30cf32c63e59bec5b38533e0a65987df262851 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 -F ext/fts5/test/fts5aa.test 2affb47c0efa9cd39e1589ff8d8d78bcc7792952 -F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 -F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b -F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 -F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 -F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd -F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 -F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d -F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f -F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 -F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f -F ext/fts5/test/fts5al.test bc873766fec3baae05ba6e76b379bc2f5e8eaf75 -F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 -F ext/fts5/test/fts5content.test 55f760043ab3b066b9d91a9bf5f518198d31cc1f -F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 -F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 -F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 -F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca -F ext/fts5/test/fts5unicode.test b9c7bb982e0ee242a0774e636e1888ca32947a83 -F ext/fts5/test/fts5unicode2.test 7b0d64bbb7bfb7b5080e032e068404b42432ee02 +F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc w test/fts5_common.tcl +F ext/fts5/test/fts5aa.test 3941b54d7585153be0c5cf0026f7dd8cfef13ea9 +F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 +F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 +F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1 +F ext/fts5/test/fts5ae.test 014d5be2f5f70407fb032d4f27704116254797c3 +F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a +F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 +F ext/fts5/test/fts5ah.test 749855d1f457ecbf8e54b25a92e55a84cc689151 +F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 +F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 +F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce +F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a +F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b +F ext/fts5/test/fts5content.test ed6a141b1fcaa8fc1cf719492a9e38b29f2a830b +F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c +F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c +F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e +F ext/fts5/test/fts5tokenizer.test f951bb9be29232bd057b0ac4d535b879d9cd9a89 +F ext/fts5/test/fts5unicode.test 9ae93296e59917c1210336388f6d3b98051b50c9 +F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -646,7 +647,6 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5_common.tcl 2488117cd80b7a4de7c20054b89f082b77b4189c F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -939,7 +939,7 @@ F test/tclsqlite.test 37a61c2da7e3bfe3b8c1a2867199f6b860df5d43 F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c F test/temptable.test d2c9b87a54147161bcd1822e30c1d1cd891e5b30 F test/temptrigger.test 8ec228b0db5d7ebc4ee9b458fc28cb9e7873f5e1 -F test/tester.tcl a9cb43af36b13ec12587e3579bc13eda98cfb6b2 +F test/tester.tcl ed77454e6c7b40eb501db7e79d1c6fbfd3eebbff F test/thread001.test 9f22fd3525a307ff42a326b6bc7b0465be1745a5 F test/thread002.test e630504f8a06c00bf8bbe68528774dd96aeb2e58 F test/thread003.test ee4c9efc3b86a6a2767516a37bd64251272560a7 @@ -1271,7 +1271,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 047aaf830d1e72f0fdad3832a0b617e769d66468 -R 4c2c7726c7891be9cd96464f52b4b676 +P ce6a899baff7265a60c880098a9a57ea352b5415 +R ee708c3acc09a58536cb486296b83967 U dan -Z b55a8c4b3246d78dc3224ac9cef3d20c +Z 63a8748a3e94829622d4a3b2bf209e0e diff --git a/manifest.uuid b/manifest.uuid index ea64e9f9d3..ccd2f86400 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ce6a899baff7265a60c880098a9a57ea352b5415 \ No newline at end of file +65f0262fb82dbfd9f80233ac7c3108e2f2716c0a \ No newline at end of file diff --git a/test/tester.tcl b/test/tester.tcl index 48931cb915..dad22661bd 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -1921,4 +1921,3 @@ database_never_corrupt source $testdir/thread_common.tcl source $testdir/malloc_common.tcl -source $testdir/fts5_common.tcl From 96a32bb51ce15ee759eafc010086f1ccdcc2436d Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 7 Jan 2015 17:11:11 +0000 Subject: [PATCH 067/206] Add the 'rebuild' and 'delete-all' commands. FossilOrigin-Name: 0cb2fed525778d96237b5b0943047665e1f636d1 --- ext/fts5/fts5.c | 31 +++++++- ext/fts5/fts5.h | 21 ------ ext/fts5/fts5Int.h | 5 ++ ext/fts5/fts5_index.c | 31 +++++--- ext/fts5/fts5_storage.c | 134 ++++++++++++++++++++++++++------- ext/fts5/test/fts5content.test | 41 ++++++++++ ext/fts5/test/fts5rebuild.test | 50 ++++++++++++ manifest | 25 +++--- manifest.uuid | 2 +- 9 files changed, 268 insertions(+), 72 deletions(-) create mode 100644 ext/fts5/test/fts5rebuild.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 3ddd23997e..c52b80f0ea 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1027,6 +1027,15 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ return rc; } +static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){ + int rc; + va_list ap; /* ... printf arguments */ + va_start(ap, zFormat); + assert( p->base.zErrMsg==0 ); + p->base.zErrMsg = sqlite3_vmprintf(zFormat, ap); + va_end(ap); +} + /* ** This function is called to handle an FTS INSERT command. In other words, ** an INSERT statement of the form: @@ -1047,11 +1056,31 @@ static int fts5SpecialInsert( sqlite3_value *pCmd, /* Value inserted into special column */ sqlite3_value *pVal /* Value inserted into rowid column */ ){ + Fts5Config *pConfig = pTab->pConfig; const char *z = (const char*)sqlite3_value_text(pCmd); int rc = SQLITE_OK; int bError = 0; - if( 0==sqlite3_stricmp("integrity-check", z) ){ + if( 0==sqlite3_stricmp("delete-all", z) ){ + if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ + fts5SetVtabError(pTab, + "'delete-all' may only be used with a " + "contentless or external content fts5 table" + ); + rc = SQLITE_ERROR; + }else{ + rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); + } + }else if( 0==sqlite3_stricmp("rebuild", z) ){ + if( pConfig->eContent==FTS5_CONTENT_NONE ){ + fts5SetVtabError(pTab, + "'rebuild' may not be used with a contentless fts5 table" + ); + rc = SQLITE_ERROR; + }else{ + rc = sqlite3Fts5StorageRebuild(pTab->pStorage); + } + }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 0908cc1248..1b9160f05b 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -93,30 +93,9 @@ typedef void (*fts5_extension_function)( ** xRowid: ** Returns the rowid of the current row. ** -** xPoslist: -** Iterate through phrase instances in the current row. If the iPhrase -** argument is 0 or greater, then only instances of phrase iPhrase are -** visited. If it is less than 0, instances of all phrases are visited. -** -** At EOF, -1 is returned and output variable iPos set to -1. -** -** -** sqlite3_int64 iPos; -** int iPhrase; -** int ii = 0; -** -** while( (iPhrase = pFts->xPoslist(pFts, -1, &ii, &iPos) >= 0 ){ -** int iCol = FTS5_POS2COLUMN(iPos); -** int iOff = FTS5_POS2OFFSET(iPos); -** // An instance of phrase iPhrase at offset iOff of column iCol. -** } -** -** -** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** -** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index e6ff083b5b..893d743cda 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -340,6 +340,8 @@ int sqlite3Fts5IndexSetCookie(Fts5Index*, int); */ int sqlite3Fts5IndexReads(Fts5Index *p); +int sqlite3Fts5IndexReinit(Fts5Index *p); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ @@ -421,6 +423,9 @@ int sqlite3Fts5StorageConfigValue(Fts5Storage *p, const char*, sqlite3_value*); int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**); +int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); +int sqlite3Fts5StorageRebuild(Fts5Storage *p); + /* ** End of interface to code in fts5_storage.c. **************************************************************************/ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e447169dc8..8552a35792 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3899,6 +3899,26 @@ int sqlite3Fts5IndexRollback(Fts5Index *p){ return SQLITE_OK; } +/* +** The %_data table is completely empty when this function is called. This +** function populates it with the initial structure objects for each index, +** and the initial version of the "averages" record (a zero-byte blob). +*/ +int sqlite3Fts5IndexReinit(Fts5Index *p){ + int i; + Fts5Structure s; + + memset(&s, 0, sizeof(Fts5Structure)); + for(i=0; ipConfig->nPrefix+1; i++){ + fts5StructureWrite(p, i, &s); + } + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); + } + + return fts5IndexReturn(p); +} + /* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying %_data table. @@ -3927,20 +3947,11 @@ int sqlite3Fts5IndexOpen( if( p->zDataTbl==0 ){ rc = SQLITE_NOMEM; }else if( bCreate ){ - int i; - Fts5Structure s; rc = sqlite3Fts5CreateTable( pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr ); if( rc==SQLITE_OK ){ - memset(&s, 0, sizeof(Fts5Structure)); - for(i=0; inPrefix+1; i++){ - fts5StructureWrite(p, i, &s); - } - rc = p->rc; - } - if( rc==SQLITE_OK ){ - rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); + rc = sqlite3Fts5IndexReinit(p); } } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 177de8185b..8431f6dd4c 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -125,22 +125,37 @@ static int fts5StorageGetStmt( return rc; } + +static int fts5ExecPrintf( + sqlite3 *db, + char **pzErr, + const char *zFormat, + ... +){ + int rc; + va_list ap; /* ... printf arguments */ + va_start(ap, zFormat); + char *zSql = sqlite3_vmprintf(zFormat, ap); + + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_exec(db, zSql, 0, 0, pzErr); + sqlite3_free(zSql); + } + + va_end(ap); + return rc; +} + /* ** Drop the shadow table with the postfix zPost (e.g. "content"). Return ** SQLITE_OK if successful or an SQLite error code otherwise. */ int sqlite3Fts5DropTable(Fts5Config *pConfig, const char *zPost){ - int rc; - char *zSql = sqlite3_mprintf("DROP TABLE IF EXISTS %Q.'%q_%q'", + return fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_%q'", pConfig->zDb, pConfig->zName, zPost ); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_exec(pConfig->db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } - return rc; } /* @@ -155,25 +170,19 @@ int sqlite3Fts5CreateTable( char **pzErr /* OUT: Error message */ ){ int rc; - char *zSql = sqlite3_mprintf("CREATE TABLE %Q.'%q_%q'(%s)%s", - pConfig->zDb, pConfig->zName, zPost, zDefn, - (bWithout ? " WITHOUT ROWID" :"") + char *zErr = 0; + + rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", + pConfig->zDb, pConfig->zName, zPost, zDefn, bWithout?" WITHOUT ROWID":"" ); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ - char *zErr = 0; - assert( *pzErr==0 ); - rc = sqlite3_exec(pConfig->db, zSql, 0, 0, &zErr); - if( zErr ){ - *pzErr = sqlite3_mprintf( - "fts5: error creating shadow table %q_%s: %s", - pConfig->zName, zPost, zErr - ); - sqlite3_free(zErr); - } - sqlite3_free(zSql); + if( zErr ){ + *pzErr = sqlite3_mprintf( + "fts5: error creating shadow table %q_%s: %s", + pConfig->zName, zPost, zErr + ); + sqlite3_free(zErr); } + return rc; } @@ -462,7 +471,7 @@ int sqlite3Fts5StorageSpecialDelete( int rc; sqlite3_stmt *pDel; - assert( p->pConfig->eContent!=FTS5_CONTENT_NORMAL ); + assert( pConfig->eContent!=FTS5_CONTENT_NORMAL ); rc = fts5StorageLoadTotals(p, 1); /* Delete the index records */ @@ -502,7 +511,78 @@ int sqlite3Fts5StorageSpecialDelete( } return rc; +} +/* +** Delete all entries in the FTS5 index. +*/ +int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ + Fts5Config *pConfig = p->pConfig; + int rc; + + /* Delete the contents of the %_data and %_docsize tables. */ + rc = fts5ExecPrintf(pConfig->db, 0, + "DELETE FROM %Q.'%q_data';" + "DELETE FROM %Q.'%q_docsize';", + pConfig->zDb, pConfig->zName, + pConfig->zDb, pConfig->zName + ); + + /* Reinitialize the %_data table. This call creates the initial structure + ** and averages records. */ + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexReinit(p->pIndex); + } + return rc; +} + +int sqlite3Fts5StorageRebuild(Fts5Storage *p){ + Fts5Buffer buf = {0,0,0}; + Fts5Config *pConfig = p->pConfig; + sqlite3_stmt *pScan = 0; + Fts5InsertCtx ctx; + int rc; + + memset(&ctx, 0, sizeof(Fts5InsertCtx)); + ctx.pStorage = p; + rc = sqlite3Fts5StorageDeleteAll(p); + if( rc==SQLITE_OK ){ + rc = fts5StorageLoadTotals(p, 1); + } + + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan, 0); + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){ + i64 iRowid = sqlite3_column_int64(pScan, 0); + + sqlite3Fts5BufferZero(&buf); + rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); + for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ + ctx.szCol = 0; + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_column_text(pScan, ctx.iCol+1), + sqlite3_column_bytes(pScan, ctx.iCol+1), + (void*)&ctx, + fts5StorageInsertCallback + ); + sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); + p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; + } + p->nTotalRow++; + + if( rc==SQLITE_OK ){ + rc = fts5StorageInsertDocsize(p, iRowid, &buf); + } + } + sqlite3_free(buf.p); + + /* Write the averages record */ + if( rc==SQLITE_OK ){ + rc = fts5StorageSaveTotals(p); + } + return rc; } /* diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 934af7b3b2..948a4308b6 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -144,6 +144,47 @@ do_execsql_test 2.7 { WHERE fts_idx MATCH 't AND x'; } {{[t] h r e e [x] y z}} +#------------------------------------------------------------------------- +# Quick tests of the 'delete-all' command. +# +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE t3 USING fts5(x, content=''); + INSERT INTO t3 VALUES('a b c'); + INSERT INTO t3 VALUES('d e f'); +} + +do_execsql_test 3.2 { + SELECT count(*) FROM t3_docsize; + SELECT count(*) FROM t3_data; +} {2 4} + +do_execsql_test 3.3 { + INSERT INTO t3(t3) VALUES('delete-all'); + SELECT count(*) FROM t3_docsize; + SELECT count(*) FROM t3_data; +} {0 2} + +do_execsql_test 3.4 { + INSERT INTO t3 VALUES('a b c'); + INSERT INTO t3 VALUES('d e f'); + SELECT rowid FROM t3 WHERE t3 MATCH 'e'; +} {2} + +do_execsql_test 3.5 { + SELECT rowid FROM t3 WHERE t3 MATCH 'c'; +} {1} + +do_execsql_test 3.6 { + SELECT count(*) FROM t3_docsize; + SELECT count(*) FROM t3_data; +} {2 4} + +do_execsql_test 3.7 { + CREATE VIRTUAL TABLE t4 USING fts5(x); +} {} +do_catchsql_test 3.8 { + INSERT INTO t4(t4) VALUES('delete-all'); +} {1 {'delete-all' may only be used with a contentless or external content fts5 table}} finish_test diff --git a/ext/fts5/test/fts5rebuild.test b/ext/fts5/test/fts5rebuild.test new file mode 100644 index 0000000000..dfaf28bc6e --- /dev/null +++ b/ext/fts5/test/fts5rebuild.test @@ -0,0 +1,50 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5rebuild + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE f1 USING fts5(a, b); + INSERT INTO f1(a, b) VALUES('one', 'o n e'); + INSERT INTO f1(a, b) VALUES('two', 't w o'); + INSERT INTO f1(a, b) VALUES('three', 't h r e e'); +} + +do_execsql_test 1.2 { + INSERT INTO f1(f1) VALUES('integrity-check'); +} {} + +do_execsql_test 1.3 { + INSERT INTO f1(f1) VALUES('rebuild'); +} {} + +do_execsql_test 1.4 { + INSERT INTO f1(f1) VALUES('integrity-check'); +} {} + +do_execsql_test 1.5 { + DELETE FROM f1_data; +} {} + +do_catchsql_test 1.6 { + INSERT INTO f1(f1) VALUES('integrity-check'); +} {1 {SQL logic error or missing database}} + +do_execsql_test 1.7 { + INSERT INTO f1(f1) VALUES('rebuild'); + INSERT INTO f1(f1) VALUES('integrity-check'); +} {} + +finish_test + diff --git a/manifest b/manifest index 5c31f642e1..a7855d306f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sthe\siPos\sparameter\sfrom\sthe\stokenizer\scallback.\sFix\sthe\s"tokenchars"\sand\s"separators"\soptions\son\sthe\ssimple\stokenizer. -D 2015-01-06T19:08:26.571 +C Add\sthe\s'rebuild'\sand\s'delete-all'\scommands. +D 2015-01-07T17:11:11.301 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,21 +104,21 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 9f6f6597410d9fe76db385955ad6be171c454331 -F ext/fts5/fts5.h cfafdf6f43f9402f999334382085e46f89d85ecf -F ext/fts5/fts5Int.h 8b338037a968da542a98bbbcdbb10bcf361ee2fe +F ext/fts5/fts5.c 66ca4324ea89dc727f01ea77eb48e5ba311be032 +F ext/fts5/fts5.h 0f8563e21ffa69cb87be4c2e24652fc41b441850 +F ext/fts5/fts5Int.h 00a8770e34b56f3db7eb29e5b110d2f7623ca959 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 0320ae948e82cf7dca800463de7f5b6a808ba7c3 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c b58bcfba3fe4e53fbf2dc525ec25aa37b77ac9f0 -F ext/fts5/fts5_storage.c cd72f2839049d5277df0edd0cf5c801f33542b07 +F ext/fts5/fts5_index.c 4e612b2c91a57ec770869b6cc89caeec0f658107 +F ext/fts5/fts5_storage.c 844b9667030370e9bb1daf3f9e862716cddb1a22 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 4c30cf32c63e59bec5b38533e0a65987df262851 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 -F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc w test/fts5_common.tcl +F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc F ext/fts5/test/fts5aa.test 3941b54d7585153be0c5cf0026f7dd8cfef13ea9 F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 @@ -132,10 +132,11 @@ F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b -F ext/fts5/test/fts5content.test ed6a141b1fcaa8fc1cf719492a9e38b29f2a830b +F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e +F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 F ext/fts5/test/fts5tokenizer.test f951bb9be29232bd057b0ac4d535b879d9cd9a89 F ext/fts5/test/fts5unicode.test 9ae93296e59917c1210336388f6d3b98051b50c9 F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee @@ -1271,7 +1272,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ce6a899baff7265a60c880098a9a57ea352b5415 -R ee708c3acc09a58536cb486296b83967 +P 65f0262fb82dbfd9f80233ac7c3108e2f2716c0a +R 3ced0a5a3507e3fd1f8ff1a5ee996e45 U dan -Z 63a8748a3e94829622d4a3b2bf209e0e +Z 06bf9328f4305b86890a4adcb16c3d2f diff --git a/manifest.uuid b/manifest.uuid index ccd2f86400..5026d1fa16 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -65f0262fb82dbfd9f80233ac7c3108e2f2716c0a \ No newline at end of file +0cb2fed525778d96237b5b0943047665e1f636d1 \ No newline at end of file From d6b1c880d2f9486c0a92f1f2f709de8203378fc6 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 7 Jan 2015 19:33:11 +0000 Subject: [PATCH 068/206] Add the fts5 'optimize' command. FossilOrigin-Name: e749be563d8e738af113bd301770e2f22763ab77 --- ext/fts5/fts5.c | 2 + ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_index.c | 73 ++++++++++++++++++++++++++++++--- ext/fts5/fts5_storage.c | 4 ++ ext/fts5/test/fts5optimize.test | 60 +++++++++++++++++++++++++++ manifest | 19 +++++---- manifest.uuid | 2 +- 7 files changed, 146 insertions(+), 16 deletions(-) create mode 100644 ext/fts5/test/fts5optimize.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index c52b80f0ea..1dd026bb4b 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1080,6 +1080,8 @@ static int fts5SpecialInsert( }else{ rc = sqlite3Fts5StorageRebuild(pTab->pStorage); } + }else if( 0==sqlite3_stricmp("optimize", z) ){ + rc = sqlite3Fts5StorageOptimize(pTab->pStorage); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 893d743cda..6148ec7f6c 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -341,6 +341,7 @@ int sqlite3Fts5IndexSetCookie(Fts5Index*, int); int sqlite3Fts5IndexReads(Fts5Index *p); int sqlite3Fts5IndexReinit(Fts5Index *p); +int sqlite3Fts5IndexOptimize(Fts5Index *p); /* ** End of interface to code in fts5_index.c. @@ -425,6 +426,7 @@ int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**); int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); int sqlite3Fts5StorageRebuild(Fts5Storage *p); +int sqlite3Fts5StorageOptimize(Fts5Storage *p); /* ** End of interface to code in fts5_storage.c. diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8552a35792..4d22c41467 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -41,6 +41,7 @@ ** */ +#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ @@ -3164,6 +3165,12 @@ static void fts5IndexCrisisMerge( *ppStruct = pStruct; } +static int fts5IndexReturn(Fts5Index *p){ + int rc = p->rc; + p->rc = SQLITE_OK; + return rc; +} + typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; @@ -3277,6 +3284,66 @@ static void fts5IndexFlush(Fts5Index *p){ p->nPendingData = 0; } + +int sqlite3Fts5IndexOptimize(Fts5Index *p){ + Fts5Config *pConfig = p->pConfig; + int i; + + fts5IndexFlush(p); + for(i=0; i<=pConfig->nPrefix; i++){ + Fts5Structure *pStruct = fts5StructureRead(p, i); + Fts5Structure *pNew = 0; + int nSeg = 0; + if( pStruct ){ + nSeg = fts5StructureCountSegments(pStruct); + if( nSeg>1 ){ + int nByte = sizeof(Fts5Structure); + nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); + pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); + } + } + if( pNew ){ + Fts5StructureLevel *pLvl; + int nByte = nSeg * sizeof(Fts5StructureSegment); + pNew->nLevel = pStruct->nLevel+1; + pNew->nWriteCounter = pStruct->nWriteCounter; + pLvl = &pNew->aLevel[pStruct->nLevel]; + pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( pLvl->aSeg ){ + int iLvl, iSeg; + int iSegOut = 0; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; + iSegOut++; + } + } + pLvl->nSeg = nSeg; + }else{ + sqlite3_free(pNew); + pNew = 0; + } + } + + if( pNew ){ + int iLvl = pNew->nLevel-1; + while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ + int nRem = FTS5_OPT_WORK_UNIT; + fts5IndexMergeLevel(p, i, &pNew, iLvl, &nRem); + } + + fts5StructureWrite(p, i, pNew); + fts5StructureRelease(pNew); + } + + fts5StructureRelease(pStruct); + } + + return fts5IndexReturn(p); +} + + + /* ** Return a simple checksum value based on the arguments. */ @@ -3788,12 +3855,6 @@ static void fts5SetupPrefixIter( sqlite3_free(aBuf); } -static int fts5IndexReturn(Fts5Index *p){ - int rc = p->rc; - p->rc = SQLITE_OK; - return rc; -} - /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 8431f6dd4c..b82db3e4a4 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -585,6 +585,10 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){ return rc; } +int sqlite3Fts5StorageOptimize(Fts5Storage *p){ + return sqlite3Fts5IndexOptimize(p->pIndex); +} + /* ** Allocate a new rowid. This is used for "external content" tables when ** a NULL value is inserted into the rowid column. The new rowid is allocated diff --git a/ext/fts5/test/fts5optimize.test b/ext/fts5/test/fts5optimize.test new file mode 100644 index 0000000000..068cf4c225 --- /dev/null +++ b/ext/fts5/test/fts5optimize.test @@ -0,0 +1,60 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5optimize + +proc rnddoc {nWord} { + set vocab {a b c d e f g h i j k l m n o p q r s t u v w x y z} + set nVocab [llength $vocab] + set ret [list] + for {set i 0} {$i < $nWord} {incr i} { + lappend ret [lindex $vocab [expr {int(rand() * $nVocab)}]] + } + return $ret +} + + +foreach {tn nStep} { + 1 2 + 2 10 + 3 50 + 4 500 +} { +if {$tn!=4} continue + reset_db + db func rnddoc rnddoc + do_execsql_test 1.$tn.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x, y); + } + do_test 1.$tn.2 { + for {set i 0} {$i < $nStep} {incr i} { + execsql { INSERT INTO t1 VALUES( rnddoc(5), rnddoc(5) ) } + } + } {} + + do_execsql_test 1.$tn.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); + } + + do_execsql_test 1.$tn.4 { + INSERT INTO t1(t1) VALUES('optimize'); + } + + do_execsql_test 1.$tn.5 { + INSERT INTO t1(t1) VALUES('integrity-check'); + } +} + +finish_test + diff --git a/manifest b/manifest index a7855d306f..adf58144e8 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\s'rebuild'\sand\s'delete-all'\scommands. -D 2015-01-07T17:11:11.301 +C Add\sthe\sfts5\s'optimize'\scommand. +D 2015-01-07T19:33:11.551 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 66ca4324ea89dc727f01ea77eb48e5ba311be032 +F ext/fts5/fts5.c c90004f4a91ce4f4dfad2fc980ade0d9314ebb10 F ext/fts5/fts5.h 0f8563e21ffa69cb87be4c2e24652fc41b441850 -F ext/fts5/fts5Int.h 00a8770e34b56f3db7eb29e5b110d2f7623ca959 +F ext/fts5/fts5Int.h 0142ba4c3c70e1976578604c0e738670f7689726 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 0320ae948e82cf7dca800463de7f5b6a808ba7c3 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 4e612b2c91a57ec770869b6cc89caeec0f658107 -F ext/fts5/fts5_storage.c 844b9667030370e9bb1daf3f9e862716cddb1a22 +F ext/fts5/fts5_index.c ea36c1e42aaf8038b6139be95575eb7fe01f34e4 +F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 4c30cf32c63e59bec5b38533e0a65987df262851 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 @@ -135,6 +135,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c +F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 F ext/fts5/test/fts5tokenizer.test f951bb9be29232bd057b0ac4d535b879d9cd9a89 @@ -1272,7 +1273,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 65f0262fb82dbfd9f80233ac7c3108e2f2716c0a -R 3ced0a5a3507e3fd1f8ff1a5ee996e45 +P 0cb2fed525778d96237b5b0943047665e1f636d1 +R b413984e0011c860316df7bca0fa936a U dan -Z 06bf9328f4305b86890a4adcb16c3d2f +Z ad35ce36f519fcc615b0ece9f543df9d diff --git a/manifest.uuid b/manifest.uuid index 5026d1fa16..0a08358e38 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0cb2fed525778d96237b5b0943047665e1f636d1 \ No newline at end of file +e749be563d8e738af113bd301770e2f22763ab77 \ No newline at end of file From 27277c4e3c0fa0a05ae87b7b6be6a8380c52ecaf Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 10 Jan 2015 20:34:27 +0000 Subject: [PATCH 069/206] Fix some documentation issues in fts5. FossilOrigin-Name: 512e1bdb4093b59d1494dfc63391476eadd52aea --- ext/fts5/extract_api_docs.tcl | 165 ++++++++++++++++++++++++++-------- ext/fts5/fts5.h | 22 +++-- ext/fts5/fts5_expr.c | 5 +- ext/fts5/test/fts5near.test | 65 ++++++++++++++ manifest | 17 ++-- manifest.uuid | 2 +- 6 files changed, 219 insertions(+), 57 deletions(-) create mode 100644 ext/fts5/test/fts5near.test diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl index 81fe4cde53..27f136a99b 100644 --- a/ext/fts5/extract_api_docs.tcl +++ b/ext/fts5/extract_api_docs.tcl @@ -17,14 +17,17 @@ set ::fts5_docs_output "" if {[info commands hd_putsnl]==""} { + if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] } proc output {text} { puts $text } } else { proc output {text} { - append ::fts5_docs_output $text + append ::fts5_docs_output "$text\n" } } +if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api} + set input_file [file join [file dir [info script]] fts5.h] set fd [open $input_file] @@ -41,7 +44,7 @@ close $fd proc get_struct_members {data} { # Extract the structure definition from the fts5.h file. - regexp "struct Fts5ExtensionApi {(.*)};" $data -> defn + regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn # Remove all comments from the structure definition regsub -all {/[*].*?[*]/} $defn {} defn2 @@ -95,45 +98,137 @@ proc get_struct_docs {data names} { set res } -# Initialize global array M as a map from Fts5StructureApi member name -# to member definition. i.e. -# -# iVersion -> {int iVersion} -# xUserData -> {void *(*xUserData)(Fts5Context*)} -# ... -# -array set M [get_struct_members $data] +proc get_tokenizer_docs {data} { + regexp {(xCreate:.*?)[*]/} $data -> docs -# Initialize global list D as a map from section name to documentation -# text. Most (all?) section names are structure member names. -# -set D [get_struct_docs $data [array names M]] - -foreach {hdr docs} $D { - if {[info exists M($hdr)]} { - set hdr $M($hdr) - } - output "
  $hdr
" - - set mode "" - set bEmpty 1 + set res "
\n" foreach line [split [string trim $docs] "\n"] { - if {[string trim $line]==""} { - if {$mode != ""} {output ""} - set mode "" - } elseif {$mode == ""} { - if {[regexp {^ } $line]} { - set mode codeblock - } else { - set mode p - } - output "<$mode>" + regexp {[*][*](.*)} $line -> line + if {[regexp {^ ?x.*:} $line]} { + append res "
$line

\n" + continue + } + if {[string trim $line] == ""} { + append res "

\n" + } else { + append res "$line\n" } - output $line } - if {$mode != ""} {output ""} + append res "

\n" + + set res } +proc get_api_docs {data} { + # Initialize global array M as a map from Fts5StructureApi member name + # to member definition. i.e. + # + # iVersion -> {int iVersion} + # xUserData -> {void *(*xUserData)(Fts5Context*)} + # ... + # + array set M [get_struct_members $data] + + # Initialize global list D as a map from section name to documentation + # text. Most (all?) section names are structure member names. + # + set D [get_struct_docs $data [array names M]] + + foreach {sub docs} $D { + if {[info exists M($sub)]} { + set hdr $M($sub) + set link " id=$sub" + } else { + set link "" + } + + output "
" + set style "padding-left:6ex;font-size:1.4em;display:block" + output "
$hdr
" + + set mode "" + set bEmpty 1 + foreach line [split [string trim $docs] "\n"] { + if {[string trim $line]==""} { + if {$mode != ""} {output ""} + set mode "" + } elseif {$mode == ""} { + if {[regexp {^ } $line]} { + set mode codeblock + } else { + set mode p + } + output "<$mode>" + } + output $line + } + if {$mode != ""} {output ""} + } +} + +proc get_fts5_struct {data start end} { + set res "" + set bOut 0 + foreach line [split $data "\n"] { + if {$bOut==0} { + if {[regexp $start $line]} { + set bOut 1 + } + } + + if {$bOut} { + append res "$line\n" + } + + if {$bOut} { + if {[regexp $end $line]} { + set bOut 0 + } + } + } + + set map [list /* /* */ */] + string map $map $res +} + +proc main {data} { + switch $::extract_api_docs_mode { + fts5_api { + output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] + } + + fts5_tokenizer { + output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] + } + + fts5_extension { + output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] + } + + Fts5ExtensionApi { + set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"] + set map [list] + foreach {k v} [get_struct_members $data] { + if {[string match x* $k]==0} continue + lappend map $k "$k" + } + output [string map $map $struct] + } + + api { + get_api_docs $data + } + + tokenizer_api { + output [get_tokenizer_docs $data] + } + + default { + } + } +} +main $data + set ::fts5_docs_output diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 1b9160f05b..8e244f3992 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -100,7 +100,7 @@ typedef void (*fts5_extension_function)( ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** -** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY DESC +** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid DESC ** ** with $p set to a phrase equivalent to the phrase iPhrase of the ** current query is executed. For each row visited, the callback function @@ -212,10 +212,11 @@ struct Fts5ExtensionApi { ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer object -** was registered with SQLite. The second and third arguments are an -** array of nul-terminated strings containing the tokenizer arguments, -** if any, specified as part of the CREATE VIRTUAL TABLE statement used -** to create the fts5 table. +** was registered with FTS5 (the third argument to xCreateTokenizer()). +** The second and third arguments are an array of nul-terminated strings +** containing the tokenizer arguments, if any, specified following the +** tokenizer name as part of the CREATE VIRTUAL TABLE statement used +** to create the FTS5 table. ** ** The final argument is an output variable. If successful, (*ppOut) ** should be set to point to the new tokenizer handle and SQLITE_OK @@ -240,12 +241,10 @@ struct Fts5ExtensionApi { ** are a pointer to a buffer containing the token text, and the size of ** the token in bytes. The 4th and 5th arguments are the byte offsets of ** the first byte of and first byte immediately following the text from -** which the token is derived within the input. The final argument is the -** token position - the total number of tokens that appear before this one -** in the input buffer. +** which the token is derived within the input. ** -** The xToken() callback must be invoked with non-decreasing values of -** the iPos parameter. +** FTS5 assumes the xToken() callback is invoked for each token in the +** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then ** the tokenization should be abandoned and the xTokenize() method should @@ -256,9 +255,8 @@ struct Fts5ExtensionApi { ** SQLITE_OK or SQLITE_DONE. ** */ -typedef struct fts5_tokenizer fts5_tokenizer; typedef struct Fts5Tokenizer Fts5Tokenizer; - +typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 0a12adaa4c..3fe34463f1 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -703,7 +703,7 @@ static int fts5ExprNearNextRowidMatch( */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ - Fts5ExprNode *pNode, + Fts5ExprNode *pNode, /* The "NEAR" node (FTS5_STRING) */ int bFromValid, i64 iFrom ){ @@ -716,6 +716,9 @@ static int fts5ExprNearNextMatch( rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); if( pNode->bEof || rc!=SQLITE_OK ) break; + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ diff --git a/ext/fts5/test/fts5near.test b/ext/fts5/test/fts5near.test new file mode 100644 index 0000000000..7425a4f24e --- /dev/null +++ b/ext/fts5/test/fts5near.test @@ -0,0 +1,65 @@ +# 2014 Jan 08 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on the NEAR operator. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5near + +proc do_near_test {tn doc near res} { + uplevel [list do_execsql_test $tn " + DELETE FROM t1; + INSERT INTO t1 VALUES('$doc'); + SELECT count(*) FROM t1 WHERE t1 MATCH '$near'; + " $res] +} + +execsql { + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple tokenchars .') +} + +do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1 +do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1 +do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1 +do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0 + +do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1 +do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1 +do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1 +do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0 + +do_near_test 1.9 ". a b . . . c . ." { NEAR("a b" c, 3) } 1 +do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0 +do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1 +do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0 + +do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1 +do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0 +do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1 +do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0 + +do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1 +do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0 +do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1 + +do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1 +do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0 + +do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1 +do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0 + +do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1 +do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0 + + +finish_test + diff --git a/manifest b/manifest index adf58144e8..d065fd8d57 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sfts5\s'optimize'\scommand. -D 2015-01-07T19:33:11.551 +C Fix\ssome\sdocumentation\sissues\sin\sfts5. +D 2015-01-10T20:34:27.199 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,14 +103,14 @@ F ext/fts3/tool/fts3view.c 3986531f2fc0ceca0c89c31ec7d0589b6adb19d6 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e -F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 +F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c c90004f4a91ce4f4dfad2fc980ade0d9314ebb10 -F ext/fts5/fts5.h 0f8563e21ffa69cb87be4c2e24652fc41b441850 +F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a F ext/fts5/fts5Int.h 0142ba4c3c70e1976578604c0e738670f7689726 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 -F ext/fts5/fts5_expr.c 0320ae948e82cf7dca800463de7f5b6a808ba7c3 +F ext/fts5/fts5_expr.c 6ba7a2e34a80989cca509bd295de1bc9f8e739a3 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c ea36c1e42aaf8038b6139be95575eb7fe01f34e4 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 @@ -135,6 +135,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c +F ext/fts5/test/fts5near.test 70a568a1211a5b6d5a17282790d5f8cbbe086ce0 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 @@ -1273,7 +1274,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0cb2fed525778d96237b5b0943047665e1f636d1 -R b413984e0011c860316df7bca0fa936a +P e749be563d8e738af113bd301770e2f22763ab77 +R 5c59d3558d2a230e6048c600760933d7 U dan -Z ad35ce36f519fcc615b0ece9f543df9d +Z 6c17e3ae4cf92b8841424ff4d00c314d diff --git a/manifest.uuid b/manifest.uuid index 0a08358e38..de97eaee58 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e749be563d8e738af113bd301770e2f22763ab77 \ No newline at end of file +512e1bdb4093b59d1494dfc63391476eadd52aea \ No newline at end of file From 73f7d6ed75ede36611aa3a69d70d802eecf900fa Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 12 Jan 2015 17:58:04 +0000 Subject: [PATCH 070/206] Optimize the unicode61 tokenizer so that it handles ascii text faster. Make it the default tokenizer. Change the name of the simple tokenizer to "ascii". FossilOrigin-Name: f22dbccad9499624880ddd48df1b07fb42b1ad66 --- ext/fts5/fts5.c | 18 ++- ext/fts5/fts5_tokenize.c | 231 +++++++++++++++++++------------ ext/fts5/test/fts5near.test | 2 +- ext/fts5/test/fts5tokenizer.test | 4 +- ext/fts5/test/fts5unicode.test | 22 ++- manifest | 20 +-- manifest.uuid | 2 +- 7 files changed, 190 insertions(+), 109 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 1dd026bb4b..054851cba2 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -72,6 +72,7 @@ struct Fts5Global { i64 iNextId; /* Used to allocate unique cursor ids */ Fts5Auxiliary *pAux; /* First in list of all aux. functions */ Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ + Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ Fts5Cursor *pCsr; /* First in list of all open cursors */ }; @@ -771,7 +772,7 @@ static int fts5FindRankFunction(Fts5Cursor *pCsr){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; int rc = SQLITE_OK; - Fts5Auxiliary *pAux; + Fts5Auxiliary *pAux = 0; const char *zRank = pCsr->zRank; const char *zRankArgs = pCsr->zRankArgs; @@ -1028,7 +1029,6 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ } static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){ - int rc; va_list ap; /* ... printf arguments */ va_start(ap, zFormat); assert( p->base.zErrMsg==0 ); @@ -1796,6 +1796,9 @@ static int fts5CreateTokenizer( pNew->xDestroy = xDestroy; pNew->pNext = pGlobal->pTok; pGlobal->pTok = pNew; + if( pNew->pNext==0 ){ + pGlobal->pDfltTok = pNew; + } }else{ rc = SQLITE_NOMEM; } @@ -1817,8 +1820,12 @@ static int fts5FindTokenizer( int rc = SQLITE_OK; Fts5TokenizerModule *pTok; - for(pTok=pGlobal->pTok; pTok; pTok=pTok->pNext){ - if( sqlite3_stricmp(zName, pTok->zName)==0 ) break; + if( zName==0 ){ + pTok = pGlobal->pDfltTok; + }else{ + for(pTok=pGlobal->pTok; pTok; pTok=pTok->pNext){ + if( sqlite3_stricmp(zName, pTok->zName)==0 ) break; + } } if( pTok ){ @@ -1841,8 +1848,9 @@ int sqlite3Fts5GetTokenizer( ){ Fts5TokenizerModule *pMod = 0; int rc = SQLITE_OK; + if( nArg==0 ){ - pMod = pGlobal->pTok; + pMod = pGlobal->pDfltTok; }else{ for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ if( sqlite3_stricmp(azArg[0], pMod->zName)==0 ) break; diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index c3f3e5aaa8..feb3513a46 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -16,14 +16,14 @@ #include /************************************************************************** -** Start of simple tokenizer implementation. +** Start of ascii tokenizer implementation. */ /* ** For tokenizers with no "unicode" modifier, the set of token characters ** is the same as the set of ASCII range alphanumeric characters. */ -static unsigned char aSimpleTokenChar[128] = { +static unsigned char aAsciiTokenChar[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ @@ -34,13 +34,13 @@ static unsigned char aSimpleTokenChar[128] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ }; -typedef struct SimpleTokenizer SimpleTokenizer; -struct SimpleTokenizer { +typedef struct AsciiTokenizer AsciiTokenizer; +struct AsciiTokenizer { unsigned char aTokenChar[128]; }; -static void fts5SimpleAddExceptions( - SimpleTokenizer *p, +static void fts5AsciiAddExceptions( + AsciiTokenizer *p, const char *zArg, int bTokenChars ){ @@ -53,32 +53,32 @@ static void fts5SimpleAddExceptions( } /* -** Create a "simple" tokenizer. +** Create a "ascii" tokenizer. */ -static int fts5SimpleCreate( +static int fts5AsciiCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; - SimpleTokenizer *p = 0; + AsciiTokenizer *p = 0; if( nArg%2 ){ rc = SQLITE_ERROR; }else{ - p = sqlite3_malloc(sizeof(SimpleTokenizer)); + p = sqlite3_malloc(sizeof(AsciiTokenizer)); if( p==0 ){ rc = SQLITE_NOMEM; }else{ int i; - memset(p, 0, sizeof(SimpleTokenizer)); - memcpy(p->aTokenChar, aSimpleTokenChar, sizeof(aSimpleTokenChar)); + memset(p, 0, sizeof(AsciiTokenizer)); + memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); for(i=0; rc==SQLITE_OK && iiCode ) break; + if( iCode<128 ){ + p->aTokenChar[iCode] = bTokenChars; + }else{ + bToken = sqlite3Fts5UnicodeIsalnum(iCode); + assert( (bToken==0 || bToken==1) ); + assert( (bTokenChars==0 || bTokenChars==1) ); + if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ + int i; + for(i=0; iiCode ) break; + } + memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); + aNew[i] = iCode; + nNew++; } - memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); - aNew[i] = iCode; - nNew++; } } p->aiException = aNew; @@ -301,6 +309,19 @@ static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ return 0; } +/* +** Delete a "unicode61" tokenizer. +*/ +static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ + if( pTok ){ + Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; + sqlite3_free(p->aiException); + sqlite3_free(p->aFold); + sqlite3_free(p); + } + return; +} + /* ** Create a "unicode61" tokenizer. */ @@ -319,7 +340,13 @@ static int fts5UnicodeCreate( if( p ){ int i; memset(p, 0, sizeof(Unicode61Tokenizer)); + memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); p->bRemoveDiacritic = 1; + p->nFold = 64; + p->aFold = sqlite3_malloc(p->nFold * sizeof(char)); + if( p->aFold==0 ){ + rc = SQLITE_NOMEM; + } for(i=0; rc==SQLITE_OK && iaiException); - sqlite3_free(p); - return; -} - /* ** Return true if, for the purposes of tokenizing with the tokenizer ** passed as the first argument, codepoint iCode is considered a token @@ -365,9 +386,6 @@ static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); } -/* -** Tokenize some text using a unicode61 tokenizer. -*/ static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, @@ -375,59 +393,94 @@ static int fts5UnicodeTokenize( int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; - const unsigned char *zInput = (const unsigned char*)pText; - const unsigned char *zTerm = &zInput[nText]; - const unsigned char *z = zInput; int rc = SQLITE_OK; - int nBuf = 0; - unsigned char *zBuf = 0; - unsigned char *zOut = 0; + unsigned char *a = p->aTokenChar; - while( rc==SQLITE_OK && zzBuf ){ - bAlnum = sqlite3Fts5UnicodeIsdiacritic(iCode); + /* Output buffer */ + char *aFold = p->aFold; + int nFold = p->nFold; + + /* Each iteration of this loop gobbles up a contiguous run of separators, + ** then the next token. */ + while( rc==SQLITE_OK ){ + int iCode; /* non-ASCII codepoint read from input */ + char *zOut = aFold; + int is; + int ie; + + /* Skip any separator characters. */ + while( 1 ){ + if( zCsr>=zTerm ) goto tokenize_done; + if( *zCsr & 0x80 ) { + /* A character outside of the ascii range. Skip past it if it is + ** a separator character. Or break out of the loop if it is not. */ + is = zCsr - (unsigned char*)pText; + READ_UTF8(zCsr, zTerm, iCode); + if( fts5UnicodeIsAlnum(p, iCode) ){ + goto non_ascii_tokenchar; + } + }else{ + if( a[*zCsr] ){ + is = zCsr - (unsigned char*)pText; + goto ascii_tokenchar; + } + zCsr++; + } } - if( bAlnum ){ - int iOut; + /* Run through the tokenchars. Fold them into the output buffer along + ** the way. */ + while( zCsr=nBuf ){ - unsigned char *zNew; - nBuf = (nBuf ? nBuf*2 : 128); - zNew = sqlite3_realloc(zBuf, nBuf); - if( zNew==0 ){ + /* Grow the output buffer so that there is sufficient space to fit the + ** largest possible utf-8 character. */ + if( (zOut-aFold)+6>nFold ){ + aFold = sqlite3_malloc(nFold*2); + if( aFold==0 ){ rc = SQLITE_NOMEM; - goto tokenize_finished; - }else{ - zOut = &zNew[zOut-zBuf]; - zBuf = zNew; + goto tokenize_done; } + memcpy(aFold, p->aFold, nFold); + sqlite3_free(p->aFold); + p->aFold = aFold; + p->nFold = nFold = nFold*2; } - /* Write the new character to it */ - iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); - if( iOut ) WRITE_UTF8(zOut, iOut); + if( *zCsr & 0x80 ){ + /* An non-ascii-range character. Fold it into the output buffer if + ** it is a token character, or break out of the loop if it is not. */ + READ_UTF8(zCsr, zTerm, iCode); + if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ + non_ascii_tokenchar: + iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); + if( iCode ) WRITE_UTF8(zOut, iCode); + }else{ + break; + } + }else if( a[*zCsr]==0 ){ + /* An ascii-range separator character. End of token. */ + break; + }else{ + ascii_tokenchar: + if( *zCsr>='A' && *zCsr<='Z' ){ + *zOut++ = *zCsr + 32; + }else{ + *zOut++ = *zCsr; + } + zCsr++; + } + ie = zCsr - (unsigned char*)pText; } - if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){ - int ie = (bAlnum ? z : zCode) - zInput; - rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie); - zOut = zBuf; - } + /* Invoke the token callback */ + rc = xToken(pCtx, aFold, zOut-aFold, is, ie); } - - tokenize_finished: - sqlite3_free(zBuf); + + tokenize_done: + if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } @@ -475,7 +528,7 @@ static int fts5PorterCreate( pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); if( pRet ){ memset(pRet, 0, sizeof(PorterTokenizer)); - rc = pApi->xFindTokenizer(pApi, "simple", &pUserdata, &pRet->tokenizer); + rc = pApi->xFindTokenizer(pApi, "ascii", &pUserdata, &pRet->tokenizer); }else{ rc = SQLITE_NOMEM; } @@ -789,9 +842,9 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ const char *zName; fts5_tokenizer x; } aBuiltin[] = { - { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, - { "simple", {fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize }} + { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, + { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, }; int rc = SQLITE_OK; /* Return code */ diff --git a/ext/fts5/test/fts5near.test b/ext/fts5/test/fts5near.test index 7425a4f24e..f545447e6f 100644 --- a/ext/fts5/test/fts5near.test +++ b/ext/fts5/test/fts5near.test @@ -24,7 +24,7 @@ proc do_near_test {tn doc near res} { } execsql { - CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple tokenchars .') + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'ascii tokenchars .') } do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1 diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index e45f7fd89a..d8c4f20f0e 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -33,7 +33,7 @@ do_execsql_test 1.3 { DROP TABLE ft1; } do_execsql_test 1.4 { - CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter simple'); + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter ascii'); DROP TABLE ft1; } @@ -75,7 +75,7 @@ do_catchsql_test 4.2 { #------------------------------------------------------------------------- # Test the "separators" and "tokenchars" options a bit. # -foreach {tn tokenizer} {1 simple 2 unicode61} { +foreach {tn tokenizer} {1 ascii 2 unicode61} { reset_db set T "$tokenizer tokenchars ',.:' separators 'xyz'" execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")" diff --git a/ext/fts5/test/fts5unicode.test b/ext/fts5/test/fts5unicode.test index 737604c1f1..0018a49030 100644 --- a/ext/fts5/test/fts5unicode.test +++ b/ext/fts5/test/fts5unicode.test @@ -25,12 +25,32 @@ proc tokenize_test {tn tokenizer input output} { }] [list {*}$output]] } -foreach {tn t} {1 simple 2 unicode61} { +foreach {tn t} {1 ascii 2 unicode61} { tokenize_test 1.$tn.0 $t {A B C D} {a b c d} tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely} tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely} tokenize_test 1.$tn.3 $t {} {} } +#------------------------------------------------------------------------- +# Check that "unicode61" really is the default tokenizer. +# + +do_execsql_test 2.0 " + CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61); + CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii); + INSERT INTO t1 VALUES('\xC0\xC8\xCC'); + INSERT INTO t2 VALUES('\xC0\xC8\xCC'); + INSERT INTO t3 VALUES('\xC0\xC8\xCC'); +" +breakpoint +do_execsql_test 2.1 " + SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC'; + SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC'; + SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC'; +" {t1 t2} + + finish_test diff --git a/manifest b/manifest index d065fd8d57..2c25d46d59 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\sdocumentation\sissues\sin\sfts5. -D 2015-01-10T20:34:27.199 +C Optimize\sthe\sunicode61\stokenizer\sso\sthat\sit\shandles\sascii\stext\sfaster.\sMake\sit\sthe\sdefault\stokenizer.\sChange\sthe\sname\sof\sthe\ssimple\stokenizer\sto\s"ascii". +D 2015-01-12T17:58:04.627 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,7 +104,7 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c c90004f4a91ce4f4dfad2fc980ade0d9314ebb10 +F ext/fts5/fts5.c 790880afffb249c79f9a36b38f9d774515f5cf7b F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a F ext/fts5/fts5Int.h 0142ba4c3c70e1976578604c0e738670f7689726 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f @@ -115,7 +115,7 @@ F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c ea36c1e42aaf8038b6139be95575eb7fe01f34e4 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 -F ext/fts5/fts5_tokenize.c 4c30cf32c63e59bec5b38533e0a65987df262851 +F ext/fts5/fts5_tokenize.c bdb6a1f599a94ec6e9c1cad037d1071e823dcb5d F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc @@ -135,12 +135,12 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c -F ext/fts5/test/fts5near.test 70a568a1211a5b6d5a17282790d5f8cbbe086ce0 +F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 -F ext/fts5/test/fts5tokenizer.test f951bb9be29232bd057b0ac4d535b879d9cd9a89 -F ext/fts5/test/fts5unicode.test 9ae93296e59917c1210336388f6d3b98051b50c9 +F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c +F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1274,7 +1274,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e749be563d8e738af113bd301770e2f22763ab77 -R 5c59d3558d2a230e6048c600760933d7 +P 512e1bdb4093b59d1494dfc63391476eadd52aea +R 30a0c3c40d1701cf92ddf5b1410b6af9 U dan -Z 6c17e3ae4cf92b8841424ff4d00c314d +Z 9b7b348d489cfd6e15d4a8bf3e2c22e9 diff --git a/manifest.uuid b/manifest.uuid index de97eaee58..4575836fe1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -512e1bdb4093b59d1494dfc63391476eadd52aea \ No newline at end of file +f22dbccad9499624880ddd48df1b07fb42b1ad66 \ No newline at end of file From 851ca6e715d167bfec4a982daf56337c089aded5 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 13 Jan 2015 17:25:08 +0000 Subject: [PATCH 071/206] Fix prefix indexes so that they work in characters, not bytes. FossilOrigin-Name: af8d43a4a08528bbae25ee38fe25de8a86f8a21c --- ext/fts5/fts5_index.c | 107 +++++++++++++++++++++++++++++++--- ext/fts5/test/fts5prefix.test | 60 +++++++++++++++++++ manifest | 13 +++-- manifest.uuid | 2 +- 4 files changed, 167 insertions(+), 15 deletions(-) create mode 100644 ext/fts5/test/fts5prefix.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 4d22c41467..a7394a84e5 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -81,7 +81,7 @@ ** + for each segment from oldest to newest: ** + segment id (always > 0) ** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) -** + first leaf page number (often 1) +** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** ** 2. The Averages Record: @@ -4049,6 +4049,39 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ return rc; } +/* +** Argument p points to a buffer containing utf-8 text that is n bytes in +** size. Return the number of bytes in the nChar character prefix of the +** buffer, or 0 if there are less than nChar characters in total. +*/ +static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ + int n = 0; + int i; + for(i=0; i=nByte ) return 0; /* Input contains fewer than nChar chars */ + if( (unsigned char)p[n++]>=0xc0 ){ + while( (p[n] & 0xc0)==0x80 ) n++; + } + } + return n; +} + +/* +** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of +** unicode characters in the string. +*/ +int fts5IndexCharlen(const char *pIn, int nIn){ + int nChar = 0; + int i = 0; + while( i=0xc0 ){ + while( inPrefix; iIdx++){ - int n = ((iIdx==pConfig->nPrefix) ? nTerm : pConfig->aPrefix[iIdx]); - if( n<=nTerm ){ - ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, n); + ret = fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nTerm); + + for(iIdx=0; iIdxnPrefix; iIdx++){ + int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); + if( nByte ){ + ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nByte); } } @@ -4107,8 +4142,9 @@ int sqlite3Fts5IndexWrite( ** prefix hash tables that it is large enough for. */ fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); for(i=0; inPrefix; i++){ - if( nToken>=pConfig->aPrefix[i] ){ - fts5AddTermToHash(p, i+1, iCol, iPos, pToken, pConfig->aPrefix[i]); + int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); + if( nByte ){ + fts5AddTermToHash(p, i+1, iCol, iPos, pToken, nByte); } } @@ -4130,8 +4166,9 @@ int sqlite3Fts5IndexQuery( if( flags & FTS5INDEX_QUERY_PREFIX ){ Fts5Config *pConfig = p->pConfig; + int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ - if( pConfig->aPrefix[iIdx-1]==nToken ) break; + if( pConfig->aPrefix[iIdx-1]==nChar ) break; } if( iIdx>pConfig->nPrefix ){ iIdx = -1; @@ -4602,6 +4639,55 @@ static void fts5DecodeFunction( fts5BufferFree(&s); } +/* +** The implementation of user-defined scalar function fts5_rowid(). +*/ +static void fts5RowidFunction( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args (always 2) */ + sqlite3_value **apVal /* Function arguments */ +){ + const char *zArg; + if( nArg==0 ){ + sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); + }else{ + zArg = (const char*)sqlite3_value_text(apVal[0]); + if( 0==sqlite3_stricmp(zArg, "segment") ){ + i64 iRowid; + int idx, segid, height, pgno; + if( nArg!=5 ){ + sqlite3_result_error(pCtx, + "should be: fts5_rowid('segment', idx, segid, height, pgno))", -1 + ); + }else{ + idx = sqlite3_value_int(apVal[1]); + segid = sqlite3_value_int(apVal[2]); + height = sqlite3_value_int(apVal[3]); + pgno = sqlite3_value_int(apVal[4]); + iRowid = FTS5_SEGMENT_ROWID(idx, segid, height, pgno); + sqlite3_result_int64(pCtx, iRowid); + } + }else if( 0==sqlite3_stricmp(zArg, "start-of-index") ){ + i64 iRowid; + int idx; + if( nArg!=2 ){ + sqlite3_result_error(pCtx, + "should be: fts5_rowid('start-of-index', idx)", -1 + ); + }else{ + idx = sqlite3_value_int(apVal[1]); + iRowid = FTS5_SEGMENT_ROWID(idx, 1, 0, 0); + sqlite3_result_int64(pCtx, iRowid); + } + }else { + sqlite3_result_error(pCtx, + "first arg to fts5_rowid() must be 'segment' " + "or 'start-of-index' ..." + , -1 + ); + } + } +} /* ** This is called as part of registering the FTS5 module with database @@ -4615,6 +4701,11 @@ int sqlite3Fts5IndexInit(sqlite3 *db){ int rc = sqlite3_create_function( db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 + ); + } return rc; } diff --git a/ext/fts5/test/fts5prefix.test b/ext/fts5/test/fts5prefix.test new file mode 100644 index 0000000000..44c21a744c --- /dev/null +++ b/ext/fts5/test/fts5prefix.test @@ -0,0 +1,60 @@ +# 2015 Jan 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5prefix + + +#------------------------------------------------------------------------- +# Check that prefix indexes really do index n-character prefixes, not +# n-byte prefixes. Use the ascii tokenizer so as not to be confused by +# diacritic removal. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) +} + +do_test 1.2 { + foreach {rowid string} { + 1 "\xCA\xCB\xCC\xCD" + 2 "\u1234\u5678\u4321\u8765" + } { + execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) } + } +} {} + +do_execsql_test 1.1.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#db eval { select fts5_decode(id, block) AS d FROM t1_data; } { puts $d } + +foreach o {1 2} { + if {$o==2} breakpoint + foreach {tn q res} { + 1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1 + 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2 + } { + do_execsql_test 1.$o.$tn $q $res + } + + execsql { + DELETE FROM t1_data WHERE + rowid>=fts5_rowid('start-of-index', 0) AND + rowid Date: Sat, 17 Jan 2015 17:48:10 +0000 Subject: [PATCH 072/206] Improve the performance of the fts5 porter tokenizer implementation. FossilOrigin-Name: 96ea600440de05ee663e71c3f0d0de2c64108bf9 --- ext/fts5/fts5_tokenize.c | 552 ++++++++++++++++++++++++++++++------- ext/fts5/mkportersteps.tcl | 222 +++++++++++++++ ext/fts5/tool/loadfts5.tcl | 71 +++++ manifest | 14 +- manifest.uuid | 2 +- 5 files changed, 757 insertions(+), 104 deletions(-) create mode 100644 ext/fts5/mkportersteps.tcl create mode 100644 ext/fts5/tool/loadfts5.tcl diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index feb3513a46..b62f50bf82 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -443,6 +443,7 @@ static int fts5UnicodeTokenize( rc = SQLITE_NOMEM; goto tokenize_done; } + zOut = &aFold[zOut - p->aFold]; memcpy(aFold, p->aFold, nFold); sqlite3_free(p->aFold); p->aFold = aFold; @@ -528,7 +529,7 @@ static int fts5PorterCreate( pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); if( pRet ){ memset(pRet, 0, sizeof(PorterTokenizer)); - rc = pApi->xFindTokenizer(pApi, "ascii", &pUserdata, &pRet->tokenizer); + rc = pApi->xFindTokenizer(pApi, "unicode61", &pUserdata, &pRet->tokenizer); }else{ rc = SQLITE_NOMEM; } @@ -666,6 +667,448 @@ static int fts5Porter_Vowel(char *zStem, int nStem){ return 0; } + +/************************************************************************** +*************************************************************************** +** GENERATED CODE STARTS HERE (mkportersteps.tcl) +*/ + +static int fts5PorterStep4(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + + case 'a': + if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ + if( fts5Porter_MGt1(aBuf, nBuf-2) ){ + *pnBuf = nBuf - 2; + } + } + break; + + case 'c': + if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt1(aBuf, nBuf-4) ){ + *pnBuf = nBuf - 4; + } + }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt1(aBuf, nBuf-4) ){ + *pnBuf = nBuf - 4; + } + } + break; + + case 'e': + if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ + if( fts5Porter_MGt1(aBuf, nBuf-2) ){ + *pnBuf = nBuf - 2; + } + } + break; + + case 'i': + if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ + if( fts5Porter_MGt1(aBuf, nBuf-2) ){ + *pnBuf = nBuf - 2; + } + } + break; + + case 'l': + if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt1(aBuf, nBuf-4) ){ + *pnBuf = nBuf - 4; + } + }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt1(aBuf, nBuf-4) ){ + *pnBuf = nBuf - 4; + } + } + break; + + case 'n': + if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt1(aBuf, nBuf-5) ){ + *pnBuf = nBuf - 5; + } + }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt1(aBuf, nBuf-4) ){ + *pnBuf = nBuf - 4; + } + }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + case 'o': + if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ + if( fts5Porter_MGt1(aBuf, nBuf-2) ){ + *pnBuf = nBuf - 2; + } + } + break; + + case 's': + if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + case 't': + if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + case 'u': + if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + case 'v': + if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + case 'z': + if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt1(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + } + return ret; +} + + +static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + + case 'a': + if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ + memcpy(&aBuf[nBuf-2], "ate", 3); + *pnBuf = nBuf - 2 + 3; + ret = 1; + } + break; + + case 'b': + if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ + memcpy(&aBuf[nBuf-2], "ble", 3); + *pnBuf = nBuf - 2 + 3; + ret = 1; + } + break; + + case 'i': + if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ + memcpy(&aBuf[nBuf-2], "ize", 3); + *pnBuf = nBuf - 2 + 3; + ret = 1; + } + break; + + } + return ret; +} + + +static int fts5PorterStep2(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + + case 'a': + if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ + if( fts5Porter_MGt0(aBuf, nBuf-7) ){ + memcpy(&aBuf[nBuf-7], "ate", 3); + *pnBuf = nBuf - 7 + 3; + } + }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ + if( fts5Porter_MGt0(aBuf, nBuf-6) ){ + memcpy(&aBuf[nBuf-6], "tion", 4); + *pnBuf = nBuf - 6 + 4; + } + } + break; + + case 'c': + if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "ence", 4); + *pnBuf = nBuf - 4 + 4; + } + }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "ance", 4); + *pnBuf = nBuf - 4 + 4; + } + } + break; + + case 'e': + if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "ize", 3); + *pnBuf = nBuf - 4 + 3; + } + } + break; + + case 'g': + if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "log", 3); + *pnBuf = nBuf - 4 + 3; + } + } + break; + + case 'l': + if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt0(aBuf, nBuf-3) ){ + memcpy(&aBuf[nBuf-3], "ble", 3); + *pnBuf = nBuf - 3 + 3; + } + }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "al", 2); + *pnBuf = nBuf - 4 + 2; + } + }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "ent", 3); + *pnBuf = nBuf - 5 + 3; + } + }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt0(aBuf, nBuf-3) ){ + memcpy(&aBuf[nBuf-3], "e", 1); + *pnBuf = nBuf - 3 + 1; + } + }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "ous", 3); + *pnBuf = nBuf - 5 + 3; + } + } + break; + + case 'o': + if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ + if( fts5Porter_MGt0(aBuf, nBuf-7) ){ + memcpy(&aBuf[nBuf-7], "ize", 3); + *pnBuf = nBuf - 7 + 3; + } + }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "ate", 3); + *pnBuf = nBuf - 5 + 3; + } + }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "ate", 3); + *pnBuf = nBuf - 4 + 3; + } + } + break; + + case 's': + if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "al", 2); + *pnBuf = nBuf - 5 + 2; + } + }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ + if( fts5Porter_MGt0(aBuf, nBuf-7) ){ + memcpy(&aBuf[nBuf-7], "ive", 3); + *pnBuf = nBuf - 7 + 3; + } + }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ + if( fts5Porter_MGt0(aBuf, nBuf-7) ){ + memcpy(&aBuf[nBuf-7], "ful", 3); + *pnBuf = nBuf - 7 + 3; + } + }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ + if( fts5Porter_MGt0(aBuf, nBuf-7) ){ + memcpy(&aBuf[nBuf-7], "ous", 3); + *pnBuf = nBuf - 7 + 3; + } + } + break; + + case 't': + if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "al", 2); + *pnBuf = nBuf - 5 + 2; + } + }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "ive", 3); + *pnBuf = nBuf - 5 + 3; + } + }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ + if( fts5Porter_MGt0(aBuf, nBuf-6) ){ + memcpy(&aBuf[nBuf-6], "ble", 3); + *pnBuf = nBuf - 6 + 3; + } + } + break; + + } + return ret; +} + + +static int fts5PorterStep3(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + + case 'a': + if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + memcpy(&aBuf[nBuf-4], "ic", 2); + *pnBuf = nBuf - 4 + 2; + } + } + break; + + case 's': + if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ + if( fts5Porter_MGt0(aBuf, nBuf-4) ){ + *pnBuf = nBuf - 4; + } + } + break; + + case 't': + if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "ic", 2); + *pnBuf = nBuf - 5 + 2; + } + }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "ic", 2); + *pnBuf = nBuf - 5 + 2; + } + } + break; + + case 'u': + if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt0(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + } + } + break; + + case 'v': + if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + *pnBuf = nBuf - 5; + } + } + break; + + case 'z': + if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ + if( fts5Porter_MGt0(aBuf, nBuf-5) ){ + memcpy(&aBuf[nBuf-5], "al", 2); + *pnBuf = nBuf - 5 + 2; + } + } + break; + + } + return ret; +} + + +static int fts5PorterStep1B(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + + case 'e': + if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_MGt0(aBuf, nBuf-3) ){ + memcpy(&aBuf[nBuf-3], "ee", 2); + *pnBuf = nBuf - 3 + 2; + } + }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ + if( fts5Porter_Vowel(aBuf, nBuf-2) ){ + *pnBuf = nBuf - 2; + ret = 1; + } + } + break; + + case 'n': + if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ + if( fts5Porter_Vowel(aBuf, nBuf-3) ){ + *pnBuf = nBuf - 3; + ret = 1; + } + } + break; + + } + return ret; +} + +/* +** GENERATED CODE ENDS HERE (mkportersteps.tcl) +*************************************************************************** +**************************************************************************/ + +static void fts5PorterStep1A(char *aBuf, int *pnBuf){ + int nBuf = *pnBuf; + if( aBuf[nBuf-1]=='s' ){ + if( aBuf[nBuf-2]=='e' ){ + if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') + || (nBuf>3 && aBuf[nBuf-3]=='i' ) + ){ + *pnBuf = nBuf-2; + }else{ + *pnBuf = nBuf-1; + } + } + else if( aBuf[nBuf-2]!='s' ){ + *pnBuf = nBuf-1; + } + } +} + static int fts5PorterCb( void *pCtx, const char *pToken, @@ -675,96 +1118,8 @@ static int fts5PorterCb( ){ PorterContext *p = (PorterContext*)pCtx; - PorterRule aStep1A[] = { - { "sses", 4, 0, "ss", 2 }, - { "ies", 3, 0, "i", 1 }, - { "ss", 2, 0, "ss", 2 }, - { "s", 1, 0, "", 0 }, - { 0, 0, 0, 0 } - }; - - PorterRule aStep1B[] = { - { "eed", 3, fts5Porter_MGt0, "ee", 2 }, - { "ed", 2, fts5Porter_Vowel, "", 0 }, - { "ing", 3, fts5Porter_Vowel, "", 0 }, - { 0, 0, 0, 0 } - }; - - PorterRule aStep1B2[] = { - { "at", 2, 0, "ate", 3 }, - { "bl", 2, 0, "ble", 3 }, - { "iz", 2, 0, "ize", 3 }, - { 0, 0, 0, 0 } - }; - - PorterRule aStep1C[] = { - { "y", 1, fts5Porter_Vowel, "i", 1 }, - { 0, 0, 0, 0 } - }; - - PorterRule aStep2[] = { - { "ational", 7, fts5Porter_MGt0, "ate", 3}, - { "tional", 6, fts5Porter_MGt0, "tion", 4}, - { "enci", 4, fts5Porter_MGt0, "ence", 4}, - { "anci", 4, fts5Porter_MGt0, "ance", 4}, - { "izer", 4, fts5Porter_MGt0, "ize", 3}, - { "logi", 4, fts5Porter_MGt0, "log", 3}, /* added post 1979 */ - { "bli", 3, fts5Porter_MGt0, "ble", 3}, /* modified post 1979 */ - { "alli", 4, fts5Porter_MGt0, "al", 2}, - { "entli", 5, fts5Porter_MGt0, "ent", 3}, - { "eli", 3, fts5Porter_MGt0, "e", 1}, - { "ousli", 5, fts5Porter_MGt0, "ous", 3}, - { "ization", 7, fts5Porter_MGt0, "ize", 3}, - { "ation", 5, fts5Porter_MGt0, "ate", 3}, - { "ator", 4, fts5Porter_MGt0, "ate", 3}, - { "alism", 5, fts5Porter_MGt0, "al", 2}, - { "iveness", 7, fts5Porter_MGt0, "ive", 3}, - { "fulness", 7, fts5Porter_MGt0, "ful", 3}, - { "ousness", 7, fts5Porter_MGt0, "ous", 3}, - { "aliti", 5, fts5Porter_MGt0, "al", 2}, - { "iviti", 5, fts5Porter_MGt0, "ive", 3}, - { "biliti", 6, fts5Porter_MGt0, "ble", 3}, - { 0, 0, 0, 0 } - }; - - PorterRule aStep3[] = { - { "icate", 5, fts5Porter_MGt0, "ic", 2}, - { "ative", 5, fts5Porter_MGt0, "", 0}, - { "alize", 5, fts5Porter_MGt0, "al", 2}, - { "iciti", 5, fts5Porter_MGt0, "ic", 2}, - { "ical", 4, fts5Porter_MGt0, "ic", 2}, - { "ful", 3, fts5Porter_MGt0, "", 0}, - { "ness", 4, fts5Porter_MGt0, "", 0}, - { 0, 0, 0, 0 } - }; - - PorterRule aStep4[] = { - { "al", 2, fts5Porter_MGt1, "", 0}, - { "ance", 4, fts5Porter_MGt1, "", 0}, - { "ence", 4, fts5Porter_MGt1, "", 0}, - { "er", 2, fts5Porter_MGt1, "", 0}, - { "ic", 2, fts5Porter_MGt1, "", 0}, - { "able", 4, fts5Porter_MGt1, "", 0}, - { "ible", 4, fts5Porter_MGt1, "", 0}, - { "ant", 3, fts5Porter_MGt1, "", 0}, - { "ement", 5, fts5Porter_MGt1, "", 0}, - { "ment", 4, fts5Porter_MGt1, "", 0}, - { "ent", 3, fts5Porter_MGt1, "", 0}, - { "ion", 3, fts5Porter_MGt1_and_S_or_T, "", 0}, - { "ou", 2, fts5Porter_MGt1, "", 0}, - { "ism", 3, fts5Porter_MGt1, "", 0}, - { "ate", 3, fts5Porter_MGt1, "", 0}, - { "iti", 3, fts5Porter_MGt1, "", 0}, - { "ous", 3, fts5Porter_MGt1, "", 0}, - { "ive", 3, fts5Porter_MGt1, "", 0}, - { "ize", 3, fts5Porter_MGt1, "", 0}, - { 0, 0, 0, 0 } - }; - - char *aBuf; int nBuf; - int n; if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; aBuf = p->aBuf; @@ -772,10 +1127,9 @@ static int fts5PorterCb( memcpy(aBuf, pToken, nBuf); /* Step 1. */ - fts5PorterApply(aBuf, &nBuf, aStep1A); - n = fts5PorterApply(aBuf, &nBuf, aStep1B); - if( n==1 || n==2 ){ - if( fts5PorterApply(aBuf, &nBuf, aStep1B2)<0 ){ + fts5PorterStep1A(aBuf, &nBuf); + if( fts5PorterStep1B(aBuf, &nBuf) ){ + if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ char c = aBuf[nBuf-1]; if( fts5PorterIsVowel(c, 0)==0 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] @@ -786,12 +1140,16 @@ static int fts5PorterCb( } } } - fts5PorterApply(aBuf, &nBuf, aStep1C); + + /* Step 1C. */ + if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ + aBuf[nBuf-1] = 'i'; + } /* Steps 2 through 4. */ - fts5PorterApply(aBuf, &nBuf, aStep2); - fts5PorterApply(aBuf, &nBuf, aStep3); - fts5PorterApply(aBuf, &nBuf, aStep4); + fts5PorterStep2(aBuf, &nBuf); + fts5PorterStep3(aBuf, &nBuf); + fts5PorterStep4(aBuf, &nBuf); /* Step 5a. */ if( nBuf>0 && aBuf[nBuf-1]=='e' ){ diff --git a/ext/fts5/mkportersteps.tcl b/ext/fts5/mkportersteps.tcl new file mode 100644 index 0000000000..b6214c6bf7 --- /dev/null +++ b/ext/fts5/mkportersteps.tcl @@ -0,0 +1,222 @@ +# +# 2014 Jun 09 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#------------------------------------------------------------------------- +# +# This script generates the implementations of the following C functions, +# which are part of the porter tokenizer implementation: +# +# static int fts5PorterStep1B(char *aBuf, int *pnBuf); +# static int fts5PorterStep1B2(char *aBuf, int *pnBuf); +# static int fts5PorterStep2(char *aBuf, int *pnBuf); +# static int fts5PorterStep3(char *aBuf, int *pnBuf); +# static int fts5PorterStep4(char *aBuf, int *pnBuf); +# + +set O(Step1B2) { + { at {} ate 1 } + { bl {} ble 1 } + { iz {} ize 1 } +} + +set O(Step1B) { + { "eed" fts5Porter_MGt0 "ee" 0 } + { "ed" fts5Porter_Vowel "" 1 } + { "ing" fts5Porter_Vowel "" 1 } +} + +set O(Step2) { + { "ational" fts5Porter_MGt0 "ate" } + { "tional" fts5Porter_MGt0 "tion" } + { "enci" fts5Porter_MGt0 "ence" } + { "anci" fts5Porter_MGt0 "ance" } + { "izer" fts5Porter_MGt0 "ize" } + { "logi" fts5Porter_MGt0 "log" } + { "bli" fts5Porter_MGt0 "ble" } + { "alli" fts5Porter_MGt0 "al" } + { "entli" fts5Porter_MGt0 "ent" } + { "eli" fts5Porter_MGt0 "e" } + { "ousli" fts5Porter_MGt0 "ous" } + { "ization" fts5Porter_MGt0 "ize" } + { "ation" fts5Porter_MGt0 "ate" } + { "ator" fts5Porter_MGt0 "ate" } + { "alism" fts5Porter_MGt0 "al" } + { "iveness" fts5Porter_MGt0 "ive" } + { "fulness" fts5Porter_MGt0 "ful" } + { "ousness" fts5Porter_MGt0 "ous" } + { "aliti" fts5Porter_MGt0 "al" } + { "iviti" fts5Porter_MGt0 "ive" } + { "biliti" fts5Porter_MGt0 "ble" } +} + +set O(Step3) { + { "icate" fts5Porter_MGt0 "ic" } + { "ative" fts5Porter_MGt0 "" } + { "alize" fts5Porter_MGt0 "al" } + { "iciti" fts5Porter_MGt0 "ic" } + { "ical" fts5Porter_MGt0 "ic" } + { "ful" fts5Porter_MGt0 "" } + { "ness" fts5Porter_MGt0 "" } +} + +set O(Step4) { + { "al" fts5Porter_MGt1 "" } + { "ance" fts5Porter_MGt1 "" } + { "ence" fts5Porter_MGt1 "" } + { "er" fts5Porter_MGt1 "" } + { "ic" fts5Porter_MGt1 "" } + { "able" fts5Porter_MGt1 "" } + { "ible" fts5Porter_MGt1 "" } + { "ant" fts5Porter_MGt1 "" } + { "ement" fts5Porter_MGt1 "" } + { "ment" fts5Porter_MGt1 "" } + { "ent" fts5Porter_MGt1 "" } + { "ion" fts5Porter_MGt1_and_S_or_T "" } + { "ou" fts5Porter_MGt1 "" } + { "ism" fts5Porter_MGt1 "" } + { "ate" fts5Porter_MGt1 "" } + { "iti" fts5Porter_MGt1 "" } + { "ous" fts5Porter_MGt1 "" } + { "ive" fts5Porter_MGt1 "" } + { "ize" fts5Porter_MGt1 "" } +} + +proc sort_cb {lhs rhs} { + set L [string range [lindex $lhs 0] end-1 end-1] + set R [string range [lindex $rhs 0] end-1 end-1] + string compare $L $R +} + +proc create_step_function {name data} { + + set T(function) { +static int fts5Porter${name}(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + ${switchbody} + } + return ret; +} + } + + set T(case) { + case '${k}': + ${ifstmts} + break; + } + + set T(if_0_0_0) { + if( ${match} ){ + *pnBuf = nBuf - $n; + } + } + set T(if_1_0_0) { + if( ${match} ){ + if( ${cond} ){ + *pnBuf = nBuf - $n; + } + } + } + set T(if_0_1_0) { + if( ${match} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + } + } + set T(if_1_1_0) { + if( ${match} ){ + if( ${cond} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + } + } + } + set T(if_1_0_1) { + if( ${match} ){ + if( ${cond} ){ + *pnBuf = nBuf - $n; + ret = 1; + } + } + } + set T(if_0_1_1) { + if( ${match} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + ret = 1; + } + } + set T(if_1_1_1) { + if( ${match} ){ + if( ${cond} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + ret = 1; + } + } + } + + set switchbody "" + + foreach I $data { + set k [string range [lindex $I 0] end-1 end-1] + lappend aCase($k) $I + } + foreach k [lsort [array names aCase]] { + set ifstmts "" + foreach I $aCase($k) { + set zSuffix [lindex $I 0] ;# Suffix text for this rule + set zRep [lindex $I 2] ;# Replacement text for rule + set xCond [lindex $I 1] ;# Condition callback (or "") + + set n [string length $zSuffix] + set nRep [string length $zRep] + + set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" + set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" + set cond "${xCond}(aBuf, nBuf-$n)" + + set bMemcpy [expr {$nRep>0}] + set bCond [expr {$xCond!=""}] + set bRet [expr {[llength $I]>3 && [lindex $I 3]}] + + set t $T(if_${bCond}_${bMemcpy}_${bRet}) + lappend ifstmts [string trim [subst -nocommands $t]] + } + + set ifstmts [join $ifstmts "else "] + + append switchbody [subst -nocommands $T(case)] + } + + + puts [subst -nocommands $T(function)] +} + + +puts [string trim { +/************************************************************************** +*************************************************************************** +** GENERATED CODE STARTS HERE (mkportersteps.tcl) +*/ +}] +foreach step [array names O] { + create_step_function $step $O($step) +} +puts [string trim { +/* +** GENERATED CODE ENDS HERE (mkportersteps.tcl) +*************************************************************************** +**************************************************************************/ +}] + + + diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl new file mode 100644 index 0000000000..2572e38aea --- /dev/null +++ b/ext/fts5/tool/loadfts5.tcl @@ -0,0 +1,71 @@ + + +proc loadfile {f} { + set fd [open $f] + set data [read $fd] + close $fd + return $data +} + +set ::nRow 0 +proc load_hierachy {dir} { + foreach f [glob -nocomplain -dir $dir *] { + if {$::O(limit) && $::nRow>=$::O(limit)} break + if {[file isdir $f]} { + load_hierachy $f + } else { + db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } + incr ::nRow + } + } +} + +proc usage {} { + puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" + puts stderr "" + puts stderr "Switches are:" + puts stderr " -fts4 (use fts4 instead of fts5)" + exit 1 +} + +set O(vtab) fts5 +set O(tok) "" +set O(limit) 0 + +if {[llength $argv]<2} usage +for {set i 0} {$i < [llength $argv]-2} {incr i} { + set arg [lindex $argv $i] + switch -- [lindex $argv $i] { + -fts4 { + set O(vtab) fts4 + } + + -fts5 { + set O(vtab) fts5 + } + + -porter { + set O(tok) ", tokenize=porter" + } + + -limit { + incr i + set O(limit) [lindex $argv $i] + } + + default { + usage + } + } +} + +sqlite3 db [lindex $argv end-1] +db func loadfile loadfile + +db transaction { + db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" + load_hierachy [lindex $argv end] +} + + + diff --git a/manifest b/manifest index 8bfbf04553..7615e891da 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sprefix\sindexes\sso\sthat\sthey\swork\sin\scharacters,\snot\sbytes. -D 2015-01-13T17:25:08.235 +C Improve\sthe\sperformance\sof\sthe\sfts5\sporter\stokenizer\simplementation. +D 2015-01-17T17:48:10.103 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -115,9 +115,10 @@ F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 6f9f98875b2ee5a16255911e1dc1b0b32cb1c350 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 -F ext/fts5/fts5_tokenize.c bdb6a1f599a94ec6e9c1cad037d1071e823dcb5d +F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 +F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc F ext/fts5/test/fts5aa.test 3941b54d7585153be0c5cf0026f7dd8cfef13ea9 F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 @@ -143,6 +144,7 @@ F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee +F ext/fts5/tool/loadfts5.tcl 55c1f3ebf3f4b4f54be5bbdc823e36d59fc5e2dd F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1275,7 +1277,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f22dbccad9499624880ddd48df1b07fb42b1ad66 -R 8d592e678c3bea0440cf749de24705b7 +P af8d43a4a08528bbae25ee38fe25de8a86f8a21c +R bbc2aaea254f25294ae3538c1336787c U dan -Z 3408fdf2714814208d88a4779f5de9eb +Z 3ca0ddccabcad41dd9682a0c32f2940d diff --git a/manifest.uuid b/manifest.uuid index f2f6111639..5130bde3f4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -af8d43a4a08528bbae25ee38fe25de8a86f8a21c \ No newline at end of file +96ea600440de05ee663e71c3f0d0de2c64108bf9 \ No newline at end of file From d8736bc3b89d1825cd57b75079b1a336225ec780 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 17 Jan 2015 20:01:52 +0000 Subject: [PATCH 073/206] Ensure an up to date copy of the fts5 configuration has been loaded into memory before attempting to modify the same configuration. FossilOrigin-Name: f30afd209aa4ce42766b1493750c4f5b5f1e9502 --- ext/fts5/fts5.c | 5 ++++- ext/fts5/fts5Int.h | 2 ++ ext/fts5/fts5_index.c | 7 +++++++ ext/fts5/tool/loadfts5.tcl | 30 ++++++++++++++++++++++++------ manifest | 18 +++++++++--------- manifest.uuid | 2 +- 6 files changed, 47 insertions(+), 17 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 054851cba2..cc6457df6a 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1085,7 +1085,10 @@ static int fts5SpecialInsert( }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ - rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); + rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); + } if( rc==SQLITE_OK ){ if( bError ){ rc = SQLITE_ERROR; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 6148ec7f6c..afe1911180 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -343,6 +343,8 @@ int sqlite3Fts5IndexReads(Fts5Index *p); int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); +int sqlite3Fts5IndexLoadConfig(Fts5Index *p); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a7394a84e5..570ff429de 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4368,6 +4368,13 @@ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ return rc; } +int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ + Fts5Structure *pStruct; + pStruct = fts5StructureRead(p, 0); + fts5StructureRelease(pStruct); + return fts5IndexReturn(p); +} + /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index 2572e38aea..c63564cd0a 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -24,16 +24,22 @@ proc usage {} { puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" puts stderr "" puts stderr "Switches are:" - puts stderr " -fts4 (use fts4 instead of fts5)" + puts stderr " -fts4 (use fts4 instead of fts5)" + puts stderr " -fts5 (use fts5)" + puts stderr " -porter (use porter tokenizer)" + puts stderr " -limit N (load no more than N documents)" + puts stderr " -automerge N (set the automerge parameter to N)" exit 1 } -set O(vtab) fts5 -set O(tok) "" -set O(limit) 0 +set O(vtab) fts5 +set O(tok) "" +set O(limit) 0 +set O(automerge) -1 if {[llength $argv]<2} usage -for {set i 0} {$i < [llength $argv]-2} {incr i} { +set nOpt [expr {[llength $argv]-2}] +for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { @@ -49,9 +55,14 @@ for {set i 0} {$i < [llength $argv]-2} {incr i} { } -limit { - incr i + if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } + + -automerge { + if { [incr i]>=$nOpt } usage + set O(automerge) [lindex $argv $i] + } default { usage @@ -64,6 +75,13 @@ db func loadfile loadfile db transaction { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" + if {$O(automerge)>=0} { + if {$O(vtab) == "fts5"} { + db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } + } else { + db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } + } + } load_hierachy [lindex $argv end] } diff --git a/manifest b/manifest index 7615e891da..991f99eb4f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\sthe\sperformance\sof\sthe\sfts5\sporter\stokenizer\simplementation. -D 2015-01-17T17:48:10.103 +C Ensure\san\sup\sto\sdate\scopy\sof\sthe\sfts5\sconfiguration\shas\sbeen\sloaded\sinto\smemory\sbefore\sattempting\sto\smodify\sthe\ssame\sconfiguration. +D 2015-01-17T20:01:52.023 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,15 +104,15 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 790880afffb249c79f9a36b38f9d774515f5cf7b +F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34 F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h 0142ba4c3c70e1976578604c0e738670f7689726 +F ext/fts5/fts5Int.h b593d5ff5f0cc6493778f88bc19db1dea42e003b F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 6ba7a2e34a80989cca509bd295de1bc9f8e739a3 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 6f9f98875b2ee5a16255911e1dc1b0b32cb1c350 +F ext/fts5/fts5_index.c 33473b527bc0a20fe4d262c2b7b4b67d6c4db5a2 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -144,7 +144,7 @@ F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee -F ext/fts5/tool/loadfts5.tcl 55c1f3ebf3f4b4f54be5bbdc823e36d59fc5e2dd +F ext/fts5/tool/loadfts5.tcl 17c9771fb225b6b7ddd02a698fc7f320eadd7b15 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1277,7 +1277,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P af8d43a4a08528bbae25ee38fe25de8a86f8a21c -R bbc2aaea254f25294ae3538c1336787c +P 96ea600440de05ee663e71c3f0d0de2c64108bf9 +R 026d50b0e36062cf8629af1a1c3f509d U dan -Z 3ca0ddccabcad41dd9682a0c32f2940d +Z 986e1351ec9614d6453e829ee3d25fd7 diff --git a/manifest.uuid b/manifest.uuid index 5130bde3f4..a1cc5de558 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -96ea600440de05ee663e71c3f0d0de2c64108bf9 \ No newline at end of file +f30afd209aa4ce42766b1493750c4f5b5f1e9502 \ No newline at end of file From aa4d380a42f868d81e870affd2a8971cb48d4e30 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 19 Jan 2015 11:15:36 +0000 Subject: [PATCH 074/206] Handle the case where a tokenizer determines that there are zero tokens in an fts5 query term. FossilOrigin-Name: 75f3d17f864072dfa2caee182b86cc4b9972d691 --- ext/fts5/fts5_expr.c | 71 ++++++++++++++++++++++----------------- ext/fts5/test/fts5aa.test | 8 +++++ ext/fts5/test/fts5eb.test | 53 +++++++++++++++++++++++++++++ manifest | 15 +++++---- manifest.uuid | 2 +- 5 files changed, 111 insertions(+), 38 deletions(-) create mode 100644 ext/fts5/test/fts5eb.test diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 3fe34463f1..71f8b48069 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -208,7 +208,7 @@ int sqlite3Fts5ExprNew( }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); sqlite3Fts5ParserFree(pEngine, fts5ParseFree); - assert( sParse.pExpr==0 || (sParse.rc==SQLITE_OK && sParse.zErr==0) ); + assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); if( sParse.rc==SQLITE_OK ){ *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); if( pNew==0 ){ @@ -1011,10 +1011,12 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ ** is not considered an error if the query does not match any documents. */ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bAsc){ - int rc; - p->pIndex = pIdx; - p->bAsc = bAsc; - rc = fts5ExprNodeFirst(p, p->pRoot); + int rc = SQLITE_OK; + if( p->pRoot ){ + p->pIndex = pIdx; + p->bAsc = bAsc; + rc = fts5ExprNodeFirst(p, p->pRoot); + } return rc; } @@ -1031,7 +1033,7 @@ int sqlite3Fts5ExprNext(Fts5Expr *p){ } int sqlite3Fts5ExprEof(Fts5Expr *p){ - return p->pRoot->bEof; + return (p->pRoot==0 || p->pRoot->bEof); } i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ @@ -1101,6 +1103,9 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5ExprNearset *pRet = 0; if( pParse->rc==SQLITE_OK ){ + if( pPhrase==0 ){ + return pNear; + } if( pNear==0 ){ int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); pRet = sqlite3_malloc(nByte); @@ -1207,7 +1212,7 @@ void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ */ Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, /* Parse context */ - Fts5ExprPhrase *pPhrase, /* Phrase to append to */ + Fts5ExprPhrase *pAppend, /* Phrase to append to */ Fts5Token *pToken, /* String to tokenize */ int bPrefix /* True if there is a trailing "*" */ ){ @@ -1217,39 +1222,40 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( char *z = 0; memset(&sCtx, 0, sizeof(TokenCtx)); - sCtx.pPhrase = pPhrase; - - if( pPhrase==0 ){ - if( (pParse->nPhrase % 8)==0 ){ - int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); - Fts5ExprPhrase **apNew; - apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); - if( apNew==0 ){ - pParse->rc = SQLITE_NOMEM; - fts5ExprPhraseFree(pPhrase); - return 0; - } - pParse->apPhrase = apNew; - } - pParse->nPhrase++; - } + sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ sqlite3Fts5Dequote(z); rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); } + sqlite3_free(z); if( rc ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; - }else if( sCtx.pPhrase->nTerm>0 ){ + }else if( sCtx.pPhrase ){ + + if( pAppend==0 ){ + if( (pParse->nPhrase % 8)==0 ){ + int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); + Fts5ExprPhrase **apNew; + apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); + if( apNew==0 ){ + pParse->rc = SQLITE_NOMEM; + fts5ExprPhraseFree(sCtx.pPhrase); + return 0; + } + pParse->apPhrase = apNew; + } + pParse->nPhrase++; + } + + pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; + assert( sCtx.pPhrase->nTerm>0 ); sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } - - pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; - sqlite3_free(z); return sCtx.pPhrase; } @@ -1331,9 +1337,12 @@ Fts5ExprNode *sqlite3Fts5ParseNode( Fts5ExprNode *pRet = 0; if( pParse->rc==SQLITE_OK ){ - assert( (eType!=FTS5_STRING && pLeft && pRight && !pNear) - || (eType==FTS5_STRING && !pLeft && !pRight && pNear) + assert( (eType!=FTS5_STRING && !pNear) + || (eType==FTS5_STRING && !pLeft && !pRight) ); + if( eType==FTS5_STRING && pNear==0 ) return 0; + if( eType!=FTS5_STRING && pLeft==0 ) return pRight; + if( eType!=FTS5_STRING && pRight==0 ) return pLeft; pRet = (Fts5ExprNode*)sqlite3_malloc(sizeof(Fts5ExprNode)); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; @@ -1589,7 +1598,9 @@ static void fts5ExprFunction( } if( rc==SQLITE_OK ){ char *zText; - if( bTcl ){ + if( pExpr->pRoot==0 ){ + zText = sqlite3_mprintf(""); + }else if( bTcl ){ zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); }else{ zText = fts5ExprPrint(pConfig, pExpr->pRoot); diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 48c880a694..8f4f92833d 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -21,6 +21,8 @@ ifcapable !fts5 { return } +if 0 { + do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); SELECT name, sql FROM sqlite_master; @@ -300,6 +302,8 @@ do_test 12.3 { string is integer $res } {1} +} + #------------------------------------------------------------------------- # reset_db @@ -320,6 +324,10 @@ do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} +do_execsql_test 13.6 { + SELECT rowid FROM t1 WHERE t1 MATCH '.'; +} {} + finish_test diff --git a/ext/fts5/test/fts5eb.test b/ext/fts5/test/fts5eb.test new file mode 100644 index 0000000000..987cb5ef19 --- /dev/null +++ b/ext/fts5/test/fts5eb.test @@ -0,0 +1,53 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5eb + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +proc do_syntax_error_test {tn expr err} { + set ::se_expr $expr + do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err] +} + +proc do_syntax_test {tn expr res} { + set ::se_expr $expr + do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] +} + +foreach {tn expr res} { + 1 {abc} {"abc"} + 2 {abc .} {"abc"} + 3 {.} {} + 4 {abc OR .} {"abc"} + 5 {abc NOT .} {"abc"} + 6 {abc AND .} {"abc"} + 7 {. OR abc} {"abc"} + 8 {. NOT abc} {"abc"} + 9 {. AND abc} {"abc"} + 10 {abc + . + def} {"abc" + "def"} + 11 {abc . def} {"abc" AND "def"} + 12 {r+e OR w} {"r" + "e" OR "w"} +} { + do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] +} + + +finish_test + + + diff --git a/manifest b/manifest index 991f99eb4f..00449bb1e2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Ensure\san\sup\sto\sdate\scopy\sof\sthe\sfts5\sconfiguration\shas\sbeen\sloaded\sinto\smemory\sbefore\sattempting\sto\smodify\sthe\ssame\sconfiguration. -D 2015-01-17T20:01:52.023 +C Handle\sthe\scase\swhere\sa\stokenizer\sdetermines\sthat\sthere\sare\szero\stokens\sin\san\sfts5\squery\sterm. +D 2015-01-19T11:15:36.619 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5Int.h b593d5ff5f0cc6493778f88bc19db1dea42e003b F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 -F ext/fts5/fts5_expr.c 6ba7a2e34a80989cca509bd295de1bc9f8e739a3 +F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 33473b527bc0a20fe4d262c2b7b4b67d6c4db5a2 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 @@ -120,7 +120,7 @@ F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc -F ext/fts5/test/fts5aa.test 3941b54d7585153be0c5cf0026f7dd8cfef13ea9 +F ext/fts5/test/fts5aa.test 59f5f2ca67338eb1755c96ef18881d7bcb1ff78c F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1 @@ -135,6 +135,7 @@ F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c +F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 @@ -1277,7 +1278,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 96ea600440de05ee663e71c3f0d0de2c64108bf9 -R 026d50b0e36062cf8629af1a1c3f509d +P f30afd209aa4ce42766b1493750c4f5b5f1e9502 +R 135edaa34728516c82a7de0e97543966 U dan -Z 986e1351ec9614d6453e829ee3d25fd7 +Z 1a9767fbfdd382ad5aed7027717883f2 diff --git a/manifest.uuid b/manifest.uuid index a1cc5de558..4caf7a4ed4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f30afd209aa4ce42766b1493750c4f5b5f1e9502 \ No newline at end of file +75f3d17f864072dfa2caee182b86cc4b9972d691 \ No newline at end of file From fd0b436e1044867a5788c58d79c3a7976aa46874 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 20 Jan 2015 20:34:17 +0000 Subject: [PATCH 075/206] Add extra fault injection tests to fts5. FossilOrigin-Name: f45a0dc0a8911c8aac5a1028ac4f543a709656e7 --- ext/fts5/fts5_index.c | 32 +++++----- ext/fts5/test/fts5aa.test | 4 -- ext/fts5/test/fts5fault1.test | 114 +++++++++++++++++++++++++++++++++- manifest | 16 ++--- manifest.uuid | 2 +- 5 files changed, 139 insertions(+), 29 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 570ff429de..b490ef2454 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -708,26 +708,29 @@ sqlite3_free(buf.p); } if( rc==SQLITE_OK ){ + u8 *aOut; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ fts5BufferZero(pBuf); - if( SQLITE_OK==fts5BufferGrow(&rc, pBuf, nByte) ){ - rc = sqlite3_blob_read(p->pReader, pBuf->p, nByte, 0); - if( rc==SQLITE_OK ) pBuf->n = nByte; - } + fts5BufferGrow(&rc, pBuf, nByte); + aOut = pBuf->p; + pBuf->n = nByte; }else{ - pRet = (Fts5Data*)fts5IdxMalloc(p, sizeof(Fts5Data) + nByte); - if( !pRet ) return 0; - - pRet->n = nByte; - pRet->p = (u8*)&pRet[1]; - pRet->nRef = 1; - rc = sqlite3_blob_read(p->pReader, pRet->p, nByte, 0); - if( rc!=SQLITE_OK ){ - sqlite3_free(pRet); - pRet = 0; + pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nByte+sizeof(Fts5Data)); + if( pRet ){ + pRet->n = nByte; + aOut = pRet->p = (u8*)&pRet[1]; + pRet->nRef = 1; } } + + if( rc==SQLITE_OK ){ + rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); + } + if( rc!=SQLITE_OK ){ + sqlite3_free(pRet); + pRet = 0; + } } p->rc = rc; p->nRead++; @@ -2981,6 +2984,7 @@ static void fts5IndexMergeLevel( pStruct = *ppStruct; } fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); + if( p->rc ) return; pLvl = &pStruct->aLevel[iLvl]; pLvlOut = &pStruct->aLevel[iLvl+1]; diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 8f4f92833d..c7e1169301 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -21,8 +21,6 @@ ifcapable !fts5 { return } -if 0 { - do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); SELECT name, sql FROM sqlite_master; @@ -302,8 +300,6 @@ do_test 12.3 { string is integer $res } {1} -} - #------------------------------------------------------------------------- # reset_db diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 7685b8bb79..8f24368336 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -29,6 +29,7 @@ ifcapable !fts5 { # 3: DELETE statement # 4: MATCH expressions # +# if 1 { @@ -70,8 +71,6 @@ do_faultsim_test 3 -prep { faultsim_test_result {0 {}} } -} - reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t2 USING fts5(a, b); @@ -108,5 +107,116 @@ foreach {tn expr res} { " } +#------------------------------------------------------------------------- +# The following tests use a larger database populated with random data. +# +# The database page size is set to 512 bytes and the FTS5 page size left +# at the default 1000 bytes. This means that reading a node may require +# pulling an overflow page from disk, which is an extra opportunity for +# an error to occur. +# +reset_db +do_execsql_test 5.0.1 { + PRAGMA main.page_size = 512; + CREATE VIRTUAL TABLE x1 USING fts5(a, b); + PRAGMA main.page_size; +} {512} + +proc rnddoc {n} { + set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] + set doc [list] + for {set i 0} {$i < $n} {incr i} { + lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]] + } + set doc +} +db func rnddoc rnddoc + +do_execsql_test 5.0.2 { + WITH r(a, b) AS ( + SELECT rnddoc(6), rnddoc(6) UNION ALL + SELECT rnddoc(6), rnddoc(6) FROM r + ) + INSERT INTO x1 SELECT * FROM r LIMIT 10000; +} + +set res [db one { + SELECT count(*) FROM x1 WHERE x1.a LIKE '%abc%' OR x1.b LIKE '%abc%'} +] + +do_faultsim_test 5.1 -faults oom* -body { + execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abc' } +} -test { + faultsim_test_result [list 0 $::res] +} +do_faultsim_test 5.2 -faults oom* -body { + execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abcd' } +} -test { + faultsim_test_result [list 0 0] +} + +proc test_astar {a b} { + return [expr { [regexp {a[^ ][^ ]} $a] || [regexp {a[^ ][^ ]} $b] }] +} +db func test_astar test_astar + +set res [db one { SELECT count(*) FROM x1 WHERE test_astar(a, b) } ] +do_faultsim_test 5.3 -faults oom* -body { + execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'a*' } +} -test { + faultsim_test_result [list 0 $::res] +} + +do_faultsim_test 5.4 -faults oom* -prep { + db close + sqlite3 db test.db +} -body { + execsql { INSERT INTO x1 VALUES('a b c d', 'e f g h') } +} -test { + faultsim_test_result [list 0 {}] +} + +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1(x1, rank) VALUES('automerge', 0); + + INSERT INTO x1 VALUES('a b c'); -- 1 + INSERT INTO x1 VALUES('a b c'); -- 2 + INSERT INTO x1 VALUES('a b c'); -- 3 + INSERT INTO x1 VALUES('a b c'); -- 4 + INSERT INTO x1 VALUES('a b c'); -- 5 + INSERT INTO x1 VALUES('a b c'); -- 6 + INSERT INTO x1 VALUES('a b c'); -- 7 + INSERT INTO x1 VALUES('a b c'); -- 8 + INSERT INTO x1 VALUES('a b c'); -- 9 + INSERT INTO x1 VALUES('a b c'); -- 10 + INSERT INTO x1 VALUES('a b c'); -- 11 + INSERT INTO x1 VALUES('a b c'); -- 12 + INSERT INTO x1 VALUES('a b c'); -- 13 + INSERT INTO x1 VALUES('a b c'); -- 14 + INSERT INTO x1 VALUES('a b c'); -- 15 + + SELECT count(*) FROM x1_data; +} {17} + +faultsim_save_and_close + +do_faultsim_test 6.1 -faults oom-tr* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO x1 VALUES('d e f') } +} -test { + faultsim_test_result [list 0 {}] + if {$testrc==0} { + set nCnt [db one {SELECT count(*) FROM x1_data}] + if {$nCnt!=3} { error "expected 3 entries but there are $nCnt" } + } +} + finish_test diff --git a/manifest b/manifest index 00449bb1e2..dc57aa2d28 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Handle\sthe\scase\swhere\sa\stokenizer\sdetermines\sthat\sthere\sare\szero\stokens\sin\san\sfts5\squery\sterm. -D 2015-01-19T11:15:36.619 +C Add\sextra\sfault\sinjection\stests\sto\sfts5. +D 2015-01-20T20:34:17.734 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 33473b527bc0a20fe4d262c2b7b4b67d6c4db5a2 +F ext/fts5/fts5_index.c 8c34dd95f780ff4010af75643a1960bbde70045d F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -120,7 +120,7 @@ F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc -F ext/fts5/test/fts5aa.test 59f5f2ca67338eb1755c96ef18881d7bcb1ff78c +F ext/fts5/test/fts5aa.test ad7eb7ace215e999a0bda9aa078dbdade20afa1e F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1 @@ -136,7 +136,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c +F ext/fts5/test/fts5fault1.test 2c077402045f0b4e69ae1de6cfa51b0c4c5044c2 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e @@ -1278,7 +1278,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f30afd209aa4ce42766b1493750c4f5b5f1e9502 -R 135edaa34728516c82a7de0e97543966 +P 75f3d17f864072dfa2caee182b86cc4b9972d691 +R 2673080f53da5e5812b6adb8f1a6e570 U dan -Z 1a9767fbfdd382ad5aed7027717883f2 +Z 0775369c7a1d21779f81072190cb8e04 diff --git a/manifest.uuid b/manifest.uuid index 4caf7a4ed4..0d30b38a60 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -75f3d17f864072dfa2caee182b86cc4b9972d691 \ No newline at end of file +f45a0dc0a8911c8aac5a1028ac4f543a709656e7 \ No newline at end of file From b461380333846a00840824002691f004f4d81437 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 21 Jan 2015 16:10:59 +0000 Subject: [PATCH 076/206] Add further tests for fts5 backend. FossilOrigin-Name: 09dabb3b9e140eec6cfda83bcb86b6b9f5cf54b6 --- ext/fts5/fts5_index.c | 34 ++++++++-- ext/fts5/test/fts5fault1.test | 50 +++++++++++++- ext/fts5/test/fts5rowid.test | 124 ++++++++++++++++++++++++++++++++++ manifest | 17 ++--- manifest.uuid | 2 +- 5 files changed, 210 insertions(+), 17 deletions(-) create mode 100644 ext/fts5/test/fts5rowid.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index b490ef2454..0f3d3ddb72 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -264,6 +264,13 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } #endif +/* +** Each time a blob is read from the %_data table, it is padded with this +** many zero bytes. This makes it easier to decode the various record formats +** without overreading if the records are corrupt. +*/ +#define FTS5_DATA_ZERO_PADDING 8 + typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; @@ -716,7 +723,8 @@ sqlite3_free(buf.p); aOut = pBuf->p; pBuf->n = nByte; }else{ - pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nByte+sizeof(Fts5Data)); + int nSpace = nByte + FTS5_DATA_ZERO_PADDING; + pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nSpace+sizeof(Fts5Data)); if( pRet ){ pRet->n = nByte; aOut = pRet->p = (u8*)&pRet[1]; @@ -4539,15 +4547,22 @@ static void fts5DecodeFunction( ){ i64 iRowid; /* Rowid for record being decoded */ int iIdx,iSegid,iHeight,iPgno; /* Rowid components */ - const u8 *a; int n; /* Record to decode */ + const u8 *aBlob; int n; /* Record to decode */ + u8 *a = 0; Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ + int nSpace = 0; assert( nArg==2 ); memset(&s, 0, sizeof(Fts5Buffer)); iRowid = sqlite3_value_int64(apVal[0]); n = sqlite3_value_bytes(apVal[1]); - a = sqlite3_value_blob(apVal[1]); + aBlob = sqlite3_value_blob(apVal[1]); + + nSpace = n + FTS5_DATA_ZERO_PADDING; + a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); + if( a==0 ) goto decode_out; + memcpy(a, aBlob, n); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); @@ -4587,8 +4602,13 @@ static void fts5DecodeFunction( int iOff; int nKeep = 0; - iRowidOff = fts5GetU16(&a[0]); - iTermOff = fts5GetU16(&a[2]); + if( n>=4 ){ + iRowidOff = fts5GetU16(&a[0]); + iTermOff = fts5GetU16(&a[2]); + }else{ + sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); + goto decode_out; + } if( iRowidOff ){ iOff = iRowidOff; @@ -4642,6 +4662,8 @@ static void fts5DecodeFunction( } } + decode_out: + sqlite3_free(a); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); }else{ @@ -4693,7 +4715,7 @@ static void fts5RowidFunction( }else { sqlite3_result_error(pCtx, "first arg to fts5_rowid() must be 'segment' " - "or 'start-of-index' ..." + "or 'start-of-index'" , -1 ); } diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 8f24368336..310f5caf9f 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -31,7 +31,7 @@ ifcapable !fts5 { # # -if 1 { +if 0 { faultsim_save_and_close do_faultsim_test 1 -prep { @@ -107,6 +107,9 @@ foreach {tn expr res} { " } + +} + #------------------------------------------------------------------------- # The following tests use a larger database populated with random data. # @@ -176,7 +179,40 @@ do_faultsim_test 5.4 -faults oom* -prep { faultsim_test_result [list 0 {}] } +do_faultsim_test 5.5.1 -faults oom* -body { + execsql { + SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=1 + } +} -test { + faultsim_test_result [list 0 1] } +do_faultsim_test 5.5.2 -faults oom* -body { + execsql { + SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=10 + } +} -test { + faultsim_test_result [list 0 1] +} +do_faultsim_test 5.5.3 -faults oom* -body { + execsql { + SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = ( + SELECT min(rowid) FROM x1_data WHERE rowid>20 + ) + } +} -test { + faultsim_test_result [list 0 1] +} +do_faultsim_test 5.5.4 -faults oom* -body { + execsql { + SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = ( + SELECT max(rowid) FROM x1_data + ) + } +} -test { + faultsim_test_result [list 0 1] +} + +finish_test #------------------------------------------------------------------------- # @@ -206,7 +242,7 @@ do_execsql_test 6.0 { faultsim_save_and_close -do_faultsim_test 6.1 -faults oom-tr* -prep { +do_faultsim_test 6.1 -faults oom* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO x1 VALUES('d e f') } @@ -218,5 +254,15 @@ do_faultsim_test 6.1 -faults oom-tr* -prep { } } +#------------------------------------------------------------------------- +do_faultsim_test 7.0 -faults oom* -prep { + catch { db close } +} -body { + sqlite3 db test.db +} -test { + faultsim_test_result [list 0 {}] [list 1 {}] +} + + finish_test diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test new file mode 100644 index 0000000000..f804d99e8d --- /dev/null +++ b/ext/fts5/test/fts5rowid.test @@ -0,0 +1,124 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests of the scalar fts5_rowid() and fts5_decode() functions. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5rowid + +do_catchsql_test 1.1 { + SELECT fts5_rowid() +} {1 {should be: fts5_rowid(subject, ....)}} + +do_catchsql_test 1.2 { + SELECT fts5_rowid('segment') +} {1 {should be: fts5_rowid('segment', idx, segid, height, pgno))}} + +do_execsql_test 1.3 { + SELECT fts5_rowid('segment', 1, 1, 1, 1) +} {4503670494330881} + +do_catchsql_test 1.4 { + SELECT fts5_rowid('start-of-index'); +} {1 {should be: fts5_rowid('start-of-index', idx)}} + +do_execsql_test 1.5 { + SELECT fts5_rowid('start-of-index', 1); +} {4503668346847232} + +do_catchsql_test 1.4 { + SELECT fts5_rowid('nosucharg'); +} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} + + +#------------------------------------------------------------------------- +# Tests of the fts5_decode() function. +# +reset_db +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE x1 USING fts5(a, b); + INSERT INTO x1(x1, rank) VALUES('pgsz', 32); +} {} + +proc rnddoc {n} { + set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] + set doc [list] + for {set i 0} {$i < $n} {incr i} { + lappend doc [string map $map [format %.3d [expr int(rand()*100)]]] + } + set doc +} +db func rnddoc rnddoc + +do_execsql_test 2.2 { + WITH r(a, b) AS ( + SELECT rnddoc(6), rnddoc(6) UNION ALL + SELECT rnddoc(6), rnddoc(6) FROM r + ) + INSERT INTO x1 SELECT * FROM r LIMIT 10000; +} + +set res [db one {SELECT count(*) FROM x1_data}] +do_execsql_test 2.3 { + SELECT count(fts5_decode(rowid, block)) FROM x1_data; +} $res +do_execsql_test 2.4 { + UPDATE x1_data SET block = X''; + SELECT count(fts5_decode(rowid, block)) FROM x1_data; +} $res + +do_execsql_test 2.5 { + INSERT INTO x1(x1, rank) VALUES('pgsz', 1024); + INSERT INTO x1(x1) VALUES('rebuild'); +} + +set res [db one {SELECT count(*) FROM x1_data}] +do_execsql_test 2.6 { + SELECT count(fts5_decode(rowid, block)) FROM x1_data; +} $res +do_execsql_test 2.7 { + UPDATE x1_data SET block = X''; + SELECT count(fts5_decode(rowid, block)) FROM x1_data; +} $res + +#------------------------------------------------------------------------- +# Tests with very large tokens. +# +set strlist [list \ + "[string repeat x 400]" \ + "[string repeat x 300][string repeat w 100]" \ + "[string repeat x 300][string repeat y 100]" \ + "[string repeat x 300][string repeat z 600]" \ +] +do_test 3.0 { + execsql { + BEGIN; + CREATE VIRTUAL TABLE x2 USING fts5(a); + } + foreach str $strlist { execsql { INSERT INTO x2 VALUES($str) } } + execsql COMMIT +} {} + +for {set tn 0} {$tn<[llength $strlist]} {incr tn} { + set str [lindex $strlist $tn] + do_execsql_test 3.1.$tn { + SELECT rowid FROM x2 WHERE x2 MATCH $str + } [expr $tn+1] +} + +set res [db one {SELECT count(*) FROM x2_data}] +do_execsql_test 3.2 { + SELECT count(fts5_decode(rowid, block)) FROM x2_data; +} $res + +finish_test + diff --git a/manifest b/manifest index e80bf58412..5aa435069e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-01-21T06:36:07.043 +C Add\sfurther\stests\sfor\sfts5\sbackend. +D 2015-01-21T16:10:59.815 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -26,7 +26,7 @@ F autoconf/missing d7c9981a81af13370d4ed152b24c0a82b7028585 x F autoconf/tea/Makefile.in d55bcc63832caf0309c2ff80358756116618cfca F autoconf/tea/README 3e9a3c060f29a44344ab50aec506f4db903fb873 F autoconf/tea/aclocal.m4 52c47aac44ce0ddb1f918b6993e8beb8eee88f43 -F autoconf/tea/configure.ac 93d43c79e936fb16556e22498177d7e8571efa04 w autoconf/tea/configure.in +F autoconf/tea/configure.ac 93d43c79e936fb16556e22498177d7e8571efa04 F autoconf/tea/doc/sqlite3.n e1fe45d4f5286ee3d0ccc877aca2a0def488e9bb F autoconf/tea/license.terms 13bd403c9610fd2b76ece0ab50c4c5eda933d523 F autoconf/tea/pkgIndex.tcl.in 3ef61715cf1c7bdcff56947ffadb26bc991ca39d @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 8c34dd95f780ff4010af75643a1960bbde70045d +F ext/fts5/fts5_index.c 49e470a6380e572d2e7dcce48399f6dc294d7247 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -136,12 +136,13 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test 2c077402045f0b4e69ae1de6cfa51b0c4c5044c2 +F ext/fts5/test/fts5fault1.test 73b55c5b777f6dfb18022abd5cec2e569464e21c F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 +F ext/fts5/test/fts5rowid.test 44f90d91538f037470f6a438f21bfe65916129f4 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee @@ -1281,7 +1282,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f45a0dc0a8911c8aac5a1028ac4f543a709656e7 0077f64510f9b9ce90032df2696cb242d097ab84 -R e06147bbdf5a4901c5e35690744486d9 +P b3348b1e07e168b156636a29fc8c6d6afb3129c2 +R 7d4ce350ba9859353b62d149a7c21faf U dan -Z a4fb835870b87f88f9fea880f3a15d66 +Z 7c0fe174f4df11b7c7390d0e1d1bdddb diff --git a/manifest.uuid b/manifest.uuid index a1753d4834..1d83cad7d6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b3348b1e07e168b156636a29fc8c6d6afb3129c2 \ No newline at end of file +09dabb3b9e140eec6cfda83bcb86b6b9f5cf54b6 \ No newline at end of file From dd8a501019f2b0581cc4d3b5f8d3f5b5486e140b Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 21 Jan 2015 18:23:25 +0000 Subject: [PATCH 077/206] Fix an fts5 issue with loading doclist-indexes for a term that is the last thing on its leaf page. FossilOrigin-Name: e0d614425f7f5ffe266fdc03642931b1ec19ad25 --- ext/fts5/fts5_index.c | 24 +++++++++++++----------- ext/fts5/test/fts5rowid.test | 31 +++++++++++++++++++++++++++++++ manifest | 14 +++++++------- manifest.uuid | 2 +- 4 files changed, 52 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 0f3d3ddb72..cabae111d2 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1825,7 +1825,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ ** (pIter->pDlidx). */ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ - int iSegid = pIter->pSeg->iSegid; + int iSeg = pIter->pSeg->iSegid; int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ int iOff = pIter->iLeafOffset; /* Byte offset within current leaf */ @@ -1836,21 +1836,23 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ - while( iOffn ){ - i64 iDelta; - int nPoslist; + if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ + while( iOffn ){ + i64 iDelta; + int nPoslist; - /* iOff is currently the offset of the size field of a position list. */ - iOff += getVarint32(&pLeaf->p[iOff], nPoslist); - iOff += nPoslist; + /* iOff is currently the offset of the size field of a position list. */ + iOff += getVarint32(&pLeaf->p[iOff], nPoslist); + iOff += nPoslist; - if( iOffn ){ - iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) return; + if( iOffn ){ + iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) return; + } } } - fts5DlidxIterInit(p, bRev, iIdx, iSegid, pIter->iLeafPgno, &pIter->pDlidx); + fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno, &pIter->pDlidx); } /* diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index f804d99e8d..e828e521ac 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -120,5 +120,36 @@ do_execsql_test 3.2 { SELECT count(fts5_decode(rowid, block)) FROM x2_data; } $res +#------------------------------------------------------------------------- +# Leaf pages with no terms or rowids at all. +# +set strlist [list \ + "[string repeat {w } 400]" \ + "[string repeat {x } 400]" \ + "[string repeat {y } 400]" \ + "[string repeat {z } 400]" \ +] +do_test 4.0 { + execsql { + BEGIN; + CREATE VIRTUAL TABLE x3 USING fts5(a); + INSERT INTO x3(x3, rank) VALUES('pgsz', 32); + } + foreach str $strlist { execsql { INSERT INTO x3 VALUES($str) } } + execsql COMMIT +} {} + +for {set tn 0} {$tn<[llength $strlist]} {incr tn} { + set str [lindex $strlist $tn] + do_execsql_test 4.1.$tn { + SELECT rowid FROM x3 WHERE x3 MATCH $str + } [expr $tn+1] +} + +set res [db one {SELECT count(*) FROM x3_data}] +do_execsql_test 4.2 { + SELECT count(fts5_decode(rowid, block)) FROM x3_data; +} $res + finish_test diff --git a/manifest b/manifest index 5beacde35d..4052b8bf9d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\strunk\schanges\swith\sthis\sbranch. -D 2015-01-21T17:20:21.383 +C Fix\san\sfts5\sissue\swith\sloading\sdoclist-indexes\sfor\sa\sterm\sthat\sis\sthe\slast\sthing\son\sits\sleaf\spage. +D 2015-01-21T18:23:25.625 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 49e470a6380e572d2e7dcce48399f6dc294d7247 +F ext/fts5/fts5_index.c 7decfde6535f73c50acb009e41946ade38e9c1b7 F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -142,7 +142,7 @@ F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 -F ext/fts5/test/fts5rowid.test 44f90d91538f037470f6a438f21bfe65916129f4 +F ext/fts5/test/fts5rowid.test bdff2ee094bf66bd163e0cdfb36a7fc7715a12d3 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee @@ -1282,7 +1282,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 09dabb3b9e140eec6cfda83bcb86b6b9f5cf54b6 5f592359d6d41708da3b3ac9d987a1631bfa3d88 -R 8c898c775e75cc6f7d7e1cf6ea13418f +P f8699a1a3b0f3bb612aff87a2729f912d709c534 +R 17960da1ca932a330a75bf04374dfc20 U dan -Z fd361ca13b44b2a851b6664735786aee +Z c1930a6e112ceedd9aced2b05e742853 diff --git a/manifest.uuid b/manifest.uuid index eb23626a44..721ef32452 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f8699a1a3b0f3bb612aff87a2729f912d709c534 \ No newline at end of file +e0d614425f7f5ffe266fdc03642931b1ec19ad25 \ No newline at end of file From 4c2871bead03c2d95e0765a5b3c6dca99b05cefa Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 21 Jan 2015 20:30:14 +0000 Subject: [PATCH 078/206] Further tests and fixes for fts5. FossilOrigin-Name: c020a291ed293a66d21c5885e50a7fee04aa6366 --- ext/fts5/fts5_index.c | 18 ++++++++---------- ext/fts5/fts5_storage.c | 10 ++++++---- ext/fts5/test/fts5aa.test | 10 ++++++++++ ext/fts5/test/fts5ah.test | 10 +++++++++- ext/fts5/test/fts5fault1.test | 31 +++++++++++++++++++++++++++---- ext/fts5/test/fts5rowid.test | 28 ++++++++++++++++++++++++++++ manifest | 22 +++++++++++----------- manifest.uuid | 2 +- 8 files changed, 100 insertions(+), 31 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index cabae111d2..780e8e268b 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1056,10 +1056,11 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ */ static int fts5StructureCountSegments(Fts5Structure *pStruct){ int nSegment = 0; /* Total number of segments */ - int iLvl; /* Used to iterate through levels */ - - for(iLvl=0; iLvlnLevel; iLvl++){ - nSegment += pStruct->aLevel[iLvl].nSeg; + if( pStruct ){ + int iLvl; /* Used to iterate through levels */ + for(iLvl=0; iLvlnLevel; iLvl++){ + nSegment += pStruct->aLevel[iLvl].nSeg; + } } return nSegment; @@ -3882,7 +3883,6 @@ static void fts5SetupPrefixIter( int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ Fts5Config *pConfig = p->pConfig; int iIdx; /* Used to iterate through indexes */ - int rc; /* Return code */ u64 cksum2 = 0; /* Checksum based on contents of indexes */ /* Check that the checksum of the index matches the argument checksum */ @@ -3915,11 +3915,10 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ fts5MultiIterFree(p, pIter); fts5StructureRelease(pStruct); } - rc = p->rc; - if( rc==SQLITE_OK && cksum!=cksum2 ) rc = FTS5_CORRUPT; + if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; /* Check that the internal nodes of each segment match the leaves */ - for(iIdx=0; rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ Fts5Structure *pStruct = fts5StructureRead(p, iIdx); if( pStruct ){ int iLvl, iSeg; @@ -3931,10 +3930,9 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ } } fts5StructureRelease(pStruct); - rc = p->rc; } - return rc; + return fts5IndexReturn(p); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index b82db3e4a4..8e31721977 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -706,10 +706,12 @@ static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ }else{ sqlite3_stmt *pCnt = 0; rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); - if( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pCnt) ){ - *pnRow = sqlite3_column_int64(pCnt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pCnt) ){ + *pnRow = sqlite3_column_int64(pCnt, 0); + } + rc = sqlite3_finalize(pCnt); } - rc = sqlite3_finalize(pCnt); } sqlite3_free(zSql); @@ -968,7 +970,7 @@ int sqlite3Fts5StorageConfigValue( sqlite3_stmt *pReplace = 0; int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_TRANSIENT); + sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC); sqlite3_bind_value(pReplace, 2, pVal); sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 2360122dd8..450539e889 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -369,6 +369,16 @@ do_test 14.3 { set nRow } {200} +do_execsql_test 15.0 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} +do_execsql_test 15.1 { + UPDATE t1_content SET c1 = 'xyz xyz xyz xyz xyz abc' WHERE rowid = 1; +} +do_catchsql_test 15.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + finish_test diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test index 37c014fcf1..69cdb38299 100644 --- a/ext/fts5/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -28,18 +28,26 @@ ifcapable !fts5 { do_test 1.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) } + set v {w w w w w w w w w w w w w w w w w w w w} + execsql { INSERT INTO t1(rowid, a) VALUES(0, $v) } for {set i 1} {$i <= 10000} {incr i} { set v {x x x x x x x x x x x x x x x x x x x x} if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i} if {($i % 1577)==0} {lset v 5 W ; lappend W $i} execsql { INSERT INTO t1 VALUES($v) } } + set v {w w w w w w w w w w w w w w w w w w w w} + execsql { INSERT INTO t1 VALUES($v) } } {} -do_execsql_test 1.1 { +do_execsql_test 1.1.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [lsort -integer -decr $W] +do_execsql_test 1.1.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*' +} [lsort -integer -decr $W] + do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [lsort -integer -decr $Y] diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 310f5caf9f..4392f13c97 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -31,7 +31,7 @@ ifcapable !fts5 { # # -if 0 { +if 1 { faultsim_save_and_close do_faultsim_test 1 -prep { @@ -108,8 +108,6 @@ foreach {tn expr res} { } -} - #------------------------------------------------------------------------- # The following tests use a larger database populated with random data. # @@ -212,7 +210,7 @@ do_faultsim_test 5.5.4 -faults oom* -body { faultsim_test_result [list 0 1] } -finish_test +} #------------------------------------------------------------------------- # @@ -254,6 +252,31 @@ do_faultsim_test 6.1 -faults oom* -prep { } } +do_faultsim_test 6.2 -faults oom* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO x1(x1, rank) VALUES('pgsz', 32) } +} -test { + faultsim_test_result [list 0 {}] +} + +do_faultsim_test 6.3 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO x1(x1) VALUES('integrity-check') } +} -test { + faultsim_test_result [list 0 {}] +} + +do_faultsim_test 6.4 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO x1(x1) VALUES('optimize') } +} -test { + faultsim_test_result [list 0 {}] +} + + #------------------------------------------------------------------------- do_faultsim_test 7.0 -faults oom* -prep { catch { db close } diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index e828e521ac..c33c9adea3 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -151,5 +151,33 @@ do_execsql_test 4.2 { SELECT count(fts5_decode(rowid, block)) FROM x3_data; } $res +#------------------------------------------------------------------------- +# Position lists with large values. +# +set strlist [list \ + "[string repeat {w } 400]a" \ + "[string repeat {x } 400]a" \ + "[string repeat {y } 400]a" \ + "[string repeat {z } 400]a" \ +] +do_test 5.0 { + execsql { + BEGIN; + CREATE VIRTUAL TABLE x4 USING fts5(a); + INSERT INTO x4(x4, rank) VALUES('pgsz', 32); + } + foreach str $strlist { execsql { INSERT INTO x4 VALUES($str) } } + execsql COMMIT +} {} + +do_execsql_test 5.1 { + SELECT rowid FROM x4 WHERE x4 MATCH 'a' +} {4 3 2 1} + +set res [db one {SELECT count(*) FROM x4_data}] +do_execsql_test 5.2 { + SELECT count(fts5_decode(rowid, block)) FROM x4_data; +} $res + finish_test diff --git a/manifest b/manifest index 4052b8bf9d..7f398a24bf 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sissue\swith\sloading\sdoclist-indexes\sfor\sa\sterm\sthat\sis\sthe\slast\sthing\son\sits\sleaf\spage. -D 2015-01-21T18:23:25.625 +C Further\stests\sand\sfixes\sfor\sfts5. +D 2015-01-21T20:30:14.830 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,22 +112,22 @@ F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c 7decfde6535f73c50acb009e41946ade38e9c1b7 -F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664 +F ext/fts5/fts5_index.c baf26bfee5bd776194c0e508e3c101964ae851c6 +F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc -F ext/fts5/test/fts5aa.test 770dbb37108a8d3531ec1240b71a404121adb554 +F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1 F ext/fts5/test/fts5ae.test 014d5be2f5f70407fb032d4f27704116254797c3 F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 -F ext/fts5/test/fts5ah.test 749855d1f457ecbf8e54b25a92e55a84cc689151 +F ext/fts5/test/fts5ah.test 17ba8e197a781ca10548b7260e39ed8269d24b93 F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce @@ -136,13 +136,13 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test 73b55c5b777f6dfb18022abd5cec2e569464e21c +F ext/fts5/test/fts5fault1.test 405886f4ecd255fa7a7077c021a65c9f8eaa4804 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 -F ext/fts5/test/fts5rowid.test bdff2ee094bf66bd163e0cdfb36a7fc7715a12d3 +F ext/fts5/test/fts5rowid.test db482328fe9bf78bb6a09f2dbf055e2caeaac00a F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee @@ -1282,7 +1282,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f8699a1a3b0f3bb612aff87a2729f912d709c534 -R 17960da1ca932a330a75bf04374dfc20 +P e0d614425f7f5ffe266fdc03642931b1ec19ad25 +R c0a23f610b9ed738a5e0737db6938cae U dan -Z c1930a6e112ceedd9aced2b05e742853 +Z 2e4179b4673c6b65260289e1eab737a6 diff --git a/manifest.uuid b/manifest.uuid index 721ef32452..0492abfca8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e0d614425f7f5ffe266fdc03642931b1ec19ad25 \ No newline at end of file +c020a291ed293a66d21c5885e50a7fee04aa6366 \ No newline at end of file From 0b520cc5f925f3d990b0eb2f2479603ac9414d57 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 22 Jan 2015 19:13:08 +0000 Subject: [PATCH 079/206] Add further tests and fixes for fts5. FossilOrigin-Name: 5b295897153e9b26cd0d2e7ea112a4d461d0a665 --- ext/fts5/fts5Int.h | 3 + ext/fts5/fts5_hash.c | 9 +- ext/fts5/fts5_index.c | 190 ++++++++++++++++++++++------------ ext/fts5/test/fts5_common.tcl | 17 +++ ext/fts5/test/fts5ab.test | 85 +++++++++++++++ ext/fts5/test/fts5fault1.test | 72 ++++++++++++- manifest | 22 ++-- manifest.uuid | 2 +- 8 files changed, 316 insertions(+), 84 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index afe1911180..7b7a86d8eb 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -345,6 +345,9 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); +int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); +#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 97dd0dc0ec..61eed74be4 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -111,11 +111,14 @@ void sqlite3Fts5HashFree(Fts5Hash *pHash){ void sqlite3Fts5HashClear(Fts5Hash *pHash){ int i; for(i=0; inSlot; i++){ - if( pHash->aSlot[i] ){ - sqlite3_free(pHash->aSlot[i]); - pHash->aSlot[i] = 0; + Fts5HashEntry *pNext; + Fts5HashEntry *pSlot; + for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ + pNext = pSlot->pNext; + sqlite3_free(pSlot); } } + memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); pHash->nEntry = 0; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 780e8e268b..d7cc3da1db 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -604,6 +604,72 @@ static u16 fts5GetU16(const u8 *aIn){ return ((u16)aIn[0] << 8) + aIn[1]; } +/* +** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. +** Except, this version does handle the single byte case that the core +** version depends on being handled before its function is called. +*/ +int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ + u32 a,b; + + /* The 1-byte case. Overwhelmingly the most common. */ + a = *p; + /* a: p0 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 0 and 127 */ + *v = a; + return 1; + } + + /* The 2-byte case */ + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 128 and 16383 */ + a &= 0x7f; + a = a<<7; + *v = a | b; + return 2; + } + + /* The 3-byte case */ + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 16384 and 2097151 */ + a &= (0x7f<<14)|(0x7f); + b &= 0x7f; + b = b<<7; + *v = a | b; + return 3; + } + + /* A 32-bit varint is used to store size information in btrees. + ** Objects are rarely larger than 2MiB limit of a 3-byte varint. + ** A 3-byte varint is sufficient, for example, to record the size + ** of a 1048569-byte BLOB or string. + ** + ** We only unroll the first 1-, 2-, and 3- byte cases. The very + ** rare larger cases can be handled by the slower 64-bit varint + ** routine. + */ + { + u64 v64; + u8 n; + p -= 2; + n = sqlite3GetVarint(p, &v64); + *v = (u32)v64; + assert( n>3 && n<=9 ); + return n; + } +} + /* ** Allocate and return a buffer at least nByte bytes in size. ** @@ -918,8 +984,8 @@ static int fts5StructureDecode( /* Read the total number of levels and segments from the start of the ** structure record. */ - i += getVarint32(&pData[i], nLevel); - i += getVarint32(&pData[i], nSegment); + i += fts5GetVarint32(&pData[i], nLevel); + i += fts5GetVarint32(&pData[i], nSegment); nByte = ( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */ @@ -935,8 +1001,8 @@ static int fts5StructureDecode( int nTotal; int iSeg; - i += getVarint32(&pData[i], pLvl->nMerge); - i += getVarint32(&pData[i], nTotal); + i += fts5GetVarint32(&pData[i], pLvl->nMerge); + i += fts5GetVarint32(&pData[i], nTotal); assert( nTotal>=pLvl->nMerge ); pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, nTotal * sizeof(Fts5StructureSegment) @@ -945,10 +1011,10 @@ static int fts5StructureDecode( if( rc==SQLITE_OK ){ pLvl->nSeg = nTotal; for(iSeg=0; iSegaSeg[iSeg].iSegid); - i += getVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); - i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); - i += getVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } }else{ fts5StructureRelease(pRet); @@ -1144,6 +1210,7 @@ static void fts5StructurePromoteTo( for(il=iPromote+1; ilnLevel; il++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; + if( pLvl->nMerge ) return; for(is=pLvl->nSeg-1; is>=0; is--){ int sz = fts5SegmentSize(&pLvl->aSeg[is]); if( sz>szPromote ) return; @@ -1193,7 +1260,8 @@ static void fts5StructurePromote( /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); pTst = &pStruct->aLevel[iTst]; - if( iTst>=0 && pTst->nMerge==0 ){ + assert( pTst->nMerge==0 ); + if( iTst>=0 ){ int i; int szMax = 0; for(i=0; inSeg; i++){ @@ -1208,31 +1276,13 @@ static void fts5StructurePromote( } } - /* Check for condition (b) */ + /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() + ** is a no-op if it is not. */ if( iPromote<0 ){ - Fts5StructureLevel *pTst; - for(iTst=iLvl+1; iTstnLevel; iTst++){ - pTst = &pStruct->aLevel[iTst]; - if( pTst->nSeg ) break; - } - if( iTstnLevel && pTst->nMerge==0 ){ - Fts5StructureSegment *pSeg2 = &pTst->aSeg[pTst->nSeg-1]; - int sz = pSeg2->pgnoLast - pSeg2->pgnoFirst + 1; - if( sz<=szSeg ){ - iPromote = iLvl; - szPromote = szSeg; - } - } - } - - /* If iPromote is greater than or equal to zero at this point, then it - ** is the level number of a level to which segments that consist of - ** szPromote or fewer pages should be promoted. */ - if( iPromote>=0 ){ - fts5PrintStructure("BEFORE", pStruct); - fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); - fts5PrintStructure("AFTER", pStruct); + iPromote = iLvl; + szPromote = szSeg; } + fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } @@ -1246,7 +1296,7 @@ static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; pIter->iOff++; - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); + pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); }else{ pIter->nEmpty = 0; pIter->bDlidx = 0; @@ -1262,8 +1312,8 @@ static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ pIter->iChild += pIter->nEmpty; }else{ int nPre, nNew; - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nPre); - pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], nNew); + pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre); + pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew); pIter->term.n = nPre-2; fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); pIter->iOff += nNew; @@ -1282,7 +1332,7 @@ static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ memset(pIter, 0, sizeof(*pIter)); pIter->aData = aData; pIter->nData = nData; - pIter->iOff = getVarint32(aData, pIter->iChild); + pIter->iOff = fts5GetVarint32(aData, pIter->iChild); fts5NodeIterGobbleNEmpty(pIter); } @@ -1466,7 +1516,7 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ int iOff = pIter->iLeafOffset; /* Offset to read at */ int nNew; /* Bytes of new data */ - iOff += getVarint32(&a[iOff], nNew); + iOff += fts5GetVarint32(&a[iOff], nNew); pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; @@ -1548,7 +1598,7 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ i64 iDelta = 0; int nPos; - i += getVarint32(&a[i], nPos); + i += fts5GetVarint32(&a[i], nPos); i += nPos; if( i>=n ) break; i += getVarint(&a[i], (u64*)&iDelta); @@ -1665,7 +1715,7 @@ static void fts5SegIterNext( pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; - iOff += getVarint32(&a[iOff], nPos); + iOff += fts5GetVarint32(&a[iOff], nPos); iOff += nPos; getVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid += iDelta; @@ -1685,7 +1735,7 @@ static void fts5SegIterNext( iOff = pIter->iLeafOffset; if( iOffiLeafOffset = 4; }else if( iOff!=fts5GetU16(&a[2]) ){ - pIter->iLeafOffset += getVarint32(&a[iOff], nKeep); + pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); } }else{ pIter->iRowid -= iDelta; @@ -1760,7 +1810,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ i64 iDelta; /* Position list size in bytes */ - iOff += getVarint32(&pLeaf->p[iOff], nPos); + iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos); iOff += nPos; if( iOff>=pLeaf->n ) break; @@ -1843,7 +1893,7 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ int nPoslist; /* iOff is currently the offset of the size field of a position list. */ - iOff += getVarint32(&pLeaf->p[iOff], nPoslist); + iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist); iOff += nPoslist; if( iOffn ){ @@ -2353,7 +2403,7 @@ static void fts5ChunkIterInit( pLeaf = pIter->pLeaf; } - iOff += getVarint32(&pLeaf->p[iOff], pIter->nRem); + iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem); pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); pIter->p = pLeaf->p + iOff; @@ -2383,7 +2433,7 @@ static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0; pIter->iOff = 0; } - pIter->iOff += getVarint32(&pIter->chunk.p[pIter->iOff], iVal); + pIter->iOff += fts5GetVarint32(&pIter->chunk.p[pIter->iOff], iVal); } return iVal; } @@ -2546,20 +2596,25 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ } static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ - Fts5PageWriter *aNew; - Fts5PageWriter *pNew; - int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *aNew; + Fts5PageWriter *pNew; + int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); - aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); - if( aNew==0 ) return; + aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } - pNew = &aNew[pWriter->nWriter]; - memset(pNew, 0, sizeof(Fts5PageWriter)); - pNew->pgno = 1; - fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); + pNew = &aNew[pWriter->nWriter]; + memset(pNew, 0, sizeof(Fts5PageWriter)); + pNew->pgno = 1; + fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); - pWriter->nWriter++; - pWriter->aWriter = aNew; + pWriter->nWriter++; + pWriter->aWriter = aNew; + } } /* @@ -3150,13 +3205,15 @@ static void fts5IndexWork( #endif if( nBestpConfig->nAutomerge - && pStruct->aLevel[iBestLvl].nMerge==0 - ){ + && pStruct->aLevel[iBestLvl].nMerge==0 + ){ break; } fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); - fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); + if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ + fts5StructurePromote(p, iBestLvl+1, pStruct); + } *ppStruct = pStruct; } } @@ -3272,6 +3329,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; } + fts5StructurePromote(p, 0, pStruct); } if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); @@ -3543,7 +3601,7 @@ static void fts5IndexIntegrityCheckSegment( }else{ int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ - iOff += getVarint32(&pLeaf->p[iOff], nTerm); + iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); if( res==0 ) res = nTerm - iter.term.n; if( res<0 ){ @@ -3667,7 +3725,7 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } - pIter->i += getVarint32(&pIter->a[pIter->i], pIter->nPoslist); + pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist); pIter->aPoslist = &pIter->a[pIter->i]; pIter->i += pIter->nPoslist; }else{ @@ -3829,7 +3887,7 @@ static void fts5SetupPrefixIter( && ((!bAsc && iRowid>=iLastRowid) || (bAsc && iRowid<=iLastRowid)) ){ - for(i=0; doclist.n && p->rc==SQLITE_OK; i++){ + for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ assert( i0} {incr i -1} { + execsql { INSERT INTO x1 VALUES( rnddoc(50) ) } + lappend ::res $i + } +} {} + +do_faultsim_test 8.1 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM x1 WHERE x1 MATCH 'x*' + } +} -test { + faultsim_test_result [list 0 $::res] +} + +} + +#------------------------------------------------------------------------- +# Segment promotion. +# +do_test 9.0 { + reset_db + db func rnddoc fts5_rnddoc + execsql { + CREATE VIRTUAL TABLE s2 USING fts5(x); + INSERT INTO s2(s2, rank) VALUES('pgsz', 32); + INSERT INTO s2(s2, rank) VALUES('automerge', 0); + } + + for {set i 1} {$i <= 16} {incr i} { + execsql { INSERT INTO s2 VALUES(rnddoc(5)) } + } + fts5_level_segs s2 +} {0 1} +faultsim_save_and_close + +do_faultsim_test 9.1 -faults oom-* -prep { + faultsim_restore_and_reopen + db func rnddoc fts5_rnddoc +} -body { + execsql { INSERT INTO s2 VALUES(rnddoc(160)) } +} -test { + faultsim_test_result {0 {}} + if {$testrc==0} { + set ls [fts5_level_segs s2] + if {$ls != "2 0"} { error "fts5_level_segs says {$ls}" } + } +} + + finish_test diff --git a/manifest b/manifest index 7f398a24bf..ae6189124b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\stests\sand\sfixes\sfor\sfts5. -D 2015-01-21T20:30:14.830 +C Add\sfurther\stests\sand\sfixes\sfor\sfts5. +D 2015-01-22T19:13:08.439 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,22 +106,22 @@ F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34 F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h b593d5ff5f0cc6493778f88bc19db1dea42e003b +F ext/fts5/fts5Int.h 99da8551098bb23fd94d0aa3f4ae1a411ee630b4 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 -F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 -F ext/fts5/fts5_index.c baf26bfee5bd776194c0e508e3c101964ae851c6 +F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b +F ext/fts5/fts5_index.c ee7b141adde3dbdaa56f1e198c06a0786d298126 F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc +F ext/fts5/test/fts5_common.tcl 7db772d34fa0139d4b58d2b321928c9ccd30f699 F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f -F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536 +F ext/fts5/test/fts5ab.test 3f3ad2fb9ed60a0df57b626fa6fe6ef41d4deee0 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1 F ext/fts5/test/fts5ae.test 014d5be2f5f70407fb032d4f27704116254797c3 @@ -136,7 +136,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test 405886f4ecd255fa7a7077c021a65c9f8eaa4804 +F ext/fts5/test/fts5fault1.test f9bafb61b40061ad19b61d15003c5faeea4a57b5 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e @@ -1282,7 +1282,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e0d614425f7f5ffe266fdc03642931b1ec19ad25 -R c0a23f610b9ed738a5e0737db6938cae +P c020a291ed293a66d21c5885e50a7fee04aa6366 +R 244beb886a9d1f5f10328b67a9ad3f5b U dan -Z 2e4179b4673c6b65260289e1eab737a6 +Z 4d510e0e441ea9491ac2e8425faae5c2 diff --git a/manifest.uuid b/manifest.uuid index 0492abfca8..75fced3560 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c020a291ed293a66d21c5885e50a7fee04aa6366 \ No newline at end of file +5b295897153e9b26cd0d2e7ea112a4d461d0a665 \ No newline at end of file From 626d9e306298bc2fa401268f8c917322504d3246 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 23 Jan 2015 06:50:33 +0000 Subject: [PATCH 080/206] Remove some redundant code from fts5. FossilOrigin-Name: 939b7a5de25e064bdf08e03864c35ab718da6f6f --- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_config.c | 15 +++++++ ext/fts5/fts5_index.c | 82 ++++++++++++++++++-------------------- ext/fts5/tool/loadfts5.tcl | 13 ++++++ manifest | 18 ++++----- manifest.uuid | 2 +- 6 files changed, 77 insertions(+), 54 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 7b7a86d8eb..d7172e315e 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -86,6 +86,7 @@ struct Fts5Config { int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ int nAutomerge; /* 'automerge' setting */ + int nCrisisMerge; /* Maximum allowed segments per level */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ }; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index ecf24dcd48..438cdf370a 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -17,6 +17,7 @@ #define FTS5_DEFAULT_PAGE_SIZE 1000 #define FTS5_DEFAULT_AUTOMERGE 4 +#define FTS5_DEFAULT_CRISISMERGE 16 /* Maximum allowed page size */ #define FTS5_MAX_PAGE_SIZE (128*1024) @@ -717,6 +718,19 @@ int sqlite3Fts5ConfigSetValue( } } + else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){ + int nCrisisMerge = -1; + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + nCrisisMerge = sqlite3_value_int(pVal); + } + if( nCrisisMerge<0 ){ + if( pbBadkey ) *pbBadkey = 1; + }else{ + if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; + pConfig->nCrisisMerge = nCrisisMerge; + } + } + else if( 0==sqlite3_stricmp(zKey, "rank") ){ const char *zIn = (const char*)sqlite3_value_text(pVal); char *zRank; @@ -749,6 +763,7 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ /* Set default values */ pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; + pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; zSql = sqlite3_mprintf(zSelect, pConfig->zDb, pConfig->zName); if( zSql==0 ){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index d7cc3da1db..3cd4892a59 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -43,7 +43,6 @@ #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ -#define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ @@ -293,7 +292,6 @@ typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ - int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* @@ -1105,8 +1103,8 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ if( !pData ) return 0; p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); - if( p->rc==SQLITE_OK && p->pConfig->iCookie!=iCookie ){ - p->rc = sqlite3Fts5ConfigLoad(p->pConfig, iCookie); + if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ + p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } fts5DataRelease(pData); @@ -1250,7 +1248,6 @@ static void fts5StructurePromote( int iPromote = -1; int szPromote; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ - Fts5StructureLevel *pTst; int szSeg; /* Size of segment just written */ @@ -1259,11 +1256,11 @@ static void fts5StructurePromote( /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); - pTst = &pStruct->aLevel[iTst]; - assert( pTst->nMerge==0 ); if( iTst>=0 ){ int i; int szMax = 0; + Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; + assert( pTst->nMerge==0 ); for(i=0; inSeg; i++){ int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; if( sz>szMax ) szMax = sz; @@ -2483,28 +2480,6 @@ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ return (p->rc || pIter->chunk.pLeaf==0); } -/* -** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) -** in hash table for index iIdx. If iIdx is zero, this is the main terms -** index. Values of 1 and greater for iIdx are prefix indexes. -** -** If an OOM error is encountered, set the Fts5Index.rc error code -** accordingly. -*/ -static void fts5AddTermToHash( - Fts5Index *p, /* Index object to write to */ - int iIdx, /* Entry in p->aHash[] to update */ - int iCol, /* Column token appears in (-ve -> delete) */ - int iPos, /* Position of token within column */ - const char *pToken, int nToken /* Token to add or remove to or from index */ -){ - if( p->rc==SQLITE_OK ){ - p->rc = sqlite3Fts5HashWrite( - p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken - ); - } -} - /* ** Allocate a new segment-id for the structure pStruct. ** @@ -3228,7 +3203,7 @@ static void fts5IndexCrisisMerge( int iLvl = 0; while( p->rc==SQLITE_OK && iLvlnLevel - && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge + && pStruct->aLevel[iLvl].nSeg>=p->pConfig->nCrisisMerge ){ fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); @@ -4000,6 +3975,29 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ */ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ assert( p->rc==SQLITE_OK ); + + /* Allocate hash tables if they have not already been allocated */ + if( p->apHash==0 ){ + int i; + int rc = SQLITE_OK; + int nHash = p->pConfig->nPrefix + 1; + Fts5Hash **apNew; + + apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash); + for(i=0; rc==SQLITE_OK && inPendingData); + } + if( rc==SQLITE_OK ){ + p->apHash = apNew; + }else{ + for(i=0; iiWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ fts5IndexFlush(p); } @@ -4071,7 +4069,6 @@ int sqlite3Fts5IndexOpen( memset(p, 0, sizeof(Fts5Index)); p->pConfig = pConfig; - p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); @@ -4196,29 +4193,26 @@ int sqlite3Fts5IndexWrite( const char *pToken, int nToken /* Token to add or remove to or from index */ ){ int i; /* Used to iterate through indexes */ + int rc; /* Return code */ Fts5Config *pConfig = p->pConfig; - assert( p->rc==SQLITE_OK ); - /* Allocate hash tables if they have not already been allocated */ - if( p->apHash==0 ){ - int nHash = pConfig->nPrefix + 1; - p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); - for(i=0; p->rc==SQLITE_OK && irc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); - } - } + assert( p->rc==SQLITE_OK ); /* Add the new token to the main terms hash table. And to each of the ** prefix hash tables that it is large enough for. */ - fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); - for(i=0; inPrefix; i++){ + rc = sqlite3Fts5HashWrite( + p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken + ); + for(i=0; inPrefix && rc==SQLITE_OK; i++){ int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); if( nByte ){ - fts5AddTermToHash(p, i+1, iCol, iPos, pToken, nByte); + rc = sqlite3Fts5HashWrite( + p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte + ); } } - return fts5IndexReturn(p); + return rc; } /* diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index c63564cd0a..034286bc60 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -29,6 +29,7 @@ proc usage {} { puts stderr " -porter (use porter tokenizer)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" + puts stderr " -crisismerge N (set the crisismerge parameter to N)" exit 1 } @@ -36,6 +37,7 @@ set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(automerge) -1 +set O(crisismerge) -1 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] @@ -64,6 +66,11 @@ for {set i 0} {$i < $nOpt} {incr i} { set O(automerge) [lindex $argv $i] } + -crisismerge { + if { [incr i]>=$nOpt } usage + set O(crisismerge) [lindex $argv $i] + } + default { usage } @@ -82,6 +89,12 @@ db transaction { db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } } } + if {$O(crisismerge)>=0} { + if {$O(vtab) == "fts5"} { + db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} + } else { + } + } load_hierachy [lindex $argv end] } diff --git a/manifest b/manifest index ae6189124b..5ed6b12d4c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sfurther\stests\sand\sfixes\sfor\sfts5. -D 2015-01-22T19:13:08.439 +C Remove\ssome\sredundant\scode\sfrom\sfts5. +D 2015-01-23T06:50:33.338 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34 F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h 99da8551098bb23fd94d0aa3f4ae1a411ee630b4 +F ext/fts5/fts5Int.h da4ad7558c2284fdf3297f907e2c5454a2237e15 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 -F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399 +F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b -F ext/fts5/fts5_index.c ee7b141adde3dbdaa56f1e198c06a0786d298126 +F ext/fts5/fts5_index.c 604e346f7a04b87f11090b91a80afa50bc74f88b F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -146,7 +146,7 @@ F ext/fts5/test/fts5rowid.test db482328fe9bf78bb6a09f2dbf055e2caeaac00a F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee -F ext/fts5/tool/loadfts5.tcl 17c9771fb225b6b7ddd02a698fc7f320eadd7b15 +F ext/fts5/tool/loadfts5.tcl 0d39b916550097a3b714060bfc1164a4a9b73f4c F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1282,7 +1282,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c020a291ed293a66d21c5885e50a7fee04aa6366 -R 244beb886a9d1f5f10328b67a9ad3f5b +P 5b295897153e9b26cd0d2e7ea112a4d461d0a665 +R 02a6ea9937d0b3e40d0a3982c5f888d6 U dan -Z 4d510e0e441ea9491ac2e8425faae5c2 +Z 8b5a907ba25d2b7d394f8c2162c154cb diff --git a/manifest.uuid b/manifest.uuid index 75fced3560..63b8637677 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5b295897153e9b26cd0d2e7ea112a4d461d0a665 \ No newline at end of file +939b7a5de25e064bdf08e03864c35ab718da6f6f \ No newline at end of file From 641cb4360a22a8dc0340bcbec187ea23464d2b0b Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 23 Jan 2015 17:43:21 +0000 Subject: [PATCH 081/206] Fix compression of keys stored on internal segment b-tree nodes by fts5. FossilOrigin-Name: 51444f67c0cc58a3023eb1cd78e7cf889da6c80f --- ext/fts5/fts5.c | 2 +- ext/fts5/fts5Int.h | 7 +++ ext/fts5/fts5_index.c | 85 +++++++++++++++++++++------------- ext/fts5/fts5_storage.c | 10 ++-- ext/fts5/test/fts5_common.tcl | 14 ++++++ ext/fts5/test/fts5corrupt.test | 75 ++++++++++++++++++++++++++++++ manifest | 21 +++++---- manifest.uuid | 2 +- 8 files changed, 167 insertions(+), 49 deletions(-) create mode 100644 ext/fts5/test/fts5corrupt.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index cc6457df6a..c61598e477 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1021,7 +1021,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ }else{ rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK ){ - rc = SQLITE_CORRUPT_VTAB; + rc = FTS5_CORRUPT; } } } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d7172e315e..728b6461a1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -33,6 +33,13 @@ #define FTS5_RANK_NAME "rank" #define FTS5_ROWID_NAME "rowid" +#ifdef SQLITE_DEBUG +# define FTS5_CORRUPT sqlite3Fts5Corrupt() +int sqlite3Fts5Corrupt(void); +#else +# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB +#endif + /************************************************************************** ** Interface to code in fts5.c. */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3cd4892a59..0cb2c2ee31 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -256,10 +256,7 @@ FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno) #ifdef SQLITE_DEBUG -static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } -# define FTS5_CORRUPT fts5Corrupt() -#else -# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB +int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } #endif @@ -373,7 +370,7 @@ struct Fts5Structure { struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ - Fts5Buffer term; /* Buffer containing previous term on page */ + Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5SegWriter { int iIdx; /* Index to write to */ @@ -383,6 +380,7 @@ struct Fts5SegWriter { i64 iPrevRowid; /* Previous docid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ + u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ Fts5Buffer dlidx; /* Doclist index */ @@ -2677,7 +2675,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; i64 iRowid; - if( pPage->term.n==0 ){ + if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); @@ -2689,12 +2687,14 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ /* Initialize the next page. */ fts5BufferZero(&pPage->buf); - fts5BufferZero(&pPage->term); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; + + /* The new leaf holds no terms */ + pWriter->bFirstTermInPage = 1; } /* @@ -2717,23 +2717,38 @@ static void fts5WriteAppendTerm( /* Zero the first term and first docid fields */ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); - assert( pPage->term.n==0 ); + assert( pWriter->bFirstTermInPage ); } if( p->rc ) return; - if( pPage->term.n==0 ){ + if( pWriter->bFirstTermInPage ){ /* Update the "first term" field of the page header. */ assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); fts5PutU16(&pPage->buf.p[2], pPage->buf.n); nPrefix = 0; - if( pWriter->aWriter[0].pgno!=1 ){ - fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm); + if( pPage->pgno!=1 ){ + /* This is the first term on a leaf that is not the leftmost leaf in + ** the segment b-tree. In this case it is necessary to add a term to + ** the b-tree hierarchy that is (a) larger than the largest term + ** already written to the segment and (b) smaller than or equal to + ** this term. In other words, a prefix of (pTerm/nTerm) that is one + ** byte longer than the longest prefix (pTerm/nTerm) shares with the + ** previous term. + ** + ** Usually, the previous term is available in pPage->term. The exception + ** is if this is the first term written in an incremental-merge step. + ** In this case the previous term is not available, so just write a + ** copy of (pTerm/nTerm) into the parent node. This is slightly + ** inefficient, but still correct. */ + int n = nTerm; + if( pPage->term.n ){ + n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + } + fts5WriteBtreeTerm(p, pWriter, n, pTerm); pPage = &pWriter->aWriter[0]; } }else{ - nPrefix = fts5PrefixCompress( - pPage->term.n, pPage->term.p, nTerm, pTerm - ); + nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); } @@ -2744,6 +2759,7 @@ static void fts5WriteAppendTerm( /* Update the Fts5PageWriter.term field. */ fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; @@ -2900,6 +2916,7 @@ static void fts5WriteInit( if( pWriter->aWriter==0 ) return; pWriter->nWriter = 1; pWriter->aWriter[0].pgno = 1; + pWriter->bFirstTermInPage = 1; } static void fts5WriteInitForAppend( @@ -2937,6 +2954,8 @@ static void fts5WriteInitForAppend( pWriter->nEmpty = pSeg->pgnoLast-1; } assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); + pWriter->bFirstTermInPage = 1; + assert( pWriter->aWriter[0].term.n==0 ); } } @@ -3918,6 +3937,21 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ int iIdx; /* Used to iterate through indexes */ u64 cksum2 = 0; /* Checksum based on contents of indexes */ + /* Check that the internal nodes of each segment match the leaves */ + for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); + } + } + } + fts5StructureRelease(pStruct); + } + /* Check that the checksum of the index matches the argument checksum */ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; @@ -3950,21 +3984,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ } if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; - /* Check that the internal nodes of each segment match the leaves */ - for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ - Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - if( pStruct ){ - int iLvl, iSeg; - for(iLvl=0; iLvlnLevel; iLvl++){ - for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ - Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; - fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); - } - } - } - fts5StructureRelease(pStruct); - } - return fts5IndexReturn(p); } @@ -3990,10 +4009,12 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ if( rc==SQLITE_OK ){ p->apHash = apNew; }else{ - for(i=0; inCol; i++){ - if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB; + if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT; } } @@ -812,12 +812,12 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); - if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; + if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } if( rc==SQLITE_OK ){ i64 nRow; rc = fts5StorageCount(p, "docsize", &nRow); - if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; + if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } /* Pass the expected checksum down to the FTS index module. It will @@ -913,7 +913,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ } rc = sqlite3_reset(pLookup); if( bCorrupt && rc==SQLITE_OK ){ - rc = SQLITE_CORRUPT_VTAB; + rc = FTS5_CORRUPT; } } return rc; diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 94e82d8684..9c612d202d 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -123,6 +123,20 @@ proc fts5_level_segs {tbl} { set ret } +proc fts5_level_segids {tbl} { + set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" + set ret [list] + foreach L [lrange [db one $sql] 1 end] { + set lvl [list] + foreach S [lrange $L 2 end] { + regexp {id=([1234567890]*)} $S -> segid + lappend lvl $segid + } + lappend ret $lvl + } + set ret +} + proc fts5_rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test new file mode 100644 index 0000000000..052563f779 --- /dev/null +++ b/ext/fts5/test/fts5corrupt.test @@ -0,0 +1,75 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5corrupt + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); +} + +do_test 1.1 { + db transaction { + for {set i 1} {$i < 200} {incr i} { + set doc [list [string repeat x $i] [string repeat y $i]] + execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) } + } + } + fts5_level_segs t1 +} {1} +db_save + +do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +set segid [lindex [fts5_level_segids t1] 0] + +do_test 1.3 { + execsql { + DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4); + } + catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {1 {SQL logic error or missing database}} + +do_test 1.4 { + db_restore_and_reopen + execsql { + UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE + rowid = fts5_rowid('segment', 0, $segid, 0, 4); + } + catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {1 {database disk image is malformed}} + +db_restore_and_reopen +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} + + + +#-------------------------------------------------------------------- +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(x); + INSERT INTO t2(t2, rank) VALUES('pgsz', 32); +} +do_test 2.1 { + db transaction { + for {set i 0} {$i < 20} {incr i} { + execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') } + } + for {set i 0} {$i < 20} {incr i} { + execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') } + } + } +} {} +db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r} + +finish_test + diff --git a/manifest b/manifest index 5ed6b12d4c..9fdcf2e5e5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\ssome\sredundant\scode\sfrom\sfts5. -D 2015-01-23T06:50:33.338 +C Fix\scompression\sof\skeys\sstored\son\sinternal\ssegment\sb-tree\snodes\sby\sfts5. +D 2015-01-23T17:43:21.454 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,22 +104,22 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34 +F ext/fts5/fts5.c 41b852b654f79f522668bc7ba292755fb261f855 F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h da4ad7558c2284fdf3297f907e2c5454a2237e15 +F ext/fts5/fts5Int.h 1d8f968b8ff71de15176acf8f4b14a2bdebcb6e3 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b -F ext/fts5/fts5_index.c 604e346f7a04b87f11090b91a80afa50bc74f88b -F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4 +F ext/fts5/fts5_index.c dda2ed8dab9910aedd8de0169ca029c5336b9e42 +F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 7db772d34fa0139d4b58d2b321928c9ccd30f699 +F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f F ext/fts5/test/fts5ab.test 3f3ad2fb9ed60a0df57b626fa6fe6ef41d4deee0 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 @@ -134,6 +134,7 @@ F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 +F ext/fts5/test/fts5corrupt.test b81ed310018ddffb34da7802f74018d94a2b3961 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test f9bafb61b40061ad19b61d15003c5faeea4a57b5 @@ -1282,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5b295897153e9b26cd0d2e7ea112a4d461d0a665 -R 02a6ea9937d0b3e40d0a3982c5f888d6 +P 939b7a5de25e064bdf08e03864c35ab718da6f6f +R 7096f8b96e0f85e1680222eb9ee6721b U dan -Z 8b5a907ba25d2b7d394f8c2162c154cb +Z 0515045012673cdccd49d82241057133 diff --git a/manifest.uuid b/manifest.uuid index 63b8637677..324db551da 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -939b7a5de25e064bdf08e03864c35ab718da6f6f \ No newline at end of file +51444f67c0cc58a3023eb1cd78e7cf889da6c80f \ No newline at end of file From 8ac3025419ba812747fdc685723946778ee54c54 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 24 Jan 2015 19:57:03 +0000 Subject: [PATCH 082/206] Have fts5 store rowids in ascending order. Query speed is virtually the same regardless of rowid order, and ascending order makes some insert optimizations easier. FossilOrigin-Name: 5206ca6005bfa9dfc7346d4b89430c9748d32c10 --- ext/fts5/fts5.c | 18 ++-- ext/fts5/fts5Int.h | 18 +++- ext/fts5/fts5_expr.c | 50 +++++----- ext/fts5/fts5_hash.c | 91 +++++++++--------- ext/fts5/fts5_index.c | 171 ++++++++++++++++----------------- ext/fts5/test/fts5aa.test | 4 +- ext/fts5/test/fts5ab.test | 36 +++++-- ext/fts5/test/fts5ac.test | 9 +- ext/fts5/test/fts5ad.test | 8 +- ext/fts5/test/fts5ae.test | 4 +- ext/fts5/test/fts5ak.test | 26 ++--- ext/fts5/test/fts5al.test | 19 ++-- ext/fts5/test/fts5content.test | 20 ++-- ext/fts5/test/fts5corrupt.test | 19 ++-- ext/fts5/test/fts5fault1.test | 10 +- ext/fts5/test/fts5rowid.test | 2 +- manifest | 42 ++++---- manifest.uuid | 2 +- 18 files changed, 290 insertions(+), 259 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index c61598e477..d748a8b40f 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -505,7 +505,7 @@ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ static int fts5StmtType(int idxNum){ if( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ){ - return (idxNum&FTS5_ORDER_ASC) ? FTS5_STMT_SCAN_ASC : FTS5_STMT_SCAN_DESC; + return (idxNum&FTS5_ORDER_DESC) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; } return FTS5_STMT_LOOKUP; } @@ -652,7 +652,7 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ return rc; } -static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ +static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ Fts5Config *pConfig = pTab->pConfig; Fts5Sorter *pSorter; int nPhrase; @@ -680,7 +680,7 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ pConfig->zDb, pConfig->zName, zRank, pConfig->zName, (zRankArgs ? ", " : ""), (zRankArgs ? zRankArgs : ""), - bAsc ? "ASC" : "DESC" + bDesc ? "DESC" : "ASC" ); if( zSql==0 ){ rc = SQLITE_NOMEM; @@ -706,9 +706,9 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ return rc; } -static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ +static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ int rc; - rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bDesc); if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } @@ -873,7 +873,7 @@ static int fts5FilterMethod( ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); + int bDesc = ((idxNum & FTS5_ORDER_DESC) ? 1 : 0); int rc = SQLITE_OK; assert( nVal<=2 ); @@ -894,7 +894,7 @@ static int fts5FilterMethod( assert( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ); pCsr->idxNum = FTS5_PLAN_SOURCE; pCsr->pExpr = pTab->pSortCsr->pExpr; - rc = fts5CursorFirst(pTab, pCsr, bAsc); + rc = fts5CursorFirst(pTab, pCsr, bDesc); }else{ int ePlan = FTS5_PLAN(idxNum); pCsr->idxNum = idxNum; @@ -913,9 +913,9 @@ static int fts5FilterMethod( rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ if( ePlan==FTS5_PLAN_MATCH ){ - rc = fts5CursorFirst(pTab, pCsr, bAsc); + rc = fts5CursorFirst(pTab, pCsr, bDesc); }else{ - rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); + rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); } } } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 728b6461a1..50003632d1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -229,7 +229,7 @@ typedef struct Fts5IndexIter Fts5IndexIter; ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ -#define FTS5INDEX_QUERY_ASC 0x0002 /* Docs in ascending rowid order */ +#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ /* ** Create/destroy an Fts5Index object. @@ -365,6 +365,13 @@ int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); */ typedef struct Fts5Hash Fts5Hash; +typedef struct Fts5Data Fts5Data; +struct Fts5Data { + u8 *p; /* Pointer to buffer containing record */ + int n; /* Size of record in bytes */ + int nRef; /* Ref count */ +}; + /* ** Create a hash table, free a hash table. */ @@ -395,6 +402,11 @@ int sqlite3Fts5HashIterate( int (*xTermDone)(void*) ); +int sqlite3Fts5HashQuery( + Fts5Hash*, /* Hash table to query */ + const char *pTerm, int nTerm, /* Query term */ + Fts5Data **ppData /* OUT: Query result */ +); /* @@ -470,7 +482,7 @@ int sqlite3Fts5ExprNew( ); /* -** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bAsc); +** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); ** rc = sqlite3Fts5ExprNext(pExpr) ** ){ @@ -478,7 +490,7 @@ int sqlite3Fts5ExprNew( ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); ** } */ -int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, int bAsc); +int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, int bDesc); int sqlite3Fts5ExprNext(Fts5Expr*); int sqlite3Fts5ExprEof(Fts5Expr*); i64 sqlite3Fts5ExprRowid(Fts5Expr*); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 71f8b48069..bbf8c68acb 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -32,7 +32,7 @@ void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); struct Fts5Expr { Fts5Index *pIndex; Fts5ExprNode *pRoot; - int bAsc; + int bDesc; /* Iterate in descending docid order */ int nPhrase; /* Number of phrases in expression */ Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ }; @@ -600,9 +600,9 @@ static int fts5ExprNearAdvanceAll( } /* -** Advance iterator pIter until it points to a value equal to or smaller -** than the initial value of *piMin. If this means the iterator points -** to a value smaller than *piMin, update *piMin to the new smallest value. +** Advance iterator pIter until it points to a value equal to or laster +** than the initial value of *piLast. If this means the iterator points +** to a value laster than *piLast, update *piLast to the new lastest value. ** ** If the iterator reaches EOF, set *pbEof to true before returning. If ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc @@ -610,7 +610,7 @@ static int fts5ExprNearAdvanceAll( */ static int fts5ExprAdvanceto( Fts5IndexIter *pIter, /* Iterator to advance */ - int bAsc, /* True if iterator is "rowid ASC" */ + int bDesc, /* True if iterator is "rowid DESC" */ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ int *pRc, /* OUT: Error code */ int *pbEof /* OUT: Set to true if EOF */ @@ -619,14 +619,14 @@ static int fts5ExprAdvanceto( i64 iRowid; iRowid = sqlite3Fts5IterRowid(pIter); - if( (bAsc==0 && iRowid>iLast) || (bAsc && iRowidiRowid) || (bDesc && iLast=iLast) ); + assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); } *piLast = iRowid; @@ -656,12 +656,13 @@ static int fts5ExprNearNextRowidMatch( i64 iLast; /* Lastest rowid any iterator points to */ int bMatch; /* True if all terms are at the same rowid */ - /* Set iLast, the lastest rowid any iterator points to. If the iterator - ** skips through rowids in the default descending order, this means the - ** minimum rowid. Or, if the iterator is "ORDER BY rowid ASC", then it - ** means the maximum rowid. */ + /* Initialize iLast, the "lastest" rowid any iterator points to. If the + ** iterator skips through rowids in the default ascending order, this means + ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it + ** means the minimum rowid. */ iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); - if( bFromValid && (iFrom>iLast)==(pExpr->bAsc!=0) ){ + if( bFromValid && (iFrom>iLast)==(pExpr->bDesc==0) ){ + assert( pExpr->bDesc || iFrom>=iLast ); iLast = iFrom; } @@ -673,7 +674,7 @@ static int fts5ExprNearNextRowidMatch( Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; i64 iRowid = sqlite3Fts5IterRowid(pIter); if( iRowid!=iLast ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, pExpr->bAsc, &iLast, &rc, &pNode->bEof) ){ + if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast, &rc, &pNode->bEof) ){ return rc; } } @@ -774,7 +775,7 @@ static int fts5ExprNearInitAll( rc = sqlite3Fts5IndexQuery( pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0), + (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), &pTerm->pIter ); assert( rc==SQLITE_OK || pTerm->pIter==0 ); @@ -810,7 +811,7 @@ static int fts5NodeCompare( ){ if( p2->bEof ) return -1; if( p1->bEof ) return +1; - if( pExpr->bAsc ){ + if( pExpr->bDesc==0 ){ if( p1->iRowidiRowid ) return -1; return (p1->iRowid > p2->iRowid); }else{ @@ -911,18 +912,17 @@ static int fts5ExprNodeNextMatch( Fts5ExprNode *p1 = pNode->pLeft; Fts5ExprNode *p2 = pNode->pRight; - while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ Fts5ExprNode *pAdv; - assert( pExpr->bAsc==0 || pExpr->bAsc==1 ); - if( pExpr->bAsc==(p1->iRowid < p2->iRowid) ){ + assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); + if( pExpr->bDesc==(p1->iRowid > p2->iRowid) ){ pAdv = p1; - if( bFromValid==0 || pExpr->bAsc==(p2->iRowid > iFrom) ){ + if( bFromValid==0 || pExpr->bDesc==(p2->iRowid < iFrom) ){ iFrom = p2->iRowid; } }else{ pAdv = p2; - if( bFromValid==0 || pExpr->bAsc==(p1->iRowid > iFrom) ){ + if( bFromValid==0 || pExpr->bDesc==(p1->iRowid < iFrom) ){ iFrom = p1->iRowid; } } @@ -1003,18 +1003,18 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ /* ** Begin iterating through the set of documents in index pIdx matched by -** the MATCH expression passed as the first argument. If the "bAsc" parameter -** is passed a non-zero value, iteration is in ascending rowid order. Or, -** if it is zero, in descending order. +** the MATCH expression passed as the first argument. If the "bDesc" parameter +** is passed a non-zero value, iteration is in descending rowid order. Or, +** if it is zero, in ascending order. ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ -int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bAsc){ +int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ int rc = SQLITE_OK; if( p->pRoot ){ p->pIndex = pIdx; - p->bAsc = bAsc; + p->bDesc = bDesc; rc = fts5ExprNodeFirst(p, p->pRoot); } return rc; diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 61eed74be4..41b2eb774a 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -55,7 +55,7 @@ struct Fts5HashEntry { Fts5HashEntry *pNext; /* Next hash entry with same hash-key */ int nAlloc; /* Total size of allocation */ - int iRowidOff; /* Offset of last rowid written */ + int iSzPoslist; /* Offset of space for 4-byte poslist size */ int nData; /* Total bytes of data (incl. structure) */ int iCol; /* Column of last value written */ @@ -64,6 +64,16 @@ struct Fts5HashEntry { char zKey[0]; /* Nul-terminated entry key */ }; +/* +** Format value iVal as a 4-byte varint and write it to buffer a[]. 4 bytes +** are used even if the value could fit in a smaller amount of space. +*/ +static void fts5Put4ByteVarint(u8 *a, int iVal){ + a[0] = (0x80 | (u8)(iVal >> 21)); + a[1] = (0x80 | (u8)(iVal >> 14)); + a[2] = (0x80 | (u8)(iVal >> 7)); + a[3] = (0x7F & (u8)(iVal)); +} /* ** Allocate a new hash table. @@ -161,25 +171,6 @@ static int fts5HashResize(Fts5Hash *pHash){ return SQLITE_OK; } -/* -** Store the 32-bit integer passed as the second argument in buffer p. -*/ -static int fts5PutNativeInt(u8 *p, int i){ - assert( sizeof(i)==4 ); - memcpy(p, &i, sizeof(i)); - return sizeof(i); -} - -/* -** Read and return the 32-bit integer stored in buffer p. -*/ -static int fts5GetNativeU32(u8 *p){ - int i; - assert( sizeof(i)==4 ); - memcpy(&i, p, sizeof(i)); - return i; -} - int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ @@ -192,7 +183,7 @@ int sqlite3Fts5HashWrite( u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ - /* Attempt to locate an existing hash object */ + /* Attempt to locate an existing hash entry */ for(p=pHash->aSlot[iHash]; p; p=p->pNext){ if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break; } @@ -214,26 +205,27 @@ int sqlite3Fts5HashWrite( p->nAlloc = nByte; memcpy(p->zKey, pToken, nToken); p->zKey[nToken] = '\0'; - p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry); + p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); + p->iSzPoslist = p->nData; + p->nData += 4; p->iRowid = iRowid; p->pNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; - nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario ** is: ** - ** + 4 bytes for the previous entry size field, ** + 9 bytes for a new rowid, + ** + 4 bytes reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). */ - if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){ + if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ int nNew = p->nAlloc * 2; Fts5HashEntry *pNew; Fts5HashEntry **pp; @@ -250,9 +242,11 @@ int sqlite3Fts5HashWrite( /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ - p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff); - p->iRowidOff = p->nData; - p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid); + assert( p->iSzPoslist>0 ); + fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); + p->iSzPoslist = p->nData; + p->nData += 4; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; @@ -379,28 +373,31 @@ int sqlite3Fts5HashIterate( while( pList ){ Fts5HashEntry *pNext = pList->pNext; if( rc==SQLITE_OK ){ + const int nSz = pList->nData - pList->iSzPoslist - 4; + const int nKey = strlen(pList->zKey); + i64 iRowid = 0; u8 *pPtr = (u8*)pList; - int nKey = strlen(pList->zKey); - int iOff = pList->iRowidOff; - int iEnd = sizeof(Fts5HashEntry) + nKey + 1; - int nByte = pList->nData - pList->iRowidOff; + int iOff = sizeof(Fts5HashEntry) + nKey + 1; + /* Fill in the final poslist size field */ + fts5Put4ByteVarint(&pPtr[pList->iSzPoslist], nSz); + + /* Issue the new-term callback */ rc = xTerm(pCtx, pList->zKey, nKey); - while( rc==SQLITE_OK && iOff ){ - int nVarint; - i64 iRowid; - nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid); - rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint); - if( iOff==iEnd ){ - iOff = 0; - }else{ - nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]); - iOff = iOff - sizeof(int) - nByte; - } - } - if( rc==SQLITE_OK ){ - rc = xTermDone(pCtx); + + /* Issue the xEntry callbacks */ + while( rc==SQLITE_OK && iOffnData ){ + i64 iDelta; /* Rowid delta value */ + int nPoslist; /* Size of position list in bytes */ + iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); + iRowid += iDelta; + iOff += fts5GetVarint32(&pPtr[iOff], nPoslist); + rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist); + iOff += nPoslist; } + + /* Issue the term-done callback */ + if( rc==SQLITE_OK ) rc = xTermDone(pCtx); } sqlite3_free(pList); pList = pNext; @@ -409,5 +406,3 @@ int sqlite3Fts5HashIterate( return rc; } - - diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 0cb2c2ee31..602046a302 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -44,7 +44,7 @@ #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ -#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ +#define FTS5_MIN_DLIDX_SIZE 4000 /* Add dlidx if this many empty pages */ /* ** Details: @@ -270,7 +270,6 @@ int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; -typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; @@ -311,7 +310,7 @@ struct Fts5Index { }; struct Fts5DoclistIter { - int bAsc; + int bDesc; /* True for DESC order, false for ASC */ u8 *a; int n; int i; @@ -333,15 +332,6 @@ struct Fts5IndexIter { Fts5Buffer poslist; /* Buffer containing current poslist */ }; -/* -** A single record read from the %_data table. -*/ -struct Fts5Data { - u8 *p; /* Pointer to buffer containing record */ - int n; /* Size of record in bytes */ - int nRef; /* Ref count */ -}; - /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the @@ -1483,6 +1473,11 @@ static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ } } +static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ + *piRowid = (int)fts5GetU16(&pLeaf->p[0]); + *piTerm = (int)fts5GetU16(&pLeaf->p[2]); +} + /* ** Load the next leaf page into the segment iterator. */ @@ -1503,8 +1498,15 @@ static void fts5SegIterNextPage( } /* -** Leave pIter->iLeafOffset as the offset to the size field of the first -** position list. The position list belonging to document pIter->iRowid. +** Fts5SegIter.iLeafOffset currently points to the first byte of the +** "nSuffix" field of a term. Function parameter nKeep contains the value +** of the "nPrefix" field (if there was one - it is passed 0 if this is +** the first term in the segment). +** +** This function populates (Fts5SegIter.term) and (Fts5SegIter.iRowid) +** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the offset to +** the size field of the first position list. The position list belonging +** to document (Fts5SegIter.iRowid). */ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ @@ -1569,11 +1571,6 @@ static void fts5SegIterInit( } } -static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ - *piRowid = (int)fts5GetU16(&pLeaf->p[0]); - *piTerm = (int)fts5GetU16(&pLeaf->p[2]); -} - /* ** This function is only ever called on iterators created by calls to ** Fts5IndexQuery() with the FTS5INDEX_QUERY_ASC flag set. @@ -1598,7 +1595,7 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ if( i>=n ) break; i += getVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; - pIter->iRowid -= iDelta; + pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; @@ -1678,7 +1675,7 @@ static int fts5SegIterIsDelete( bRet = (pLeaf->p[pIter->iLeafOffset]==0x00); }else{ Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( - pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno+1 )); if( pNew ){ bRet = (pNew->p[4]==0x00); @@ -1713,7 +1710,7 @@ static void fts5SegIterNext( iOff += fts5GetVarint32(&a[iOff], nPos); iOff += nPos; getVarint(&a[iOff], (u64*)&iDelta); - pIter->iRowid += iDelta; + pIter->iRowid -= iDelta; }else{ fts5SegIterReverseNewPage(p, pIter); } @@ -1748,7 +1745,7 @@ static void fts5SegIterNext( pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); } }else{ - pIter->iRowid -= iDelta; + pIter->iRowid += iDelta; } }else{ iOff = 0; @@ -1922,7 +1919,7 @@ static void fts5SegIterSeekInit( int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0); int bDlidx = 0; /* True if there is a doclist-index */ - assert( bGe==0 || (flags & FTS5INDEX_QUERY_ASC)==0 ); + assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; @@ -1980,13 +1977,13 @@ static void fts5SegIterSeekInit( if( p->rc==SQLITE_OK && bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ - if( flags & FTS5INDEX_QUERY_ASC ){ + if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; } if( bDlidx ){ fts5SegIterLoadDlidx(p, iIdx, pIter); } - if( flags & FTS5INDEX_QUERY_ASC ){ + if( flags & FTS5INDEX_QUERY_DESC ){ fts5SegIterReverse(p, iIdx, pIter); } } @@ -2042,7 +2039,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ assert( i2>i1 ); assert( i2!=0 ); if( p1->iRowid==p2->iRowid ) return i2; - res = ((p1->iRowid < p2->iRowid)==pIter->bRev) ? -1 : +1; + res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; } assert( res!=0 ); if( res<0 ){ @@ -2056,35 +2053,6 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ return 0; } -/* -** Free the iterator object passed as the second argument. -*/ -static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ - if( pIter ){ - int i; - for(i=0; inSeg; i++){ - fts5SegIterClear(&pIter->aSeg[i]); - } - sqlite3_free(pIter); - } -} - -static void fts5MultiIterAdvanced( - Fts5Index *p, /* FTS5 backend to iterate within */ - Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ - int iChanged, /* Index of sub-iterator just advanced */ - int iMinset /* Minimum entry in aFirst[] to set */ -){ - int i; - for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ - int iEq; - if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ - fts5SegIterNext(p, &pIter->aSeg[iEq]); - i = pIter->nSeg + iEq; - } - } -} - /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. ** It is an error if leaf iLeafPgno contains no rowid. @@ -2170,6 +2138,36 @@ static void fts5SegIterNextFrom( } } + +/* +** Free the iterator object passed as the second argument. +*/ +static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( pIter ){ + int i; + for(i=0; inSeg; i++){ + fts5SegIterClear(&pIter->aSeg[i]); + } + sqlite3_free(pIter); + } +} + +static void fts5MultiIterAdvanced( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged, /* Index of sub-iterator just advanced */ + int iMinset /* Minimum entry in aFirst[] to set */ +){ + int i; + for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ + fts5SegIterNext(p, &pIter->aSeg[iEq]); + i = pIter->nSeg + iEq; + } + } +} + /* ** Move the iterator to the next entry. ** @@ -2248,7 +2246,7 @@ static void fts5MultiIterNew( pNew->nSeg = nSlot; pNew->aSeg = (Fts5SegIter*)&pNew[1]; pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; - pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_ASC)); + pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = bSkipEmpty; /* Initialize each of the component segment iterators. */ @@ -2328,8 +2326,8 @@ static void fts5MultiIterNextFrom( fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break; iRowid = fts5MultiIterRowid(pIter); - if( pIter->bRev==0 && iRowid<=iMatch ) break; - if( pIter->bRev!=0 && iRowid>=iMatch ) break; + if( pIter->bRev==0 && iRowid>=iMatch ) break; + if( pIter->bRev!=0 && iRowid<=iMatch ) break; } } @@ -2794,8 +2792,8 @@ static void fts5WriteAppendRowid( if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); }else{ - assert( p->rc || iRowidiPrevRowid ); - fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->iPrevRowid - iRowid); + assert( p->rc || iRowid>pWriter->iPrevRowid ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; @@ -3711,10 +3709,10 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ if( pIter->i ){ i64 iDelta; pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); - if( pIter->bAsc ){ - pIter->iRowid += iDelta; - }else{ + if( pIter->bDesc ){ pIter->iRowid -= iDelta; + }else{ + pIter->iRowid += iDelta; } }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); @@ -3729,13 +3727,13 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ static void fts5DoclistIterInit( Fts5Buffer *pBuf, - int bAsc, + int bDesc, Fts5DoclistIter *pIter ){ memset(pIter, 0, sizeof(*pIter)); pIter->a = pBuf->p; pIter->n = pBuf->n; - pIter->bAsc = bAsc; + pIter->bDesc = bDesc; fts5DoclistIterNext(pIter); } @@ -3744,14 +3742,14 @@ static void fts5DoclistIterInit( */ static void fts5MergeAppendDocid( int *pRc, /* IN/OUT: Error code */ - int bAsc, + int bDesc, Fts5Buffer *pBuf, /* Buffer to write to */ i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ i64 iRowid /* Rowid to append */ ){ if( pBuf->n==0 ){ fts5BufferAppendVarint(pRc, pBuf, iRowid); - }else if( bAsc==0 ){ + }else if( bDesc ){ fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); }else{ fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); @@ -3769,7 +3767,7 @@ static void fts5MergeAppendDocid( */ static void fts5MergePrefixLists( Fts5Index *p, /* FTS5 backend object */ - int bAsc, + int bDesc, Fts5Buffer *p1, /* First list to merge */ Fts5Buffer *p2 /* Second list to merge */ ){ @@ -3782,21 +3780,21 @@ static void fts5MergePrefixLists( memset(&out, 0, sizeof(out)); memset(&tmp, 0, sizeof(tmp)); - fts5DoclistIterInit(p1, bAsc, &i1); - fts5DoclistIterInit(p2, bAsc, &i2); + fts5DoclistIterInit(p1, bDesc, &i1); + fts5DoclistIterInit(p2, bDesc, &i2); while( i1.aPoslist!=0 || i2.aPoslist!=0 ){ if( i2.aPoslist==0 || (i1.aPoslist && - ( (!bAsc && i1.iRowid>i2.iRowid) || (bAsc && i1.iRowidi2.iRowid) || (!bDesc && i1.iRowidrc, bAsc, &out, &iLastRowid, i1.iRowid); + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid); fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); fts5DoclistIterNext(&i1); } else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ - fts5MergeAppendDocid(&p->rc, bAsc, &out, &iLastRowid, i2.iRowid); + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); fts5DoclistIterNext(&i2); @@ -3809,7 +3807,7 @@ static void fts5MergePrefixLists( memset(&writer, 0, sizeof(writer)); /* Merge the two position lists. */ - fts5MergeAppendDocid(&p->rc, bAsc, &out, &iLastRowid, i2.iRowid); + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); fts5BufferZero(&tmp); sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); @@ -3847,7 +3845,7 @@ static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ - int bAsc, /* True for "ORDER BY rowid ASC" */ + int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5IndexIter *pIter /* Populate this object */ @@ -3878,7 +3876,7 @@ static void fts5SetupPrefixIter( if( nTerm0 - && ((!bAsc && iRowid>=iLastRowid) || (bAsc && iRowid<=iLastRowid)) + && ((!bDesc && iRowid<=iLastRowid) || (bDesc && iRowid>=iLastRowid)) ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ @@ -3887,14 +3885,14 @@ static void fts5SetupPrefixIter( fts5BufferSwap(&doclist, &aBuf[i]); fts5BufferZero(&doclist); }else{ - fts5MergePrefixLists(p, bAsc, &doclist, &aBuf[i]); + fts5MergePrefixLists(p, bDesc, &doclist, &aBuf[i]); fts5BufferZero(&aBuf[i]); } } } if( doclist.n==0 ){ fts5BufferAppendVarint(&p->rc, &doclist, iRowid); - }else if( bAsc==0 ){ + }else if( bDesc ){ fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); }else{ fts5BufferAppendVarint(&p->rc, &doclist, iRowid - iLastRowid); @@ -3904,7 +3902,7 @@ static void fts5SetupPrefixIter( } for(i=0; ipDoclist = pDoclist; - fts5DoclistIterInit(&doclist, bAsc, pIter->pDoclist); + fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist); } } @@ -4273,8 +4271,8 @@ int sqlite3Fts5IndexQuery( ); } }else{ - int bAsc = (flags & FTS5INDEX_QUERY_ASC)!=0; - fts5SetupPrefixIter(p, bAsc, (const u8*)pToken, nToken, pRet); + int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; + fts5SetupPrefixIter(p, bDesc, (const u8*)pToken, nToken, pRet); } } @@ -4321,8 +4319,8 @@ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ static void fts5DoclistIterNextFrom(Fts5DoclistIter *p, i64 iMatch){ do{ i64 iRowid = p->iRowid; - if( p->bAsc!=0 && iRowid>=iMatch ) break; - if( p->bAsc==0 && iRowid<=iMatch ) break; + if( p->bDesc==0 && iRowid>=iMatch ) break; + if( p->bDesc!=0 && iRowid<=iMatch ) break; fts5DoclistIterNext(p); }while( p->aPoslist ); } @@ -4602,7 +4600,7 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDelta; iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta); if( iDelta==0 ) return iOff; - iDocid -= iDelta; + iDocid += iDelta; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); } } @@ -4692,7 +4690,6 @@ static void fts5DecodeFunction( } fts5DecodePoslist(&rc, &s, &a[4], iOff-4); - assert( iRowidOff==0 || iOff==iRowidOff ); if( iRowidOff ){ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 450539e889..cd3de2655b 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -185,7 +185,9 @@ for {set i 1} {$i <= 10} {incr i} { } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} + if {$i==2} break } +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} #------------------------------------------------------------------------- # @@ -312,7 +314,7 @@ do_execsql_test 13.1 { do_execsql_test 13.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; -} {2 1} +} {1 2} do_execsql_test 13.4 { DELETE FROM t1 WHERE rowid=2; diff --git a/ext/fts5/test/fts5ab.test b/ext/fts5/test/fts5ab.test index 79ebc7eaa5..d68240901e 100644 --- a/ext/fts5/test/fts5ab.test +++ b/ext/fts5/test/fts5ab.test @@ -30,11 +30,11 @@ do_execsql_test 1.0 { } do_execsql_test 1.1 { - SELECT * FROM t1; + SELECT * FROM t1 ORDER BY rowid DESC; } { forty five {one two} {three four} hello world } do_execsql_test 1.2 { - SELECT rowid FROM t1; + SELECT rowid FROM t1 ORDER BY rowid DESC; } {45 2 1} do_execsql_test 1.3 { @@ -90,7 +90,12 @@ foreach {tn expr res} { 9 y {6} 10 z {6} } { - do_execsql_test 2.7.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res + do_execsql_test 2.7.$tn.1 { + SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC + } $res + do_execsql_test 2.7.$tn.2 { + SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC + } [lsort -integer $res] } #------------------------------------------------------------------------- @@ -127,7 +132,7 @@ foreach {tn expr res} { 7 {"abashing abases abasement abaft abashing"} {8} } { do_execsql_test 3.2.$tn { - SELECT rowid FROM t1 WHERE t1 MATCH $expr + SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC } $res } @@ -145,7 +150,7 @@ foreach {tn expr res} { 7 {"abashing abases abasement abaft abashing"} {8} } { do_execsql_test 3.4.$tn { - SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC + SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res } @@ -165,11 +170,11 @@ foreach {tn doc} [list \ do_execsql_test 4.3 { SELECT rowid FROM s1 WHERE s1 MATCH 'x' -} {2 1} +} {1 2} do_execsql_test 4.4 { SELECT rowid FROM s1 WHERE s1 MATCH '"a x"' -} {2 1} +} {1 2} #------------------------------------------------------------------------- # Check that a special case of segment promotion works. The case is where @@ -233,6 +238,23 @@ do_test 5.4 { fts5_level_segs s2 } {2 0} +#------------------------------------------------------------------------- +# +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE s3 USING fts5(x); + BEGIN; + INSERT INTO s3 VALUES('a b c'); + INSERT INTO s3 VALUES('A B C'); +} + +do_execsql_test 6.1 { + SELECT rowid FROM s3 WHERE s3 MATCH 'a' +} {2 1} + +do_execsql_test 6.2 { + COMMIT; + SELECT rowid FROM s3 WHERE s3 MATCH 'a' +} {2 1} finish_test diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index 76f663ac5f..713e70dcd4 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -134,6 +134,7 @@ do_test 1.1 { foreach {id x y} $data { execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) } } + execsql { INSERT INTO xx(xx) VALUES('integrity-check') } } {} # Usage: @@ -253,7 +254,7 @@ proc instcompare {lhs rhs} { # where each element is a list of phrase matches in the # same form as returned by auxiliary scalar function fts5_test(). # -proc matchdata {bPos expr {bAsc 0}} { +proc matchdata {bPos expr {bAsc 1}} { set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] set res [list] @@ -307,6 +308,8 @@ sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist # Test phrase queries. # foreach {tn phrase} { + 8 "c" + 1 "o" 2 "b q" 3 "e a e" @@ -400,8 +403,8 @@ do_execsql_test 6.integrity { } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r} foreach {bAsc sql} { - 0 {SELECT rowid FROM xx WHERE xx MATCH $expr} - 1 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid ASC} + 1 {SELECT rowid FROM xx WHERE xx MATCH $expr} + 0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC} } { foreach {tn expr} { 0.1 x diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index 9bc694f784..89a5078816 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -36,7 +36,7 @@ foreach {tn match res} { 4 {r*} {3 1} } { do_execsql_test 1.$tn { - SELECT rowid FROM yy WHERE yy MATCH $match + SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid DESC } $res } @@ -47,7 +47,7 @@ foreach {tn match res} { 8 {r*} {1 3} } { do_execsql_test 1.$tn { - SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid ASC + SELECT rowid FROM yy WHERE yy MATCH $match } $res } @@ -194,8 +194,8 @@ foreach {T create} { } foreach {bAsc sql} { - 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} - 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid ASC} + 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} + 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} } { foreach {tn prefix} { 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} diff --git a/ext/fts5/test/fts5ae.test b/ext/fts5/test/fts5ae.test index f327bc9207..c9c3fcce30 100644 --- a/ext/fts5/test/fts5ae.test +++ b/ext/fts5/test/fts5ae.test @@ -109,8 +109,8 @@ do_execsql_test 3.3 { SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'a OR b AND c'; } { - 3 0.0.5 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15 2.1.2} + 3 0.0.5 } #------------------------------------------------------------------------- @@ -190,8 +190,8 @@ do_execsql_test 6.1 { do_execsql_test 6.2 { SELECT rowid, fts5_test_tokenize(t6) FROM t6 WHERE t6 MATCH 't*' } { - 2 {{horatio than are} {dreamt of in your philosophy}} 1 {{there are more} {things in heaven and earth}} + 2 {{horatio than are} {dreamt of in your philosophy}} } #------------------------------------------------------------------------- diff --git a/ext/fts5/test/fts5ak.test b/ext/fts5/test/fts5ak.test index 756ae0a898..4eb28324c9 100644 --- a/ext/fts5/test/fts5ak.test +++ b/ext/fts5/test/fts5ak.test @@ -40,19 +40,19 @@ do_execsql_test 1.1 { do_execsql_test 1.2 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e'; } { - {g d a [e] h a b c f j} - {i c c f a d g h j [e]} - {j f c [e] d a h j d b} - {i a d [e] g j g d a a} - {d c j d c j b c g [e]} {[e] j a [e] f h b f h h} + {d c j d c j b c g [e]} + {i a d [e] g j g d a a} + {j f c [e] d a h j d b} + {i c c f a d g h j [e]} + {g d a [e] h a b c f j} } do_execsql_test 1.3 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d'; } { - {j f [h d] g h i b d f} {[h d] b j c c g a c a} + {j f [h d] g h i b d f} } do_execsql_test 1.4 { @@ -64,12 +64,12 @@ do_execsql_test 1.4 { do_execsql_test 1.5 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e' } { - {g d a [e] h a b c f j} - {i c c f a d g h j [e]} - {j f c [e] d a h j d b} - {i a d [e] g j g d a a} - {d c j d c j b c g [e]} {[e] j a [e] f h b f h h} + {d c j d c j b c g [e]} + {i a d [e] g j g d a a} + {j f c [e] d a h j d b} + {i c c f a d g h j [e]} + {g d a [e] h a b c f j} } do_execsql_test 1.6 { @@ -133,9 +133,9 @@ do_execsql_test 3.1 { -- '[a b c d e]' SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e'; } { - {[a b c d e]} - {[a b c] [c d e]} {[a b c] x [c d e]} + {[a b c] [c d e]} + {[a b c d e]} } diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 31d51713af..36402d6f6b 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -104,7 +104,7 @@ sqlite3_fts5_create_function db rowidtest rowidtest do_execsql_test 3.3.1 { SELECT rowidtest(t1) FROM t1 WHERE t1 MATCH 'q' -} {2 1} +} {1 2} proc insttest {cmd} { set res [list] @@ -118,15 +118,15 @@ sqlite3_fts5_create_function db insttest insttest do_execsql_test 3.4.1 { SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'q' } { - {{0 0 5}} {{0 0 0}} + {{0 0 5}} } do_execsql_test 3.4.2 { SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w' } { - {{0 0 2} {1 0 4}} {{1 0 1}} + {{0 0 2} {1 0 4}} } proc coltest {cmd} { @@ -137,7 +137,8 @@ sqlite3_fts5_create_function db coltest coltest do_execsql_test 3.5.1 { SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q' } { - {6 {y t r e w q}} {6 {q w e r t y}} + {6 {q w e r t y}} + {6 {y t r e w q}} } #------------------------------------------------------------------------- @@ -188,7 +189,7 @@ do_execsql_test 4.1.3 { WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' ORDER BY rank DESC } { - 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 + 5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0 } do_execsql_test 4.1.4 { @@ -202,14 +203,14 @@ do_execsql_test 4.1.4 { do_execsql_test 4.1.5 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { - 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 + 5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0 } do_execsql_test 4.1.6 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) '); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { - 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 + 5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0 } proc rowidplus {cmd ival} { @@ -257,14 +258,14 @@ do_execsql_test 4.3.2 { WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)' ORDER BY rank ASC } { - {a four} {a five} {a one} {a two} {a three} + {a four} {a one} {a five} {a two} {a three} } do_execsql_test 4.3.3 { SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)' ORDER BY rank ASC } { - {a three} 0 {a four} 1 {a one} 1 {a five} 2 {a two} 2 + {a three} 0 {a one} 1 {a four} 1 {a two} 2 {a five} 2 } diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 948a4308b6..145fa4b6a9 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -26,38 +26,38 @@ do_execsql_test 1.1 { do_execsql_test 1.2 { SELECT rowid FROM f1 WHERE f1 MATCH 'o'; -} {2 1} +} {1 2} do_execsql_test 1.3 { INSERT INTO f1(a, b) VALUES('four', 'f o u r'); SELECT rowid FROM f1 WHERE f1 MATCH 'o'; -} {4 2 1} +} {1 2 4} do_execsql_test 1.4 { SELECT rowid, a, b FROM f1 WHERE f1 MATCH 'o'; -} {4 {} {} 2 {} {} 1 {} {}} +} {1 {} {} 2 {} {} 4 {} {}} do_execsql_test 1.5 { SELECT rowid, highlight(f1, 0, '[', ']') FROM f1 WHERE f1 MATCH 'o'; -} {4 {} 2 {} 1 {}} +} {1 {} 2 {} 4 {}} do_execsql_test 1.6 { SELECT rowid, highlight(f1, 0, '[', ']') IS NULL FROM f1 WHERE f1 MATCH 'o'; -} {4 1 2 1 1 1} +} {1 1 2 1 4 1} do_execsql_test 1.7 { SELECT rowid, snippet(f1, -1, '[', ']', '...', 5) IS NULL FROM f1 WHERE f1 MATCH 'o'; -} {4 1 2 1 1 1} +} {1 1 2 1 4 1} do_execsql_test 1.8 { SELECT rowid, snippet(f1, 1, '[', ']', '...', 5) IS NULL FROM f1 WHERE f1 MATCH 'o'; -} {4 1 2 1 1 1} +} {1 1 2 1 4 1} do_execsql_test 1.9 { SELECT rowid FROM f1; -} {4 3 2 1} +} {1 2 3 4} do_execsql_test 1.10 { SELECT * FROM f1; @@ -85,11 +85,11 @@ do_execsql_test 1.15 { do_execsql_test 1.16 { SELECT rowid FROM f1 WHERE f1 MATCH 'o'; -} {4 1} +} {1 4} do_execsql_test 1.17 { SELECT rowid FROM f1; -} {4 3 1} +} {1 3 4} #------------------------------------------------------------------------- # External content tables diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index 052563f779..57473afe65 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -53,23 +53,22 @@ db_restore_and_reopen #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} - #-------------------------------------------------------------------- +# do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts5(x); - INSERT INTO t2(t2, rank) VALUES('pgsz', 32); + INSERT INTO t2(t2, rank) VALUES('pgsz', 64); } +db func rnddoc fts5_rnddoc do_test 2.1 { - db transaction { - for {set i 0} {$i < 20} {incr i} { - execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') } - } - for {set i 0} {$i < 20} {incr i} { - execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') } - } + for {set i 0} {$i < 500} {incr i} { + execsql { INSERT INTO t2 VALUES(rnddoc(50)) } + execsql { INSERT INTO t2(t2) VALUES('integrity-check') } } } {} -db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r} + +#-------------------------------------------------------------------- +# finish_test diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 68aed258ec..838bf3cc4b 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -90,13 +90,13 @@ faultsim_save_and_close foreach {tn expr res} { 1 { dk } 7 2 { m f } 1 - 3 { f* } {10 9 8 6 5 4 3 1} - 4 { m OR f } {10 9 8 5 4 1} + 3 { f* } {1 3 4 5 6 8 9 10} + 4 { m OR f } {1 4 5 8 9 10} 5 { sn + gh } {5} 6 { "sn gh" } {5} 7 { NEAR(r a, 5) } {9} - 8 { m* f* } {10 9 8 6 4 1} - 9 { m* + f* } {8 1} + 8 { m* f* } {1 4 6 8 9 10} + 9 { m* + f* } {1 8} } { do_faultsim_test 4.$tn -prep { faultsim_restore_and_reopen @@ -302,7 +302,7 @@ db func rnddoc rnddoc do_test 8.0 { execsql { CREATE VIRTUAL TABLE x1 USING fts5(a) } set ::res [list] - for {set i 100} {$i>0} {incr i -1} { + for {set i 1} {$i<100} {incr i 1} { execsql { INSERT INTO x1 VALUES( rnddoc(50) ) } lappend ::res $i } diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index c33c9adea3..7ffd2977bf 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -172,7 +172,7 @@ do_test 5.0 { do_execsql_test 5.1 { SELECT rowid FROM x4 WHERE x4 MATCH 'a' -} {4 3 2 1} +} {1 2 3 4} set res [db one {SELECT count(*) FROM x4_data}] do_execsql_test 5.2 { diff --git a/manifest b/manifest index 9fdcf2e5e5..d15cc908b2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\scompression\sof\skeys\sstored\son\sinternal\ssegment\sb-tree\snodes\sby\sfts5. -D 2015-01-23T17:43:21.454 +C Have\sfts5\sstore\srowids\sin\sascending\sorder.\sQuery\sspeed\sis\svirtually\sthe\ssame\sregardless\sof\srowid\sorder,\sand\sascending\sorder\smakes\ssome\sinsert\soptimizations\seasier. +D 2015-01-24T19:57:03.097 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,15 +104,15 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 41b852b654f79f522668bc7ba292755fb261f855 +F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h 1d8f968b8ff71de15176acf8f4b14a2bdebcb6e3 +F ext/fts5/fts5Int.h e3b9344d8209c9639825c711662d5d039eb70322 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 -F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 -F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b -F ext/fts5/fts5_index.c dda2ed8dab9910aedd8de0169ca029c5336b9e42 +F ext/fts5/fts5_expr.c 473e3428a9a637fa6e61d64d8ca3796ec57a58e9 +F ext/fts5/fts5_hash.c 4ab952b75f27d5ed3ef0f3b4f7fa1464744483e8 +F ext/fts5/fts5_index.c b3e8e38c70178a638f4b0a183694db60ecde5366 F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -120,30 +120,30 @@ F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f -F ext/fts5/test/fts5ab.test 3f3ad2fb9ed60a0df57b626fa6fe6ef41d4deee0 -F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 -F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1 -F ext/fts5/test/fts5ae.test 014d5be2f5f70407fb032d4f27704116254797c3 +F ext/fts5/test/fts5aa.test e77e28ac85c70891fc2603ff4b15de571eca628f +F ext/fts5/test/fts5ab.test 127769288519ed549c57d7e11628dbe5b9952ad5 +F ext/fts5/test/fts5ac.test 1dfa0751bcf32fd9cfaad1557b7729950e5cc930 +F ext/fts5/test/fts5ad.test 6c970531caf865b65f4e1dd9d6d43bd6ea37d754 +F ext/fts5/test/fts5ae.test 347c96db06aab23ff00cf6a6b4064a8dbb182e42 F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 F ext/fts5/test/fts5ah.test 17ba8e197a781ca10548b7260e39ed8269d24b93 F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 -F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce -F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a +F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 +F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b -F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 -F ext/fts5/test/fts5corrupt.test b81ed310018ddffb34da7802f74018d94a2b3961 +F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 +F ext/fts5/test/fts5corrupt.test 78eb076867e750a013b46b3bc06065870bc93c22 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test f9bafb61b40061ad19b61d15003c5faeea4a57b5 +F ext/fts5/test/fts5fault1.test fbd8612889234849ff041f5b36f8e390feeed46e F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 -F ext/fts5/test/fts5rowid.test db482328fe9bf78bb6a09f2dbf055e2caeaac00a +F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee @@ -1283,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 939b7a5de25e064bdf08e03864c35ab718da6f6f -R 7096f8b96e0f85e1680222eb9ee6721b +P 51444f67c0cc58a3023eb1cd78e7cf889da6c80f +R bde0099a6ffad2afb653ac6add38295f U dan -Z 0515045012673cdccd49d82241057133 +Z 8b04510bfa3b18ba6ca879f4b4c9a36e diff --git a/manifest.uuid b/manifest.uuid index 324db551da..fb905f2048 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -51444f67c0cc58a3023eb1cd78e7cf889da6c80f \ No newline at end of file +5206ca6005bfa9dfc7346d4b89430c9748d32c10 \ No newline at end of file From 900f1922ac0aabd954c6976d8bc399f651e53bd8 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 27 Jan 2015 20:41:00 +0000 Subject: [PATCH 083/206] Fix a problem with fts5 doclist-indexes that occured if the first rowid of the first non-term page of a doclist is zero. FossilOrigin-Name: f704bc059e06b01f1d68fa7dad89e33eace6c389 --- ext/fts5/fts5_index.c | 149 +++++++++++++++++++++++--------------- ext/fts5/test/fts5aa.test | 3 +- ext/fts5/test/fts5ac.test | 3 +- ext/fts5/test/fts5ah.test | 10 +-- manifest | 18 ++--- manifest.uuid | 2 +- 6 files changed, 109 insertions(+), 76 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 602046a302..01ad055b0d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -44,7 +44,7 @@ #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ -#define FTS5_MIN_DLIDX_SIZE 4000 /* Add dlidx if this many empty pages */ +#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ /* ** Details: @@ -192,11 +192,15 @@ ** ** 5. Segment doclist indexes: ** -** A list of varints - the first docid on each page (starting with the -** first termless page) of the doclist. First element in the list is a -** literal docid. Each docid thereafter is a (negative) delta. If there -** are no docids at all on a page, a 0x00 byte takes the place of the -** delta value. +** A list of varints. If the first termless page contains at least one +** docid, the list begins with that docid as a varint followed by the +** value 1 (0x01). Or, if the first termless page contains no docids, +** a varint containing the last docid stored on the term page followed +** by a 0 (0x00) value. +** +** For each subsequent page in the doclist, either a 0x00 byte if the +** page contains no terms, or a delta-encoded docid (always +ve) +** representing the first docid on the page otherwise. */ /* @@ -373,7 +377,7 @@ struct Fts5SegWriter { u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ - Fts5Buffer dlidx; /* Doclist index */ + Fts5Buffer cdlidx; /* Doclist index */ i64 iDlidxPrev; /* Previous rowid appended to dlidx */ int bDlidxPrevValid; /* True if iDlidxPrev is valid */ }; @@ -1335,28 +1339,42 @@ static void fts5NodeIterFree(Fts5NodeIter *pIter){ ** ** pData: pointer to doclist-index record, ** iLeafPgno: page number that this doclist-index is associated with. +** +** When this function is called pIter->iLeafPgno is the page number the +** doclist is associated with (the one featuring the term). */ static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ Fts5Data *pData = pIter->pData; int i; + int bPresent; assert( pIter->pData ); assert( pIter->iLeafPgno>0 ); - /* Count the number of leading 0x00 bytes. Then set iLeafPgno. */ - for(i=0; in; i++){ - if( pData->p[i] ) break; + /* Read the first rowid value. And the "present" flag that follows it. */ + pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid); + bPresent = pData->p[pIter->iOff++]; + if( bPresent ){ + i = 0; + }else{ + /* Count the number of leading 0x00 bytes. */ + for(i=1; pIter->iOffn; i++){ + if( pData->p[pIter->iOff] ) break; + pIter->iOff++; + } + + /* Unless we are already at the end of the doclist-index, load the first + ** rowid value. */ + if( pIter->iOffn ){ + i64 iVal; + pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal); + pIter->iRowid += iVal; + }else{ + pIter->bEof = 1; + } } pIter->iLeafPgno += (i+1); - pIter->iOff = i; - /* Unless we are already at the end of the doclist-index, load the first - ** rowid value. */ - if( pIter->iOffn ){ - pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&pIter->iRowid); - }else{ - pIter->bEof = 1; - } pIter->iFirstOff = pIter->iOff; return pIter->bEof; } @@ -1376,7 +1394,7 @@ static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ i64 iVal; pIter->iLeafPgno += (iOff - pIter->iOff) + 1; iOff += getVarint(&pData->p[iOff], (u64*)&iVal); - pIter->iRowid -= iVal; + pIter->iRowid += iVal; pIter->iOff = iOff; }else{ pIter->bEof = 1; @@ -1417,7 +1435,7 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ } getVarint(&a[iOff], (u64*)&iVal); - pIter->iRowid += iVal; + pIter->iRowid -= iVal; pIter->iLeafPgno--; while( iOff>pIter->iFirstOff @@ -1432,18 +1450,15 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ return pIter->bEof; } -static void fts5DlidxIterInit( +static void fts5DlidxIterInitFromData( Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ - int iIdx, int iSegid, /* Segment iSegid within index iIdx */ - int iLeafPgno, /* Leaf page number to load dlidx for */ + int iLeafPgno, /* Leaf page number dlidx is for */ + Fts5Data *pDlidx, /* Leaf index data */ Fts5DlidxIter **ppIter /* OUT: Populated iterator */ ){ Fts5DlidxIter *pIter = *ppIter; - Fts5Data *pDlidx; - pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno)); - if( pDlidx==0 ) return; if( pIter==0 ){ *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); if( pIter==0 ){ @@ -1463,6 +1478,19 @@ static void fts5DlidxIterInit( } } +static void fts5DlidxIterInit( + Fts5Index *p, /* Fts5 Backend to iterate within */ + int bRev, /* True for ORDER BY ASC */ + int iIdx, int iSegid, /* Segment iSegid within index iIdx */ + int iLeafPgno, /* Leaf page number to load dlidx for */ + Fts5DlidxIter **ppIter /* OUT: Populated iterator */ +){ + Fts5Data *pDlidx; + pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno)); + if( pDlidx==0 ) return; + fts5DlidxIterInitFromData(p, bRev, iLeafPgno, pDlidx, ppIter); +} + /* ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). */ @@ -2104,7 +2132,7 @@ static void fts5SegIterNextFrom( assert( pIter->pLeaf ); if( bRev==0 ){ - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ iLeafPgno = pDlidx->iLeafPgno; fts5DlidxIterNext(pDlidx); } @@ -2114,8 +2142,8 @@ static void fts5SegIterNextFrom( bMove = 0; } }else{ - assert( iMatch>pIter->iRowid ); - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ + assert( iMatchiRowid ); + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ fts5DlidxIterPrev(pDlidx); } iLeafPgno = pDlidx->iLeafPgno; @@ -2132,8 +2160,8 @@ static void fts5SegIterNextFrom( while( 1 ){ if( bMove ) fts5SegIterNext(p, pIter); if( pIter->pLeaf==0 ) break; - if( bRev==0 && pIter->iRowid<=iMatch ) break; - if( bRev!=0 && pIter->iRowid>=iMatch ) break; + if( bRev==0 && pIter->iRowid>=iMatch ) break; + if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; } } @@ -2551,8 +2579,8 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty ); - assert( pWriter->dlidx.n>0 ); - fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n); + assert( pWriter->cdlidx.n>0 ); + fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); bFlag = 1; } fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); @@ -2562,7 +2590,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ /* Whether or not it was written to disk, zero the doclist index at this ** point */ - sqlite3Fts5BufferZero(&pWriter->dlidx); + sqlite3Fts5BufferZero(&pWriter->cdlidx); pWriter->bDlidxPrevValid = 0; } @@ -2643,7 +2671,13 @@ static void fts5WriteBtreeNoTerm( if( pWriter->bFirstRowidInPage ){ /* No rowids on this page. Append an 0x00 byte to the current ** doclist-index */ - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, 0); + if( pWriter->bDlidxPrevValid==0 ){ + i64 iRowid = pWriter->iPrevRowid; + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); + pWriter->bDlidxPrevValid = 1; + pWriter->iDlidxPrev = iRowid; + } + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0); } pWriter->nEmpty++; } @@ -2659,11 +2693,12 @@ static void fts5WriteDlidxAppend( ){ i64 iVal; if( pWriter->bDlidxPrevValid ){ - iVal = pWriter->iDlidxPrev - iRowid; + iVal = iRowid - pWriter->iDlidxPrev; }else{ - iVal = iRowid; + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); + iVal = 1; } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, iVal); + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal); pWriter->bDlidxPrevValid = 1; pWriter->iDlidxPrev = iRowid; } @@ -2898,7 +2933,7 @@ static void fts5WriteFinish( } } sqlite3_free(pWriter->aWriter); - sqlite3Fts5BufferFree(&pWriter->dlidx); + sqlite3Fts5BufferFree(&pWriter->cdlidx); } static void fts5WriteInit( @@ -3661,7 +3696,7 @@ static void fts5IndexIntegrityCheckSegment( } fts5DlidxIterFree(pDlidx); - fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); + // fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); } } @@ -4638,25 +4673,23 @@ static void fts5DecodeFunction( fts5DebugRowid(&rc, &s, iRowid); if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ - int i = 0; - i64 iPrev; - if( n>0 ){ - i = getVarint(&a[i], (u64*)&iPrev); - sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev); - } - while( i Date: Thu, 29 Jan 2015 20:59:34 +0000 Subject: [PATCH 084/206] Fix some problems with transactions that both read and write an fts5 table. FossilOrigin-Name: 0e225b15357765f132c3364b222f9931a608a5b2 --- ext/fts5/fts5Int.h | 31 ++--- ext/fts5/fts5_buffer.c | 1 + ext/fts5/fts5_hash.c | 120 ++++++++++++++---- ext/fts5/fts5_index.c | 100 ++++++++++++++- ext/fts5/test/fts5ab.test | 11 +- ext/fts5/test/fts5ac.test | 254 ++++++++++++++++++++------------------ ext/fts5/test/fts5ad.test | 17 ++- manifest | 24 ++-- manifest.uuid | 2 +- 9 files changed, 382 insertions(+), 178 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 50003632d1..d31ac07210 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -228,7 +228,7 @@ typedef struct Fts5IndexIter Fts5IndexIter; /* ** Values used as part of the flags argument passed to IndexQuery(). */ -#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ /* @@ -259,16 +259,13 @@ int sqlite3Fts5IndexQuery( ); /* -** Docid list iteration. +** The various operations on open token or token prefix iterators opened +** using sqlite3Fts5IndexQuery(). */ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); - -/* -** Obtain the position list that corresponds to the current position. -*/ int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn); /* @@ -365,13 +362,6 @@ int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); */ typedef struct Fts5Hash Fts5Hash; -typedef struct Fts5Data Fts5Data; -struct Fts5Data { - u8 *p; /* Pointer to buffer containing record */ - int n; /* Size of record in bytes */ - int nRef; /* Ref count */ -}; - /* ** Create a hash table, free a hash table. */ @@ -405,7 +395,20 @@ int sqlite3Fts5HashIterate( int sqlite3Fts5HashQuery( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ - Fts5Data **ppData /* OUT: Query result */ + const char **ppDoclist, /* OUT: Pointer to doclist for pTerm */ + int *pnDoclist /* OUT: Size of doclist in bytes */ +); + +void sqlite3Fts5HashScanInit( + Fts5Hash*, /* Hash table to query */ + const char *pTerm, int nTerm /* Query prefix */ +); +void sqlite3Fts5HashScanNext(Fts5Hash*); +int sqlite3Fts5HashScanEof(Fts5Hash*); +void sqlite3Fts5HashScanEntry(Fts5Hash *, + const char **pzTerm, /* OUT: term (nul-terminated) */ + const char **ppDoclist, /* OUT: pointer to doclist */ + int *pnDoclist /* OUT: size of doclist in bytes */ ); diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index b8f55694d8..53cb02f521 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -74,6 +74,7 @@ void sqlite3Fts5BufferAppendBlob( int nData, const u8 *pData ){ + assert( nData>=0 ); if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return; memcpy(&pBuf->p[pBuf->n], pData, nData); pBuf->n += nData; diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 41b2eb774a..78ab2083c3 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -27,6 +27,7 @@ struct Fts5Hash { int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */ + Fts5HashEntry *pScan; /* Current ordered scan item */ Fts5HashEntry **aSlot; /* Array of hash slots */ }; @@ -52,7 +53,8 @@ struct Fts5Hash { ** Bytes of data written since iRowidOff. */ struct Fts5HashEntry { - Fts5HashEntry *pNext; /* Next hash entry with same hash-key */ + Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ + Fts5HashEntry *pScanNext; /* Next entry in sorted order */ int nAlloc; /* Total size of allocation */ int iSzPoslist; /* Offset of space for 4-byte poslist size */ @@ -124,7 +126,7 @@ void sqlite3Fts5HashClear(Fts5Hash *pHash){ Fts5HashEntry *pNext; Fts5HashEntry *pSlot; for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ - pNext = pSlot->pNext; + pNext = pSlot->pHashNext; sqlite3_free(pSlot); } } @@ -158,9 +160,9 @@ static int fts5HashResize(Fts5Hash *pHash){ while( apOld[i] ){ int iHash; Fts5HashEntry *p = apOld[i]; - apOld[i] = p->pNext; + apOld[i] = p->pHashNext; iHash = fts5HashKey(nNew, p->zKey, strlen(p->zKey)); - p->pNext = apNew[iHash]; + p->pHashNext = apNew[iHash]; apNew[iHash] = p; } } @@ -184,7 +186,7 @@ int sqlite3Fts5HashWrite( int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ /* Attempt to locate an existing hash entry */ - for(p=pHash->aSlot[iHash]; p; p=p->pNext){ + for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break; } @@ -210,7 +212,7 @@ int sqlite3Fts5HashWrite( p->iSzPoslist = p->nData; p->nData += 4; p->iRowid = iRowid; - p->pNext = pHash->aSlot[iHash]; + p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; @@ -232,7 +234,7 @@ int sqlite3Fts5HashWrite( pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); if( pNew==0 ) return SQLITE_NOMEM; pNew->nAlloc = nNew; - for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext); + for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); *pp = pNew; p = pNew; } @@ -301,13 +303,13 @@ static Fts5HashEntry *fts5HashEntryMerge( if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ /* p2 is smaller */ *ppOut = p2; - ppOut = &p2->pNext; - p2 = p2->pNext; + ppOut = &p2->pScanNext; + p2 = p2->pScanNext; }else{ /* p1 is smaller */ *ppOut = p1; - ppOut = &p1->pNext; - p1 = p1->pNext; + ppOut = &p1->pScanNext; + p1 = p1->pScanNext; } *ppOut = 0; } @@ -322,7 +324,11 @@ static Fts5HashEntry *fts5HashEntryMerge( ** the responsibility of the caller to free the elements of the returned ** list. */ -static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){ +static int fts5HashEntrySort( + Fts5Hash *pHash, + const char *pTerm, int nTerm, /* Query prefix, if any */ + Fts5HashEntry **ppSorted +){ const int nMergeSlot = 32; Fts5HashEntry **ap; Fts5HashEntry *pList; @@ -335,15 +341,17 @@ static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){ memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); for(iSlot=0; iSlotnSlot; iSlot++){ - while( pHash->aSlot[iSlot] ){ - Fts5HashEntry *pEntry = pHash->aSlot[iSlot]; - pHash->aSlot[iSlot] = pEntry->pNext; - pEntry->pNext = 0; - for(i=0; ap[i]; i++){ - pEntry = fts5HashEntryMerge(pEntry, ap[i]); - ap[i] = 0; + Fts5HashEntry *pIter; + for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ + if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){ + Fts5HashEntry *pEntry = pIter; + pEntry->pScanNext = 0; + for(i=0; ap[i]; i++){ + pEntry = fts5HashEntryMerge(pEntry, ap[i]); + ap[i] = 0; + } + ap[i] = pEntry; } - ap[i] = pEntry; } } @@ -368,10 +376,11 @@ int sqlite3Fts5HashIterate( Fts5HashEntry *pList; int rc; - rc = fts5HashEntrySort(pHash, &pList); + rc = fts5HashEntrySort(pHash, 0, 0, &pList); if( rc==SQLITE_OK ){ + memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); while( pList ){ - Fts5HashEntry *pNext = pList->pNext; + Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ const int nSz = pList->nData - pList->iSzPoslist - 4; const int nKey = strlen(pList->zKey); @@ -406,3 +415,70 @@ int sqlite3Fts5HashIterate( return rc; } +/* +** Query the hash table for a doclist associated with term pTerm/nTerm. +*/ +int sqlite3Fts5HashQuery( + Fts5Hash *pHash, /* Hash table to query */ + const char *pTerm, int nTerm, /* Query term */ + const char **ppDoclist, /* OUT: Pointer to doclist for pTerm */ + int *pnDoclist /* OUT: Size of doclist in bytes */ +){ + unsigned int iHash = fts5HashKey(pHash->nSlot, pTerm, nTerm); + Fts5HashEntry *p; + + for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ + if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; + } + + if( p ){ + u8 *pPtr = (u8*)p; + fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + *ppDoclist = &p->zKey[nTerm+1]; + *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); + }else{ + *ppDoclist = 0; + *pnDoclist = 0; + } + + return SQLITE_OK; +} + +void sqlite3Fts5HashScanInit( + Fts5Hash *p, /* Hash table to query */ + const char *pTerm, int nTerm /* Query prefix */ +){ + fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); +} + +void sqlite3Fts5HashScanNext(Fts5Hash *p){ + if( p->pScan ){ + p->pScan = p->pScan->pScanNext; + } +} + +int sqlite3Fts5HashScanEof(Fts5Hash *p){ + return (p->pScan==0); +} + +void sqlite3Fts5HashScanEntry( + Fts5Hash *pHash, + const char **pzTerm, /* OUT: term (nul-terminated) */ + const char **ppDoclist, /* OUT: pointer to doclist */ + int *pnDoclist /* OUT: size of doclist in bytes */ +){ + Fts5HashEntry *p; + if( (p = pHash->pScan) ){ + u8 *pPtr = (u8*)p; + int nTerm = strlen(p->zKey); + fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + *pzTerm = p->zKey; + *ppDoclist = &p->zKey[nTerm+1]; + *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); + }else{ + *pzTerm = 0; + *ppDoclist = 0; + *pnDoclist = 0; + } +} + diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 01ad055b0d..1731d3ed70 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -274,6 +274,7 @@ int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; +typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; @@ -286,6 +287,12 @@ typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; +struct Fts5Data { + u8 *p; /* Pointer to buffer containing record */ + int n; /* Size of record in bytes */ + int nRef; /* Ref count */ +}; + /* ** One object per %_data table. */ @@ -1514,7 +1521,7 @@ static void fts5SegIterNextPage( Fts5SegIter *pIter /* Iterator to advance to next page */ ){ Fts5StructureSegment *pSeg = pIter->pSeg; - if( pIter->pLeaf ) fts5DataRelease(pIter->pLeaf); + fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, @@ -1775,6 +1782,26 @@ static void fts5SegIterNext( }else{ pIter->iRowid += iDelta; } + }else if( pIter->pSeg==0 ){ + const char *pList = 0; + const char *zTerm; + int nList; + if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ + sqlite3Fts5HashScanNext(p->apHash[0]); + sqlite3Fts5HashScanEntry(p->apHash[0], &zTerm, &pList, &nList); + } + if( pList==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + pIter->pLeaf->p = (u8*)pList; + pIter->pLeaf->n = nList; + sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); + pIter->iLeafOffset = getVarint((u8*)pList, (u64*)&pIter->iRowid); + if( pIter->flags & FTS5_SEGITER_REVERSE ){ + fts5SegIterReverseInitPage(p, pIter); + } + } }else{ iOff = 0; /* Next entry is not on the current page */ @@ -2018,6 +2045,58 @@ static void fts5SegIterSeekInit( } } +/* +** Initialize the object pIter to point to term pTerm/nTerm within the +** in-memory hash table iIdx. If there is no such term in the table, the +** iterator is set to EOF. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterHashInit( + Fts5Index *p, /* FTS5 backend */ + int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to */ + int flags, /* Mask of FTS5INDEX_XXX flags */ + Fts5SegIter *pIter /* Object to populate */ +){ + Fts5Hash *pHash = p->apHash[iIdx]; + const char *pList = 0; + int nList = 0; + const u8 *z = 0; + int n = 0; + + assert( pHash ); + + if( pTerm==0 || (iIdx==0 && (flags & FTS5INDEX_QUERY_PREFIX)) ){ + sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm); + sqlite3Fts5HashScanEntry(pHash, (const char**)&z, &pList, &nList); + n = (z ? strlen((const char*)z) : 0); + }else{ + pIter->flags |= FTS5_SEGITER_ONETERM; + sqlite3Fts5HashQuery(pHash, (const char*)pTerm, nTerm, &pList, &nList); + z = pTerm; + n = nTerm; + } + + if( pList ){ + Fts5Data *pLeaf; + sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); + pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); + if( pLeaf==0 ) return; + pLeaf->nRef = 1; + pLeaf->p = (u8*)pList; + pLeaf->n = nList; + pIter->pLeaf = pLeaf; + pIter->iLeafOffset = getVarint(pLeaf->p, (u64*)&pIter->iRowid); + + if( flags & FTS5INDEX_QUERY_DESC ){ + pIter->flags |= FTS5_SEGITER_REVERSE; + fts5SegIterReverseInitPage(p, pIter); + } + } +} + /* ** Zero the iterator passed as the only argument. */ @@ -2261,6 +2340,7 @@ static void fts5MultiIterNew( /* Allocate space for the new multi-seg-iterator. */ if( iLevel<0 ){ nSeg = fts5StructureCountSegments(pStruct); + nSeg += (p->apHash ? 1 : 0); }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } @@ -2280,6 +2360,11 @@ static void fts5MultiIterNew( /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; + if( p->apHash ){ + /* Add a segment iterator for the current contents of the hash table. */ + Fts5SegIter *pIter = &pNew->aSeg[iIter++]; + fts5SegIterHashInit(p, iIdx, pTerm, nTerm, flags, pIter); + } for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; @@ -2406,13 +2491,19 @@ static void fts5ChunkIterInit( Fts5SegIter *pSeg, /* Segment iterator to read poslist from */ Fts5ChunkIter *pIter /* Initialize this object */ ){ - int iId = pSeg->pSeg->iSegid; - i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); Fts5Data *pLeaf = pSeg->pLeaf; int iOff = pSeg->iLeafOffset; memset(pIter, 0, sizeof(*pIter)); - pIter->iLeafRowid = rowid; + /* If Fts5SegIter.pSeg is NULL, then this iterator iterates through data + ** currently stored in a hash table. In this case there is no leaf-rowid + ** to calculate. */ + if( pSeg->pSeg ){ + int iId = pSeg->pSeg->iSegid; + i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + pIter->iLeafRowid = rowid; + } + if( iOffn ){ fts5DataReference(pLeaf); pIter->pLeaf = pLeaf; @@ -3100,6 +3191,7 @@ fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); fflush(stdout); #endif + assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) diff --git a/ext/fts5/test/fts5ab.test b/ext/fts5/test/fts5ab.test index d68240901e..23fdec0dfa 100644 --- a/ext/fts5/test/fts5ab.test +++ b/ext/fts5/test/fts5ab.test @@ -247,14 +247,21 @@ do_execsql_test 6.0 { INSERT INTO s3 VALUES('A B C'); } -do_execsql_test 6.1 { +do_execsql_test 6.1.1 { SELECT rowid FROM s3 WHERE s3 MATCH 'a' +} {1 2} + +do_execsql_test 6.1.2 { + SELECT rowid FROM s3 WHERE s3 MATCH 'a' ORDER BY rowid DESC } {2 1} do_execsql_test 6.2 { COMMIT; +} + +do_execsql_test 6.3 { SELECT rowid FROM s3 WHERE s3 MATCH 'a' -} {2 1} +} {1 2} finish_test diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index 999b0788f0..b061d0bf6f 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -22,11 +22,6 @@ ifcapable !fts5 { return } -do_execsql_test 1.0 { - CREATE VIRTUAL TABLE xx USING fts5(x,y); - INSERT INTO xx(xx, rank) VALUES('pgsz', 32); -} - set data { 0 {p o q e z k z p n f y u z y n y} {l o o l v v k} 1 {p k h h p y l l h i p v n} {p p l u r i f a j g e r r x w} @@ -130,13 +125,6 @@ set data { 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } -do_test 1.1 { - foreach {id x y} $data { - execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) } - } - execsql { INSERT INTO xx(xx) VALUES('integrity-check') } -} {} - # Usage: # # poslist aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... @@ -302,112 +290,94 @@ proc fts5_test_poslist {cmd} { set res } -sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist - -#------------------------------------------------------------------------- -# Test phrase queries. -# -foreach {tn phrase} { - 1 "o" - 2 "b q" - 3 "e a e" - 4 "m d g q q b k b w f q q p p" - 5 "l o o l v v k" - 6 "a" - 7 "b" - 8 "c" - 9 "no" - 10 "L O O L V V K" +foreach {tn2 sql} { + 1 {} + 2 {BEGIN} } { - set expr "\"$phrase\"" - set res [matchdata 1 $expr] + reset_db + sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist - do_execsql_test 1.2.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res -} + do_execsql_test 1.0 { + CREATE VIRTUAL TABLE xx USING fts5(x,y); + INSERT INTO xx(xx, rank) VALUES('pgsz', 32); + } -#------------------------------------------------------------------------- -# Test some AND and OR queries. -# -foreach {tn expr} { - 1.1 "a AND b" - 1.2 "a+b AND c" - 1.3 "d+c AND u" - 1.4 "d+c AND u+d" + execsql $sql - 2.1 "a OR b" - 2.2 "a+b OR c" - 2.3 "d+c OR u" - 2.4 "d+c OR u+d" + do_test $tn2.1.1 { + foreach {id x y} $data { + execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) } + } + execsql { INSERT INTO xx(xx) VALUES('integrity-check') } + } {} - 3.1 { a AND b AND c } -} { - set res [matchdata 1 $expr] - do_execsql_test 2.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res -} + #------------------------------------------------------------------------- + # Test phrase queries. + # + foreach {tn phrase} { + 1 "o" + 2 "b q" + 3 "e a e" + 4 "m d g q q b k b w f q q p p" + 5 "l o o l v v k" + 6 "a" + 7 "b" + 8 "c" + 9 "no" + 10 "L O O L V V K" + } { + set expr "\"$phrase\"" + set res [matchdata 1 $expr] -#------------------------------------------------------------------------- -# Queries on a specific column. -# -foreach {tn expr} { - 1 "x:a" - 2 "y:a" - 3 "x:b" - 4 "y:b" -} { - set res [matchdata 1 $expr] - do_execsql_test 3.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res -} + do_execsql_test $tn2.1.2.$tn.[llength $res] { + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr + } $res + } -#------------------------------------------------------------------------- -# Some NEAR queries. -# -foreach {tn expr} { - 1 "NEAR(a b)" - 2 "NEAR(r c)" - 2 { NEAR(r c, 5) } - 3 { NEAR(r c, 3) } - 4 { NEAR(r c, 2) } - 5 { NEAR(r c, 0) } - 6 { NEAR(a b c) } - 7 { NEAR(a b c, 8) } - 8 { x : NEAR(r c) } - 9 { y : NEAR(r c) } -} { - set res [matchdata 1 $expr] - do_execsql_test 4.1.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res -} - -do_test 4.1 { poslist {{a b c}} -- a } {0.0.0} -do_test 4.2 { poslist {{a b c}} -- c } {0.0.2} - -foreach {tn expr tclexpr} { - 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} -} { - do_execsql_test 5.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr] -} - -#------------------------------------------------------------------------- -# -do_execsql_test 6.integrity { - INSERT INTO xx(xx) VALUES('integrity-check'); -} -#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r} -foreach {bAsc sql} { - 1 {SELECT rowid FROM xx WHERE xx MATCH $expr} - 0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC} -} { + #------------------------------------------------------------------------- + # Test some AND and OR queries. + # foreach {tn expr} { - 0.1 x - 1 { NEAR(r c) } + 1.1 "a AND b" + 1.2 "a+b AND c" + 1.3 "d+c AND u" + 1.4 "d+c AND u+d" + + 2.1 "a OR b" + 2.2 "a+b OR c" + 2.3 "d+c OR u" + 2.4 "d+c OR u+d" + + 3.1 { a AND b AND c } + } { + set res [matchdata 1 $expr] + do_execsql_test $tn2.2.$tn.[llength $res] { + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr + } $res + } + + #------------------------------------------------------------------------- + # Queries on a specific column. + # + foreach {tn expr} { + 1 "x:a" + 2 "y:a" + 3 "x:b" + 4 "y:b" + } { + set res [matchdata 1 $expr] + do_execsql_test $tn2.3.$tn.[llength $res] { + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr + } $res + } + + #------------------------------------------------------------------------- + # Some NEAR queries. + # + foreach {tn expr} { + 1 "NEAR(a b)" + 2 "NEAR(r c)" 2 { NEAR(r c, 5) } 3 { NEAR(r c, 3) } 4 { NEAR(r c, 2) } @@ -416,20 +386,60 @@ foreach {bAsc sql} { 7 { NEAR(a b c, 8) } 8 { x : NEAR(r c) } 9 { y : NEAR(r c) } - 10 { x : "r c" } - 11 { y : "r c" } - 12 { a AND b } - 13 { a AND b AND c } - 14a { a } - 14b { a OR b } - 15 { a OR b AND c } - 16 { c AND b OR a } - 17 { c AND (b OR a) } - 18 { c NOT (b OR a) } - 19 { c NOT b OR a AND d } } { - set res [matchdata 0 $expr $bAsc] - do_execsql_test 6.$bAsc.$tn.[llength $res] $sql $res + set res [matchdata 1 $expr] + do_execsql_test $tn2.4.1.$tn.[llength $res] { + SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr + } $res + } + + do_test $tn2.4.1 { poslist {{a b c}} -- a } {0.0.0} + do_test $tn2.4.2 { poslist {{a b c}} -- c } {0.0.2} + + foreach {tn expr tclexpr} { + 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} + } { + do_execsql_test $tn2.5.$tn { + SELECT fts5_expr_tcl($expr, 'N $x') + } [list $tclexpr] + } + + #------------------------------------------------------------------------- + # + do_execsql_test $tn2.6.integrity { + INSERT INTO xx(xx) VALUES('integrity-check'); + } + #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r} + foreach {bAsc sql} { + 1 {SELECT rowid FROM xx WHERE xx MATCH $expr} + 0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC} + } { + foreach {tn expr} { + 0.1 x + 1 { NEAR(r c) } + 2 { NEAR(r c, 5) } + 3 { NEAR(r c, 3) } + 4 { NEAR(r c, 2) } + 5 { NEAR(r c, 0) } + 6 { NEAR(a b c) } + 7 { NEAR(a b c, 8) } + 8 { x : NEAR(r c) } + 9 { y : NEAR(r c) } + 10 { x : "r c" } + 11 { y : "r c" } + 12 { a AND b } + 13 { a AND b AND c } + 14a { a } + 14b { a OR b } + 15 { a OR b AND c } + 16 { c AND b OR a } + 17 { c AND (b OR a) } + 18 { c NOT (b OR a) } + 19 { c NOT b OR a AND d } + } { + set res [matchdata 0 $expr $bAsc] + do_execsql_test $tn2.6.$bAsc.$tn.[llength $res] $sql $res + } } } diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index 89a5078816..461fe41e50 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -62,6 +62,18 @@ foreach {T create} { INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } + 4 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + BEGIN; + } + + 5 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + BEGIN; + } + } { do_test $T.1 { @@ -194,8 +206,8 @@ foreach {T create} { } foreach {bAsc sql} { - 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} + 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} } { foreach {tn prefix} { 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} @@ -211,9 +223,12 @@ foreach {T create} { set res [lsort -integer -increasing $res] } set n [llength $res] + if {$T==5} breakpoint do_execsql_test $T.$bAsc.$tn.$n $sql $res } } + + catchsql COMMIT } finish_test diff --git a/manifest b/manifest index 6b87d2a44b..d807680b8c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sfts5\sdoclist-indexes\sthat\soccured\sif\sthe\sfirst\srowid\sof\sthe\sfirst\snon-term\spage\sof\sa\sdoclist\sis\szero. -D 2015-01-27T20:41:00.681 +C Fix\ssome\sproblems\swith\stransactions\sthat\sboth\sread\sand\swrite\san\sfts5\stable. +D 2015-01-29T20:59:34.380 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h e3b9344d8209c9639825c711662d5d039eb70322 +F ext/fts5/fts5Int.h f7cf9331f34c5a5a83a88f43148161daa4cc0233 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f -F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 +F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c 473e3428a9a637fa6e61d64d8ca3796ec57a58e9 -F ext/fts5/fts5_hash.c 4ab952b75f27d5ed3ef0f3b4f7fa1464744483e8 -F ext/fts5/fts5_index.c ef6c7764a9f4968465936839c8f7e7423d8458c2 +F ext/fts5/fts5_hash.c b54822ca901fb76d79c6a09daecbc464e5fe02c1 +F ext/fts5/fts5_index.c 1550befd9622d009520fdadfa0b42154e0ac54c0 F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -121,9 +121,9 @@ F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 F ext/fts5/test/fts5aa.test 065767c60ad301f77ad95f24369305e13347aa00 -F ext/fts5/test/fts5ab.test 127769288519ed549c57d7e11628dbe5b9952ad5 -F ext/fts5/test/fts5ac.test cc39f7debda6f10ca2422e17163f9b6f078d5560 -F ext/fts5/test/fts5ad.test 6c970531caf865b65f4e1dd9d6d43bd6ea37d754 +F ext/fts5/test/fts5ab.test 5da2e92a8047860b9e22b6fd3990549639d631b1 +F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e +F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d F ext/fts5/test/fts5ae.test 347c96db06aab23ff00cf6a6b4064a8dbb182e42 F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 @@ -1283,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5206ca6005bfa9dfc7346d4b89430c9748d32c10 -R dc023966ceb63b949d8070662e553f89 +P f704bc059e06b01f1d68fa7dad89e33eace6c389 +R f45496311c450f4a551203517eb9c071 U dan -Z 99344f3fa1c5e2c02514e48da6c76a56 +Z d89173c476e3f912e9f3a6ccba8c9b1b diff --git a/manifest.uuid b/manifest.uuid index 10e9759b14..a041d59a2d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f704bc059e06b01f1d68fa7dad89e33eace6c389 \ No newline at end of file +0e225b15357765f132c3364b222f9931a608a5b2 \ No newline at end of file From 50821403113ce3e2289a549c4c9e57484943bd40 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 31 Jan 2015 15:23:44 +0000 Subject: [PATCH 085/206] Minor optimizations to fts5 writes. FossilOrigin-Name: 1fffe51fa92f1784365140d5b163ab6c690981ae --- ext/fts5/fts5_hash.c | 17 +++++++++++++++-- ext/fts5/fts5_index.c | 11 ++++++++--- ext/fts5/tool/loadfts5.tcl | 13 ++++++++++++- manifest | 16 ++++++++-------- manifest.uuid | 2 +- 5 files changed, 44 insertions(+), 15 deletions(-) diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 78ab2083c3..841c4d3709 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -77,6 +77,17 @@ static void fts5Put4ByteVarint(u8 *a, int iVal){ a[3] = (0x7F & (u8)(iVal)); } +static int fts5Get4ByteVarint(u8 *a, int *pnVarint){ + int iRet = ((int)(a[0] & 0x7F) << 21) + ((int)(a[1] & 0x7F) << 14) + + ((int)(a[2] & 0x7F) << 7) + ((int)(a[3])); + *pnVarint = ( + (iRet & 0xFFFFFF80)==0 ? 1 : + (iRet & 0xFFFFC000)==0 ? 2 : + (iRet & 0xFFE00000)==0 ? 3 : 4 + ); + return iRet; +} + /* ** Allocate a new hash table. */ @@ -398,10 +409,12 @@ int sqlite3Fts5HashIterate( while( rc==SQLITE_OK && iOffnData ){ i64 iDelta; /* Rowid delta value */ int nPoslist; /* Size of position list in bytes */ + int nVarint; iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); iRowid += iDelta; - iOff += fts5GetVarint32(&pPtr[iOff], nPoslist); - rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist); + nPoslist = fts5Get4ByteVarint(&pPtr[iOff], &nVarint); + iOff += 4; + rc = xEntry(pCtx, iRowid, &pPtr[iOff-nVarint], nPoslist+nVarint); iOff += nPoslist; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 1731d3ed70..ed7692bef6 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3392,12 +3392,17 @@ static int fts5FlushNewEntry( Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; Fts5Index *pIdx = p->pIdx; +#ifdef SQLITE_DEBUG + /* The poslist-size varint should already be at the start of the + ** aPoslist/nPoslist buffer. This assert verifies that. */ + int n, i; + i = fts5GetVarint32(aPoslist, n); + assert( nPoslist==(n+i) ); +#endif + /* Append the rowid itself */ fts5WriteAppendRowid(pIdx, &p->writer, iRowid); - /* Append the size of the position list in bytes */ - fts5WriteAppendPoslistInt(pIdx, &p->writer, nPoslist); - /* And the poslist data */ fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist); return pIdx->rc; diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index 034286bc60..97353ffa3b 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -8,6 +8,8 @@ proc loadfile {f} { } set ::nRow 0 +set ::nRowPerDot 1000 + proc load_hierachy {dir} { foreach f [glob -nocomplain -dir $dir *] { if {$::O(limit) && $::nRow>=$::O(limit)} break @@ -16,6 +18,13 @@ proc load_hierachy {dir} { } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow + + if {($::nRow % $::nRowPerDot)==0} { + puts -nonewline . + if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } + flush stdout + } + } } } @@ -81,7 +90,9 @@ sqlite3 db [lindex $argv end-1] db func loadfile loadfile db transaction { - db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" + catch { + db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" + } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } diff --git a/manifest b/manifest index d807680b8c..980bed6d09 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\sproblems\swith\stransactions\sthat\sboth\sread\sand\swrite\san\sfts5\stable. -D 2015-01-29T20:59:34.380 +C Minor\soptimizations\sto\sfts5\swrites. +D 2015-01-31T15:23:44.132 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,8 +111,8 @@ F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c 473e3428a9a637fa6e61d64d8ca3796ec57a58e9 -F ext/fts5/fts5_hash.c b54822ca901fb76d79c6a09daecbc464e5fe02c1 -F ext/fts5/fts5_index.c 1550befd9622d009520fdadfa0b42154e0ac54c0 +F ext/fts5/fts5_hash.c 57febfb06e59ae419ee9ba31667635f70d7c4dd0 +F ext/fts5/fts5_index.c 3239fa41b002230dd47f92937b72e7fa2caa0f90 F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 @@ -147,7 +147,7 @@ F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee -F ext/fts5/tool/loadfts5.tcl 0d39b916550097a3b714060bfc1164a4a9b73f4c +F ext/fts5/tool/loadfts5.tcl 310cb6556b74eda5ce7829a539298c3f35003523 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1283,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f704bc059e06b01f1d68fa7dad89e33eace6c389 -R f45496311c450f4a551203517eb9c071 +P 0e225b15357765f132c3364b222f9931a608a5b2 +R bf6974c43532ef38693ab05d4629d2ec U dan -Z d89173c476e3f912e9f3a6ccba8c9b1b +Z 8f2a2071b0090e43c31fbea210c6dce0 diff --git a/manifest.uuid b/manifest.uuid index a041d59a2d..734bdb3e9c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0e225b15357765f132c3364b222f9931a608a5b2 \ No newline at end of file +1fffe51fa92f1784365140d5b163ab6c690981ae \ No newline at end of file From 57fec54b53c1c04ba868a327c5ab5ea9cf929a77 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 2 Feb 2015 11:32:20 +0000 Subject: [PATCH 086/206] Fix some problems with building fts5 and fts3 together using the amalgamation. FossilOrigin-Name: fb10bbb9f9c4481e6043d323a3018a4ec68eb0ff --- ext/fts3/unicode/mkunicode.tcl | 10 ++++++---- ext/fts5/fts5Int.h | 1 - ext/fts5/fts5_expr.c | 4 ++-- ext/fts5/fts5_index.c | 2 +- ext/fts5/fts5_tokenize.c | 2 ++ ext/fts5/fts5_unicode2.c | 10 +++++----- main.mk | 6 ++++-- manifest | 26 +++++++++++++------------- manifest.uuid | 2 +- tool/mksqlite3c.tcl | 1 + 10 files changed, 35 insertions(+), 29 deletions(-) diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index b33b05c781..692ba72bf8 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -117,7 +117,7 @@ proc print_rd {map} { puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" - puts "static int remove_diacritic(int c)\{" + puts "static int ${::remove_diacritic}(int c)\{" puts " unsigned short aDia\[\] = \{" puts -nonewline " 0, " set i 1 @@ -626,7 +626,7 @@ proc print_fold {zFunc} { tl_print_table_footer toggle tl_print_ioff_table $liOff - puts { + puts [subst -nocommands { int ret = c; assert( c>=0 ); @@ -659,9 +659,9 @@ proc print_fold {zFunc} { } } - if( bRemoveDiacritic ) ret = remove_diacritic(ret); - } + if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); } + }] foreach entry $lHigh { tl_print_if_entry $entry @@ -772,6 +772,7 @@ if {[llength $argv]<2} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] +set remove_diacritic remove_diacritic set generate_test_code 0 set generate_fts5_code 0 set function_prefix "sqlite3Fts" @@ -783,6 +784,7 @@ for {set i 0} {$i < [llength $argv]-2} {incr i} { -fts5 { set function_prefix sqlite3Fts5 set generate_fts5_code 1 + set remove_diacritic fts5_remove_diacritic } default { usage diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d31ac07210..034e6866db 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -16,7 +16,6 @@ #include "fts5.h" #include "sqliteInt.h" -#include "fts3_tokenizer.h" /* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index bbf8c68acb..6966fc1975 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -25,7 +25,7 @@ typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ -void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(size_t)); +void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); @@ -179,7 +179,7 @@ static int fts5ExprGetToken( return tok; } -static void *fts5ParseAlloc(size_t t){ return sqlite3_malloc((int)t); } +static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); } static void fts5ParseFree(void *p){ sqlite3_free(p); } int sqlite3Fts5ExprNew( diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ed7692bef6..6916c4254e 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3793,7 +3793,7 @@ static void fts5IndexIntegrityCheckSegment( } fts5DlidxIterFree(pDlidx); - // fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); + fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); } } diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index b62f50bf82..12bf242055 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -561,6 +561,7 @@ struct PorterRule { int nOutput; }; +#if 0 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ int ret = -1; int nBuf = *pnBuf; @@ -584,6 +585,7 @@ static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ return ret; } +#endif static int fts5PorterIsVowel(char c, int bYIsVowel){ return ( diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c index 5692bf2b39..972e7ed975 100644 --- a/ext/fts5/fts5_unicode2.c +++ b/ext/fts5/fts5_unicode2.c @@ -38,7 +38,7 @@ int sqlite3Fts5UnicodeIsalnum(int c){ ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ - const static unsigned int aEntry[] = { + static const unsigned int aEntry[] = { 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, @@ -130,7 +130,7 @@ int sqlite3Fts5UnicodeIsalnum(int c){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; - int iRes; + int iRes = 0; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ @@ -158,7 +158,7 @@ int sqlite3Fts5UnicodeIsalnum(int c){ ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ -static int remove_diacritic(int c){ +static int fts5_remove_diacritic(int c){ unsigned short aDia[] = { 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, @@ -201,7 +201,7 @@ static int remove_diacritic(int c){ } assert( key>=aDia[iRes] ); return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); -}; +} /* @@ -351,7 +351,7 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ } } - if( bRemoveDiacritic ) ret = remove_diacritic(ret); + if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); } else if( c>=66560 && c<66600 ){ diff --git a/main.mk b/main.mk index 3f0a124cba..04c3436938 100644 --- a/main.mk +++ b/main.mk @@ -243,7 +243,8 @@ SRC += \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ $(TOP)/ext/fts5/fts5_storage.c \ - $(TOP)/ext/fts5/fts5_tokenize.c + $(TOP)/ext/fts5/fts5_tokenize.c \ + $(TOP)/ext/fts5/fts5_unicode2.c # Generated source code files @@ -634,7 +635,8 @@ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y mv fts5parse.c fts5parse.c.orig - cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' > fts5parse.c + cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \ + | sed 's/TOKEN/FTS5TOKEN/g' > fts5parse.c userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) diff --git a/manifest b/manifest index 00214ef287..e6f897392a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-02-02T09:40:36.284 +C Fix\ssome\sproblems\swith\sbuilding\sfts5\sand\sfts3\stogether\susing\sthe\samalgamation. +D 2015-02-02T11:32:20.159 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -102,21 +102,21 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 3986531f2fc0ceca0c89c31ec7d0589b6adb19d6 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e +F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h f7cf9331f34c5a5a83a88f43148161daa4cc0233 +F ext/fts5/fts5Int.h 34040674eb25f3de8a0e57423a3155aef6312541 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 -F ext/fts5/fts5_expr.c 473e3428a9a637fa6e61d64d8ca3796ec57a58e9 +F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b F ext/fts5/fts5_hash.c 57febfb06e59ae419ee9ba31667635f70d7c4dd0 -F ext/fts5/fts5_index.c 3239fa41b002230dd47f92937b72e7fa2caa0f90 +F ext/fts5/fts5_index.c beced7a9f360c2bf44a9f987c0a8735b6868ffbf F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 -F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 -F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 +F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 +F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 @@ -197,7 +197,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 13e0f64976aa3df394b06b43006d8b3625cd7719 +F main.mk e3b4499dc6c442e5bba2474ef9f7fd83f859a2cf F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1251,7 +1251,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 07a5124cf2dbafa1b375eefcf8ac4227028b0f8b F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 9ef48e1748dce7b844f67e2450ff9dfeb0fb4ab5 -F tool/mksqlite3c.tcl c63fdd4a6a264e1b03c3cc67ea48d496c51fbc16 +F tool/mksqlite3c.tcl d04425b480c822c637524f5f058672182fbed2b0 F tool/mksqlite3h.tcl ba24038056f51fde07c0079c41885ab85e2cff12 F tool/mksqlite3internalh.tcl eb994013e833359137eb53a55acdad0b5ae1049b F tool/mkvsix.tcl 52a4c613707ac34ae9c226e5ccc69cb948556105 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1fffe51fa92f1784365140d5b163ab6c690981ae 42d5601739c90434e5adfda8fa99ef7b903877db -R 668d2feec869c09e5368ca6aaf5507f3 +P 76212f2c9a3c3ff0e238d6dad776938c6af674e6 +R 6b8af163b7b0fb43dcbdad20d71a2bdf U dan -Z dfa214b77fdf4b594624abdda6f53f9b +Z 3017212b5b79cafa2f215942f5826316 diff --git a/manifest.uuid b/manifest.uuid index c32532fc48..516ce8ea8a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -76212f2c9a3c3ff0e238d6dad776938c6af674e6 \ No newline at end of file +fb10bbb9f9c4481e6043d323a3018a4ec68eb0ff \ No newline at end of file diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index d17e26a63e..db330679bd 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -342,6 +342,7 @@ foreach file { fts5parse.c fts5_storage.c fts5_tokenize.c + fts5_unicode2.c rtree.c icu.c From 601415e2bc2116aaf40ce7484cdb739e80730ab4 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 2 Feb 2015 11:58:21 +0000 Subject: [PATCH 087/206] Ensure generated header file fts5parse.h is included in sqlite3.c. FossilOrigin-Name: bc7be2fcfd29d6f1b567b69b3b20896eceb99798 --- main.mk | 3 ++- manifest | 14 +++++++------- manifest.uuid | 2 +- tool/mksqlite3c.tcl | 1 + 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/main.mk b/main.mk index 04c3436938..a3e80df531 100644 --- a/main.mk +++ b/main.mk @@ -241,7 +241,7 @@ SRC += \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ - fts5parse.c \ + fts5parse.c fts5parse.h \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ $(TOP)/ext/fts5/fts5_unicode2.c @@ -414,6 +414,7 @@ EXTHDR += \ $(TOP)/ext/icu/sqliteicu.h EXTHDR += \ $(TOP)/ext/fts5/fts5Int.h \ + fts5parse.h \ $(TOP)/ext/fts5/fts5.h EXTHDR += \ $(TOP)/ext/userauth/sqlite3userauth.h diff --git a/manifest b/manifest index e6f897392a..9aae583c5c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\sproblems\swith\sbuilding\sfts5\sand\sfts3\stogether\susing\sthe\samalgamation. -D 2015-02-02T11:32:20.159 +C Ensure\sgenerated\sheader\sfile\sfts5parse.h\sis\sincluded\sin\ssqlite3.c. +D 2015-02-02T11:58:21.261 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -197,7 +197,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk e3b4499dc6c442e5bba2474ef9f7fd83f859a2cf +F main.mk f53de1926f080336a0eb0f4807f122c5ee63aae3 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1251,7 +1251,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 07a5124cf2dbafa1b375eefcf8ac4227028b0f8b F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 9ef48e1748dce7b844f67e2450ff9dfeb0fb4ab5 -F tool/mksqlite3c.tcl d04425b480c822c637524f5f058672182fbed2b0 +F tool/mksqlite3c.tcl ce1e8a27ecf2d20194325237a3bba6b40e7eed3b F tool/mksqlite3h.tcl ba24038056f51fde07c0079c41885ab85e2cff12 F tool/mksqlite3internalh.tcl eb994013e833359137eb53a55acdad0b5ae1049b F tool/mkvsix.tcl 52a4c613707ac34ae9c226e5ccc69cb948556105 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 76212f2c9a3c3ff0e238d6dad776938c6af674e6 -R 6b8af163b7b0fb43dcbdad20d71a2bdf +P fb10bbb9f9c4481e6043d323a3018a4ec68eb0ff +R 5aa72f287ea58e03f6201c3554f79f11 U dan -Z 3017212b5b79cafa2f215942f5826316 +Z faf1a2c366b2fc1b07a53967e5d081da diff --git a/manifest.uuid b/manifest.uuid index 516ce8ea8a..419358cbac 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fb10bbb9f9c4481e6043d323a3018a4ec68eb0ff \ No newline at end of file +bc7be2fcfd29d6f1b567b69b3b20896eceb99798 \ No newline at end of file diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index db330679bd..bd2f5e90a7 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -99,6 +99,7 @@ foreach hdr { fts3_tokenizer.h fts5.h fts5Int.h + fts5parse.h hash.h hwtime.h keywordhash.h From 6d8f42ec8139305b5cc809529579d02c6ae52275 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 25 Feb 2015 19:24:37 +0000 Subject: [PATCH 088/206] Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c. FossilOrigin-Name: 7eb022d7e5fdb180af823c82c47c938e4a7a355f --- ext/fts5/fts5Int.h | 2 ++ ext/fts5/fts5_hash.c | 42 ++++++++++++++++++++++++-------------- ext/fts5/fts5_index.c | 10 ++++++++- ext/fts5/tool/loadfts5.tcl | 10 ++++++++- manifest | 18 ++++++++-------- manifest.uuid | 2 +- 6 files changed, 57 insertions(+), 27 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 034e6866db..90df81a016 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -352,6 +352,8 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) +int sqlite3Fts5GetVarintLen(u32 iVal); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 841c4d3709..4b85b2af23 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -184,6 +184,23 @@ static int fts5HashResize(Fts5Hash *pHash){ return SQLITE_OK; } +static void fts5HashAddPoslistSize(Fts5HashEntry *p){ + if( p->iSzPoslist ){ + u8 *pPtr = (u8*)p; + int nSz = p->nData - p->iSzPoslist - 1; + + if( nSz<=127 ){ + pPtr[p->iSzPoslist] = nSz; + }else{ + int nByte = sqlite3Fts5GetVarintLen((u32)nSz); + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); + sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); + p->nData += (nByte-1); + } + p->iSzPoslist = 0; + } +} + int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ @@ -221,7 +238,7 @@ int sqlite3Fts5HashWrite( p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; - p->nData += 4; + p->nData += 1; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; @@ -233,7 +250,7 @@ int sqlite3Fts5HashWrite( ** is: ** ** + 9 bytes for a new rowid, - ** + 4 bytes reserved for the "poslist size" varint. + ** + 4 byte reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). @@ -255,11 +272,10 @@ int sqlite3Fts5HashWrite( /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ - assert( p->iSzPoslist>0 ); - fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + fts5HashAddPoslistSize(p); p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; - p->nData += 4; + p->nData += 1; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; @@ -393,14 +409,13 @@ int sqlite3Fts5HashIterate( while( pList ){ Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ - const int nSz = pList->nData - pList->iSzPoslist - 4; const int nKey = strlen(pList->zKey); i64 iRowid = 0; u8 *pPtr = (u8*)pList; int iOff = sizeof(Fts5HashEntry) + nKey + 1; /* Fill in the final poslist size field */ - fts5Put4ByteVarint(&pPtr[pList->iSzPoslist], nSz); + fts5HashAddPoslistSize(pList); /* Issue the new-term callback */ rc = xTerm(pCtx, pList->zKey, nKey); @@ -412,10 +427,9 @@ int sqlite3Fts5HashIterate( int nVarint; iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); iRowid += iDelta; - nPoslist = fts5Get4ByteVarint(&pPtr[iOff], &nVarint); - iOff += 4; - rc = xEntry(pCtx, iRowid, &pPtr[iOff-nVarint], nPoslist+nVarint); - iOff += nPoslist; + nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); + rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); + iOff += nVarint+nPoslist; } /* Issue the term-done callback */ @@ -445,8 +459,7 @@ int sqlite3Fts5HashQuery( } if( p ){ - u8 *pPtr = (u8*)p; - fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + fts5HashAddPoslistSize(p); *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ @@ -482,9 +495,8 @@ void sqlite3Fts5HashScanEntry( ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ - u8 *pPtr = (u8*)p; int nTerm = strlen(p->zKey); - fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + fts5HashAddPoslistSize(p); *pzTerm = p->zKey; *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6916c4254e..b90e5308d6 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -371,7 +371,7 @@ struct Fts5Structure { struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ - Fts5Buffer term; /* Buffer containing previous term on page */ + Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5SegWriter { int iIdx; /* Index to write to */ @@ -667,6 +667,14 @@ int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ } } +int sqlite3Fts5GetVarintLen(u32 iVal){ + if( iVal<(1 << 7 ) ) return 1; + if( iVal<(1 << 14) ) return 2; + if( iVal<(1 << 21) ) return 3; + if( iVal<(1 << 28) ) return 4; + return 5; +} + /* ** Allocate and return a buffer at least nByte bytes in size. ** diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index 97353ffa3b..feb92ec162 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -36,6 +36,7 @@ proc usage {} { puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" + puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" @@ -45,6 +46,7 @@ proc usage {} { set O(vtab) fts5 set O(tok) "" set O(limit) 0 +set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 @@ -65,6 +67,10 @@ for {set i 0} {$i < $nOpt} {incr i} { set O(tok) ", tokenize=porter" } + -delete { + set O(delete) 1 + } + -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] @@ -86,7 +92,9 @@ for {set i 0} {$i < $nOpt} {incr i} { } } -sqlite3 db [lindex $argv end-1] +set dbfile [lindex $argv end-1] +if {$O(delete)} { file delete -force $dbfile } +sqlite3 db $dbfile db func loadfile loadfile db transaction { diff --git a/manifest b/manifest index 9aae583c5c..e43204b803 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Ensure\sgenerated\sheader\sfile\sfts5parse.h\sis\sincluded\sin\ssqlite3.c. -D 2015-02-02T11:58:21.261 +C Instead\sof\sthe\s4-byte\sfields,\suse\sregular\svarints\sfor\sthe\sposlist-size\sfield\sin\sfts5_hash.c. +D 2015-02-25T19:24:37.378 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h 34040674eb25f3de8a0e57423a3155aef6312541 +F ext/fts5/fts5Int.h 7c2af493177b0e4e0290b869f19cd6d1d671d5ac F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b -F ext/fts5/fts5_hash.c 57febfb06e59ae419ee9ba31667635f70d7c4dd0 -F ext/fts5/fts5_index.c beced7a9f360c2bf44a9f987c0a8735b6868ffbf +F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4 +F ext/fts5/fts5_index.c 220321d06ae87496e22bb92fe52b65d84549ef0e F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -147,7 +147,7 @@ F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee -F ext/fts5/tool/loadfts5.tcl 310cb6556b74eda5ce7829a539298c3f35003523 +F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fb10bbb9f9c4481e6043d323a3018a4ec68eb0ff -R 5aa72f287ea58e03f6201c3554f79f11 +P bc7be2fcfd29d6f1b567b69b3b20896eceb99798 +R 79d1ace6d8bc7671e8fd6ed98da16ffb U dan -Z faf1a2c366b2fc1b07a53967e5d081da +Z a35e490814ed578ad02ebd09e2509be2 diff --git a/manifest.uuid b/manifest.uuid index 419358cbac..c4c9f580d3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bc7be2fcfd29d6f1b567b69b3b20896eceb99798 \ No newline at end of file +7eb022d7e5fdb180af823c82c47c938e4a7a355f \ No newline at end of file From 4a7e11c6299c2e4816531cde21dafc086d609a8f Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 26 Feb 2015 14:54:03 +0000 Subject: [PATCH 089/206] Fix an fts5 bug in large incremental merges. FossilOrigin-Name: 208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 --- ext/fts5/fts5_index.c | 32 +++++++++++++++++++------------- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index b90e5308d6..f24be525d9 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1183,6 +1183,7 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ } #if 0 +static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ int rc = SQLITE_OK; Fts5Buffer buf; @@ -1201,8 +1202,9 @@ static int fts5SegmentSize(Fts5StructureSegment *pSeg){ } /* -** Return a copy of index structure pStruct. Except, promote as many segments -** as possible to level iPromote. If an OOM occurs, NULL is returned. +** Return a copy of index structure pStruct. Except, promote as many +** segments as possible to level iPromote. If an OOM occurs, NULL is +** returned. */ static void fts5StructurePromoteTo( Fts5Index *p, @@ -1213,17 +1215,19 @@ static void fts5StructurePromoteTo( int il, is; Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; - for(il=iPromote+1; ilnLevel; il++){ - Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; - if( pLvl->nMerge ) return; - for(is=pLvl->nSeg-1; is>=0; is--){ - int sz = fts5SegmentSize(&pLvl->aSeg[is]); - if( sz>szPromote ) return; - fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); - if( p->rc ) return; - memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); - pOut->nSeg++; - pLvl->nSeg--; + if( pOut->nMerge==0 ){ + for(il=iPromote+1; ilnLevel; il++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; + if( pLvl->nMerge ) return; + for(is=pLvl->nSeg-1; is>=0; is--){ + int sz = fts5SegmentSize(&pLvl->aSeg[is]); + if( sz>szPromote ) return; + fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); + if( p->rc ) return; + memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); + pOut->nSeg++; + pLvl->nSeg--; + } } } } @@ -3343,6 +3347,7 @@ static void fts5IndexWork( } *ppStruct = pStruct; } + } } @@ -3464,6 +3469,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ fts5StructurePromote(p, 0, pStruct); } + if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); fts5IndexCrisisMerge(p, iHash, &pStruct); fts5StructureWrite(p, iHash, pStruct); diff --git a/manifest b/manifest index e43204b803..94a134ea1d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Instead\sof\sthe\s4-byte\sfields,\suse\sregular\svarints\sfor\sthe\sposlist-size\sfield\sin\sfts5_hash.c. -D 2015-02-25T19:24:37.378 +C Fix\san\sfts5\sbug\sin\slarge\sincremental\smerges. +D 2015-02-26T14:54:03.688 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4 -F ext/fts5/fts5_index.c 220321d06ae87496e22bb92fe52b65d84549ef0e +F ext/fts5/fts5_index.c 97ce5c919be5a70b623f89c66c60bda15408d577 F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bc7be2fcfd29d6f1b567b69b3b20896eceb99798 -R 79d1ace6d8bc7671e8fd6ed98da16ffb +P 7eb022d7e5fdb180af823c82c47c938e4a7a355f +R a31df37a1e652372bc458cec184c4145 U dan -Z a35e490814ed578ad02ebd09e2509be2 +Z ba5b725699c58a1a7e341156a5c76451 diff --git a/manifest.uuid b/manifest.uuid index c4c9f580d3..32c499775f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7eb022d7e5fdb180af823c82c47c938e4a7a355f \ No newline at end of file +208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 \ No newline at end of file From dfdc4b46134c22550e48cf32224f6b990cbeabf5 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 26 Feb 2015 20:49:09 +0000 Subject: [PATCH 090/206] Optimize copying data from fts5 in-memory hash tables to top level segments. FossilOrigin-Name: 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac --- ext/fts5/fts5_hash.c | 54 +----------- ext/fts5/fts5_index.c | 192 ++++++++++++++++++++++++++++++------------ manifest | 14 +-- manifest.uuid | 2 +- 4 files changed, 150 insertions(+), 112 deletions(-) diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 4b85b2af23..eb1f3d1b1a 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -393,55 +393,6 @@ static int fts5HashEntrySort( return SQLITE_OK; } -int sqlite3Fts5HashIterate( - Fts5Hash *pHash, - void *pCtx, - int (*xTerm)(void*, const char*, int), - int (*xEntry)(void*, i64, const u8*, int), - int (*xTermDone)(void*) -){ - Fts5HashEntry *pList; - int rc; - - rc = fts5HashEntrySort(pHash, 0, 0, &pList); - if( rc==SQLITE_OK ){ - memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); - while( pList ){ - Fts5HashEntry *pNext = pList->pScanNext; - if( rc==SQLITE_OK ){ - const int nKey = strlen(pList->zKey); - i64 iRowid = 0; - u8 *pPtr = (u8*)pList; - int iOff = sizeof(Fts5HashEntry) + nKey + 1; - - /* Fill in the final poslist size field */ - fts5HashAddPoslistSize(pList); - - /* Issue the new-term callback */ - rc = xTerm(pCtx, pList->zKey, nKey); - - /* Issue the xEntry callbacks */ - while( rc==SQLITE_OK && iOffnData ){ - i64 iDelta; /* Rowid delta value */ - int nPoslist; /* Size of position list in bytes */ - int nVarint; - iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); - iRowid += iDelta; - nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); - rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); - iOff += nVarint+nPoslist; - } - - /* Issue the term-done callback */ - if( rc==SQLITE_OK ) rc = xTermDone(pCtx); - } - sqlite3_free(pList); - pList = pNext; - } - } - return rc; -} - /* ** Query the hash table for a doclist associated with term pTerm/nTerm. */ @@ -478,9 +429,8 @@ void sqlite3Fts5HashScanInit( } void sqlite3Fts5HashScanNext(Fts5Hash *p){ - if( p->pScan ){ - p->pScan = p->pScan->pScanNext; - } + Fts5HashEntry *pScan = p->pScan; + if( pScan ) p->pScan = pScan->pScanNext; } int sqlite3Fts5HashScanEof(Fts5Hash *p){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index f24be525d9..481cd75fa7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -113,7 +113,7 @@ ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) -** poslist: first poslist +** poslist: next poslist ** } ** 0x00 byte ** @@ -2677,7 +2677,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty @@ -3004,12 +3004,15 @@ static void fts5WriteFinish( ){ int i; if( p->rc==SQLITE_OK ){ - *pnLeaf = pWriter->aWriter[0].pgno; - if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ + Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; + if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ - fts5WriteFlushLeaf(p, pWriter); + if( pLeaf->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + *pnLeaf = pLeaf->pgno-1; if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } @@ -3381,44 +3384,20 @@ struct Fts5FlushCtx { Fts5SegWriter writer; }; -static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; - fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm); - return rc; -} - -static int fts5FlushTermDone(void *pCtx){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; - /* Write the doclist terminator */ - fts5WriteAppendZerobyte(p->pIdx, &p->writer); - return rc; -} - -static int fts5FlushNewEntry( - void *pCtx, - i64 iRowid, - const u8 *aPoslist, - int nPoslist -){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - Fts5Index *pIdx = p->pIdx; - -#ifdef SQLITE_DEBUG - /* The poslist-size varint should already be at the start of the - ** aPoslist/nPoslist buffer. This assert verifies that. */ - int n, i; - i = fts5GetVarint32(aPoslist, n); - assert( nPoslist==(n+i) ); -#endif - - /* Append the rowid itself */ - fts5WriteAppendRowid(pIdx, &p->writer, iRowid); - - /* And the poslist data */ - fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist); - return pIdx->rc; +/* +** Buffer aBuf[] contains a list of varints, all small enough to fit +** in a 32-bit integer. Return the size of the largest prefix of this +** list nMax bytes or less in size. +*/ +static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ + int ret = 0; + while( 1 ){ + u32 dummy; + int i = fts5GetVarint32(&aBuf[ret], dummy); + if( (ret + i) > nMax ) break; + ret += i; + } + return ret; } /* @@ -3429,6 +3408,7 @@ static int fts5FlushNewEntry( ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ + Fts5Hash *pHash = p->apHash[iHash]; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ @@ -3439,19 +3419,127 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ + const int pgsz = p->pConfig->pgsz; + Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ - int rc; - Fts5FlushCtx ctx; + Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ - fts5WriteInit(p, &ctx.writer, iHash, iSegid); - ctx.pIdx = p; + Fts5SegWriter writer; + fts5WriteInit(p, &writer, iHash, iSegid); - rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, - fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone - ); - if( p->rc==SQLITE_OK ) p->rc = rc; - fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); + /* Pre-allocate the buffer used to assemble leaf pages to the target + ** page size. */ + assert( pgsz>0 ); + pBuf = &writer.aWriter[0].buf; + fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + + /* Begin scanning through hash table entries. */ + if( p->rc==SQLITE_OK ){ + memset(pBuf->p, 0, 4); + pBuf->n = 4; + sqlite3Fts5HashScanInit(pHash, 0, 0); + } + + while( 0==sqlite3Fts5HashScanEof(pHash) ){ + const char *zTerm; + int nTerm; + const u8 *pDoclist; + int nDoclist; + + sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist); + nTerm = strlen(zTerm); + + /* Decide if the term fits on the current leaf. If not, flush it + ** to disk. */ + if( (pBuf->n + nTerm + 2) > pgsz ){ + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + if( (nTerm + 32) > pBuf->nSpace ){ + fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); + } + } + + /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + if( writer.bFirstTermInPage==0 ){ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0); + }else{ + fts5PutU16(&pBuf->p[2], pBuf->n); + writer.bFirstTermInPage = 0; + if( writer.aWriter[0].pgno!=1 ){ + fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm); + pBuf = &writer.aWriter[0].buf; + } + } + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm); + fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm); + + if( pgsz>=(pBuf->n + nDoclist + 1) ){ + /* The entire doclist will fit on the current leaf. */ + fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist); + }else{ + i64 iRowid = 0; + i64 iDelta = 0; + int iOff = 0; + int bFirstDocid = 0; + + /* The entire doclist will not fit on this leaf. The following + ** loop iterates through the poslists that make up the current + ** doclist. */ + while( iOffp[0], pBuf->n); /* first docid on page */ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); + bFirstDocid = 0; + }else{ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); + } + assert( pBuf->n<=pBuf->nSpace ); + + if( (pBuf->n + nCopy) <= pgsz ){ + /* The entire poslist will fit on the current leaf. So copy + ** it in one go. */ + fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]); + }else{ + /* The entire poslist will not fit on this leaf. So it needs + ** to be broken into sections. The only qualification being + ** that each varint must be stored contiguously. */ + const u8 *pPoslist = &pDoclist[iOff]; + int iPos = 0; + while( 1 ){ + int nSpace = pgsz - pBuf->n; + int n; + if( (nCopy - iPos)<=nSpace ){ + n = nCopy - iPos; + }else{ + n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + } + fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]); + iPos += n; + if( iPos>=nCopy ) break; + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + } + bFirstDocid = 1; + } + assert( pBuf->n<=pgsz ); + iOff += nCopy; + } + } + + pBuf->p[pBuf->n++] = '\0'; + assert( pBuf->n<=pBuf->nSpace ); + sqlite3Fts5HashScanNext(pHash); + } + sqlite3Fts5HashClear(pHash); + fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ diff --git a/manifest b/manifest index 94a134ea1d..c27cd5f2c0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sbug\sin\slarge\sincremental\smerges. -D 2015-02-26T14:54:03.688 +C Optimize\scopying\sdata\sfrom\sfts5\sin-memory\shash\stables\sto\stop\slevel\ssegments. +D 2015-02-26T20:49:09.566 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,8 +111,8 @@ F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b -F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4 -F ext/fts5/fts5_index.c 97ce5c919be5a70b623f89c66c60bda15408d577 +F ext/fts5/fts5_hash.c 323099a445bf8f608af069e2d8ff4bb93db9904c +F ext/fts5/fts5_index.c 7a9de0c033a8f702f8e3659a23c2ea31bbbb789b F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7eb022d7e5fdb180af823c82c47c938e4a7a355f -R a31df37a1e652372bc458cec184c4145 +P 208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 +R 2ae35fa8ad1fd2b74f86acf33cc74d9f U dan -Z ba5b725699c58a1a7e341156a5c76451 +Z 84061e5477aeb4ed2552f0a9aa275eda diff --git a/manifest.uuid b/manifest.uuid index 32c499775f..3538b76cd7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 \ No newline at end of file +8e3ca6323a2beab5f04250e24ae15b159d2aa0ac \ No newline at end of file From 5db7e2ca83d6650bb480870cc40e87b68d8154e1 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 27 Feb 2015 07:23:26 +0000 Subject: [PATCH 091/206] Fix suffix and prefix compression of terms in top-level fts5 segments. And a crash that could follow an OOM condition. FossilOrigin-Name: bb104b3646c6f07ed002be7360b08433ee7980d4 --- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_hash.c | 4 ++-- ext/fts5/fts5_index.c | 25 ++++++++++++++++++------- manifest | 16 ++++++++-------- manifest.uuid | 2 +- 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 90df81a016..f31548ee21 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -400,7 +400,7 @@ int sqlite3Fts5HashQuery( int *pnDoclist /* OUT: Size of doclist in bytes */ ); -void sqlite3Fts5HashScanInit( +int sqlite3Fts5HashScanInit( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm /* Query prefix */ ); diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index eb1f3d1b1a..7c63fad19c 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -421,11 +421,11 @@ int sqlite3Fts5HashQuery( return SQLITE_OK; } -void sqlite3Fts5HashScanInit( +int sqlite3Fts5HashScanInit( Fts5Hash *p, /* Hash table to query */ const char *pTerm, int nTerm /* Query prefix */ ){ - fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); + return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); } void sqlite3Fts5HashScanNext(Fts5Hash *p){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 481cd75fa7..79c6d007de 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2079,9 +2079,10 @@ static void fts5SegIterHashInit( int n = 0; assert( pHash ); + assert( p->rc==SQLITE_OK ); if( pTerm==0 || (iIdx==0 && (flags & FTS5INDEX_QUERY_PREFIX)) ){ - sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm); + p->rc = sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm); sqlite3Fts5HashScanEntry(pHash, (const char**)&z, &pList, &nList); n = (z ? strlen((const char*)z) : 0); }else{ @@ -3424,6 +3425,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ + const char *zPrev = 0; Fts5SegWriter writer; fts5WriteInit(p, &writer, iHash, iSegid); @@ -3438,14 +3440,15 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; - sqlite3Fts5HashScanInit(pHash, 0, 0); + p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } - while( 0==sqlite3Fts5HashScanEof(pHash) ){ + while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; int nTerm; const u8 *pDoclist; int nDoclist; + int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist); nTerm = strlen(zTerm); @@ -3462,17 +3465,24 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ /* Write the term to the leaf. And push it up into the b-tree hierarchy */ if( writer.bFirstTermInPage==0 ){ - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0); + int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, zTerm); + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); + nSuffix = nTerm - nPre; }else{ fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; if( writer.aWriter[0].pgno!=1 ){ - fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm); + int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, zTerm); + fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); pBuf = &writer.aWriter[0].buf; + assert( nPren += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm); - fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm); + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); + fts5BufferAppendBlob(&p->rc, pBuf, + nSuffix, (const u8*)&zTerm[nTerm-nSuffix] + ); if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ @@ -3536,6 +3546,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ pBuf->p[pBuf->n++] = '\0'; assert( pBuf->n<=pBuf->nSpace ); + zPrev = zTerm; sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); diff --git a/manifest b/manifest index c27cd5f2c0..9484610f04 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Optimize\scopying\sdata\sfrom\sfts5\sin-memory\shash\stables\sto\stop\slevel\ssegments. -D 2015-02-26T20:49:09.566 +C Fix\ssuffix\sand\sprefix\scompression\sof\sterms\sin\stop-level\sfts5\ssegments.\sAnd\sa\scrash\sthat\scould\sfollow\san\sOOM\scondition. +D 2015-02-27T07:23:26.074 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h 7c2af493177b0e4e0290b869f19cd6d1d671d5ac +F ext/fts5/fts5Int.h 5c8efea3d0a1ccc70194225f8c402a1732ed5ad5 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b -F ext/fts5/fts5_hash.c 323099a445bf8f608af069e2d8ff4bb93db9904c -F ext/fts5/fts5_index.c 7a9de0c033a8f702f8e3659a23c2ea31bbbb789b +F ext/fts5/fts5_hash.c 63ad0066ec83525f0dad5b416d9db6e06f7d39ac +F ext/fts5/fts5_index.c 14549572551b60d99413f9bd2043ed2be004a328 F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 208e3cb6b6dc8c7d824b64dec2034004c9fcbba5 -R 2ae35fa8ad1fd2b74f86acf33cc74d9f +P 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac +R f04df10b48e4ca5c813b34f1122b4215 U dan -Z 84061e5477aeb4ed2552f0a9aa275eda +Z 4695fccf9ba60f7f53ddc85dbb7a72b7 diff --git a/manifest.uuid b/manifest.uuid index 3538b76cd7..b080cab9ec 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8e3ca6323a2beab5f04250e24ae15b159d2aa0ac \ No newline at end of file +bb104b3646c6f07ed002be7360b08433ee7980d4 \ No newline at end of file From f400282bdf8e184b89437f6ae8d4c5e9297a81ab Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 27 Feb 2015 09:41:10 +0000 Subject: [PATCH 092/206] Further minor optimizations to flushing fts5 data to disk. FossilOrigin-Name: a07dcca9ef3821a6719ef9dbbc8ed861fa005035 --- ext/fts5/fts5_index.c | 17 +++++++++++------ manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 79c6d007de..3fe408a86e 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3401,6 +3401,12 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ return ret; } +#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ + assert( pBuf->nSpace>=(pBuf->n+nBlob) ); \ + memcpy(&pBuf->p[pBuf->n], pBlob, nBlob); \ + pBuf->n += nBlob; \ +} + /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. @@ -3460,6 +3466,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ pBuf = &writer.aWriter[0].buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); + if( p->rc ) break; } } @@ -3480,13 +3487,11 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ nSuffix = nTerm; } pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); - fts5BufferAppendBlob(&p->rc, pBuf, - nSuffix, (const u8*)&zTerm[nTerm-nSuffix] - ); + fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ - fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist); + fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; @@ -3516,7 +3521,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ if( (pBuf->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ - fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]); + fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); }else{ /* The entire poslist will not fit on this leaf. So it needs ** to be broken into sections. The only qualification being @@ -3531,7 +3536,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } - fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]); + fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; if( iPos>=nCopy ) break; fts5WriteFlushLeaf(p, &writer); diff --git a/manifest b/manifest index 9484610f04..5e4cb7afe0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssuffix\sand\sprefix\scompression\sof\sterms\sin\stop-level\sfts5\ssegments.\sAnd\sa\scrash\sthat\scould\sfollow\san\sOOM\scondition. -D 2015-02-27T07:23:26.074 +C Further\sminor\soptimizations\sto\sflushing\sfts5\sdata\sto\sdisk. +D 2015-02-27T09:41:10.812 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b F ext/fts5/fts5_hash.c 63ad0066ec83525f0dad5b416d9db6e06f7d39ac -F ext/fts5/fts5_index.c 14549572551b60d99413f9bd2043ed2be004a328 +F ext/fts5/fts5_index.c deb7a5b73ca79b297cb32ce604015d13ad4a129e F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac -R f04df10b48e4ca5c813b34f1122b4215 +P bb104b3646c6f07ed002be7360b08433ee7980d4 +R 7d2aff01dcb4baccf8d2147c84edd80c U dan -Z 4695fccf9ba60f7f53ddc85dbb7a72b7 +Z 8659a7b4b1b02ada86d180379ab48889 diff --git a/manifest.uuid b/manifest.uuid index b080cab9ec..6faea6421b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bb104b3646c6f07ed002be7360b08433ee7980d4 \ No newline at end of file +a07dcca9ef3821a6719ef9dbbc8ed861fa005035 \ No newline at end of file From 47c467c80e8719d0284adb6d95b999b51b9b080c Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 4 Mar 2015 08:29:24 +0000 Subject: [PATCH 093/206] Fix a couple of build problems. FossilOrigin-Name: a5d5468c0509d129e198bf9432190ee07cedb7af --- ext/fts5/fts5.c | 2 ++ ext/fts5/fts5_aux.c | 3 +++ ext/fts5/fts5_buffer.c | 4 ++-- ext/fts5/fts5_config.c | 4 ++++ ext/fts5/fts5_expr.c | 4 ++++ ext/fts5/fts5_hash.c | 4 ++++ ext/fts5/fts5_index.c | 7 +++++-- ext/fts5/fts5_storage.c | 4 ++++ ext/fts5/fts5_tcl.c | 13 ++++++++++--- ext/fts5/fts5_tokenize.c | 3 +++ main.mk | 4 +++- manifest | 32 ++++++++++++++++---------------- manifest.uuid | 2 +- 13 files changed, 61 insertions(+), 25 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index d748a8b40f..f8450aab15 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -13,6 +13,7 @@ ** This is an SQLite module implementing full-text search. */ +#if defined(SQLITE_ENABLE_FTS5) #include "fts5Int.h" @@ -1962,5 +1963,6 @@ int sqlite3Fts5Init(sqlite3 *db){ } return rc; } +#endif /* defined(SQLITE_ENABLE_FTS5) */ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 22bdbb2344..fbd786640e 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -11,6 +11,8 @@ ****************************************************************************** */ +#ifdef SQLITE_ENABLE_FTS5 + #include "fts5Int.h" #include @@ -549,5 +551,6 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){ return rc; } +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 53cb02f521..73a5157ef9 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -12,6 +12,7 @@ */ +#ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" @@ -294,6 +295,5 @@ void *sqlite3Fts5MallocZero(int *pRc, int nByte){ } return pRet; } - - +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 438cdf370a..0450db6913 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -13,6 +13,9 @@ ** This is an SQLite module implementing full-text search. */ +#ifdef SQLITE_ENABLE_FTS5 + + #include "fts5Int.h" #define FTS5_DEFAULT_PAGE_SIZE 1000 @@ -789,3 +792,4 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ return rc; } +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 6966fc1975..878b54f53e 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -12,6 +12,9 @@ ** */ +#ifdef SQLITE_ENABLE_FTS5 + + #include "fts5Int.h" #include "fts5parse.h" @@ -1695,3 +1698,4 @@ int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ return 0; } +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 7c63fad19c..bd17205f1c 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -12,6 +12,9 @@ ** */ +#ifdef SQLITE_ENABLE_FTS5 + + #include "fts5Int.h" typedef struct Fts5HashEntry Fts5HashEntry; @@ -457,3 +460,4 @@ void sqlite3Fts5HashScanEntry( } } +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3fe408a86e..5eb5cbfbff 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -16,6 +16,8 @@ ** the interface defined in fts5Int.h. */ +#ifdef SQLITE_ENABLE_FTS5 + #include "fts5Int.h" /* @@ -448,8 +450,8 @@ struct Fts5MultiSegIter { ** ** FTS5_SEGITER_REVERSE: ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If -** it is set, iterate through docids in ascending order instead of the -** default descending order. +** it is set, iterate through docids in descending order instead of the +** default ascending order. ** ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. @@ -5060,3 +5062,4 @@ int sqlite3Fts5IndexInit(sqlite3 *db){ return rc; } +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 4bbbed138a..075b2eb66a 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -12,6 +12,9 @@ ** */ +#ifdef SQLITE_ENABLE_FTS5 + + #include "fts5Int.h" struct Fts5Storage { @@ -986,3 +989,4 @@ int sqlite3Fts5StorageConfigValue( } +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index f560590720..f1c2284276 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -12,10 +12,13 @@ ** */ + #ifdef SQLITE_TEST +#include + +#ifdef SQLITE_ENABLE_FTS5 #include "fts5.h" -#include #include #include @@ -856,5 +859,9 @@ int Fts5tcl_Init(Tcl_Interp *interp){ return TCL_OK; } - -#endif +#else /* SQLITE_ENABLE_FTS5 */ +int Fts5tcl_Init(Tcl_Interp *interp){ + return TCL_OK; +} +#endif /* SQLITE_ENABLE_FTS5 */ +#endif /* SQLITE_TEST */ diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 12bf242055..67ee446418 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -11,6 +11,8 @@ ****************************************************************************** */ +#if defined(SQLITE_ENABLE_FTS5) + #include "fts5.h" #include #include @@ -1221,5 +1223,6 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ return SQLITE_OK; } +#endif /* defined(SQLITE_ENABLE_FTS5) */ diff --git a/main.mk b/main.mk index a3e80df531..ec939c7a37 100644 --- a/main.mk +++ b/main.mk @@ -636,8 +636,10 @@ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y mv fts5parse.c fts5parse.c.orig + echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \ - | sed 's/TOKEN/FTS5TOKEN/g' > fts5parse.c + | sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c + echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) diff --git a/manifest b/manifest index 5e4cb7afe0..1f0b00ef27 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\sminor\soptimizations\sto\sflushing\sfts5\sdata\sto\sdisk. -D 2015-02-27T09:41:10.812 +C Fix\sa\scouple\sof\sbuild\sproblems. +D 2015-03-04T08:29:24.833 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,18 +104,18 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d +F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a F ext/fts5/fts5Int.h 5c8efea3d0a1ccc70194225f8c402a1732ed5ad5 -F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f -F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 -F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 -F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b -F ext/fts5/fts5_hash.c 63ad0066ec83525f0dad5b416d9db6e06f7d39ac -F ext/fts5/fts5_index.c deb7a5b73ca79b297cb32ce604015d13ad4a129e -F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 -F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 -F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 +F ext/fts5/fts5_aux.c c64e56b08c5be52fa688c078259cf903b164937a +F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06 +F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 +F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 +F ext/fts5/fts5_hash.c 6bc0f78cb3630c5ff27dbfb58847758e82c3d0ac +F ext/fts5/fts5_index.c db8dc4cf906245dfd8a8d724695b60d6f22b7654 +F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d +F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 +F ext/fts5/fts5_tokenize.c c3fe30914f7722941ea9e0092c07ab5ae87112e4 F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba @@ -197,7 +197,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk f53de1926f080336a0eb0f4807f122c5ee63aae3 +F main.mk 784b787dbf04af12581b53ceca57116a2bd355e6 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bb104b3646c6f07ed002be7360b08433ee7980d4 -R 7d2aff01dcb4baccf8d2147c84edd80c +P a07dcca9ef3821a6719ef9dbbc8ed861fa005035 +R c94fdf40731aa277dc09f612ab9d1498 U dan -Z 8659a7b4b1b02ada86d180379ab48889 +Z 4c98d611f9675f463f695743f454384f diff --git a/manifest.uuid b/manifest.uuid index 6faea6421b..fe120d997a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a07dcca9ef3821a6719ef9dbbc8ed861fa005035 \ No newline at end of file +a5d5468c0509d129e198bf9432190ee07cedb7af \ No newline at end of file From 6f876c339dfc8c39aebbcf43adef3ff215402a68 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 7 Mar 2015 11:50:31 +0000 Subject: [PATCH 094/206] Fix the bm25() function so that it multiplies scores by -1 before returning them. This means better matches have a lower numerical score, so "ORDER BY rank" (not "ORDER BY rank DESC") does what you want. FossilOrigin-Name: 3ee7b5a9f987c269251620ae7cc0fc7876b58ee5 --- ext/fts5/fts5.h | 2 +- ext/fts5/fts5_aux.c | 2 +- ext/fts5/fts5_hash.c | 22 ---------------------- ext/fts5/test/fts5ae.test | 6 +++--- manifest | 18 +++++++++--------- manifest.uuid | 2 +- 6 files changed, 15 insertions(+), 37 deletions(-) diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 8e244f3992..28be0de676 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -100,7 +100,7 @@ typedef void (*fts5_extension_function)( ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** -** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid DESC +** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid ** ** with $p set to a phrase equivalent to the phrase iPhrase of the ** current query is executed. For each row visited, the callback function diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index fbd786640e..8e4beffe67 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -520,7 +520,7 @@ static void fts5Bm25Function( /* If no error has occurred, return the calculated score. Otherwise, ** throw an SQL exception. */ if( rc==SQLITE_OK ){ - sqlite3_result_double(pCtx, score); + sqlite3_result_double(pCtx, -1.0 * score); }else{ sqlite3_result_error_code(pCtx, rc); } diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index bd17205f1c..69425a3310 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -69,28 +69,6 @@ struct Fts5HashEntry { char zKey[0]; /* Nul-terminated entry key */ }; -/* -** Format value iVal as a 4-byte varint and write it to buffer a[]. 4 bytes -** are used even if the value could fit in a smaller amount of space. -*/ -static void fts5Put4ByteVarint(u8 *a, int iVal){ - a[0] = (0x80 | (u8)(iVal >> 21)); - a[1] = (0x80 | (u8)(iVal >> 14)); - a[2] = (0x80 | (u8)(iVal >> 7)); - a[3] = (0x7F & (u8)(iVal)); -} - -static int fts5Get4ByteVarint(u8 *a, int *pnVarint){ - int iRet = ((int)(a[0] & 0x7F) << 21) + ((int)(a[1] & 0x7F) << 14) - + ((int)(a[2] & 0x7F) << 7) + ((int)(a[3])); - *pnVarint = ( - (iRet & 0xFFFFFF80)==0 ? 1 : - (iRet & 0xFFFFC000)==0 ? 2 : - (iRet & 0xFFE00000)==0 ? 3 : 4 - ); - return iRet; -} - /* ** Allocate a new hash table. */ diff --git a/ext/fts5/test/fts5ae.test b/ext/fts5/test/fts5ae.test index c9c3fcce30..d310e723be 100644 --- a/ext/fts5/test/fts5ae.test +++ b/ext/fts5/test/fts5ae.test @@ -265,15 +265,15 @@ foreach {tn q res} { 6 {j AND (h OR i)} {5 6} } { do_execsql_test 8.2.$tn.1 { - SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8) DESC; + SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8); } $res do_execsql_test 8.2.$tn.2 { - SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank DESC; + SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank; } $res do_execsql_test 8.2.$tn.3 { - SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank DESC; + SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank; } $res } diff --git a/manifest b/manifest index 1f0b00ef27..7124437f18 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\scouple\sof\sbuild\sproblems. -D 2015-03-04T08:29:24.833 +C Fix\sthe\sbm25()\sfunction\sso\sthat\sit\smultiplies\sscores\sby\s-1\sbefore\sreturning\sthem.\sThis\smeans\sbetter\smatches\shave\sa\slower\snumerical\sscore,\sso\s"ORDER\sBY\srank"\s(not\s"ORDER\sBY\srank\sDESC")\sdoes\swhat\syou\swant. +D 2015-03-07T11:50:31.532 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,13 +105,13 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b -F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a +F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a F ext/fts5/fts5Int.h 5c8efea3d0a1ccc70194225f8c402a1732ed5ad5 -F ext/fts5/fts5_aux.c c64e56b08c5be52fa688c078259cf903b164937a +F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 -F ext/fts5/fts5_hash.c 6bc0f78cb3630c5ff27dbfb58847758e82c3d0ac +F ext/fts5/fts5_hash.c 13fcefb50a178c0f5086b88cdd781e26c413a3cb F ext/fts5/fts5_index.c db8dc4cf906245dfd8a8d724695b60d6f22b7654 F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 @@ -124,7 +124,7 @@ F ext/fts5/test/fts5aa.test 065767c60ad301f77ad95f24369305e13347aa00 F ext/fts5/test/fts5ab.test 5da2e92a8047860b9e22b6fd3990549639d631b1 F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d -F ext/fts5/test/fts5ae.test 347c96db06aab23ff00cf6a6b4064a8dbb182e42 +F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 F ext/fts5/test/fts5ah.test 56b5a2599707621bf2fd1b5a00ddc0c0c1ffbf06 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a07dcca9ef3821a6719ef9dbbc8ed861fa005035 -R c94fdf40731aa277dc09f612ab9d1498 +P a5d5468c0509d129e198bf9432190ee07cedb7af +R aa671856811e9ea4e67d518a4616a999 U dan -Z 4c98d611f9675f463f695743f454384f +Z 4764624a6d652eca0ea7813e6c8b51f7 diff --git a/manifest.uuid b/manifest.uuid index fe120d997a..11c1b204ae 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a5d5468c0509d129e198bf9432190ee07cedb7af \ No newline at end of file +3ee7b5a9f987c269251620ae7cc0fc7876b58ee5 \ No newline at end of file From 92faed663a220c93a5399beed111882e05cfce9e Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 7 Mar 2015 15:46:41 +0000 Subject: [PATCH 095/206] Fix some compiler warnings caused by signed/unsigned pointer conversions. FossilOrigin-Name: cccee7b5b1e84523f1c549d3052fd170e32bde80 --- ext/fts5/fts5Int.h | 4 ++-- ext/fts5/fts5_hash.c | 8 ++++---- ext/fts5/fts5_index.c | 16 ++++++++-------- manifest | 16 ++++++++-------- manifest.uuid | 2 +- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index f31548ee21..3e2045d4c1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -396,7 +396,7 @@ int sqlite3Fts5HashIterate( int sqlite3Fts5HashQuery( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ - const char **ppDoclist, /* OUT: Pointer to doclist for pTerm */ + const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ int *pnDoclist /* OUT: Size of doclist in bytes */ ); @@ -408,7 +408,7 @@ void sqlite3Fts5HashScanNext(Fts5Hash*); int sqlite3Fts5HashScanEof(Fts5Hash*); void sqlite3Fts5HashScanEntry(Fts5Hash *, const char **pzTerm, /* OUT: term (nul-terminated) */ - const char **ppDoclist, /* OUT: pointer to doclist */ + const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ); diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 69425a3310..c5fd858fc0 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -380,7 +380,7 @@ static int fts5HashEntrySort( int sqlite3Fts5HashQuery( Fts5Hash *pHash, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ - const char **ppDoclist, /* OUT: Pointer to doclist for pTerm */ + const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ int *pnDoclist /* OUT: Size of doclist in bytes */ ){ unsigned int iHash = fts5HashKey(pHash->nSlot, pTerm, nTerm); @@ -392,7 +392,7 @@ int sqlite3Fts5HashQuery( if( p ){ fts5HashAddPoslistSize(p); - *ppDoclist = &p->zKey[nTerm+1]; + *ppDoclist = (const u8*)&p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *ppDoclist = 0; @@ -421,7 +421,7 @@ int sqlite3Fts5HashScanEof(Fts5Hash *p){ void sqlite3Fts5HashScanEntry( Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ - const char **ppDoclist, /* OUT: pointer to doclist */ + const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; @@ -429,7 +429,7 @@ void sqlite3Fts5HashScanEntry( int nTerm = strlen(p->zKey); fts5HashAddPoslistSize(p); *pzTerm = p->zKey; - *ppDoclist = &p->zKey[nTerm+1]; + *ppDoclist = (const u8*)&p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *pzTerm = 0; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 5eb5cbfbff..15b345da4f 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1797,7 +1797,7 @@ static void fts5SegIterNext( pIter->iRowid += iDelta; } }else if( pIter->pSeg==0 ){ - const char *pList = 0; + const u8 *pList = 0; const char *zTerm; int nList; if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ @@ -1811,7 +1811,7 @@ static void fts5SegIterNext( pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->n = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); - pIter->iLeafOffset = getVarint((u8*)pList, (u64*)&pIter->iRowid); + pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid); if( pIter->flags & FTS5_SEGITER_REVERSE ){ fts5SegIterReverseInitPage(p, pIter); } @@ -2075,7 +2075,7 @@ static void fts5SegIterHashInit( Fts5SegIter *pIter /* Object to populate */ ){ Fts5Hash *pHash = p->apHash[iIdx]; - const char *pList = 0; + const u8 *pList = 0; int nList = 0; const u8 *z = 0; int n = 0; @@ -3433,7 +3433,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ - const char *zPrev = 0; + const u8 *zPrev = 0; Fts5SegWriter writer; fts5WriteInit(p, &writer, iHash, iSegid); @@ -3458,7 +3458,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ int nDoclist; int nSuffix; /* Size of term suffix */ - sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist); + sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); /* Decide if the term fits on the current leaf. If not, flush it @@ -3474,14 +3474,14 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ /* Write the term to the leaf. And push it up into the b-tree hierarchy */ if( writer.bFirstTermInPage==0 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, zTerm); + int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); nSuffix = nTerm - nPre; }else{ fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; if( writer.aWriter[0].pgno!=1 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, zTerm); + int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); pBuf = &writer.aWriter[0].buf; assert( nPrep[pBuf->n++] = '\0'; assert( pBuf->n<=pBuf->nSpace ); - zPrev = zTerm; + zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); diff --git a/manifest b/manifest index 7124437f18..dbf36921fd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sbm25()\sfunction\sso\sthat\sit\smultiplies\sscores\sby\s-1\sbefore\sreturning\sthem.\sThis\smeans\sbetter\smatches\shave\sa\slower\snumerical\sscore,\sso\s"ORDER\sBY\srank"\s(not\s"ORDER\sBY\srank\sDESC")\sdoes\swhat\syou\swant. -D 2015-03-07T11:50:31.532 +C Fix\ssome\scompiler\swarnings\scaused\sby\ssigned/unsigned\spointer\sconversions. +D 2015-03-07T15:46:41.341 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 5c8efea3d0a1ccc70194225f8c402a1732ed5ad5 +F ext/fts5/fts5Int.h 1dcb02943f3a55d275d5473911a7e991d638c73c F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 -F ext/fts5/fts5_hash.c 13fcefb50a178c0f5086b88cdd781e26c413a3cb -F ext/fts5/fts5_index.c db8dc4cf906245dfd8a8d724695b60d6f22b7654 +F ext/fts5/fts5_hash.c 9959b5408f649487d4b0ee081416f37dc3cd8cdd +F ext/fts5/fts5_index.c 3eb8db82d08386d6777faeb4ff45ee998b3d9a81 F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c3fe30914f7722941ea9e0092c07ab5ae87112e4 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a5d5468c0509d129e198bf9432190ee07cedb7af -R aa671856811e9ea4e67d518a4616a999 +P 3ee7b5a9f987c269251620ae7cc0fc7876b58ee5 +R 2a14dede3733f8ccd7d343d89b036f09 U dan -Z 4764624a6d652eca0ea7813e6c8b51f7 +Z b868027a203766c0d15e29e2b4da678d diff --git a/manifest.uuid b/manifest.uuid index 11c1b204ae..51cb90e53a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3ee7b5a9f987c269251620ae7cc0fc7876b58ee5 \ No newline at end of file +cccee7b5b1e84523f1c549d3052fd170e32bde80 \ No newline at end of file From c9ca6fb85daf4ba5eb6adbe5b50c9cc634392f39 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 10 Mar 2015 19:24:30 +0000 Subject: [PATCH 096/206] Avoid redundant string comparisons while merging fts5 segment b-trees. FossilOrigin-Name: 5c46820d9b4aae791a8704b69145bd81f1e6780d --- ext/fts5/fts5_index.c | 174 ++++++++++++++++++++++++++++++++++++------ manifest | 12 +-- manifest.uuid | 2 +- 3 files changed, 157 insertions(+), 31 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 15b345da4f..9fc9d2b799 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -410,12 +410,19 @@ struct Fts5SegWriter { ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. */ + +typedef struct Fts5CResult Fts5CResult; +struct Fts5CResult { + u16 iFirst; /* aSeg[] index of firstest iterator */ + u8 bTermEq; /* True if the terms are equal */ +}; + struct Fts5MultiSegIter { int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ int bSkipEmpty; /* True to skip deleted entries */ Fts5SegIter *aSeg; /* Array of segment iterators */ - u16 *aFirst; /* Current merge state (see above) */ + Fts5CResult *aFirst; /* Current merge state (see above) */ }; /* @@ -1744,8 +1751,10 @@ static int fts5SegIterIsDelete( */ static void fts5SegIterNext( Fts5Index *p, /* FTS5 backend object */ - Fts5SegIter *pIter /* Iterator to advance */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ ){ + assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ if( pIter->iRowidOffset>0 ){ @@ -1841,6 +1850,7 @@ static void fts5SegIterNext( pIter->pLeaf = 0; }else{ fts5SegIterLoadTerm(p, pIter, nKeep); + if( pbNewTerm ) *pbNewTerm = 1; } } } @@ -2033,7 +2043,7 @@ static void fts5SegIterSeekInit( do { res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; - fts5SegIterNext(p, pIter); + fts5SegIterNext(p, pIter, 0); }while( pIter->pLeaf && p->rc==SQLITE_OK ); if( bGe==0 && res ){ @@ -2123,6 +2133,79 @@ static void fts5SegIterClear(Fts5SegIter *pIter){ memset(pIter, 0, sizeof(Fts5SegIter)); } +#ifdef SQLITE_DEBUG + +/* +** This function is used as part of the big assert() procedure implemented by +** fts5AssertMultiIterSetup(). It ensures that the result currently stored +** in *pRes is the correct result of comparing the current positions of the +** two iterators. +*/ +static void fts5AssertComparisonResult( + Fts5MultiSegIter *pIter, + Fts5SegIter *p1, + Fts5SegIter *p2, + Fts5CResult *pRes +){ + int i1 = p1 - pIter->aSeg; + int i2 = p2 - pIter->aSeg; + + if( p1->pLeaf || p2->pLeaf ){ + if( p1->pLeaf==0 ){ + assert( pRes->iFirst==i2 ); + }else if( p2->pLeaf==0 ){ + assert( pRes->iFirst==i1 ); + }else{ + int nMin = MIN(p1->term.n, p2->term.n); + int res = memcmp(p1->term.p, p2->term.p, nMin); + if( res==0 ) res = p1->term.n - p2->term.n; + + if( res==0 ){ + assert( pRes->bTermEq==1 ); + assert( p1->iRowid!=p2->iRowid ); + res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; + }else{ + assert( pRes->bTermEq==0 ); + } + + if( res<0 ){ + assert( pRes->iFirst==i1 ); + }else{ + assert( pRes->iFirst==i2 ); + } + } + } +} + +/* +** This function is a no-op unless SQLITE_DEBUG is defined when this module +** is compiled. In that case, this function is essentially an assert() +** statement used to verify that the contents of the pIter->aFirst[] array +** are correct. +*/ +static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int i; + for(i=0; inSeg; i+=2){ + Fts5SegIter *p1 = &pIter->aSeg[i]; + Fts5SegIter *p2 = &pIter->aSeg[i+1]; + Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; + fts5AssertComparisonResult(pIter, p1, p2, pRes); + } + + for(i=1; i<(pIter->nSeg / 2); i+=2){ + Fts5CResult *pRes = &pIter->aFirst[i]; + Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; + Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; + + fts5AssertComparisonResult(pIter, p1, p2, pRes); + } + } +} +#else +# define fts5AssertMultiIterSetup(x,y) +#endif + /* ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** @@ -2137,6 +2220,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ + Fts5CResult *pRes = &pIter->aFirst[iOut]; assert( iOutnSeg && iOut>0 ); assert( pIter->bRev==0 || pIter->bRev==1 ); @@ -2145,12 +2229,13 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ i1 = (iOut - pIter->nSeg/2) * 2; i2 = i1 + 1; }else{ - i1 = pIter->aFirst[iOut*2]; - i2 = pIter->aFirst[iOut*2+1]; + i1 = pIter->aFirst[iOut*2].iFirst; + i2 = pIter->aFirst[iOut*2+1].iFirst; } p1 = &pIter->aSeg[i1]; p2 = &pIter->aSeg[i2]; + pRes->bTermEq = 0; if( p1->pLeaf==0 ){ /* If p1 is at EOF */ iRes = i2; }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ @@ -2160,6 +2245,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ if( res==0 ){ assert( i2>i1 ); assert( i2!=0 ); + pRes->bTermEq = 1; if( p1->iRowid==p2->iRowid ) return i2; res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; } @@ -2171,7 +2257,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ } } - pIter->aFirst[iOut] = iRes; + pRes->iFirst = iRes; return 0; } @@ -2252,7 +2338,7 @@ static void fts5SegIterNextFrom( } while( 1 ){ - if( bMove ) fts5SegIterNext(p, pIter); + if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; @@ -2284,12 +2370,43 @@ static void fts5MultiIterAdvanced( for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ - fts5SegIterNext(p, &pIter->aSeg[iEq]); + fts5SegIterNext(p, &pIter->aSeg[iEq], 0); i = pIter->nSeg + iEq; } } } +static int fts5MultiIterAdvanceRowid( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged /* Index of sub-iterator just advanced */ +){ + int i; + Fts5SegIter *pNew = &pIter->aSeg[iChanged]; + Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; + + for(i=(pIter->nSeg+iChanged)/2; p->rc==SQLITE_OK; i=i/2){ + Fts5CResult *pRes = &pIter->aFirst[i]; + + assert( pNew->pLeaf ); + assert( pRes->bTermEq==0 || pOther->pLeaf ); + + if( pRes->bTermEq ){ + if( pNew->iRowid==pOther->iRowid ){ + return 1; + }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ + pNew = pOther; + } + } + pRes->iFirst = (pNew - pIter->aSeg); + if( i==1 ) break; + + pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; + } + + return 0; +} + /* ** Move the iterator to the next entry. ** @@ -2306,17 +2423,25 @@ static void fts5MultiIterNext( if( p->rc==SQLITE_OK ){ int bUseFrom = bFrom; do { - int iFirst = pIter->aFirst[1]; + int iFirst = pIter->aFirst[1].iFirst; + int bNewTerm = 0; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ - fts5SegIterNext(p, pSeg); + fts5SegIterNext(p, pSeg, &bNewTerm); } - fts5MultiIterAdvanced(p, pIter, iFirst, 1); + + if( pSeg->pLeaf==0 || bNewTerm + || fts5MultiIterAdvanceRowid(p, pIter, iFirst) + ){ + fts5MultiIterAdvanced(p, pIter, iFirst, 1); + } + fts5AssertMultiIterSetup(p, pIter); + bUseFrom = 0; }while( pIter->bSkipEmpty - && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1]]) + && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1].iFirst]) ); } } @@ -2336,7 +2461,7 @@ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ - int bSkipEmpty, + int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* True for >= */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ @@ -2363,12 +2488,12 @@ static void fts5MultiIterNew( *ppOut = pNew = fts5IdxMalloc(p, sizeof(Fts5MultiSegIter) + /* pNew */ sizeof(Fts5SegIter) * nSlot + /* pNew->aSeg[] */ - sizeof(u16) * nSlot /* pNew->aFirst[] */ + sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ ); if( pNew==0 ) return; pNew->nSeg = nSlot; pNew->aSeg = (Fts5SegIter*)&pNew[1]; - pNew->aFirst = (u16*)&pNew->aSeg[nSlot]; + pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = bSkipEmpty; @@ -2407,13 +2532,14 @@ static void fts5MultiIterNew( for(iIter=nSlot-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - fts5SegIterNext(p, &pNew->aSeg[iEq]); + fts5SegIterNext(p, &pNew->aSeg[iEq], 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } + fts5AssertMultiIterSetup(p, pNew); if( pNew->bSkipEmpty - && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1]]) + && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1].iFirst]) ){ fts5MultiIterNext(p, pNew, 0, 0); } @@ -2428,7 +2554,7 @@ static void fts5MultiIterNew( ** False otherwise. */ static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ - return (p->rc || pIter->aSeg[ pIter->aFirst[1] ].pLeaf==0); + return (p->rc || pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0); } /* @@ -2437,8 +2563,8 @@ static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ - assert( pIter->aSeg[ pIter->aFirst[1] ].pLeaf ); - return pIter->aSeg[ pIter->aFirst[1] ].iRowid; + assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); + return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; } /* @@ -2464,7 +2590,7 @@ static void fts5MultiIterNextFrom( ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ - Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1] ]; + Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } @@ -2593,7 +2719,7 @@ static void fts5PosIterInit( Fts5PosIter *pIter /* Initialize this object */ ){ if( p->rc==SQLITE_OK ){ - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; memset(pIter, 0, sizeof(*pIter)); fts5ChunkIterInit(p, pSeg, &pIter->chunk); if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ @@ -3214,7 +3340,7 @@ fflush(stdout); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ - Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; + Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ /* If the segment being written is the oldest in the entire index and @@ -3940,7 +4066,7 @@ static void fts5MultiIterPoslist( ){ if( p->rc==SQLITE_OK ){ Fts5ChunkIter iter; - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ]; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); fts5ChunkIterInit(p, pSeg, &iter); if( fts5ChunkIterEof(p, &iter)==0 ){ diff --git a/manifest b/manifest index dbf36921fd..3b3d160cd1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\scompiler\swarnings\scaused\sby\ssigned/unsigned\spointer\sconversions. -D 2015-03-07T15:46:41.341 +C Avoid\sredundant\sstring\scomparisons\swhile\smerging\sfts5\ssegment\sb-trees. +D 2015-03-10T19:24:30.225 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 9959b5408f649487d4b0ee081416f37dc3cd8cdd -F ext/fts5/fts5_index.c 3eb8db82d08386d6777faeb4ff45ee998b3d9a81 +F ext/fts5/fts5_index.c b00f7147f9660e66d9d1a8149d4faea3a06cd48e F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c3fe30914f7722941ea9e0092c07ab5ae87112e4 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 3ee7b5a9f987c269251620ae7cc0fc7876b58ee5 -R 2a14dede3733f8ccd7d343d89b036f09 +P cccee7b5b1e84523f1c549d3052fd170e32bde80 +R 3da0db5a8a88c2deea54e98a4c62e146 U dan -Z b868027a203766c0d15e29e2b4da678d +Z f8fc1108495d7cd86a621708029f2995 diff --git a/manifest.uuid b/manifest.uuid index 51cb90e53a..6ab7ead8ed 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -cccee7b5b1e84523f1c549d3052fd170e32bde80 \ No newline at end of file +5c46820d9b4aae791a8704b69145bd81f1e6780d \ No newline at end of file From f5fab92d82260636bc0e702785983a3a46cdb509 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 11 Mar 2015 14:51:39 +0000 Subject: [PATCH 097/206] Add an optimization to the fts5 unicode tokenizer code. FossilOrigin-Name: f5db489250029678fce845dfb2b1109fde46bea5 --- ext/fts5/fts5_tokenize.c | 4 +++- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 67ee446418..3f4261c698 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -404,6 +404,7 @@ static int fts5UnicodeTokenize( /* Output buffer */ char *aFold = p->aFold; int nFold = p->nFold; + const char *pEnd = &aFold[nFold-6]; /* Each iteration of this loop gobbles up a contiguous run of separators, ** then the next token. */ @@ -439,7 +440,7 @@ static int fts5UnicodeTokenize( /* Grow the output buffer so that there is sufficient space to fit the ** largest possible utf-8 character. */ - if( (zOut-aFold)+6>nFold ){ + if( zOut>pEnd ){ aFold = sqlite3_malloc(nFold*2); if( aFold==0 ){ rc = SQLITE_NOMEM; @@ -450,6 +451,7 @@ static int fts5UnicodeTokenize( sqlite3_free(p->aFold); p->aFold = aFold; p->nFold = nFold = nFold*2; + pEnd = &aFold[nFold-6]; } if( *zCsr & 0x80 ){ diff --git a/manifest b/manifest index 3b3d160cd1..aa18705b89 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\sredundant\sstring\scomparisons\swhile\smerging\sfts5\ssegment\sb-trees. -D 2015-03-10T19:24:30.225 +C Add\san\soptimization\sto\sthe\sfts5\sunicode\stokenizer\scode. +D 2015-03-11T14:51:39.375 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -115,7 +115,7 @@ F ext/fts5/fts5_hash.c 9959b5408f649487d4b0ee081416f37dc3cd8cdd F ext/fts5/fts5_index.c b00f7147f9660e66d9d1a8149d4faea3a06cd48e F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 -F ext/fts5/fts5_tokenize.c c3fe30914f7722941ea9e0092c07ab5ae87112e4 +F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P cccee7b5b1e84523f1c549d3052fd170e32bde80 -R 3da0db5a8a88c2deea54e98a4c62e146 +P 5c46820d9b4aae791a8704b69145bd81f1e6780d +R 3ab21807b5c69b091448271467250abb U dan -Z f8fc1108495d7cd86a621708029f2995 +Z 588a6ddf00bd9c069b244e50951b58e0 diff --git a/manifest.uuid b/manifest.uuid index 6ab7ead8ed..9a4fa3320d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5c46820d9b4aae791a8704b69145bd81f1e6780d \ No newline at end of file +f5db489250029678fce845dfb2b1109fde46bea5 \ No newline at end of file From 8885708c3ceebe9f7b7f7851faf1ca3c998b0f2e Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 21 Mar 2015 15:37:19 +0000 Subject: [PATCH 098/206] When scanning the full-text index as part of the fts5 integrity-check, also run a point query for every term and verify that these results are consistent with those found by the linear scan. FossilOrigin-Name: ce972f6aab90f6929d018696f1ab3c2649eca802 --- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_index.c | 55 ++++++++++++++++++++++++++++++++++++------- manifest | 14 +++++------ manifest.uuid | 2 +- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 3e2045d4c1..2065d9d3cf 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -242,7 +242,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy); ** 0==sqlite3Fts5IterEof(pIter); ** sqlite3Fts5IterNext(pIter) ** ){ -** i64 iDocid = sqlite3Fts5IndexDocid(pIter); +** i64 iRowid = sqlite3Fts5IterRowid(pIter); ** } */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 9fc9d2b799..7ce2e2fbc4 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4312,6 +4312,8 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ Fts5Config *pConfig = p->pConfig; int iIdx; /* Used to iterate through indexes */ u64 cksum2 = 0; /* Checksum based on contents of indexes */ + u64 cksum3 = 0; /* Checksum based on contents of indexes */ + Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ /* Check that the internal nodes of each segment match the leaves */ for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ @@ -4328,7 +4330,19 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ fts5StructureRelease(pStruct); } - /* Check that the checksum of the index matches the argument checksum */ + /* The cksum argument passed to this function is a checksum calculated + ** based on all expected entries in the FTS index (including prefix index + ** entries). This block checks that a checksum calculated based on the + ** actual contents of FTS index is identical. + ** + ** Two versions of the same checksum are calculated. The first (stack + ** variable cksum2) based on entries extracted from the full-text index + ** while doing a linear scan of each individual index in turn. + ** + ** As each term visited by the linear scans, a separate query for the + ** same term is performed. cksum3 is calculated based on the entries + ** extracted by these queries. + */ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; Fts5Structure *pStruct = fts5StructureRead(p, iIdx); @@ -4341,25 +4355,50 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ i64 iRowid = fts5MultiIterRowid(pIter); char *z = (char*)fts5MultiIterTerm(pIter, &n); + /* Update cksum2 with the entries associated with the current term + ** and rowid. */ for(fts5PosIterInit(p, pIter, &sPos); fts5PosIterEof(p, &sPos)==0; fts5PosIterNext(p, &sPos) ){ cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); -#if 0 - fprintf(stdout, "rowid=%d ", (int)iRowid); - fprintf(stdout, "term=%.*s ", n, z); - fprintf(stdout, "col=%d ", sPos.iCol); - fprintf(stdout, "off=%d\n", sPos.iPos); - fflush(stdout); -#endif + } + + /* If this is a new term, query for it. Update cksum3 with the results. */ + if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ + Fts5IndexIter *pIdxIter = 0; + int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + const u8 *pPos; + int nPos; + i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); + } + rc = sqlite3Fts5IterNext(pIdxIter); + } + } + sqlite3Fts5IterClose(pIdxIter); + fts5BufferSet(&rc, &term, n, (const u8*)z); + p->rc = rc; } } fts5MultiIterFree(p, pIter); fts5StructureRelease(pStruct); } if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; + if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT; + fts5BufferFree(&term); return fts5IndexReturn(p); } diff --git a/manifest b/manifest index aa18705b89..d7abe65895 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\san\soptimization\sto\sthe\sfts5\sunicode\stokenizer\scode. -D 2015-03-11T14:51:39.375 +C When\sscanning\sthe\sfull-text\sindex\sas\spart\sof\sthe\sfts5\sintegrity-check,\salso\srun\sa\spoint\squery\sfor\severy\sterm\sand\sverify\sthat\sthese\sresults\sare\sconsistent\swith\sthose\sfound\sby\sthe\slinear\sscan. +D 2015-03-21T15:37:19.761 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 1dcb02943f3a55d275d5473911a7e991d638c73c +F ext/fts5/fts5Int.h 8d09f7894e83b00a18a7e2149354a153904002df F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 9959b5408f649487d4b0ee081416f37dc3cd8cdd -F ext/fts5/fts5_index.c b00f7147f9660e66d9d1a8149d4faea3a06cd48e +F ext/fts5/fts5_index.c 4adc5e18ab6d0648faeb18f18a89d6aec57d77be F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5c46820d9b4aae791a8704b69145bd81f1e6780d -R 3ab21807b5c69b091448271467250abb +P f5db489250029678fce845dfb2b1109fde46bea5 +R 258f390b03b29f1c61f33d36002f03e9 U dan -Z 588a6ddf00bd9c069b244e50951b58e0 +Z 6cf104c99eec6cd34913e25fa048b8ab diff --git a/manifest.uuid b/manifest.uuid index 9a4fa3320d..f5fccf09fa 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f5db489250029678fce845dfb2b1109fde46bea5 \ No newline at end of file +ce972f6aab90f6929d018696f1ab3c2649eca802 \ No newline at end of file From 56e2497aaa4e5a56f33cc1942e29d6a59ce3eb36 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 11 Apr 2015 16:23:31 +0000 Subject: [PATCH 099/206] Improve fts5 integrity-check so that it checks that DESC queries return the same as ASC. Change the poslist format slightly to make room for a delete-flag. FossilOrigin-Name: 49c1e74522a26e5dbe6f8305bc96487279b80dfb --- ext/fts5/fts5Int.h | 11 ---- ext/fts5/fts5_hash.c | 6 ++- ext/fts5/fts5_index.c | 122 +++++++++++++++++++++++++++++------------- manifest | 16 +++--- manifest.uuid | 2 +- 5 files changed, 97 insertions(+), 60 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 2065d9d3cf..83f0bf8252 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -382,17 +382,6 @@ int sqlite3Fts5HashWrite( */ void sqlite3Fts5HashClear(Fts5Hash*); -/* -** Iterate through the contents of the hash table. -*/ -int sqlite3Fts5HashIterate( - Fts5Hash*, - void *pCtx, - int (*xTerm)(void*, const char*, int), - int (*xEntry)(void*, i64, const u8*, int), - int (*xTermDone)(void*) -); - int sqlite3Fts5HashQuery( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index c5fd858fc0..9a411802d5 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -167,14 +167,16 @@ static int fts5HashResize(Fts5Hash *pHash){ static void fts5HashAddPoslistSize(Fts5HashEntry *p){ if( p->iSzPoslist ){ + /* WRITEPOSLISTSIZE */ u8 *pPtr = (u8*)p; - int nSz = p->nData - p->iSzPoslist - 1; + int nSz = (p->nData - p->iSzPoslist - 1) * 2; if( nSz<=127 ){ pPtr[p->iSzPoslist] = nSz; }else{ int nByte = sqlite3Fts5GetVarintLen((u32)nSz); - memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); + /* WRITEPOSLISTSIZE */ + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz/2); sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); p->nData += (nByte-1); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 7ce2e2fbc4..926a495bdb 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -43,6 +43,7 @@ ** */ + #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ @@ -121,7 +122,8 @@ ** ** poslist format: ** -** varint: size of poslist in bytes. not including this field. +** varint: size of poslist in bytes multiplied by 2, not including +** this field. Plus 1 if this entry carries the "delete" flag. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte @@ -1629,7 +1631,7 @@ static void fts5SegIterInit( /* ** This function is only ever called on iterators created by calls to -** Fts5IndexQuery() with the FTS5INDEX_QUERY_ASC flag set. +** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. ** ** When this function is called, iterator pIter points to the first rowid ** on the current leaf associated with the term being queried. This function @@ -1646,8 +1648,9 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ i64 iDelta = 0; int nPos; + /* READPOSLISTSIZE */ i += fts5GetVarint32(&a[i], nPos); - i += nPos; + i += nPos / 2; if( i>=n ) break; i += getVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; @@ -1765,8 +1768,9 @@ static void fts5SegIterNext( pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&a[iOff], nPos); - iOff += nPos; + iOff += (nPos / 2); getVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid -= iDelta; }else{ @@ -1785,8 +1789,9 @@ static void fts5SegIterNext( iOff = pIter->iLeafOffset; if( iOffrc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid); if( pIter->flags & FTS5_SEGITER_REVERSE ){ + assert( 0 ); fts5SegIterReverseInitPage(p, pIter); } } @@ -1881,8 +1887,9 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ i64 iDelta; /* Position list size in bytes */ + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos); - iOff += nPos; + iOff += (nPos / 2); if( iOff>=pLeaf->n ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ @@ -1964,8 +1971,9 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ int nPoslist; /* iOff is currently the offset of the size field of a position list. */ + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist); - iOff += nPoslist; + iOff += nPoslist / 2; if( iOffn ){ iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); @@ -2656,7 +2664,9 @@ static void fts5ChunkIterInit( pLeaf = pIter->pLeaf; } + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem); + pIter->nRem = pIter->nRem / 2; pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); pIter->p = pLeaf->p + iOff; @@ -3369,7 +3379,8 @@ fflush(stdout); fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); /* Copy the position list from input to output */ - fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); + /* WRITEPOSLISTSIZE */ + fts5WriteAppendPoslistInt(p, &writer, sPos.nRem * 2); for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); } @@ -3587,8 +3598,8 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); - /* Decide if the term fits on the current leaf. If not, flush it - ** to disk. */ + /* Decide if the term will fit on the current leaf. If it will not, + ** flush the leaf to disk here. */ if( (pBuf->n + nTerm + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; @@ -3633,8 +3644,9 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ u32 nPos; int nCopy; iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta); + /* READPOSLISTSIZE */ nCopy = fts5GetVarint32(&pDoclist[iOff], nPos); - nCopy += nPos; + nCopy += (nPos / 2); iRowid += iDelta; if( bFirstDocid ){ @@ -4071,7 +4083,8 @@ static void fts5MultiIterPoslist( fts5ChunkIterInit(p, pSeg, &iter); if( fts5ChunkIterEof(p, &iter)==0 ){ if( bSz ){ - fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); } while( fts5ChunkIterEof(p, &iter)==0 ){ fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); @@ -4095,7 +4108,9 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } + /* READPOSLISTSIZE */ pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist); + pIter->nPoslist = pIter->nPoslist / 2; pIter->aPoslist = &pIter->a[pIter->i]; pIter->i += pIter->nPoslist; }else{ @@ -4166,14 +4181,16 @@ static void fts5MergePrefixLists( )){ /* Copy entry from i1 */ fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid); - fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); fts5DoclistIterNext(&i1); } else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); - fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); fts5DoclistIterNext(&i2); } @@ -4202,7 +4219,8 @@ static void fts5MergePrefixLists( p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); } - fts5BufferAppendVarint(&p->rc, &out, tmp.n); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); fts5DoclistIterNext(&i1); fts5DoclistIterNext(&i2); @@ -4298,6 +4316,41 @@ static void fts5SetupPrefixIter( sqlite3_free(aBuf); } +static int fts5QueryCksum( + Fts5Index *p, + const char *z, + int n, + int flags, + u64 *pCksum +){ + u64 cksum = *pCksum; + Fts5IndexIter *pIdxIter = 0; + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + const u8 *pPos; + int nPos; + i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); + } + rc = sqlite3Fts5IterNext(pIdxIter); + } + } + sqlite3Fts5IterClose(pIdxIter); + + *pCksum = cksum; + return rc; +} + /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums @@ -4366,28 +4419,20 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ /* If this is a new term, query for it. Update cksum3 with the results. */ if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ - Fts5IndexIter *pIdxIter = 0; + int rc; int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); - while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ - const u8 *pPos; - int nPos; - i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); - if( rc==SQLITE_OK ){ - Fts5PoslistReader sReader; - for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); - sReader.bEof==0; - sqlite3Fts5PoslistReaderNext(&sReader) - ){ - int iCol = FTS5_POS2COLUMN(sReader.iPos); - int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); - } - rc = sqlite3Fts5IterNext(pIdxIter); - } + u64 ck1 = 0; + u64 ck2 = 0; + + /* Check that the results returned for ASC and DESC queries are + ** the same. If not, call this corruption. */ + rc = fts5QueryCksum(p, z, n, flags, &ck1); + if( rc==SQLITE_OK ){ + rc = fts5QueryCksum(p, z, n, flags | FTS5INDEX_QUERY_DESC, &ck2); } - sqlite3Fts5IterClose(pIdxIter); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + + cksum3 ^= ck1; fts5BufferSet(&rc, &term, n, (const u8*)z); p->rc = rc; } @@ -4773,8 +4818,8 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ ** the current entry. Output variable *pn is set to the size of the buffer ** in bytes before returning. ** -** The returned buffer does not include the 0x00 terminator byte stored on -** disk. +** The returned position list does not include the "number of bytes" varint +** field that starts the position list on disk. */ int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ assert( pIter->pIndex->rc==SQLITE_OK ); @@ -5011,8 +5056,9 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ } while( iOff Date: Sat, 11 Apr 2015 18:25:04 +0000 Subject: [PATCH 100/206] Have fts5 integrity check verify that prefix indexes contain the same values as returned by prefix queries on the main terms index. FossilOrigin-Name: bdb8e82ab683f2438cde9f0b63e497dbf0141dcf --- ext/fts5/fts5Int.h | 5 +++-- ext/fts5/fts5_index.c | 34 +++++++++++++++++++++++++--------- manifest | 14 +++++++------- manifest.uuid | 2 +- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 83f0bf8252..59d3271212 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -227,8 +227,9 @@ typedef struct Fts5IndexIter Fts5IndexIter; /* ** Values used as part of the flags argument passed to IndexQuery(). */ -#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ -#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ +#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ /* ** Create/destroy an Fts5Index object. diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 926a495bdb..d805d256ff 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4428,10 +4428,25 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ ** the same. If not, call this corruption. */ rc = fts5QueryCksum(p, z, n, flags, &ck1); if( rc==SQLITE_OK ){ - rc = fts5QueryCksum(p, z, n, flags | FTS5INDEX_QUERY_DESC, &ck2); + rc = fts5QueryCksum(p, z, n, flags|FTS5INDEX_QUERY_DESC, &ck2); } if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + /* If this is a prefix query, check that the results returned if the + ** the index is disabled are the same. In both ASC and DESC order. */ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, z, n, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, z, n, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + cksum3 ^= ck1; fts5BufferSet(&rc, &term, n, (const u8*)z); p->rc = rc; @@ -4706,17 +4721,18 @@ int sqlite3Fts5IndexQuery( int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ + Fts5Config *pConfig = p->pConfig; Fts5IndexIter *pRet; int iIdx = 0; if( flags & FTS5INDEX_QUERY_PREFIX ){ - Fts5Config *pConfig = p->pConfig; - int nChar = fts5IndexCharlen(pToken, nToken); - for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ - if( pConfig->aPrefix[iIdx-1]==nChar ) break; - } - if( iIdx>pConfig->nPrefix ){ - iIdx = -1; + if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + iIdx = 1+pConfig->nPrefix; + }else{ + int nChar = fts5IndexCharlen(pToken, nToken); + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nChar ) break; + } } } @@ -4725,7 +4741,7 @@ int sqlite3Fts5IndexQuery( memset(pRet, 0, sizeof(Fts5IndexIter)); pRet->pIndex = p; - if( iIdx>=0 ){ + if( iIdx<=pConfig->nPrefix ){ pRet->pStruct = fts5StructureRead(p, iIdx); if( pRet->pStruct ){ fts5MultiIterNew(p, pRet->pStruct, diff --git a/manifest b/manifest index 9c85f5c4cf..a1dc46526c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\sfts5\sintegrity-check\sso\sthat\sit\schecks\sthat\sDESC\squeries\sreturn\sthe\ssame\sas\sASC.\sChange\sthe\sposlist\sformat\sslightly\sto\smake\sroom\sfor\sa\sdelete-flag. -D 2015-04-11T16:23:31.390 +C Have\sfts5\sintegrity\scheck\sverify\sthat\sprefix\sindexes\scontain\sthe\ssame\svalues\sas\sreturned\sby\sprefix\squeries\son\sthe\smain\sterms\sindex. +D 2015-04-11T18:25:04.731 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h d6fd1eb0b243b64d971126dd820821221810efe3 +F ext/fts5/fts5Int.h 1b537736f8838df7fca10245c0f70a23cfddc7f5 F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 7c134ed05d25e2a19418356d78aa4e7059bd319c -F ext/fts5/fts5_index.c 5eea88e4641ee66a86d012b33ddec622b83a9365 +F ext/fts5/fts5_index.c 670a428c51abb4f5f3f6135cc9fd0a19c192f96d F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -1292,7 +1292,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 142743918fd5a6d79fa10c44398360c3684a255a -R e78dd4221a93060347417c129987958e +P 49c1e74522a26e5dbe6f8305bc96487279b80dfb +R 008cbeef4b71700e108cdf2aad4130b5 U dan -Z 3431ace9d6cdd5acdd6fb6814cf8ac9f +Z b886f5faefd1f02721c5ad30d158df62 diff --git a/manifest.uuid b/manifest.uuid index 5c56b45150..5a159d131f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -49c1e74522a26e5dbe6f8305bc96487279b80dfb \ No newline at end of file +bdb8e82ab683f2438cde9f0b63e497dbf0141dcf \ No newline at end of file From f1e58b7e8d8d39989f787c7fa20d8bfa1b6389b1 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 14 Apr 2015 20:15:41 +0000 Subject: [PATCH 101/206] Begin changing fts5 to use a delete flag so that delete markers may be annihilated more quickly. FossilOrigin-Name: 9341c070bb6140dbf559680952909674aa83fa55 --- ext/fts5/fts5_buffer.c | 2 +- ext/fts5/fts5_hash.c | 21 +-- ext/fts5/fts5_index.c | 296 +++++++++++++++++++++++--------------- ext/fts5/test/fts5aa.test | 3 - ext/fts5/test/fts5ah.test | 2 +- manifest | 20 +-- manifest.uuid | 2 +- 7 files changed, 203 insertions(+), 143 deletions(-) diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 73a5157ef9..94fb4216d1 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -75,7 +75,7 @@ void sqlite3Fts5BufferAppendBlob( int nData, const u8 *pData ){ - assert( nData>=0 ); + assert( *pRc || nData>=0 ); if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return; memcpy(&pBuf->p[pBuf->n], pData, nData); pBuf->n += nData; diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 9a411802d5..fa7701a6d0 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -62,6 +62,7 @@ struct Fts5HashEntry { int nAlloc; /* Total size of allocation */ int iSzPoslist; /* Offset of space for 4-byte poslist size */ int nData; /* Total bytes of data (incl. structure) */ + u8 bDel; /* Set delete-flag @ iSzPoslist */ int iCol; /* Column of last value written */ int iPos; /* Position of last value written */ @@ -167,19 +168,20 @@ static int fts5HashResize(Fts5Hash *pHash){ static void fts5HashAddPoslistSize(Fts5HashEntry *p){ if( p->iSzPoslist ){ - /* WRITEPOSLISTSIZE */ u8 *pPtr = (u8*)p; - int nSz = (p->nData - p->iSzPoslist - 1) * 2; + int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ + int nPos = nSz*2 + p->bDel; /* Value of nPos field */ - if( nSz<=127 ){ - pPtr[p->iSzPoslist] = nSz; + assert( p->bDel==0 || p->bDel==1 ); + if( nPos<=127 ){ + pPtr[p->iSzPoslist] = nPos; }else{ - int nByte = sqlite3Fts5GetVarintLen((u32)nSz); - /* WRITEPOSLISTSIZE */ - memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz/2); - sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); + int nByte = sqlite3Fts5GetVarintLen((u32)nPos); + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); + sqlite3PutVarint(&pPtr[p->iSzPoslist], nPos); p->nData += (nByte-1); } + p->bDel = 0; p->iSzPoslist = 0; } } @@ -277,6 +279,9 @@ int sqlite3Fts5HashWrite( /* Append the new position offset */ p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); p->iPos = iPos; + }else{ + /* This is a delete. Set the delete flag. */ + p->bDel = 1; } nIncr += p->nData; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index d805d256ff..2616d32e60 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -438,7 +438,8 @@ struct Fts5MultiSegIter { ** Current leaf page number within segment. ** ** iLeafOffset: -** Byte offset within the current leaf that is one byte past the end of the +** Byte offset within the current leaf that is the first byte of the +** position list data (one byte passed the position-list size field). ** rowid field of the current entry. Usually this is the size field of the ** position list data. The exception is if the rowid for the current entry ** is the last thing on the leaf page. @@ -465,9 +466,9 @@ struct Fts5MultiSegIter { ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. ** -** Each time a new page is loaded, the iterator is set to point to the -** final rowid. Additionally, the aRowidOffset[] array is populated -** with the byte offsets of all relevant rowid fields on the page. +** For each rowid on the page corresponding to the current term, the +** corresponding aRowidOffset[] entry is set to the byte offset of the +** start of the "position-list-size" field within the page. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ @@ -492,6 +493,8 @@ struct Fts5SegIter { /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ + int nPos; /* Number of bytes in current position list */ + int bDel; /* True if the delete flag is set */ }; #define FTS5_SEGITER_ONETERM 0x01 @@ -722,17 +725,6 @@ static int fts5BufferCompareBlob( return (res==0 ? (pLeft->n - nRight) : res); } -#if 0 -static int fts5CompareBlob( - const u8 *pLeft, int nLeft, - const u8 *pRight, int nRight -){ - int nCmp = MIN(nLeft, nRight); - int res = memcmp(pLeft, pRight, nCmp); - return (res==0 ? (nLeft - nRight) : res); -} -#endif - /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. @@ -1555,16 +1547,64 @@ static void fts5SegIterNextPage( } } +/* +** Argument p points to a buffer containing a varint to be interpreted as a +** position list size field. Read the varint and return the number of bytes +** read. Before returning, set *pnSz to the number of bytes in the position +** list, and *pbDel to true if the delete flag is set, or false otherwise. +*/ +static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ + int nSz; + int n = fts5GetVarint32(p, nSz); + *pnSz = nSz/2; + *pbDel = nSz & 0x0001; + return n; +} + +/* +** Fts5SegIter.iLeafOffset currently points to the first byte of a +** position-list size field. Read the value of the field and store it +** in the following variables: +** +** Fts5SegIter.nPos +** Fts5SegIter.bDel +** +** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the +** position list content (if any). +*/ +static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int iOff = pIter->iLeafOffset; /* Offset to read at */ + if( iOff>=pIter->pLeaf->n ){ + assert( 0 ); + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ){ + if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; + return; + } + iOff = 4; + } + iOff += fts5GetPoslistSize(pIter->pLeaf->p+iOff, &pIter->nPos,&pIter->bDel); + pIter->iLeafOffset = iOff; + } +} + /* ** Fts5SegIter.iLeafOffset currently points to the first byte of the ** "nSuffix" field of a term. Function parameter nKeep contains the value ** of the "nPrefix" field (if there was one - it is passed 0 if this is ** the first term in the segment). ** -** This function populates (Fts5SegIter.term) and (Fts5SegIter.iRowid) -** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the offset to -** the size field of the first position list. The position list belonging -** to document (Fts5SegIter.iRowid). +** This function populates: +** +** Fts5SegIter.term +** Fts5SegIter.rowid +** Fts5SegIter.nPos +** Fts5SegIter.bDel +** +** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of +** the first position list. The position list belonging to document +** (Fts5SegIter.iRowid). */ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ @@ -1626,6 +1666,7 @@ static void fts5SegIterInit( u8 *a = pIter->pLeaf->p; pIter->iLeafOffset = fts5GetU16(&a[2]); fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); } } @@ -1633,10 +1674,16 @@ static void fts5SegIterInit( ** This function is only ever called on iterators created by calls to ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. ** -** When this function is called, iterator pIter points to the first rowid -** on the current leaf associated with the term being queried. This function -** advances it to point to the last such rowid and, if necessary, initializes -** the aRowidOffset[] and iRowidOffset variables. +** The iterator is in an unusual state when this function is called: the +** Fts5SegIter.iLeafOffset variable is set to the offset of the start of +** the position-list size field for the first relevant rowid on the page. +** Fts5SegIter.rowid is set, but nPos and bDel are not. +** +** This function advances the iterator so that it points to the last +** relevant rowid on the page and, if necessary, initializes the +** aRowidOffset[] and iRowidOffset variables. At this point the iterator +** is in its regular state - Fts5SegIter.iLeafOffset points to the first +** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int n = pIter->pLeaf->n; @@ -1647,10 +1694,10 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ while( p->rc==SQLITE_OK && i=n ) break; i += getVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; @@ -1671,6 +1718,7 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ pIter->iLeafOffset = i; } pIter->iRowidOffset = iRowidOffset; + fts5SegIterLoadNPos(p, pIter); } /* @@ -1730,17 +1778,21 @@ static int fts5SegIterIsDelete( int bRet = 0; Fts5Data *pLeaf = pIter->pLeaf; if( p->rc==SQLITE_OK && pLeaf ){ + bRet = pIter->nPos==0; + /* bRet = pIter->bDel; */ +#if 0 if( pIter->iLeafOffsetn ){ - bRet = (pLeaf->p[pIter->iLeafOffset]==0x00); + bRet = ((pLeaf->p[pIter->iLeafOffset] & 0xFE)==0x00); }else{ Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno+1 )); if( pNew ){ - bRet = (pNew->p[4]==0x00); + bRet = ((pNew->p[4] & 0xFE)==0x00); fts5DataRelease(pNew); } } +#endif } return bRet; } @@ -1760,19 +1812,23 @@ static void fts5SegIterNext( assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ + if( pIter->iRowidOffset>0 ){ u8 *a = pIter->pLeaf->p; int iOff; int nPos; + int bDummy; i64 iDelta; - pIter->iRowidOffset--; - pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; - /* READPOSLISTSIZE */ - iOff += fts5GetVarint32(&a[iOff], nPos); - iOff += (nPos / 2); - getVarint(&a[iOff], (u64*)&iDelta); - pIter->iRowid -= iDelta; + if( p->rc==SQLITE_OK ){ + pIter->iRowidOffset--; + pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); + iOff += nPos; + getVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid -= iDelta; + fts5SegIterLoadNPos(p, pIter); + } }else{ fts5SegIterReverseNewPage(p, pIter); } @@ -1786,13 +1842,7 @@ static void fts5SegIterNext( u8 *a = pLeaf->p; int n = pLeaf->n; - iOff = pIter->iLeafOffset; - if( iOffiLeafOffset + pIter->nPos; if( iOffpLeaf->n = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid); - if( pIter->flags & FTS5_SEGITER_REVERSE ){ - assert( 0 ); - fts5SegIterReverseInitPage(p, pIter); - } } }else{ iOff = 0; @@ -1850,46 +1896,60 @@ static void fts5SegIterNext( } /* Check if the iterator is now at EOF. If so, return early. */ - if( pIter->pLeaf && bNewTerm ){ - if( pIter->flags & FTS5_SEGITER_ONETERM ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; + if( pIter->pLeaf ){ + if( bNewTerm ){ + if( pIter->flags & FTS5_SEGITER_ONETERM ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + fts5SegIterLoadNPos(p, pIter); + if( pbNewTerm ) *pbNewTerm = 1; + } }else{ - fts5SegIterLoadTerm(p, pIter, nKeep); - if( pbNewTerm ) *pbNewTerm = 1; + fts5SegIterLoadNPos(p, pIter); } } } } } +#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } + /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ - Fts5Data *pLeaf; /* Current leaf data */ - int iOff = pIter->iLeafOffset; /* Byte offset within current leaf */ Fts5Data *pLast = 0; int pgnoLast = 0; - /* Move to the page that contains the last rowid in this doclist. */ - pLeaf = pIter->pLeaf; - if( pIter->pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = pIter->pDlidx->iLeafPgno; pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); }else{ + int iOff; /* Byte offset within pLeaf */ + Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ + + /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first + ** byte of position-list content for the current rowid. Back it up + ** so that it points to the start of the position-list size field. */ + pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2 + pIter->bDel); + iOff = pIter->iLeafOffset; + assert( iOff>=4 ); + + /* Search for a new term within the current leaf. If one can be found, + ** then this page contains the largest rowid for the current term. */ while( iOffn ){ int nPos; i64 iDelta; + int bDummy; - /* Position list size in bytes */ - /* READPOSLISTSIZE */ - iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos); - iOff += (nPos / 2); + /* Read the position-list size field */ + iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); + iOff += nPos; if( iOff>=pLeaf->n ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ @@ -1898,28 +1958,27 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ iOff += nPos; } + /* If this condition is true then the largest rowid for the current + ** term may not be stored on the current page. So search forward to + ** see where said rowid really is. */ if( iOff>=pLeaf->n ){ + int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; - i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pIter->iLeafPgno); - i64 iLast = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pSeg->pgnoLast); /* The last rowid in the doclist may not be on the current page. Search - ** forward to find the page containing the last rowid. */ - for(iAbs++; p->rc==SQLITE_OK && iAbs<=iLast; iAbs++){ + ** forward to find the page containing the last rowid. */ + for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ + i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, iTerm; fts5LeafHeader(pNew, &iRowid, &iTerm); if( iRowid ){ - Fts5Data *pTmp = pLast; - pLast = pNew; - pNew = pTmp; - pgnoLast = iAbs & (((i64)1 << FTS5_DATA_PAGE_B) - 1); - } - if( iTerm ){ - iAbs = iLast; + SWAPVAL(Fts5Data*, pNew, pLast); + pgnoLast = pgno; } fts5DataRelease(pNew); + if( iTerm ) break; } } } @@ -1927,14 +1986,16 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ /* If pLast is NULL at this point, then the last rowid for this doclist ** lies on the page currently indicated by the iterator. In this case - ** iLastOff is set to the value that pIter->iLeafOffset will take when - ** the iterator points to that rowid. + ** pIter->iLeafOffset is already set to point to the position-list size + ** field associated with the first relevant rowid on the page. ** ** Or, if pLast is non-NULL, then it is the page that contains the last - ** rowid. + ** rowid. In this case configure the iterator so that it points to the + ** first rowid on this page. */ if( pLast ){ int dummy; + int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; @@ -1966,18 +2027,18 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ ** early without loading the doclist-index (as it belongs to a different ** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ + int nPos = pIter->nPos; while( iOffn ){ i64 iDelta; - int nPoslist; - /* iOff is currently the offset of the size field of a position list. */ - /* READPOSLISTSIZE */ - iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist); - iOff += nPoslist / 2; + /* iOff is currently the offset of the start of position list data */ + iOff += nPos; + iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) return; if( iOffn ){ - iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) return; + int bDummy; + iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); } } } @@ -2048,6 +2109,7 @@ static void fts5SegIterSeekInit( int res; pIter->iLeafOffset = fts5GetU16(&pIter->pLeaf->p[2]); fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); do { res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; @@ -2126,6 +2188,8 @@ static void fts5SegIterHashInit( if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); + }else{ + fts5SegIterLoadNPos(p, pIter); } } } @@ -2296,6 +2360,7 @@ static void fts5SegIterGotoPage( }else{ iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + fts5SegIterLoadNPos(p, pIter); } } } @@ -2470,7 +2535,7 @@ static void fts5MultiIterNew( Fts5Structure *pStruct, /* Structure of specific index */ int iIdx, /* Config.aHash[] index of FTS index */ int bSkipEmpty, /* True to ignore delete-keys */ - int flags, /* True for >= */ + int flags, /* FTS5INDEX_QUERY_XXX flags */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ @@ -2653,23 +2718,11 @@ static void fts5ChunkIterInit( pIter->iLeafRowid = rowid; } - if( iOffn ){ - fts5DataReference(pLeaf); - pIter->pLeaf = pLeaf; - }else{ - pIter->nRem = 1; - fts5ChunkIterNext(p, pIter); - if( p->rc ) return; - iOff = 4; - pLeaf = pIter->pLeaf; - } - - /* READPOSLISTSIZE */ - iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem); - pIter->nRem = pIter->nRem / 2; + fts5DataReference(pLeaf); + pIter->pLeaf = pLeaf; + pIter->nRem = pSeg->nPos; pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); pIter->p = pLeaf->p + iOff; - if( pIter->n==0 ){ fts5ChunkIterNext(p, pIter); } @@ -3047,12 +3100,13 @@ static void fts5WriteAppendTerm( } /* -** Append a docid to the writers output. +** Append a docid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, - i64 iRowid + i64 iRowid, + int nPos ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; @@ -3076,6 +3130,8 @@ static void fts5WriteAppendRowid( pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); + if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); pWriter->bFirstRowidInPage = 1; @@ -3376,11 +3432,9 @@ fflush(stdout); } /* Append the rowid to the output */ - fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); - - /* Copy the position list from input to output */ /* WRITEPOSLISTSIZE */ - fts5WriteAppendPoslistInt(p, &writer, sPos.nRem * 2); + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), sPos.nRem*2); + for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); } @@ -3530,9 +3584,10 @@ struct Fts5FlushCtx { ** list nMax bytes or less in size. */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ - int ret = 0; + int ret; + u32 dummy; + ret = fts5GetVarint32(aBuf, dummy); while( 1 ){ - u32 dummy; int i = fts5GetVarint32(&aBuf[ret], dummy); if( (ret + i) > nMax ) break; ret += i; @@ -3641,12 +3696,12 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ ** loop iterates through the poslists that make up the current ** doclist. */ while( iOffn; - int n; + int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } + assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; if( iPos>=nCopy ) break; @@ -3684,7 +3740,6 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } bFirstDocid = 1; } - assert( pBuf->n<=pgsz ); iOff += nCopy; } } @@ -4097,6 +4152,7 @@ static void fts5MultiIterPoslist( static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ if( pIter->in ){ + int bDummy; if( pIter->i ){ i64 iDelta; pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); @@ -4108,9 +4164,9 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } - /* READPOSLISTSIZE */ - pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist); - pIter->nPoslist = pIter->nPoslist / 2; + pIter->i += fts5GetPoslistSize( + &pIter->a[pIter->i], &pIter->nPoslist, &bDummy + ); pIter->aPoslist = &pIter->a[pIter->i]; pIter->i += pIter->nPoslist; }else{ @@ -4175,7 +4231,7 @@ static void fts5MergePrefixLists( fts5DoclistIterInit(p1, bDesc, &i1); fts5DoclistIterInit(p2, bDesc, &i2); - while( i1.aPoslist!=0 || i2.aPoslist!=0 ){ + while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ if( i2.aPoslist==0 || (i1.aPoslist && ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowid0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; +static int nCall = 0; +nCall++; ck2 = 0; rc = fts5QueryCksum(p, z, n, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; @@ -5072,9 +5130,9 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ } while( iOff Date: Wed, 15 Apr 2015 16:01:42 +0000 Subject: [PATCH 102/206] Fix a problem preventing doclist indexes from being loaded. FossilOrigin-Name: b29109a083e5cd442cdd19f29d7be45b09c4f661 --- ext/fts5/fts5_index.c | 8 +++----- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 2616d32e60..3c6e436e56 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2018,7 +2018,6 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ int iSeg = pIter->pSeg->iSegid; int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ - int iOff = pIter->iLeafOffset; /* Byte offset within current leaf */ assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx==0 ); @@ -2027,18 +2026,19 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ ** early without loading the doclist-index (as it belongs to a different ** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ - int nPos = pIter->nPos; + int iOff = pIter->iLeafOffset + pIter->nPos; while( iOffn ){ i64 iDelta; /* iOff is currently the offset of the start of position list data */ - iOff += nPos; iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; if( iOffn ){ int bDummy; + int nPos; iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); + iOff += nPos; } } } @@ -4492,8 +4492,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ ** the index is disabled are the same. In both ASC and DESC order. */ if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; -static int nCall = 0; -nCall++; ck2 = 0; rc = fts5QueryCksum(p, z, n, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; diff --git a/manifest b/manifest index 5066aafec8..a2d7a01cc0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Begin\schanging\sfts5\sto\suse\sa\sdelete\sflag\sso\sthat\sdelete\smarkers\smay\sbe\sannihilated\smore\squickly. -D 2015-04-14T20:15:41.831 +C Fix\sa\sproblem\spreventing\sdoclist\sindexes\sfrom\sbeing\sloaded. +D 2015-04-15T16:01:42.421 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c ba7680d0c6b3f4772e1ac54676f6f65679c24a08 +F ext/fts5/fts5_index.c 9556d405a12a38b3e1a323333a2620813b9f323a F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -1292,7 +1292,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bdb8e82ab683f2438cde9f0b63e497dbf0141dcf -R 98bc774fac6611e41c6e4422cb947fd5 +P 9341c070bb6140dbf559680952909674aa83fa55 +R c6ba726746ea3bc47027dcf89fd1186f U dan -Z 5d35832a61b13dbbe8195fefbe4221a8 +Z a5a728f32ef73ddd62455c44220623a7 diff --git a/manifest.uuid b/manifest.uuid index 0a948972b3..fdf9afda57 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9341c070bb6140dbf559680952909674aa83fa55 \ No newline at end of file +b29109a083e5cd442cdd19f29d7be45b09c4f661 \ No newline at end of file From 687c5124e07351ce9a78dbb6632f043f5ee06732 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 15 Apr 2015 18:49:20 +0000 Subject: [PATCH 103/206] Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances. FossilOrigin-Name: 50fae1f0006c0e946b5214e73eedf2687a0016f9 --- ext/fts5/fts5_index.c | 102 ++++++++++++++------------------- ext/fts5/test/fts5corrupt.test | 2 +- manifest | 14 ++--- manifest.uuid | 2 +- 4 files changed, 51 insertions(+), 69 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3c6e436e56..d407411a2f 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1771,30 +1771,9 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ -static int fts5SegIterIsDelete( - Fts5Index *p, /* FTS5 backend object */ - Fts5SegIter *pIter /* Iterator to advance */ -){ - int bRet = 0; - Fts5Data *pLeaf = pIter->pLeaf; - if( p->rc==SQLITE_OK && pLeaf ){ - bRet = pIter->nPos==0; - /* bRet = pIter->bDel; */ -#if 0 - if( pIter->iLeafOffsetn ){ - bRet = ((pLeaf->p[pIter->iLeafOffset] & 0xFE)==0x00); - }else{ - Fts5Data *pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( - pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno+1 - )); - if( pNew ){ - bRet = ((pNew->p[4] & 0xFE)==0x00); - fts5DataRelease(pNew); - } - } -#endif - } - return bRet; +static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){ + Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } /* @@ -2318,7 +2297,10 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ assert( i2>i1 ); assert( i2!=0 ); pRes->bTermEq = 1; - if( p1->iRowid==p2->iRowid ) return i2; + if( p1->iRowid==p2->iRowid ){ + p1->bDel = p2->bDel; + return i2; + } res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; } assert( res!=0 ); @@ -2513,9 +2495,7 @@ static void fts5MultiIterNext( fts5AssertMultiIterSetup(p, pIter); bUseFrom = 0; - }while( pIter->bSkipEmpty - && fts5SegIterIsDelete(p, &pIter->aSeg[pIter->aFirst[1].iFirst]) - ); + }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) ); } } @@ -2611,9 +2591,7 @@ static void fts5MultiIterNew( } fts5AssertMultiIterSetup(p, pNew); - if( pNew->bSkipEmpty - && fts5SegIterIsDelete(p, &pNew->aSeg[pNew->aFirst[1].iFirst]) - ){ + if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ fts5MultiIterNext(p, pNew, 0, 0); } }else{ @@ -3408,36 +3386,38 @@ fflush(stdout); ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ + int nPos; /* position-list size field value */ + int nTerm; + const u8 *pTerm; + + /* Check for key annihilation. */ + if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue; - /* If the segment being written is the oldest in the entire index and - ** the position list is empty (i.e. the entry is a delete marker), no - ** entry need be written to the output. */ fts5ChunkIterInit(p, pSeg, &sPos); - if( bOldest==0 || sPos.nRem>0 ){ - int nTerm; - const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); - if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ - if( pnRem && writer.nLeafWritten>nRem ){ - fts5ChunkIterRelease(&sPos); - break; - } - /* This is a new term. Append a term to the output segment. */ - if( bRequireDoclistTerm ){ - fts5WriteAppendZerobyte(p, &writer); - } - fts5WriteAppendTerm(p, &writer, nTerm, pTerm); - fts5BufferSet(&p->rc, &term, nTerm, pTerm); - bRequireDoclistTerm = 1; + pTerm = fts5MultiIterTerm(pIter, &nTerm); + if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ + if( pnRem && writer.nLeafWritten>nRem ){ + fts5ChunkIterRelease(&sPos); + break; } - /* Append the rowid to the output */ - /* WRITEPOSLISTSIZE */ - fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), sPos.nRem*2); - - for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ - fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); + /* This is a new term. Append a term to the output segment. */ + if( bRequireDoclistTerm ){ + fts5WriteAppendZerobyte(p, &writer); } + fts5WriteAppendTerm(p, &writer, nTerm, pTerm); + fts5BufferSet(&p->rc, &term, nTerm, pTerm); + bRequireDoclistTerm = 1; + } + + /* Append the rowid to the output */ + /* WRITEPOSLISTSIZE */ + nPos = pSeg->nPos*2 + pSeg->bDel; + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); + + for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ + fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); } fts5ChunkIterRelease(&sPos); @@ -3882,13 +3862,15 @@ static void fts5BtreeIterInit( int i; nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1); memset(pIter, 0, sizeof(*pIter)); - pIter->nLvl = pSeg->nHeight-1; - pIter->iIdx = iIdx; - pIter->p = p; - pIter->pSeg = pSeg; - if( nByte && p->rc==SQLITE_OK ){ + if( nByte ){ pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); } + if( p->rc==SQLITE_OK ){ + pIter->nLvl = pSeg->nHeight-1; + pIter->iIdx = iIdx; + pIter->p = p; + pIter->pSeg = pSeg; + } for(i=0; p->rc==SQLITE_OK && inLvl; i++){ i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1); Fts5Data *pData; diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index 57473afe65..a9393de43d 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -63,8 +63,8 @@ db func rnddoc fts5_rnddoc do_test 2.1 { for {set i 0} {$i < 500} {incr i} { execsql { INSERT INTO t2 VALUES(rnddoc(50)) } - execsql { INSERT INTO t2(t2) VALUES('integrity-check') } } + execsql { INSERT INTO t2(t2) VALUES('integrity-check') } } {} #-------------------------------------------------------------------- diff --git a/manifest b/manifest index a2d7a01cc0..994b105994 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\spreventing\sdoclist\sindexes\sfrom\sbeing\sloaded. -D 2015-04-15T16:01:42.421 +C Logically\sstore\supdates\sas\s(insert+delete)\swithin\sthe\sFTS\stree.\sThis\sallows\skeys\sto\sbe\sannihilated\smore\squickly\sunder\ssome\scircumstances. +D 2015-04-15T18:49:20.008 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 9556d405a12a38b3e1a323333a2620813b9f323a +F ext/fts5/fts5_index.c 28f1bfadf3eb4f860c8b978f4d8d6ea0cf7c724d F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -134,7 +134,7 @@ F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 -F ext/fts5/test/fts5corrupt.test 78eb076867e750a013b46b3bc06065870bc93c22 +F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test fbd8612889234849ff041f5b36f8e390feeed46e @@ -1292,7 +1292,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9341c070bb6140dbf559680952909674aa83fa55 -R c6ba726746ea3bc47027dcf89fd1186f +P b29109a083e5cd442cdd19f29d7be45b09c4f661 +R 3f3d372bbefa9c63dd6c3e57000a1fa8 U dan -Z a5a728f32ef73ddd62455c44220623a7 +Z ff9a464604d7627b634f349ee851852e diff --git a/manifest.uuid b/manifest.uuid index fdf9afda57..ab0ef8daa2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b29109a083e5cd442cdd19f29d7be45b09c4f661 \ No newline at end of file +50fae1f0006c0e946b5214e73eedf2687a0016f9 \ No newline at end of file From 80d3ea080a151c54f83c2e11e77fc6255c5692fd Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 20 Apr 2015 18:48:57 +0000 Subject: [PATCH 104/206] Fix some fts5 problems with very large position lists. FossilOrigin-Name: 2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a --- ext/fts5/fts5_index.c | 28 +++++++++++++---- ext/fts5/test/fts5bigpl.test | 58 ++++++++++++++++++++++++++++++++++++ manifest | 13 ++++---- manifest.uuid | 2 +- 4 files changed, 88 insertions(+), 13 deletions(-) create mode 100644 ext/fts5/test/fts5bigpl.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index d407411a2f..7d48d074da 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1901,13 +1901,20 @@ static void fts5SegIterNext( ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ + Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; - if( pIter->pDlidx ){ - int iSegid = pIter->pSeg->iSegid; - pgnoLast = pIter->pDlidx->iLeafPgno; - pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); + if( pDlidx ){ + /* If the doclist-iterator is already at EOF, then the current doclist + ** contains no entries except those on the current page. */ + if( fts5DlidxIterEof(p, pDlidx)==0 ){ + int iSegid = pIter->pSeg->iSegid; + pgnoLast = pDlidx->iLeafPgno; + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); + }else{ + pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); + } }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ @@ -1915,7 +1922,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ - pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2 + pIter->bDel); + pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); iOff = pIter->iLeafOffset; assert( iOff>=4 ); @@ -3285,6 +3292,9 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ if( pSeg->pSeg==0 ){ /* no-op */ }else if( pSeg->pLeaf==0 ){ + /* All keys from this input segment have been transfered to the output. + ** Set both the first and last page-numbers to 0 to indicate that the + ** segment is now empty. */ pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ @@ -4092,7 +4102,13 @@ static void fts5IndexIntegrityCheckSegment( } } - if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ + /* Either iter.iLeaf must be the rightmost leaf-page in the segment, or + ** else the segment has been completely emptied by an ongoing merge + ** operation. */ + if( p->rc==SQLITE_OK + && iter.iLeaf!=pSeg->pgnoLast + && (pSeg->pgnoFirst || pSeg->pgnoLast) + ){ p->rc = FTS5_CORRUPT; } diff --git a/ext/fts5/test/fts5bigpl.test b/ext/fts5/test/fts5bigpl.test new file mode 100644 index 0000000000..172c0396be --- /dev/null +++ b/ext/fts5/test/fts5bigpl.test @@ -0,0 +1,58 @@ +# 2015 April 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This test is focused on really large position lists. Those that require +# 4 or 5 byte position-list size varints. Because of the amount of memory +# required, these tests only run on 64-bit platforms. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5bigpl + +if { $tcl_platform(wordSize)<8 } { + finish_test + return +} + +do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) } + +do_test 1.1 { + foreach t {a b c d e f g h i j} { + set doc [string repeat "$t " 1200000] + execsql { INSERT INTO t1 VALUES($doc) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {} + +do_test 1.2 { + execsql { DELETE FROM t1 } + foreach t {"a b" "b a" "c d" "d c"} { + set doc [string repeat "$t " 600000] + execsql { INSERT INTO t1 VALUES($doc) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {} + + +# 5-byte varint. This test takes 30 seconds or so on a 2014 workstation. +# The generated database is roughly 635MiB. +# +do_test 2.1...slow { + execsql { DELETE FROM t1 } + foreach t {a} { + set doc [string repeat "$t " 150000000] + execsql { INSERT INTO t1 VALUES($doc) } + } + execsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {} + +finish_test + diff --git a/manifest b/manifest index 994b105994..cb1f4c510b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Logically\sstore\supdates\sas\s(insert+delete)\swithin\sthe\sFTS\stree.\sThis\sallows\skeys\sto\sbe\sannihilated\smore\squickly\sunder\ssome\scircumstances. -D 2015-04-15T18:49:20.008 +C Fix\ssome\sfts5\sproblems\swith\svery\slarge\sposition\slists. +D 2015-04-20T18:48:57.780 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 28f1bfadf3eb4f860c8b978f4d8d6ea0cf7c724d +F ext/fts5/fts5_index.c f840e35cceafcd0597688467010a4d12feea9c76 F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -133,6 +133,7 @@ F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b +F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c @@ -1292,7 +1293,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b29109a083e5cd442cdd19f29d7be45b09c4f661 -R 3f3d372bbefa9c63dd6c3e57000a1fa8 +P 50fae1f0006c0e946b5214e73eedf2687a0016f9 +R e3e266d7af429931ea61b2ad868bf28e U dan -Z ff9a464604d7627b634f349ee851852e +Z 88f214a9049d68201f885f825375d535 diff --git a/manifest.uuid b/manifest.uuid index ab0ef8daa2..6b9e7e2cd3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -50fae1f0006c0e946b5214e73eedf2687a0016f9 \ No newline at end of file +2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a \ No newline at end of file From d7889b26acd8fa664677fe923e091d356374fb53 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 21 Apr 2015 19:07:39 +0000 Subject: [PATCH 105/206] Fix an fts5 problem with large deletes. FossilOrigin-Name: e50e8031d6f804ebe50e0eec9a8b6e7f0152ecc3 --- ext/fts5/fts5_index.c | 11 +++-- ext/fts5/test/fts5dlidx.test | 80 ++++++++++++++++++++++++++++++++++++ manifest | 13 +++--- manifest.uuid | 2 +- 4 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 ext/fts5/test/fts5dlidx.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 7d48d074da..87fd11a096 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1426,7 +1426,7 @@ static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ } static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ - return (p->rc!=SQLITE_OK || pIter->bEof); + return pIter->bEof; } static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ @@ -1460,6 +1460,7 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ pIter->iRowid -= iVal; pIter->iLeafPgno--; + /* Skip backwards passed any 0x00 bytes. */ while( iOff>pIter->iFirstOff && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 ){ @@ -3698,6 +3699,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); bFirstDocid = 0; + fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); } @@ -3720,13 +3722,16 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + assert( n>=nSpace ); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; + if( pBuf->n>=pgsz ){ + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + } if( iPos>=nCopy ) break; - fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.aWriter[0].buf; } bFirstDocid = 1; } diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test new file mode 100644 index 0000000000..0bfc3f331e --- /dev/null +++ b/ext/fts5/test/fts5dlidx.test @@ -0,0 +1,80 @@ +# 2015 April 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This test is focused on uses of doclist-index records. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5dlidx + +if { $tcl_platform(wordSize)<8 } { + finish_test + return +} + +proc do_fb_test {tn sql res} { + set res2 [lsort -integer -decr $res] + uplevel [list do_execsql_test $tn.1 $sql $res] + uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2] +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); +} + +foreach {tn spc1 spc2 mul} { + 1 10 100 1000 + 2 1 1 128 +} { + set xdoc [list] + set ydoc [list] + + execsql { DELETE FROM t1 } + + do_test 1.$tn.1 { + + execsql BEGIN + for {set i 0} {$i < 10000} {incr i} { + set rowid [expr $i * $mul] + set doc "a b c a b c a b c a b c a b c" + if {($i % $spc1)==0} { + lappend xdoc $rowid + append doc " x" + if {($i % $spc2)==0} { + lappend ydoc $rowid + append doc " y" + } + } + execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } + } + execsql COMMIT + execsql { INSERT INTO t1(t1) VALUES('integrity-check') } + } {} + + do_execsql_test 1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + + do_fb_test 1.$tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc + do_fb_test 1.$tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc + + do_fb_test 1.$tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc + do_fb_test 1.$tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc + + do_fb_test 1.$tn.5.1 { + SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc + do_fb_test 1.$tn.5.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc + +} + + +finish_test + diff --git a/manifest b/manifest index cb1f4c510b..5c12ac6c1c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\sfts5\sproblems\swith\svery\slarge\sposition\slists. -D 2015-04-20T18:48:57.780 +C Fix\san\sfts5\sproblem\swith\slarge\sdeletes. +D 2015-04-21T19:07:39.210 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c f840e35cceafcd0597688467010a4d12feea9c76 +F ext/fts5/fts5_index.c abf74b5e0d96b954911b6db3e13dce008c091303 F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -136,6 +136,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d +F ext/fts5/test/fts5dlidx.test 710d1eaf44e6fbb09dfa73b7fd488227d8cc751a F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test fbd8612889234849ff041f5b36f8e390feeed46e @@ -1293,7 +1294,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 50fae1f0006c0e946b5214e73eedf2687a0016f9 -R e3e266d7af429931ea61b2ad868bf28e +P 2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a +R f13e2690ef21877d2344bab07fedcde6 U dan -Z 88f214a9049d68201f885f825375d535 +Z 02eb8d79520bf4e8512cc80bc0c885cd diff --git a/manifest.uuid b/manifest.uuid index 6b9e7e2cd3..a63e0364d4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a \ No newline at end of file +e50e8031d6f804ebe50e0eec9a8b6e7f0152ecc3 \ No newline at end of file From 5bb97c2b801248ab08b37b99926962f7db5c0e09 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 21 Apr 2015 20:13:38 +0000 Subject: [PATCH 106/206] Fix a problem in fts5fault1.test. FossilOrigin-Name: a21d60cb2ac6463c012d82d1970d90da5da2a14a --- ext/fts5/fts5_index.c | 64 ++++++++++++++--------------------- ext/fts5/test/fts5fault1.test | 8 ++--- manifest | 14 ++++---- manifest.uuid | 2 +- 4 files changed, 35 insertions(+), 53 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 87fd11a096..05c8d68319 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1473,45 +1473,31 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ return pIter->bEof; } -static void fts5DlidxIterInitFromData( - Fts5Index *p, /* Fts5 Backend to iterate within */ - int bRev, /* True for ORDER BY ASC */ - int iLeafPgno, /* Leaf page number dlidx is for */ - Fts5Data *pDlidx, /* Leaf index data */ - Fts5DlidxIter **ppIter /* OUT: Populated iterator */ -){ - Fts5DlidxIter *pIter = *ppIter; - - if( pIter==0 ){ - *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); - if( pIter==0 ){ - fts5DataRelease(pDlidx); - return; - } - }else{ - memset(pIter, 0, sizeof(Fts5DlidxIter)); - } - - pIter->pData = pDlidx; - pIter->iLeafPgno = iLeafPgno; - if( bRev==0 ){ - fts5DlidxIterFirst(pIter); - }else{ - fts5DlidxIterLast(pIter); - } -} - -static void fts5DlidxIterInit( +static Fts5DlidxIter *fts5DlidxIterInit( Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ int iIdx, int iSegid, /* Segment iSegid within index iIdx */ - int iLeafPgno, /* Leaf page number to load dlidx for */ - Fts5DlidxIter **ppIter /* OUT: Populated iterator */ + int iLeafPg /* Leaf page number to load dlidx for */ ){ - Fts5Data *pDlidx; - pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno)); - if( pDlidx==0 ) return; - fts5DlidxIterInitFromData(p, bRev, iLeafPgno, pDlidx, ppIter); + Fts5DlidxIter *pIter; + + pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); + if( pIter==0 ) return 0; + + pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPg)); + if( pIter->pData==0 ){ + sqlite3_free(pIter); + pIter = 0; + }else{ + pIter->iLeafPgno = iLeafPg; + if( bRev==0 ){ + fts5DlidxIterFirst(pIter); + }else{ + fts5DlidxIterLast(pIter); + } + } + + return pIter; } /* @@ -2030,7 +2016,7 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ } } - fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno, &pIter->pDlidx); + pIter->pDlidx = fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno); } /* @@ -3975,7 +3961,7 @@ static void fts5DlidxIterTestReverse( i64 cksum1 = 13; i64 cksum2 = 13; - for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf, &pDlidx); + for(pDlidx=fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ @@ -3986,7 +3972,7 @@ static void fts5DlidxIterTestReverse( fts5DlidxIterFree(pDlidx); pDlidx = 0; - for(fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf, &pDlidx); + for(pDlidx=fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(pDlidx) ){ @@ -4063,7 +4049,7 @@ static void fts5IndexIntegrityCheckSegment( int iPg; i64 iKey; - for(fts5DlidxIterInit(p, 0, iIdx, iSegid, iter.iLeaf, &pDlidx); + for(pDlidx=fts5DlidxIterInit(p, 0, iIdx, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 838bf3cc4b..ff6e2483e9 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -31,8 +31,6 @@ ifcapable !fts5 { # # -if 1 { - faultsim_save_and_close do_faultsim_test 1 -prep { faultsim_restore_and_reopen @@ -317,8 +315,6 @@ do_faultsim_test 8.1 -faults oom* -prep { faultsim_test_result [list 0 $::res] } -} - #------------------------------------------------------------------------- # Segment promotion. # @@ -336,13 +332,13 @@ do_test 9.0 { } fts5_level_segs s2 } {0 1} +set insert_doc [db one {SELECT rnddoc(160)}] faultsim_save_and_close do_faultsim_test 9.1 -faults oom-* -prep { faultsim_restore_and_reopen - db func rnddoc fts5_rnddoc } -body { - execsql { INSERT INTO s2 VALUES(rnddoc(160)) } + execsql { INSERT INTO s2 VALUES($::insert_doc) } } -test { faultsim_test_result {0 {}} if {$testrc==0} { diff --git a/manifest b/manifest index 5c12ac6c1c..3007e261c8 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sproblem\swith\slarge\sdeletes. -D 2015-04-21T19:07:39.210 +C Fix\sa\sproblem\sin\sfts5fault1.test. +D 2015-04-21T20:13:38.631 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c abf74b5e0d96b954911b6db3e13dce008c091303 +F ext/fts5/fts5_index.c 6ae86ef3f266c303cbf4a04fe63e8da54d91cd09 F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -139,7 +139,7 @@ F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d F ext/fts5/test/fts5dlidx.test 710d1eaf44e6fbb09dfa73b7fd488227d8cc751a F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test fbd8612889234849ff041f5b36f8e390feeed46e +F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e @@ -1294,7 +1294,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a -R f13e2690ef21877d2344bab07fedcde6 +P e50e8031d6f804ebe50e0eec9a8b6e7f0152ecc3 +R 60eea8ec1517faa88921988ca09638c5 U dan -Z 02eb8d79520bf4e8512cc80bc0c885cd +Z b632ef0339d92005d6b22a3d7746ce59 diff --git a/manifest.uuid b/manifest.uuid index a63e0364d4..e8e16db49a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e50e8031d6f804ebe50e0eec9a8b6e7f0152ecc3 \ No newline at end of file +a21d60cb2ac6463c012d82d1970d90da5da2a14a \ No newline at end of file From c51f5b3fd983630a707b710095f6d65f7dca404d Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 22 Apr 2015 20:14:46 +0000 Subject: [PATCH 107/206] Fix problems with doclist-indexes involving very large rowids. FossilOrigin-Name: 1e5994097e4c740c5173ea9718c3935728fdb86f --- ext/fts5/fts5_index.c | 16 +++----- ext/fts5/test/fts5dlidx.test | 71 +++++++++++++++++++++--------------- manifest | 14 +++---- manifest.uuid | 2 +- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 05c8d68319..ee8eb40630 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2993,8 +2993,9 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ /* Increase the leaves written counter */ pWriter->nLeafWritten++; - /* The new leaf holds no terms */ + /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; + pWriter->bFirstRowidInPage = 1; } /* @@ -3067,7 +3068,6 @@ static void fts5WriteAppendTerm( /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } } @@ -3106,7 +3106,6 @@ static void fts5WriteAppendRowid( if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } } } @@ -3121,7 +3120,6 @@ static void fts5WriteAppendPoslistInt( fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } } } @@ -3148,7 +3146,6 @@ static void fts5WriteAppendPoslistData( a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } if( n>0 ){ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); @@ -3667,7 +3664,8 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; - int bFirstDocid = 0; + + writer.bFirstRowidInPage = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current @@ -3681,10 +3679,10 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ nCopy += nPos; iRowid += iDelta; - if( bFirstDocid ){ + if( writer.bFirstRowidInPage ){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); - bFirstDocid = 0; + writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); @@ -3708,7 +3706,6 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); - assert( n>=nSpace ); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); @@ -3719,7 +3716,6 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } if( iPos>=nCopy ) break; } - bFirstDocid = 1; } iOff += nCopy; } diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test index 0bfc3f331e..6a23622ce0 100644 --- a/ext/fts5/test/fts5dlidx.test +++ b/ext/fts5/test/fts5dlidx.test @@ -31,50 +31,61 @@ do_execsql_test 1.0 { INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } -foreach {tn spc1 spc2 mul} { - 1 10 100 1000 - 2 1 1 128 -} { +# This test populates the FTS5 table containing $nEntry entries. Rows are +# numbered from 0 to ($nEntry-1). The rowid for row $i is: +# +# ($iFirst + $i*$nStep) +# +# Each document is of the form "a b c a b c a b c...". If the row number ($i) +# is an integer multiple of $spc1, then an "x" token is appended to the +# document. If it is *also* a multiple of $spc2, a "y" token is also appended. +# +proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { + + do_execsql_test $tn.0 { DELETE FROM t1 } + set xdoc [list] set ydoc [list] - - execsql { DELETE FROM t1 } - do_test 1.$tn.1 { - - execsql BEGIN - for {set i 0} {$i < 10000} {incr i} { - set rowid [expr $i * $mul] - set doc "a b c a b c a b c a b c a b c" - if {($i % $spc1)==0} { - lappend xdoc $rowid - append doc " x" - if {($i % $spc2)==0} { - lappend ydoc $rowid - append doc " y" - } + execsql BEGIN + for {set i 0} {$i < $nEntry} {incr i} { + set rowid [expr $i * $nStep] + set doc [string trim [string repeat "a b c " 100]] + if {($i % $spc1)==0} { + lappend xdoc $rowid + append doc " x" + if {($i % $spc2)==0} { + lappend ydoc $rowid + append doc " y" } - execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } - execsql COMMIT + execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } + } + execsql COMMIT + + do_test $tn.1 { execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} - do_execsql_test 1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') } + do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc + do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc - do_fb_test 1.$tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc - do_fb_test 1.$tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc + do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc + do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc - do_fb_test 1.$tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc - do_fb_test 1.$tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc - - do_fb_test 1.$tn.5.1 { + do_fb_test $tn.5.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc - do_fb_test 1.$tn.5.2 { + do_fb_test $tn.5.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc - } +do_dlidx_test1 1.1 10 100 10000 0 1000 +do_dlidx_test1 1.2 10 10 10000 0 128 +do_dlidx_test1 1.3 10 10 100 0 36028797018963970 +do_dlidx_test1 1.3 10 10 50 0 150000000000000000 + + + finish_test diff --git a/manifest b/manifest index 151780783a..7b1af793d3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sthis\sbranch\swith\slatest\strunk\schanges. -D 2015-04-22T09:40:35.867 +C Fix\sproblems\swith\sdoclist-indexes\sinvolving\svery\slarge\srowids. +D 2015-04-22T20:14:46.893 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in faaf75b89840659d74501bea269c7e33414761c1 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 6ae86ef3f266c303cbf4a04fe63e8da54d91cd09 +F ext/fts5/fts5_index.c 446f70d131e8d12a464f2fb7a4d0348e8334f45f F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -136,7 +136,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d -F ext/fts5/test/fts5dlidx.test 710d1eaf44e6fbb09dfa73b7fd488227d8cc751a +F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d @@ -1300,7 +1300,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a21d60cb2ac6463c012d82d1970d90da5da2a14a 2cb945116e7a5b78741b19839899826b539d5868 -R ce8dc868a2df13b90cea4f2e6b631af2 +P 9797482ded7de985e3b20aedec5e4d81f55065c8 +R 0cda6877e1c22250889225fbc6083da5 U dan -Z da83630e09cb54023a2663ea2cafcf24 +Z 27e6fd1a7355db699b5f202767df6e93 diff --git a/manifest.uuid b/manifest.uuid index 98e5186972..2744268eb0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9797482ded7de985e3b20aedec5e4d81f55065c8 \ No newline at end of file +1e5994097e4c740c5173ea9718c3935728fdb86f \ No newline at end of file From 206480992dffa2053cb4818b1a4f6da8e2c1ebf6 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 22 Apr 2015 20:58:24 +0000 Subject: [PATCH 108/206] Add extra OOM tests for fts5. FossilOrigin-Name: 2dd59b5762c944b3bdd270e95c6739cd4f530bfa --- ext/fts5/fts5_expr.c | 5 ++-- ext/fts5/fts5_index.c | 15 ++-------- ext/fts5/test/fts5fault2.test | 55 +++++++++++++++++++++++++++++++++++ manifest | 15 +++++----- manifest.uuid | 2 +- 5 files changed, 70 insertions(+), 22 deletions(-) create mode 100644 ext/fts5/test/fts5fault2.test diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 878b54f53e..af44bcf760 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -623,8 +623,9 @@ static int fts5ExprAdvanceto( iRowid = sqlite3Fts5IterRowid(pIter); if( (bDesc==0 && iLast>iRowid) || (bDesc && iLastrc==SQLITE_OK ){ + const u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; int iOff = pIter->iLeafOffset; /* Offset to read at */ - if( iOff>=pIter->pLeaf->n ){ - assert( 0 ); - fts5SegIterNextPage(p, pIter); - if( pIter->pLeaf==0 ){ - if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; - return; - } - iOff = 4; - } - iOff += fts5GetPoslistSize(pIter->pLeaf->p+iOff, &pIter->nPos,&pIter->bDel); - pIter->iLeafOffset = iOff; + pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos,&pIter->bDel); } } @@ -1678,7 +1669,7 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; - while( p->rc==SQLITE_OK && i Date: Fri, 24 Apr 2015 06:02:29 +0000 Subject: [PATCH 109/206] Fix an fts5 build problem in main.mk. FossilOrigin-Name: 60045cedef109f03317dc878fe6bb3d03867ae69 --- main.mk | 8 +++++--- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/main.mk b/main.mk index 2129a6c217..424bb638d4 100644 --- a/main.mk +++ b/main.mk @@ -621,14 +621,14 @@ fts3_unicode2.o: $(TOP)/ext/fts3/fts3_unicode2.c $(HDR) $(EXTHDR) fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c -fts5.o: $(TOP)/ext/fts5/fts5.c $(HDR) $(EXTHDR) - $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5.c - rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c # FTS5 things # +fts5.o: $(TOP)/ext/fts5/fts5.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5.c + fts5_aux.o: $(TOP)/ext/fts5/fts5_aux.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_aux.c @@ -666,6 +666,8 @@ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon | sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c +fts5parse.h: fts5parse.c + userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c diff --git a/manifest b/manifest index d45776e983..994c259715 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sextra\sOOM\stests\sfor\sfts5. -D 2015-04-22T20:58:24.232 +C Fix\san\sfts5\sbuild\sproblem\sin\smain.mk. +D 2015-04-24T06:02:29.587 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in faaf75b89840659d74501bea269c7e33414761c1 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -200,7 +200,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 0bdbbda2133675d0624b333f55e5d38a46ece577 +F main.mk 2f5c69b4d942b4084f61c7a9e76e6ad2c9912bcd F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk d5e22023b5238985bb54a72d33e0ac71fe4f8a32 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1301,7 +1301,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1e5994097e4c740c5173ea9718c3935728fdb86f -R 8fd8d325057a6a764b3fb8036f0655cc +P 2dd59b5762c944b3bdd270e95c6739cd4f530bfa +R 16931392bbfaadd753438e7b377b1156 U dan -Z a76fe5c70a2f59aae49b2624efc313d4 +Z b1c66eea22fb17fe989c58ccfd8fe427 diff --git a/manifest.uuid b/manifest.uuid index 4871a966e3..e4bd9e57cc 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2dd59b5762c944b3bdd270e95c6739cd4f530bfa \ No newline at end of file +60045cedef109f03317dc878fe6bb3d03867ae69 \ No newline at end of file From def90aae18f406fddb37151fb9aa8cb81eca0aac Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 24 Apr 2015 15:56:09 +0000 Subject: [PATCH 110/206] Add extra tests for corrupt database handling in fts5. FossilOrigin-Name: 41449f7a0b5da6332eef48386c91ef63382c4783 --- ext/fts5/fts5Int.h | 12 ++++ ext/fts5/fts5_index.c | 61 +++++++++++++---- ext/fts5/fts5_tcl.c | 38 ++++++++++- ext/fts5/test/fts5corrupt.test | 4 +- ext/fts5/test/fts5corrupt2.test | 116 ++++++++++++++++++++++++++++++++ ext/fts5/test/fts5rebuild.test | 2 +- manifest | 21 +++--- manifest.uuid | 2 +- 8 files changed, 230 insertions(+), 26 deletions(-) create mode 100644 ext/fts5/test/fts5corrupt2.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 59d3271212..0bbea3aa7d 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -39,6 +39,18 @@ int sqlite3Fts5Corrupt(void); # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif +/* +** The assert_nc() macro is similar to the assert() macro, except that it +** is used for assert() conditions that are true only if it can be +** guranteed that the database is not corrupt. +*/ +#ifdef SQLITE_TEST +extern int sqlite3_fts5_may_be_corrupt; +# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) +#else +# define assert_nc(x) assert(x) +#endif + /************************************************************************** ** Interface to code in fts5.c. */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3708ba58ca..abddf5b30d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -752,6 +752,46 @@ static void fts5CloseReader(Fts5Index *p){ } } +/* +** Check if row iRowid exists in the %_data table, and that it contains +** a blob value. If so, return SQLITE_ERROR (yes - SQLITE_ERROR, not +** SQLITE_OK). If not, return SQLITE_CORRUPT_VTAB. +** +** If an error occurs (e.g. OOM or IOERR), return the relevant error code. +** +** This function does not need to be efficient. It is part of vary rarely +** invoked error handling code only. +*/ +#if 0 +static int fts5CheckMissingRowid(Fts5Index *p, i64 iRowid){ + const char *zFmt = "SELECT typeof(block)=='blob' FROM '%q'.%Q WHERE id=%lld"; + int bOk = 0; + int rc; + char *zSql; + + zSql = sqlite3_mprintf(zFmt, p->pConfig->zDb, p->zDataTbl, iRowid); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_stmt *pStmt; + rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + bOk = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } + sqlite3_free(zSql); + } + + if( rc==SQLITE_OK ){ + rc = bOk ? SQLITE_ERROR : FTS5_CORRUPT; + } + + return rc; +} +#endif + static Fts5Data *fts5DataReadOrBuffer( Fts5Index *p, Fts5Buffer *pBuf, @@ -761,13 +801,6 @@ static Fts5Data *fts5DataReadOrBuffer( if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK; -#if 0 -Fts5Buffer buf = {0,0,0}; -fts5DebugRowid(&rc, &buf, iRowid); -fprintf(stdout, "read: %s\n", buf.p); -fflush(stdout); -sqlite3_free(buf.p); -#endif if( p->pReader ){ /* This call may return SQLITE_ABORT if there has been a savepoint ** rollback since it was last used. In this case a new blob handle @@ -788,6 +821,13 @@ sqlite3_free(buf.p); ); } + /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls + ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. + ** All the reasons those functions might return SQLITE_ERROR - missing + ** table, missing row, non-blob/text in block column - indicate + ** backing store corruption. */ + if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; + if( rc==SQLITE_OK ){ u8 *aOut; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); @@ -1563,7 +1603,7 @@ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ const u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; int iOff = pIter->iLeafOffset; /* Offset to read at */ - pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos,&pIter->bDel); + pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } } @@ -1577,8 +1617,6 @@ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ ** ** Fts5SegIter.term ** Fts5SegIter.rowid -** Fts5SegIter.nPos -** Fts5SegIter.bDel ** ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of ** the first position list. The position list belonging to document @@ -3912,7 +3950,6 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; pIter->iLeaf = pIter->aLvl[0].s.iChild; - assert( p->rc==SQLITE_OK || pIter->bEof ); } static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ @@ -3985,7 +4022,7 @@ static void fts5IndexIntegrityCheckSegment( /* Iterate through the b-tree hierarchy. */ for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); - iter.bEof==0; + p->rc==SQLITE_OK && iter.bEof==0; fts5BtreeIterNext(&iter) ){ i64 iRow; /* Rowid for this leaf */ diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index f1c2284276..5bbfc821a2 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -22,6 +22,14 @@ #include #include +/* +** This variable is set to true when running corruption tests. Otherwise +** false. If it is false, extra assert() conditions in the fts5 code are +** activated - conditions that are only true if it is guaranteed that the +** fts5 database is not corrupt. +*/ +int sqlite3_fts5_may_be_corrupt = 0; + /************************************************************************* ** This is a copy of the first part of the SqliteDb structure in ** tclsqlite.c. We need it here so that the get_sqlite_pointer routine @@ -830,6 +838,33 @@ static void xF5tFree(ClientData clientData){ ckfree(clientData); } +/* +** sqlite3_fts5_may_be_corrupt BOOLEAN +** +** Set or clear the global "may-be-corrupt" flag. Return the old value. +*/ +static int f5tMayBeCorrupt( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + int bOld = sqlite3_fts5_may_be_corrupt; + + if( objc!=2 && objc!=1 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?"); + return TCL_ERROR; + } + if( objc==2 ){ + int bNew; + if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR; + sqlite3_fts5_may_be_corrupt = bNew; + } + + Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld)); + return TCL_OK; +} + /* ** Entry point. */ @@ -842,7 +877,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){ { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 }, { "sqlite3_fts5_token", f5tTokenizerReturn, 1 }, { "sqlite3_fts5_tokenize", f5tTokenize, 0 }, - { "sqlite3_fts5_create_function", f5tCreateFunction, 0 } + { "sqlite3_fts5_create_function", f5tCreateFunction, 0 }, + { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 } }; int i; F5tTokenizerContext *pContext; diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index a9393de43d..0791ab0cf9 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -9,6 +9,8 @@ # #*********************************************************************** # +# This file tests that the FTS5 'integrity-check' command detects +# inconsistencies (corruption) in the on-disk backing tables. # source [file join [file dirname [info script]] fts5_common.tcl] @@ -38,7 +40,7 @@ do_test 1.3 { DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } -} {1 {SQL logic error or missing database}} +} {1 {database disk image is malformed}} do_test 1.4 { db_restore_and_reopen diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test new file mode 100644 index 0000000000..a5f657b160 --- /dev/null +++ b/ext/fts5/test/fts5corrupt2.test @@ -0,0 +1,116 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file tests that FTS5 handles corrupt databases (i.e. internal +# inconsistencies in the backing tables) correctly. In this case +# "correctly" means without crashing. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5corrupt2 + +# Create a simple FTS5 table containing 100 documents. Each document +# contains 10 terms, each of which start with the character "x". +# +expr srand(0) +db func rnddoc fts5_rnddoc +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) + INSERT INTO t1 SELECT rnddoc(10) FROM ii; +} + +set mask [expr 31 << 31] + +# Test 1: +# +# For each page in the t1_data table, open a transaction and DELETE +# the t1_data entry. Then run: +# +# * an integrity-check, and +# * unless the deleted block was a b-tree node, a query for "t1 MATCH 'x*'" +# +# and check that the corruption is detected in both cases. The +# rollback the transaction. +# +# Test 2: +# +# Same thing, except instead of deleting a row from t1_data, replace its +# blob content with integer value 14. +# +foreach {tno stmt} { + 1 { DELETE FROM t1_data WHERE rowid=$rowid } + 2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid } +} { + break + set tn 0 + foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] { + incr tn + #if {$tn!=224} continue + + do_test 1.$tno.$tn.1.$rowid { + execsql { BEGIN } + execsql $stmt + catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } + } {1 {database disk image is malformed}} + + if {($rowid & $mask)==0} { + # Node is a leaf node, not a b-tree node. + do_catchsql_test 1.$tno.$tn.2.$rowid { + SELECT rowid FROM t1 WHERE t1 MATCH 'x*' + } {1 {database disk image is malformed}} + } + + do_execsql_test 1.$tno.$tn.3.$rowid { + ROLLBACK; + INSERT INTO t1(t1) VALUES('integrity-check'); + } {} + } +} + +# Run N-1 tests, where N is the number of bytes in the rightmost leaf page +# of the fts index. For test $i, truncate the rightmost leafpage to $i +# bytes. Then test both the integrity-check detects the corruption. +# +# Also tested is that "MATCH 'x*'" does not crash and sometimes reports +# corruption. It may not report the db as corrupt because truncating the +# final leaf to some sizes may create a valid leaf page. +# +set lrowid [db one {SELECT max(rowid) FROM t1_data WHERE (rowid & $mask)=0}] +set nbyte [db one {SELECT length(block) FROM t1_data WHERE rowid=$lrowid}] +set all [db eval {SELECT rowid FROM t1}] +for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} { + do_execsql_test 2.$i.1 { + BEGIN; + UPDATE t1_data SET block = substr(block, 1, $i) WHERE rowid=$lrowid; + } + + do_catchsql_test 2.$i.2 { + INSERT INTO t1(t1) VALUES('integrity-check'); + } {1 {database disk image is malformed}} + + do_test 2.$i.3 { + set res [catchsql {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'}] + expr { + $res=="1 {database disk image is malformed}" + || $res=="0 {$all}" + } + } 1 + + do_execsql_test 2.$i.4 { + ROLLBACK; + INSERT INTO t1(t1) VALUES('integrity-check'); + } {} +} + +finish_test + diff --git a/ext/fts5/test/fts5rebuild.test b/ext/fts5/test/fts5rebuild.test index dfaf28bc6e..644a674942 100644 --- a/ext/fts5/test/fts5rebuild.test +++ b/ext/fts5/test/fts5rebuild.test @@ -39,7 +39,7 @@ do_execsql_test 1.5 { do_catchsql_test 1.6 { INSERT INTO f1(f1) VALUES('integrity-check'); -} {1 {SQL logic error or missing database}} +} {1 {database disk image is malformed}} do_execsql_test 1.7 { INSERT INTO f1(f1) VALUES('rebuild'); diff --git a/manifest b/manifest index 994c259715..9d446ac187 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sbuild\sproblem\sin\smain.mk. -D 2015-04-24T06:02:29.587 +C Add\sextra\stests\sfor\scorrupt\sdatabase\shandling\sin\sfts5. +D 2015-04-24T15:56:09.379 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in faaf75b89840659d74501bea269c7e33414761c1 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,15 +106,15 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 1b537736f8838df7fca10245c0f70a23cfddc7f5 +F ext/fts5/fts5Int.h 1309320cb233e1c5b38d7f1e2cab2138bbf497d8 F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 7c9615a83e0ca928817e81be65e266f639f45532 +F ext/fts5/fts5_index.c 1663ad6a9ae221f14f27442b9b1a9d5088a2c5fe F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d -F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 +F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -135,7 +135,8 @@ F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 -F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d +F ext/fts5/test/fts5corrupt.test 9e8524281aa322c522c1d6e2b347e24e060c2727 +F ext/fts5/test/fts5corrupt2.test 3be48d8a30d30e3ae819f04e957c45d091bfbb85 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e @@ -145,7 +146,7 @@ F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 -F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4 +F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d @@ -1301,7 +1302,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2dd59b5762c944b3bdd270e95c6739cd4f530bfa -R 16931392bbfaadd753438e7b377b1156 +P 60045cedef109f03317dc878fe6bb3d03867ae69 +R 9c5c238cdd6f30dc8d0223c36173d961 U dan -Z b1c66eea22fb17fe989c58ccfd8fe427 +Z ec7cf237df9e7bd8116f5a496704530c diff --git a/manifest.uuid b/manifest.uuid index e4bd9e57cc..62f07c3585 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -60045cedef109f03317dc878fe6bb3d03867ae69 \ No newline at end of file +41449f7a0b5da6332eef48386c91ef63382c4783 \ No newline at end of file From df5bd1fed2ec6f1bf1f13dc28ca535280f468d44 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 24 Apr 2015 19:41:43 +0000 Subject: [PATCH 111/206] Add the "unindexed" column option to fts5. FossilOrigin-Name: 86309961344f4076ddcf55d730d3600ec3b6e45c --- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_config.c | 217 +++++++++++++++++++++---------- ext/fts5/fts5_index.c | 16 +-- ext/fts5/fts5_storage.c | 31 +++-- ext/fts5/test/fts5tokenizer.test | 2 +- ext/fts5/test/fts5unindexed.test | 73 +++++++++++ manifest | 21 +-- manifest.uuid | 2 +- 8 files changed, 258 insertions(+), 105 deletions(-) create mode 100644 ext/fts5/test/fts5unindexed.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 0bbea3aa7d..1e08bd68e0 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -92,6 +92,7 @@ struct Fts5Config { char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ char **azCol; /* Column names */ + u8 *abUnindexed; /* True for unindexed columns */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 0450db6913..9bdcdf9e44 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -134,31 +134,50 @@ static const char *fts5ConfigSkipLiteral(const char *pIn){ return p; } +/* +** The first character of the string pointed to by argument z is guaranteed +** to be an open-quote character (see function fts5_isopenquote()). +** +** This function searches for the corresponding close-quote character within +** the string and, if found, dequotes the string in place and adds a new +** nul-terminator byte. +** +** If the close-quote is found, the value returned is the byte offset of +** the character immediately following it. Or, if the close-quote is not +** found, -1 is returned. If -1 is returned, the buffer is left in an +** undefined state. +*/ static int fts5Dequote(char *z){ char q; int iIn = 1; int iOut = 0; - int bRet = 1; q = z[0]; + /* Set stack variable q to the close-quote character */ assert( q=='[' || q=='\'' || q=='"' || q=='`' ); if( q=='[' ) q = ']'; while( z[iIn] ){ if( z[iIn]==q ){ if( z[iIn+1]!=q ){ - if( z[iIn+1]=='\0' ) bRet = 0; - break; + /* Character iIn was the close quote. */ + z[iOut] = '\0'; + return iIn+1; + }else{ + /* Character iIn and iIn+1 form an escaped quote character. Skip + ** the input cursor past both and copy a single quote character + ** to the output buffer. */ + iIn += 2; + z[iOut++] = q; } - z[iOut++] = q; - iIn += 2; }else{ z[iOut++] = z[iIn++]; } } - z[iOut] = '\0'; - return bRet; + /* Did not find the close-quote character. Return -1. */ + z[iOut] = '\0'; + return -1; } /* @@ -184,18 +203,6 @@ void sqlite3Fts5Dequote(char *z){ } } -/* -** Trim any white-space from the right of nul-terminated string z. -*/ -static char *fts5TrimString(char *z){ - int n = strlen(z); - while( n>0 && fts5_iswhitespace(z[n-1]) ){ - z[--n] = '\0'; - } - while( fts5_iswhitespace(*z) ) z++; - return z; -} - /* ** Duplicate the string passed as the only argument into a buffer allocated ** by sqlite3_malloc(). @@ -251,10 +258,10 @@ static int fts5ConfigParseSpecial( Fts5Global *pGlobal, Fts5Config *pConfig, /* Configuration object to update */ const char *zCmd, /* Special command to parse */ - int nCmd, /* Size of zCmd in bytes */ const char *zArg, /* Argument to parse */ char **pzErr /* OUT: Error message */ ){ + int nCmd = strlen(zCmd); if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; int rc = SQLITE_OK; @@ -384,6 +391,84 @@ static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ ); } +/* +** Gobble up the first bareword or quoted word from the input buffer zIn. +** Return a pointer to the character immediately following the last in +** the gobbled word if successful, or a NULL pointer otherwise (failed +** to find close-quote character). +** +** Before returning, set pzOut to point to a new buffer containing a +** nul-terminated, dequoted copy of the gobbled word. If the word was +** quoted, *pbQuoted is also set to 1 before returning. +** +** If *pRc is other than SQLITE_OK when this function is called, it is +** a no-op (NULL is returned). Otherwise, if an OOM occurs within this +** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* +** set if a parse error (failed to find close quote) occurs. +*/ +static const char *fts5ConfigGobbleWord( + int *pRc, + const char *zIn, + char **pzOut, + int *pbQuoted +){ + const char *zRet = 0; + *pbQuoted = 0; + *pzOut = 0; + + if( *pRc==SQLITE_OK ){ + int nIn = strlen(zIn); + char *zOut = sqlite3_malloc(nIn+1); + + if( zOut==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + memcpy(zOut, zIn, nIn+1); + if( fts5_isopenquote(zOut[0]) ){ + int ii = fts5Dequote(zOut); + if( ii>0 ) zRet = &zIn[ii]; + *pbQuoted = 1; + }else{ + zRet = fts5ConfigSkipBareword(zIn); + zOut[zRet-zIn] = '\0'; + } + } + + if( zRet==0 ){ + sqlite3_free(zOut); + }else{ + *pzOut = zOut; + } + } + + return zRet; +} + +static int fts5ConfigParseColumn( + Fts5Config *p, + char *zCol, + char *zArg, + char **pzErr +){ + int rc = SQLITE_OK; + if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) + || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) + ){ + *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol); + rc = SQLITE_ERROR; + }else if( zArg ){ + if( 0==sqlite3_stricmp(zArg, "unindexed") ){ + p->abUnindexed[p->nCol] = 1; + }else{ + *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); + rc = SQLITE_ERROR; + } + } + + p->azCol[p->nCol++] = zCol; + return rc; +} + /* ** Arguments nArg/azArg contain the string arguments passed to the xCreate ** or xConnect method of the virtual table. This function attempts to @@ -407,6 +492,7 @@ int sqlite3Fts5ConfigParse( int rc = SQLITE_OK; /* Return code */ Fts5Config *pRet; /* New object to return */ int i; + int nByte; *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; @@ -414,7 +500,9 @@ int sqlite3Fts5ConfigParse( pRet->db = db; pRet->iCookie = -1; - pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); + nByte = nArg * (sizeof(char*) + sizeof(u8)); + pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); + pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = fts5Strdup(&rc, azArg[1]); pRet->zName = fts5Strdup(&rc, azArg[2]); if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ @@ -423,63 +511,48 @@ int sqlite3Fts5ConfigParse( } for(i=3; rc==SQLITE_OK && iazCol[pRet->nCol++] = zCol; - zDup = 0; - } - } - - sqlite3_free(zDup); + z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); + z = fts5ConfigSkipWhitespace(z); + if( z && *z=='=' ){ + bOption = 1; + z++; + if( bMustBeCol ) z = 0; } + z = fts5ConfigSkipWhitespace(z); + if( z && z[0] ){ + int bDummy; + z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); + if( z && z[0] ) z = 0; + } + + if( rc==SQLITE_OK ){ + if( z==0 ){ + *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig); + rc = SQLITE_ERROR; + }else{ + if( bOption ){ + rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo, pzErr); + }else{ + rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); + zOne = 0; + } + } + } + + sqlite3_free(zOne); + sqlite3_free(zTwo); } /* If a tokenizer= option was successfully parsed, the tokenizer has ** already been allocated. Otherwise, allocate an instance of the default - ** tokenizer (simple) now. */ + ** tokenizer (unicode61) now. */ if( rc==SQLITE_OK && pRet->pTok==0 ){ rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index abddf5b30d..c109cff57d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1815,15 +1815,13 @@ static void fts5SegIterNext( int bDummy; i64 iDelta; - if( p->rc==SQLITE_OK ){ - pIter->iRowidOffset--; - pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; - iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); - iOff += nPos; - getVarint(&a[iOff], (u64*)&iDelta); - pIter->iRowid -= iDelta; - fts5SegIterLoadNPos(p, pIter); - } + pIter->iRowidOffset--; + pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); + iOff += nPos; + getVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid -= iDelta; + fts5SegIterLoadNPos(p, pIter); }else{ fts5SegIterReverseNewPage(p, pIter); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 075b2eb66a..33eda7c3e7 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -323,6 +323,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ + if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), @@ -486,6 +487,7 @@ int sqlite3Fts5StorageSpecialDelete( rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iColnCol; iCol++){ + if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), @@ -564,12 +566,14 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; - rc = sqlite3Fts5Tokenize(pConfig, - (const char*)sqlite3_column_text(pScan, ctx.iCol+1), - sqlite3_column_bytes(pScan, ctx.iCol+1), - (void*)&ctx, - fts5StorageInsertCallback - ); + if( pConfig->abUnindexed[ctx.iCol]==0 ){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_column_text(pScan, ctx.iCol+1), + sqlite3_column_bytes(pScan, ctx.iCol+1), + (void*)&ctx, + fts5StorageInsertCallback + ); + } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } @@ -671,12 +675,14 @@ int sqlite3Fts5StorageInsert( } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; - rc = sqlite3Fts5Tokenize(pConfig, - (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), - sqlite3_value_bytes(apVal[ctx.iCol+2]), - (void*)&ctx, - fts5StorageInsertCallback - ); + if( pConfig->abUnindexed[ctx.iCol]==0 ){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), + sqlite3_value_bytes(apVal[ctx.iCol+2]), + (void*)&ctx, + fts5StorageInsertCallback + ); + } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } @@ -783,6 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && inCol; i++){ + if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; rc = sqlite3Fts5Tokenize( diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index d8c4f20f0e..44de1690fe 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -70,7 +70,7 @@ do_catchsql_test 4.1 { } {1 {parse error in "tokenize = tcl abc"}} do_catchsql_test 4.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x y) -} {1 {parse error in "x y"}} +} {1 {unrecognized column option: y}} #------------------------------------------------------------------------- # Test the "separators" and "tokenchars" options a bit. diff --git a/ext/fts5/test/fts5unindexed.test b/ext/fts5/test/fts5unindexed.test new file mode 100644 index 0000000000..e808064f05 --- /dev/null +++ b/ext/fts5/test/fts5unindexed.test @@ -0,0 +1,73 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The tests in this file focus on "unindexed" columns. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5unindexed + + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED); + INSERT INTO t1 VALUES('a b c', 'd e f'); + INSERT INTO t1 VALUES('g h i', 'j k l'); +} {} + +do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1} +do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {} + +do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {} +do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {} +do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {} + +do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1} +do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {} + +do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {} + +do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {} +do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {} +do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {} + +do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2} +do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {} + +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED); + INSERT INTO t1 VALUES('a b c', 'd e f'); + INSERT INTO t1 VALUES('g h i', 'j k l'); + SELECT rowid FROM t2_data; +} {1 10} +do_execsql_test 2.2 { + INSERT INTO t2(t2) VALUES('rebuild'); + INSERT INTO t2(t2) VALUES('integrity-check'); + SELECT rowid FROM t2_data; +} {1 10} + +do_execsql_test 3.1 { + CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c); + CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4); + INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i'); + INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r'); + INSERT INTO t4(t4) VALUES('rebuild'); + INSERT INTO t4(t4) VALUES('integrity-check'); +} {} + +do_execsql_test 3.2 { + INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r'); + DELETE FROM x4 WHERE rowid=20; + INSERT INTO t4(t4) VALUES('integrity-check'); +} {} + + +finish_test + diff --git a/manifest b/manifest index 9d446ac187..fe8819f3b1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sextra\stests\sfor\scorrupt\sdatabase\shandling\sin\sfts5. -D 2015-04-24T15:56:09.379 +C Add\sthe\s"unindexed"\scolumn\soption\sto\sfts5. +D 2015-04-24T19:41:43.259 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in faaf75b89840659d74501bea269c7e33414761c1 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 1309320cb233e1c5b38d7f1e2cab2138bbf497d8 +F ext/fts5/fts5Int.h 803fd2fc03e3799a38ebb404f2f1309ded5d3e8b F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 -F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 +F ext/fts5/fts5_config.c adf7110b0e8a9bdd64cb61c6f9da0bf6b80d9a1d F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 1663ad6a9ae221f14f27442b9b1a9d5088a2c5fe -F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d +F ext/fts5/fts5_index.c 39810b25a017f2626ac72b3e44afe9b534e5d5db +F ext/fts5/fts5_storage.c b3a4cbbcd197fe587789398e51a631f92fc9196c F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d @@ -148,9 +148,10 @@ F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 -F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c +F ext/fts5/test/fts5tokenizer.test 7a6ee24db908c09a0dc1eba634ffa17afcc05d86 F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee +F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1302,7 +1303,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 60045cedef109f03317dc878fe6bb3d03867ae69 -R 9c5c238cdd6f30dc8d0223c36173d961 +P 41449f7a0b5da6332eef48386c91ef63382c4783 +R 6f8967f9e2552e4661a21d901cb3fab7 U dan -Z ec7cf237df9e7bd8116f5a496704530c +Z c90c5f944e45872447f2bae768eb92e5 diff --git a/manifest.uuid b/manifest.uuid index 62f07c3585..71decbb105 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -41449f7a0b5da6332eef48386c91ef63382c4783 \ No newline at end of file +86309961344f4076ddcf55d730d3600ec3b6e45c \ No newline at end of file From b33e64c83e8788f958f5786dfcd83179bfbe4274 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 25 Apr 2015 18:56:48 +0000 Subject: [PATCH 112/206] Add tests for fts5. FossilOrigin-Name: e748651c940eae2389fe826cf5c25f1166a5e611 --- ext/fts5/fts5_index.c | 227 +++++++++++++++----------------- ext/fts5/test/fts5aa.test | 8 +- ext/fts5/test/fts5ab.test | 22 ++++ ext/fts5/test/fts5corrupt2.test | 86 +++++++++++- ext/fts5/test/fts5fault2.test | 23 ++++ ext/fts5/test/fts5full.test | 37 ++++++ manifest | 21 +-- manifest.uuid | 2 +- 8 files changed, 285 insertions(+), 141 deletions(-) create mode 100644 ext/fts5/test/fts5full.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index c109cff57d..746e44bdbb 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -256,6 +256,11 @@ */ #define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1) +/* +** Maximum segments permitted in a single index +*/ +#define FTS5_MAX_SEGMENT 2000 + /* ** The rowid for the doclist index associated with leaf page pgno of segment ** segid in index idx. @@ -365,6 +370,7 @@ struct Fts5StructureLevel { }; struct Fts5Structure { u64 nWriteCounter; /* Total leaves written to level 0 */ + int nSegment; /* Total segments in this structure */ int nLevel; /* Number of levels in this index */ Fts5StructureLevel aLevel[0]; /* Array of nLevel level objects */ }; @@ -725,6 +731,7 @@ static int fts5BufferCompareBlob( return (res==0 ? (pLeft->n - nRight) : res); } + /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. @@ -740,6 +747,17 @@ static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ return (res==0 ? (pLeft->n - pRight->n) : res); } +#ifdef SQLITE_DEBUG +static int fts5BlobCompare( + const u8 *pLeft, int nLeft, + const u8 *pRight, int nRight +){ + int nCmp = MIN(nLeft, nRight); + int res = memcmp(pLeft, pRight, nCmp); + return (res==0 ? (nLeft - nRight) : res); +} +#endif + /* ** Close the read-only blob handle, if it is open. @@ -1042,6 +1060,7 @@ static int fts5StructureDecode( if( pRet ){ pRet->nLevel = nLevel; + pRet->nSegment = nSegment; i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter); for(iLvl=0; rc==SQLITE_OK && iLvlrc==SQLITE_OK ){ - int nSegment; /* Total number of segments */ Fts5Buffer buf; /* Buffer to serialize record into */ int iLvl; /* Used to iterate through levels */ int iCookie; /* Cookie value to store */ - nSegment = fts5StructureCountSegments(pStruct); + assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); memset(&buf, 0, sizeof(Fts5Buffer)); /* Append the current configuration cookie */ @@ -1202,7 +1223,7 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ fts5BufferAppend32(&p->rc, &buf, iCookie); fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); - fts5BufferAppendVarint(&p->rc, &buf, nSegment); + fts5BufferAppendVarint(&p->rc, &buf, pStruct->nSegment); fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); for(iLvl=0; iLvlnLevel; iLvl++){ @@ -1583,6 +1604,7 @@ static void fts5SegIterNextPage( static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ int nSz; int n = fts5GetVarint32(p, nSz); + assert_nc( nSz>=0 ); *pnSz = nSz/2; *pbDel = nSz & 0x0001; return n; @@ -1601,9 +1623,13 @@ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ - const u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; int iOff = pIter->iLeafOffset; /* Offset to read at */ - pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); + if( iOff>=pIter->pLeaf->n ){ + p->rc = FTS5_CORRUPT; + }else{ + const u8 *a = &pIter->pLeaf->p[iOff]; + pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); + } } } @@ -2028,18 +2054,16 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ int iOff = pIter->iLeafOffset + pIter->nPos; while( iOffn ){ + int bDummy; + int nPos; i64 iDelta; /* iOff is currently the offset of the start of position list data */ iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; - - if( iOffn ){ - int bDummy; - int nPos; - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - } + assert_nc( iOffn ); + iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); + iOff += nPos; } } @@ -2346,11 +2370,9 @@ static void fts5SegIterGotoPage( int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); - if( p->rc==SQLITE_OK ){ - pIter->iLeafPgno = iLeafPgno-1; - fts5SegIterNextPage(p, pIter); - assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); - } + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); if( p->rc==SQLITE_OK ){ int iOff; @@ -2413,7 +2435,7 @@ static void fts5SegIterNextFrom( } } - while( 1 ){ + while( p->rc==SQLITE_OK ){ if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; @@ -2502,6 +2524,7 @@ static void fts5MultiIterNext( int iFirst = pIter->aFirst[1].iFirst; int bNewTerm = 0; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + assert( p->rc==SQLITE_OK ); if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ @@ -2542,8 +2565,8 @@ static void fts5MultiIterNew( int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ ){ - int nSeg; /* Number of segments merged */ - int nSlot; /* Power of two >= nSeg */ + int nSeg; /* Number of segment-iters in use */ + int nSlot = 0; /* Power of two >= nSeg */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl; @@ -2552,13 +2575,17 @@ static void fts5MultiIterNew( assert( (pTerm==0 && nTerm==0) || iLevel<0 ); /* Allocate space for the new multi-seg-iterator. */ - if( iLevel<0 ){ - nSeg = fts5StructureCountSegments(pStruct); - nSeg += (p->apHash ? 1 : 0); - }else{ - nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); + if( p->rc==SQLITE_OK ){ + if( iLevel<0 ){ + assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); + nSeg = pStruct->nSegment; + nSeg += (p->apHash ? 1 : 0); + }else{ + nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); + } + for(nSlot=2; nSlotaSeg[] */ @@ -2732,101 +2759,41 @@ static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ pIter->pLeaf = 0; } -/* -** Read and return the next 32-bit varint from the position-list iterator -** passed as the second argument. -** -** If an error occurs, zero is returned an an error code left in -** Fts5Index.rc. If an error has already occurred when this function is -** called, it is a no-op. -*/ -static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ - int iVal = 0; - if( p->rc==SQLITE_OK ){ - if( pIter->iOff>=pIter->chunk.n ){ - fts5ChunkIterNext(p, &pIter->chunk); - if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0; - pIter->iOff = 0; - } - pIter->iOff += fts5GetVarint32(&pIter->chunk.p[pIter->iOff], iVal); - } - return iVal; -} /* -** Advance the position list iterator to the next entry. -*/ -static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){ - int iVal; - assert( fts5ChunkIterEof(p, &pIter->chunk)==0 ); - iVal = fts5PosIterReadVarint(p, pIter); - if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ - if( iVal==1 ){ - pIter->iCol = fts5PosIterReadVarint(p, pIter); - pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; - }else{ - pIter->iPos += (iVal - 2); - } - } -} - -/* -** Initialize the Fts5PosIter object passed as the final argument to iterate -** through the position-list associated with the index entry that iterator -** pMulti currently points to. -*/ -static void fts5PosIterInit( - Fts5Index *p, /* FTS5 backend object */ - Fts5MultiSegIter *pMulti, /* Multi-seg iterator to read pos-list from */ - Fts5PosIter *pIter /* Initialize this object */ -){ - if( p->rc==SQLITE_OK ){ - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; - memset(pIter, 0, sizeof(*pIter)); - fts5ChunkIterInit(p, pSeg, &pIter->chunk); - if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ - fts5PosIterNext(p, pIter); - } - } -} - -/* -** Return true if the position iterator passed as the second argument is -** at EOF. Or if an error has already occurred. Otherwise, return false. -*/ -static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ - return (p->rc || pIter->chunk.pLeaf==0); -} - -/* -** Allocate a new segment-id for the structure pStruct. +** Allocate a new segment-id for the structure pStruct. The new segment +** id must be between 1 and 65335 inclusive, and must not be used by +** any currently existing segment. If a free segment id cannot be found, +** SQLITE_FULL is returned. ** ** If an error has already occurred, this function is a no-op. 0 is ** returned in this case. */ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ int i; - if( p->rc!=SQLITE_OK ) return 0; + u32 iSegid = 0; - for(i=0; i<100; i++){ - int iSegid; - sqlite3_randomness(sizeof(int), (void*)&iSegid); - iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1); - if( iSegid ){ - int iLvl, iSeg; - for(iLvl=0; iLvlnLevel; iLvl++){ - for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ - if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ - iSegid = 0; + if( p->rc==SQLITE_OK ){ + if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ + p->rc = SQLITE_FULL; + }else{ + while( iSegid==0 ){ + int iLvl, iSeg; + sqlite3_randomness(sizeof(u32), (void*)&iSegid); + iSegid = (iSegid % ((1 << FTS5_DATA_ID_B) - 2)) + 1; + assert( iSegid>0 && iSegid<=65535 ); + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ + iSegid = 0; + } } } } } - if( iSegid ) return iSegid; } - p->rc = SQLITE_ERROR; - return 0; + return (int)iSegid; } /* @@ -2838,7 +2805,7 @@ static void fts5IndexDiscardData(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; for(i=0; i<=pConfig->nPrefix; i++){ - if( p->apHash[i] ) sqlite3Fts5HashClear(p->apHash[i]); + sqlite3Fts5HashClear(p->apHash[i]); } p->nPendingData = 0; } @@ -2853,7 +2820,8 @@ static int fts5PrefixCompress( int nNew, const u8 *pNew ){ int i; - for(i=0; inEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){ + if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty @@ -3040,8 +3008,8 @@ static void fts5WriteAppendTerm( int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->aWriter[0]; - assert( pPage==0 || pPage->buf.n==0 || pPage->buf.n>4 ); - if( pPage && pPage->buf.n==0 ){ + assert( pPage->buf.n==0 || pPage->buf.n>4 ); + if( pPage->buf.n==0 ){ /* Zero the first term and first docid fields */ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); @@ -3137,6 +3105,7 @@ static void fts5WriteAppendRowid( } } +#if 0 static void fts5WriteAppendPoslistInt( Fts5Index *p, Fts5SegWriter *pWriter, @@ -3150,6 +3119,7 @@ static void fts5WriteAppendPoslistInt( } } } +#endif static void fts5WriteAppendPoslistData( Fts5Index *p, @@ -3389,6 +3359,7 @@ static void fts5IndexMergeLevel( pLvlOut->nSeg++; pSeg->pgnoFirst = 1; pSeg->iSegid = iSegid; + pStruct->nSegment++; /* Read input from all segments in the input level */ nInput = pLvl->nSeg; @@ -3461,10 +3432,12 @@ fflush(stdout); int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); } + pStruct->nSegment -= nInput; pLvl->nSeg -= nInput; pLvl->nMerge = 0; if( pSeg->pgnoLast==0 ){ pLvlOut->nSeg--; + pStruct->nSegment--; } }else{ assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); @@ -3768,6 +3741,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ pSeg->nHeight = nHeight; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; + pStruct->nSegment++; } fts5StructurePromote(p, 0, pStruct); } @@ -3809,7 +3783,8 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){ Fts5Structure *pNew = 0; int nSeg = 0; if( pStruct ){ - nSeg = fts5StructureCountSegments(pStruct); + assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); + nSeg = pStruct->nSegment; if( nSeg>1 ){ int nByte = sizeof(Fts5Structure); nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); @@ -3832,7 +3807,7 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){ iSegOut++; } } - pLvl->nSeg = nSeg; + pNew->nSegment = pLvl->nSeg = nSeg; }else{ sqlite3_free(pNew); pNew = 0; @@ -4146,7 +4121,11 @@ static void fts5MultiIterPoslist( Fts5ChunkIter iter; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); + static int nCall = 0; + nCall++; + fts5ChunkIterInit(p, pSeg, &iter); + if( fts5ChunkIterEof(p, &iter)==0 ){ if( bSz ){ /* WRITEPOSLISTSIZE */ @@ -4434,6 +4413,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ u64 cksum2 = 0; /* Checksum based on contents of indexes */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ + Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ /* Check that the internal nodes of each segment match the leaves */ for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ @@ -4470,18 +4450,18 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ - Fts5PosIter sPos; /* Used to iterate through position list */ int n; /* Size of term in bytes */ + i64 iPos = 0; /* Position read from poslist */ + int iOff = 0; /* Offset within poslist */ i64 iRowid = fts5MultiIterRowid(pIter); char *z = (char*)fts5MultiIterTerm(pIter, &n); - /* Update cksum2 with the entries associated with the current term - ** and rowid. */ - for(fts5PosIterInit(p, pIter, &sPos); - fts5PosIterEof(p, &sPos)==0; - fts5PosIterNext(p, &sPos) - ){ - cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); + poslist.n = 0; + fts5MultiIterPoslist(p, pIter, 0, &poslist); + while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + int iTokOff = FTS5_POS2OFFSET(iPos); + cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, z, n); } /* If this is a new term, query for it. Update cksum3 with the results. */ @@ -4526,6 +4506,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT; fts5BufferFree(&term); + fts5BufferFree(&poslist); return fts5IndexReturn(p); } diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 24a3521152..9c56790dc5 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -47,11 +47,9 @@ do_execsql_test 2.0 { do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } -do_execsql_test 2.2 { - SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 -} { - {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} -} +do_test 2.2 { + execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } +} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } diff --git a/ext/fts5/test/fts5ab.test b/ext/fts5/test/fts5ab.test index 23fdec0dfa..0746e64326 100644 --- a/ext/fts5/test/fts5ab.test +++ b/ext/fts5/test/fts5ab.test @@ -263,5 +263,27 @@ do_execsql_test 6.3 { SELECT rowid FROM s3 WHERE s3 MATCH 'a' } {1 2} +do_test 6.4 { + db close + sqlite3 db test.db + execsql { + BEGIN; + INSERT INTO s3(s3) VALUES('optimize'); + ROLLBACK; + } +} {} + +#------------------------------------------------------------------------- +# +set doc [string repeat "a b c " 500] +breakpoint +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1(x1, rank) VALUES('pgsz', 32); + INSERT INTO x1 VALUES($doc); +} + + + finish_test diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index a5f657b160..74591cda78 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -28,7 +28,6 @@ do_execsql_test 1.0 { WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) INSERT INTO t1 SELECT rnddoc(10) FROM ii; } - set mask [expr 31 << 31] # Test 1: @@ -51,7 +50,6 @@ foreach {tno stmt} { 1 { DELETE FROM t1_data WHERE rowid=$rowid } 2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid } } { - break set tn 0 foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] { incr tn @@ -77,6 +75,8 @@ foreach {tno stmt} { } } +# Using the same database as the 1.* tests. +# # Run N-1 tests, where N is the number of bytes in the rightmost leaf page # of the fts index. For test $i, truncate the rightmost leafpage to $i # bytes. Then test both the integrity-check detects the corruption. @@ -112,5 +112,87 @@ for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} { } {} } +#------------------------------------------------------------------------- +# Test that corruption in leaf page headers is detected by queries that use +# doclist-indexes. +# +set doc "A B C D E F G H I J " +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE x3 USING fts5(tt); + INSERT INTO x3(x3, rank) VALUES('pgsz', 32); + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<1000) + INSERT INTO x3 + SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii; +} + +foreach {tn hdr} { + 1 "\00\00\00\00" + 2 "\FF\FF\FF\FF" +} { + set tn2 0 + set nCorrupt 0 + foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] { + if {$rowid & $mask} continue + incr tn2 + do_test 3.$tn.$tn2 { + execsql BEGIN + + set fd [db incrblob main x3_data block $rowid] + fconfigure $fd -encoding binary -translation binary + puts -nonewline $fd $hdr + close $fd + + set res [catchsql {SELECT rowid FROM x3 WHERE x3 MATCH 'x AND a'}] + if {$res == "1 {database disk image is malformed}"} {incr nCorrupt} + set {} 1 + } {1} + + execsql ROLLBACK + } + + do_test 3.$tn.x { expr $nCorrupt>0 } 1 +} + +#-------------------------------------------------------------------- +# +set doc "A B C D E F G H I J " +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE x4 USING fts5(tt); + INSERT INTO x4(x4, rank) VALUES('pgsz', 32); + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) + INSERT INTO x4 + SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii; +} + +foreach {tn nCut} { + 1 1 + 2 10 +} { + set tn2 0 + set nCorrupt 0 + foreach rowid [db eval {SELECT rowid FROM x4_data WHERE rowid>10}] { + if {$rowid & $mask} continue + incr tn2 + do_test 4.$tn.$tn2 { + execsql { + BEGIN; + UPDATE x4_data SET block = substr(block, 1, length(block)-$nCut) + WHERE id = $rowid; + } + + set res [catchsql { + SELECT rowid FROM x4 WHERE x4 MATCH 'a' ORDER BY 1 DESC + }] + if {$res == "1 {database disk image is malformed}"} {incr nCorrupt} + set {} 1 + } {1} + + execsql ROLLBACK + } + + do_test 4.$tn.x { expr $nCorrupt>0 } 1 +} + + finish_test diff --git a/ext/fts5/test/fts5fault2.test b/ext/fts5/test/fts5fault2.test index 8404e2f6ad..2624b5a8e4 100644 --- a/ext/fts5/test/fts5fault2.test +++ b/ext/fts5/test/fts5fault2.test @@ -51,5 +51,28 @@ do_faultsim_test 1.2 -faults oom-* -prep { faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}} } +#------------------------------------------------------------------------- +# OOM within a query that accesses the in-memory hash table. +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE "a b c" USING fts5(a, b, c); + INSERT INTO "a b c" VALUES('one two', 'x x x', 'three four'); + INSERT INTO "a b c" VALUES('nine ten', 'y y y', 'two two'); +} + +do_faultsim_test 2.1 -faults oom-trans* -prep { + execsql { + BEGIN; + INSERT INTO "a b c" VALUES('one one', 'z z z', 'nine ten'); + } +} -body { + execsql { SELECT rowid FROM "a b c" WHERE "a b c" MATCH 'one' } +} -test { + faultsim_test_result {0 {1 3}} + catchsql { ROLLBACK } +} + + finish_test diff --git a/ext/fts5/test/fts5full.test b/ext/fts5/test/fts5full.test new file mode 100644 index 0000000000..4563cedce5 --- /dev/null +++ b/ext/fts5/test/fts5full.test @@ -0,0 +1,37 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Test that SQLITE_FULL is returned if the FTS5 table cannot find a free +# segid to use. In practice this can only really happen when automerge and +# crisismerge are both disabled. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5full + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE x8 USING fts5(i); + INSERT INTO x8(x8, rank) VALUES('automerge', 0); + INSERT INTO x8(x8, rank) VALUES('crisismerge', 100000); +} + +db func rnddoc fts5_rnddoc +do_test 1.1 { + list [catch { + for {set i 0} {$i < 2500} {incr i} { + execsql { INSERT INTO x8 VALUES( rnddoc(5) ); } + } + } msg] $msg +} {1 {database or disk is full}} + + +finish_test + diff --git a/manifest b/manifest index 338f93c639..1be9a25a1a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-04-24T20:18:21.844 +C Add\stests\sfor\sfts5. +D 2015-04-25T18:56:48.351 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 43fcf838d3a3390d1245e3d5e651fa5cc1df575b F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 39810b25a017f2626ac72b3e44afe9b534e5d5db +F ext/fts5/fts5_index.c c87369d11271847df9f033f0df148e7f004a88a2 F ext/fts5/fts5_storage.c b3a4cbbcd197fe587789398e51a631f92fc9196c F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -120,8 +120,8 @@ F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test 91f22b3cc7b372a2903c828e907a1e52f1177b8a -F ext/fts5/test/fts5ab.test 5da2e92a8047860b9e22b6fd3990549639d631b1 +F ext/fts5/test/fts5aa.test 87b2e01084c3e2a960ae1500dd5f0729dac2166c +F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db @@ -136,12 +136,13 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 F ext/fts5/test/fts5corrupt.test 9e8524281aa322c522c1d6e2b347e24e060c2727 -F ext/fts5/test/fts5corrupt2.test 3be48d8a30d30e3ae819f04e957c45d091bfbb85 +F ext/fts5/test/fts5corrupt2.test 494111fd4f2dab36499cf97718eaba1f7c11e9d0 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d -F ext/fts5/test/fts5fault2.test 000ecebf28b8f2cd520f44c70962625ee11d65ac +F ext/fts5/test/fts5fault2.test f478fa94e39a6911189f9e052a3b93ab4cd275fa +F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e @@ -1304,7 +1305,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 86309961344f4076ddcf55d730d3600ec3b6e45c dc88fe7e641c52d27fba8c753cee590db87388c5 -R e0fc7874fca6c46f36b4c18c43a12035 +P 1c78d8920fb59da3cb97dd2eb09b3e08dfd14259 +R 6572bfe0dee7d9becde3a8bb0a8d33a2 U dan -Z 339a1949f9a5bc9b03407831c5b374c4 +Z e7bb2196ce75c57e787365bfa027743a diff --git a/manifest.uuid b/manifest.uuid index 0185da7076..14320ee5eb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1c78d8920fb59da3cb97dd2eb09b3e08dfd14259 \ No newline at end of file +e748651c940eae2389fe826cf5c25f1166a5e611 \ No newline at end of file From 26c84373c4d2cc8a30181314cfebb2b8ef5f6d6e Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 25 Apr 2015 20:29:46 +0000 Subject: [PATCH 113/206] Improve coverage of fts5_index.c slightly. FossilOrigin-Name: e5aaa01306597ffd2475dcb83ae889393f68d315 --- ext/fts5/fts5_index.c | 1424 +++++++++++++++++---------------- ext/fts5/test/fts5fault2.test | 28 + manifest | 14 +- manifest.uuid | 2 +- 4 files changed, 751 insertions(+), 717 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 746e44bdbb..748dd27a7e 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3193,11 +3193,8 @@ static void fts5WriteFinish( } for(i=0; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; - assert( pPg || p->rc!=SQLITE_OK ); - if( pPg ){ - fts5BufferFree(&pPg->term); - fts5BufferFree(&pPg->buf); - } + fts5BufferFree(&pPg->term); + fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); sqlite3Fts5BufferFree(&pWriter->cdlidx); @@ -3833,6 +3830,719 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){ +/* +** Iterator pMulti currently points to a valid entry (not EOF). This +** function appends a copy of the position-list of the entry pMulti +** currently points to to buffer pBuf. +** +** If an error occurs, an error code is left in p->rc. It is assumed +** no error has already occurred when this function is called. +*/ +static void fts5MultiIterPoslist( + Fts5Index *p, + Fts5MultiSegIter *pMulti, + int bSz, + Fts5Buffer *pBuf +){ + if( p->rc==SQLITE_OK ){ + Fts5ChunkIter iter; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + assert( fts5MultiIterEof(p, pMulti)==0 ); + static int nCall = 0; + nCall++; + + fts5ChunkIterInit(p, pSeg, &iter); + + if( fts5ChunkIterEof(p, &iter)==0 ){ + if( bSz ){ + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); + } + while( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); + fts5ChunkIterNext(p, &iter); + } + } + fts5ChunkIterRelease(&iter); + } +} + +static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ + if( pIter->in ){ + int bDummy; + if( pIter->i ){ + i64 iDelta; + pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); + if( pIter->bDesc ){ + pIter->iRowid -= iDelta; + }else{ + pIter->iRowid += iDelta; + } + }else{ + pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); + } + pIter->i += fts5GetPoslistSize( + &pIter->a[pIter->i], &pIter->nPoslist, &bDummy + ); + pIter->aPoslist = &pIter->a[pIter->i]; + pIter->i += pIter->nPoslist; + }else{ + pIter->aPoslist = 0; + } +} + +static void fts5DoclistIterInit( + Fts5Buffer *pBuf, + int bDesc, + Fts5DoclistIter *pIter +){ + memset(pIter, 0, sizeof(*pIter)); + pIter->a = pBuf->p; + pIter->n = pBuf->n; + pIter->bDesc = bDesc; + fts5DoclistIterNext(pIter); +} + +/* +** Append a doclist to buffer pBuf. +*/ +static void fts5MergeAppendDocid( + int *pRc, /* IN/OUT: Error code */ + int bDesc, + Fts5Buffer *pBuf, /* Buffer to write to */ + i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ + i64 iRowid /* Rowid to append */ +){ + if( pBuf->n==0 ){ + fts5BufferAppendVarint(pRc, pBuf, iRowid); + }else if( bDesc ){ + fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); + }else{ + fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); + } + *piLastRowid = iRowid; +} + +/* +** Buffers p1 and p2 contain doclists. This function merges the content +** of the two doclists together and sets buffer p1 to the result before +** returning. +** +** If an error occurs, an error code is left in p->rc. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5MergePrefixLists( + Fts5Index *p, /* FTS5 backend object */ + int bDesc, + Fts5Buffer *p1, /* First list to merge */ + Fts5Buffer *p2 /* Second list to merge */ +){ + if( p2->n ){ + i64 iLastRowid = 0; + Fts5DoclistIter i1; + Fts5DoclistIter i2; + Fts5Buffer out; + Fts5Buffer tmp; + memset(&out, 0, sizeof(out)); + memset(&tmp, 0, sizeof(tmp)); + + fts5DoclistIterInit(p1, bDesc, &i1); + fts5DoclistIterInit(p2, bDesc, &i2); + while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ + if( i2.aPoslist==0 || (i1.aPoslist && + ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowidrc, bDesc, &out, &iLastRowid, i1.iRowid); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); + fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); + fts5DoclistIterNext(&i1); + } + else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ + /* Copy entry from i2 */ + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); + fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); + fts5DoclistIterNext(&i2); + } + else{ + Fts5PoslistReader r1; + Fts5PoslistReader r2; + Fts5PoslistWriter writer; + + memset(&writer, 0, sizeof(writer)); + + /* Merge the two position lists. */ + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); + fts5BufferZero(&tmp); + sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); + sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); + while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){ + i64 iNew; + if( r2.bEof || (r1.bEof==0 && r1.iPosrc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + } + + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); + fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); + fts5DoclistIterNext(&i1); + fts5DoclistIterNext(&i2); + } + } + + fts5BufferSet(&p->rc, p1, out.n, out.p); + fts5BufferFree(&tmp); + fts5BufferFree(&out); + } +} + +static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ + Fts5Buffer tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +static void fts5SetupPrefixIter( + Fts5Index *p, /* Index to read from */ + int bDesc, /* True for "ORDER BY rowid DESC" */ + const u8 *pToken, /* Buffer containing prefix to match */ + int nToken, /* Size of buffer pToken in bytes */ + Fts5IndexIter *pIter /* Populate this object */ +){ + Fts5Structure *pStruct; + Fts5Buffer *aBuf; + const int nBuf = 32; + + aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); + pStruct = fts5StructureRead(p, 0); + + if( aBuf && pStruct ){ + Fts5DoclistIter *pDoclist; + int i; + i64 iLastRowid = 0; + Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ + Fts5Buffer doclist; + + memset(&doclist, 0, sizeof(doclist)); + for(fts5MultiIterNew(p, pStruct, 0, 1, 1, pToken, nToken, -1, 0, &p1); + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext(p, p1, 0, 0) + ){ + i64 iRowid = fts5MultiIterRowid(p1); + int nTerm; + const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); + assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); + if( nTerm0 + && ((!bDesc && iRowid<=iLastRowid) || (bDesc && iRowid>=iLastRowid)) + ){ + + for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ + assert( irc, &doclist, iRowid); + }else if( bDesc ){ + fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); + }else{ + fts5BufferAppendVarint(&p->rc, &doclist, iRowid - iLastRowid); + } + iLastRowid = iRowid; + fts5MultiIterPoslist(p, p1, 1, &doclist); + } + + for(i=0; ipDoclist = pDoclist; + fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist); + } + } + + fts5StructureRelease(pStruct); + sqlite3_free(aBuf); +} + + +/* +** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain +** to the document with rowid iRowid. +*/ +int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ + assert( p->rc==SQLITE_OK ); + + /* Allocate hash tables if they have not already been allocated */ + if( p->apHash==0 ){ + int i; + int rc = SQLITE_OK; + int nHash = p->pConfig->nPrefix + 1; + Fts5Hash **apNew; + + apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash); + for(i=0; rc==SQLITE_OK && inPendingData); + } + if( rc==SQLITE_OK ){ + p->apHash = apNew; + }else{ + if( apNew ){ + for(i=0; iiWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ + fts5IndexFlush(p); + } + p->iWriteRowid = iRowid; + return fts5IndexReturn(p); +} + +/* +** Commit data to disk. +*/ +int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ + assert( p->rc==SQLITE_OK ); + fts5IndexFlush(p); + if( bCommit ) fts5CloseReader(p); + return fts5IndexReturn(p); +} + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p){ + fts5CloseReader(p); + fts5IndexDiscardData(p); + assert( p->rc==SQLITE_OK ); + return SQLITE_OK; +} + +/* +** The %_data table is completely empty when this function is called. This +** function populates it with the initial structure objects for each index, +** and the initial version of the "averages" record (a zero-byte blob). +*/ +int sqlite3Fts5IndexReinit(Fts5Index *p){ + int i; + Fts5Structure s; + + memset(&s, 0, sizeof(Fts5Structure)); + for(i=0; ipConfig->nPrefix+1; i++){ + fts5StructureWrite(p, i, &s); + } + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); + } + + return fts5IndexReturn(p); +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying %_data table. +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5IndexOpen( + Fts5Config *pConfig, + int bCreate, + Fts5Index **pp, + char **pzErr +){ + int rc = SQLITE_OK; + Fts5Index *p; /* New object */ + + *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Index)); + p->pConfig = pConfig; + p->nWorkUnit = FTS5_WORK_UNIT; + p->nMaxPendingData = 1024*1024; + p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); + if( p->zDataTbl==0 ){ + rc = SQLITE_NOMEM; + }else if( bCreate ){ + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexReinit(p); + } + } + + assert( p->rc==SQLITE_OK || rc!=SQLITE_OK ); + if( rc ){ + sqlite3Fts5IndexClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). +*/ +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ + int rc = SQLITE_OK; + if( p ){ + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "data"); + } + assert( p->pReader==0 ); + sqlite3_finalize(p->pWriter); + sqlite3_finalize(p->pDeleter); + if( p->apHash ){ + int i; + for(i=0; i<=p->pConfig->nPrefix; i++){ + sqlite3Fts5HashFree(p->apHash[i]); + } + sqlite3_free(p->apHash); + } + sqlite3_free(p->zDataTbl); + sqlite3_free(p); + } + return rc; +} + +/* +** Argument p points to a buffer containing utf-8 text that is n bytes in +** size. Return the number of bytes in the nChar character prefix of the +** buffer, or 0 if there are less than nChar characters in total. +*/ +static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ + int n = 0; + int i; + for(i=0; i=nByte ) return 0; /* Input contains fewer than nChar chars */ + if( (unsigned char)p[n++]>=0xc0 ){ + while( (p[n] & 0xc0)==0x80 ) n++; + } + } + return n; +} + +/* +** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of +** unicode characters in the string. +*/ +int fts5IndexCharlen(const char *pIn, int nIn){ + int nChar = 0; + int i = 0; + while( i=0xc0 ){ + while( i delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + int i; /* Used to iterate through indexes */ + int rc; /* Return code */ + Fts5Config *pConfig = p->pConfig; + + assert( p->rc==SQLITE_OK ); + + /* Add the new token to the main terms hash table. And to each of the + ** prefix hash tables that it is large enough for. */ + rc = sqlite3Fts5HashWrite( + p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken + ); + for(i=0; inPrefix && rc==SQLITE_OK; i++){ + int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); + if( nByte ){ + rc = sqlite3Fts5HashWrite( + p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte + ); + } + } + + return rc; +} + +/* +** Open a new iterator to iterate though all docids that match the +** specified token or token prefix. +*/ +int sqlite3Fts5IndexQuery( + Fts5Index *p, /* FTS index to query */ + const char *pToken, int nToken, /* Token (or prefix) to query for */ + int flags, /* Mask of FTS5INDEX_QUERY_X flags */ + Fts5IndexIter **ppIter /* OUT: New iterator object */ +){ + Fts5Config *pConfig = p->pConfig; + Fts5IndexIter *pRet; + int iIdx = 0; + + if( flags & FTS5INDEX_QUERY_PREFIX ){ + if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + iIdx = 1+pConfig->nPrefix; + }else{ + int nChar = fts5IndexCharlen(pToken, nToken); + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nChar ) break; + } + } + } + + pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); + if( pRet ){ + memset(pRet, 0, sizeof(Fts5IndexIter)); + + pRet->pIndex = p; + if( iIdx<=pConfig->nPrefix ){ + pRet->pStruct = fts5StructureRead(p, iIdx); + if( pRet->pStruct ){ + fts5MultiIterNew(p, pRet->pStruct, + iIdx, 1, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + ); + } + }else{ + int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; + fts5SetupPrefixIter(p, bDesc, (const u8*)pToken, nToken, pRet); + } + } + + if( p->rc ){ + sqlite3Fts5IterClose(pRet); + pRet = 0; + } + *ppIter = pRet; + return fts5IndexReturn(p); +} + +/* +** Return true if the iterator passed as the only argument is at EOF. +*/ +int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ + assert( pIter->pIndex->rc==SQLITE_OK ); + if( pIter->pDoclist ){ + return pIter->pDoclist->aPoslist==0; + }else{ + return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); + } +} + +/* +** Move to the next matching rowid. +*/ +int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ + assert( pIter->pIndex->rc==SQLITE_OK ); + if( pIter->pDoclist ){ + fts5DoclistIterNext(pIter->pDoclist); + }else{ + fts5BufferZero(&pIter->poslist); + fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); + } + return fts5IndexReturn(pIter->pIndex); +} + +/* +** Move the doclist-iter passed as the first argument to the next +** matching rowid that occurs at or after iMatch. The definition of "at +** or after" depends on whether this iterator iterates in ascending or +** descending rowid order. +*/ +static void fts5DoclistIterNextFrom(Fts5DoclistIter *p, i64 iMatch){ + do{ + i64 iRowid = p->iRowid; + if( p->bDesc==0 && iRowid>=iMatch ) break; + if( p->bDesc!=0 && iRowid<=iMatch ) break; + fts5DoclistIterNext(p); + }while( p->aPoslist ); +} + +/* +** Move to the next matching rowid that occurs at or after iMatch. The +** definition of "at or after" depends on whether this iterator iterates +** in ascending or descending rowid order. +*/ +int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ + if( pIter->pDoclist ){ + fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); + }else{ + fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); + } + return fts5IndexReturn(pIter->pIndex); +} + +/* +** Return the current rowid. +*/ +i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ + if( pIter->pDoclist ){ + return pIter->pDoclist->iRowid; + }else{ + return fts5MultiIterRowid(pIter->pMulti); + } +} + + +/* +** Return a pointer to a buffer containing a copy of the position list for +** the current entry. Output variable *pn is set to the size of the buffer +** in bytes before returning. +** +** The returned position list does not include the "number of bytes" varint +** field that starts the position list on disk. +*/ +int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ + assert( pIter->pIndex->rc==SQLITE_OK ); + if( pIter->pDoclist ){ + *pn = pIter->pDoclist->nPoslist; + *pp = pIter->pDoclist->aPoslist; + }else{ + Fts5Index *p = pIter->pIndex; + fts5BufferZero(&pIter->poslist); + fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); + *pn = pIter->poslist.n; + *pp = pIter->poslist.p; + } + return fts5IndexReturn(pIter->pIndex); +} + +/* +** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). +*/ +void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ + if( pIter ){ + if( pIter->pDoclist ){ + sqlite3_free(pIter->pDoclist->a); + sqlite3_free(pIter->pDoclist); + }else{ + fts5MultiIterFree(pIter->pIndex, pIter->pMulti); + fts5StructureRelease(pIter->pStruct); + fts5BufferFree(&pIter->poslist); + } + fts5CloseReader(pIter->pIndex); + sqlite3_free(pIter); + } +} + +/* +** Read the "averages" record into the buffer supplied as the second +** argument. Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ + assert( p->rc==SQLITE_OK ); + fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); + return fts5IndexReturn(p); +} + +/* +** Replace the current "averages" record with the contents of the buffer +** supplied as the second argument. +*/ +int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ + assert( p->rc==SQLITE_OK ); + fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); + return fts5IndexReturn(p); +} + +/* +** Return the total number of blocks this module has read from the %_data +** table since it was created. +*/ +int sqlite3Fts5IndexReads(Fts5Index *p){ + return p->nRead; +} + +/* +** Set the 32-bit cookie value stored at the start of all structure +** records to the value passed as the second argument. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ + int rc = SQLITE_OK; + Fts5Config *pConfig = p->pConfig; + u8 aCookie[4]; + int i; + + assert( p->rc==SQLITE_OK ); + sqlite3Fts5Put32(aCookie, iNew); + for(i=0; rc==SQLITE_OK && i<=pConfig->nPrefix; i++){ + sqlite3_blob *pBlob = 0; + i64 iRowid = FTS5_STRUCTURE_ROWID(i); + rc = sqlite3_blob_open( + pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 1, &pBlob + ); + if( rc==SQLITE_OK ){ + sqlite3_blob_write(pBlob, aCookie, 4, 0); + rc = sqlite3_blob_close(pBlob); + } + } + + return rc; +} + +int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ + Fts5Structure *pStruct; + pStruct = fts5StructureRead(p, 0); + fts5StructureRelease(pStruct); + return fts5IndexReturn(p); +} + + +/************************************************************************* +************************************************************************** +** Below this point is the implementation of the integrity-check +** functionality. +*/ + /* ** Return a simple checksum value based on the arguments. */ @@ -4103,264 +4813,6 @@ static void fts5IndexIntegrityCheckSegment( fts5BtreeIterFree(&iter); } -/* -** Iterator pMulti currently points to a valid entry (not EOF). This -** function appends a copy of the position-list of the entry pMulti -** currently points to to buffer pBuf. -** -** If an error occurs, an error code is left in p->rc. It is assumed -** no error has already occurred when this function is called. -*/ -static void fts5MultiIterPoslist( - Fts5Index *p, - Fts5MultiSegIter *pMulti, - int bSz, - Fts5Buffer *pBuf -){ - if( p->rc==SQLITE_OK ){ - Fts5ChunkIter iter; - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; - assert( fts5MultiIterEof(p, pMulti)==0 ); - static int nCall = 0; - nCall++; - - fts5ChunkIterInit(p, pSeg, &iter); - - if( fts5ChunkIterEof(p, &iter)==0 ){ - if( bSz ){ - /* WRITEPOSLISTSIZE */ - fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); - } - while( fts5ChunkIterEof(p, &iter)==0 ){ - fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); - fts5ChunkIterNext(p, &iter); - } - } - fts5ChunkIterRelease(&iter); - } -} - -static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ - if( pIter->in ){ - int bDummy; - if( pIter->i ){ - i64 iDelta; - pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); - if( pIter->bDesc ){ - pIter->iRowid -= iDelta; - }else{ - pIter->iRowid += iDelta; - } - }else{ - pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); - } - pIter->i += fts5GetPoslistSize( - &pIter->a[pIter->i], &pIter->nPoslist, &bDummy - ); - pIter->aPoslist = &pIter->a[pIter->i]; - pIter->i += pIter->nPoslist; - }else{ - pIter->aPoslist = 0; - } -} - -static void fts5DoclistIterInit( - Fts5Buffer *pBuf, - int bDesc, - Fts5DoclistIter *pIter -){ - memset(pIter, 0, sizeof(*pIter)); - pIter->a = pBuf->p; - pIter->n = pBuf->n; - pIter->bDesc = bDesc; - fts5DoclistIterNext(pIter); -} - -/* -** Append a doclist to buffer pBuf. -*/ -static void fts5MergeAppendDocid( - int *pRc, /* IN/OUT: Error code */ - int bDesc, - Fts5Buffer *pBuf, /* Buffer to write to */ - i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ - i64 iRowid /* Rowid to append */ -){ - if( pBuf->n==0 ){ - fts5BufferAppendVarint(pRc, pBuf, iRowid); - }else if( bDesc ){ - fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); - }else{ - fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); - } - *piLastRowid = iRowid; -} - -/* -** Buffers p1 and p2 contain doclists. This function merges the content -** of the two doclists together and sets buffer p1 to the result before -** returning. -** -** If an error occurs, an error code is left in p->rc. If an error has -** already occurred, this function is a no-op. -*/ -static void fts5MergePrefixLists( - Fts5Index *p, /* FTS5 backend object */ - int bDesc, - Fts5Buffer *p1, /* First list to merge */ - Fts5Buffer *p2 /* Second list to merge */ -){ - if( p2->n ){ - i64 iLastRowid = 0; - Fts5DoclistIter i1; - Fts5DoclistIter i2; - Fts5Buffer out; - Fts5Buffer tmp; - memset(&out, 0, sizeof(out)); - memset(&tmp, 0, sizeof(tmp)); - - fts5DoclistIterInit(p1, bDesc, &i1); - fts5DoclistIterInit(p2, bDesc, &i2); - while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ - if( i2.aPoslist==0 || (i1.aPoslist && - ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowidrc, bDesc, &out, &iLastRowid, i1.iRowid); - /* WRITEPOSLISTSIZE */ - fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); - fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); - fts5DoclistIterNext(&i1); - } - else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ - /* Copy entry from i2 */ - fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); - /* WRITEPOSLISTSIZE */ - fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); - fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); - fts5DoclistIterNext(&i2); - } - else{ - Fts5PoslistReader r1; - Fts5PoslistReader r2; - Fts5PoslistWriter writer; - - memset(&writer, 0, sizeof(writer)); - - /* Merge the two position lists. */ - fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); - fts5BufferZero(&tmp); - sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); - sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); - while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){ - i64 iNew; - if( r2.bEof || (r1.bEof==0 && r1.iPosrc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); - } - - /* WRITEPOSLISTSIZE */ - fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); - fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); - fts5DoclistIterNext(&i1); - fts5DoclistIterNext(&i2); - } - } - - fts5BufferSet(&p->rc, p1, out.n, out.p); - fts5BufferFree(&tmp); - fts5BufferFree(&out); - } -} - -static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ - Fts5Buffer tmp = *p1; - *p1 = *p2; - *p2 = tmp; -} - -static void fts5SetupPrefixIter( - Fts5Index *p, /* Index to read from */ - int bDesc, /* True for "ORDER BY rowid DESC" */ - const u8 *pToken, /* Buffer containing prefix to match */ - int nToken, /* Size of buffer pToken in bytes */ - Fts5IndexIter *pIter /* Populate this object */ -){ - Fts5Structure *pStruct; - Fts5Buffer *aBuf; - const int nBuf = 32; - - aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); - pStruct = fts5StructureRead(p, 0); - - if( aBuf && pStruct ){ - Fts5DoclistIter *pDoclist; - int i; - i64 iLastRowid = 0; - Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ - Fts5Buffer doclist; - - memset(&doclist, 0, sizeof(doclist)); - for(fts5MultiIterNew(p, pStruct, 0, 1, 1, pToken, nToken, -1, 0, &p1); - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext(p, p1, 0, 0) - ){ - i64 iRowid = fts5MultiIterRowid(p1); - int nTerm; - const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); - assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); - if( nTerm0 - && ((!bDesc && iRowid<=iLastRowid) || (bDesc && iRowid>=iLastRowid)) - ){ - - for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ - assert( irc, &doclist, iRowid); - }else if( bDesc ){ - fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); - }else{ - fts5BufferAppendVarint(&p->rc, &doclist, iRowid - iLastRowid); - } - iLastRowid = iRowid; - fts5MultiIterPoslist(p, p1, 1, &doclist); - } - - for(i=0; ipDoclist = pDoclist; - fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist); - } - } - - fts5StructureRelease(pStruct); - sqlite3_free(aBuf); -} static int fts5QueryCksum( Fts5Index *p, @@ -4511,188 +4963,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ } -/* -** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain -** to the document with rowid iRowid. -*/ -int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ - assert( p->rc==SQLITE_OK ); - - /* Allocate hash tables if they have not already been allocated */ - if( p->apHash==0 ){ - int i; - int rc = SQLITE_OK; - int nHash = p->pConfig->nPrefix + 1; - Fts5Hash **apNew; - - apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash); - for(i=0; rc==SQLITE_OK && inPendingData); - } - if( rc==SQLITE_OK ){ - p->apHash = apNew; - }else{ - if( apNew ){ - for(i=0; iiWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ - fts5IndexFlush(p); - } - p->iWriteRowid = iRowid; - return fts5IndexReturn(p); -} - -/* -** Commit data to disk. -*/ -int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ - assert( p->rc==SQLITE_OK ); - fts5IndexFlush(p); - if( bCommit ) fts5CloseReader(p); - return fts5IndexReturn(p); -} - -/* -** Discard any data stored in the in-memory hash tables. Do not write it -** to the database. Additionally, assume that the contents of the %_data -** table may have changed on disk. So any in-memory caches of %_data -** records must be invalidated. -*/ -int sqlite3Fts5IndexRollback(Fts5Index *p){ - fts5CloseReader(p); - fts5IndexDiscardData(p); - assert( p->rc==SQLITE_OK ); - return SQLITE_OK; -} - -/* -** The %_data table is completely empty when this function is called. This -** function populates it with the initial structure objects for each index, -** and the initial version of the "averages" record (a zero-byte blob). -*/ -int sqlite3Fts5IndexReinit(Fts5Index *p){ - int i; - Fts5Structure s; - - memset(&s, 0, sizeof(Fts5Structure)); - for(i=0; ipConfig->nPrefix+1; i++){ - fts5StructureWrite(p, i, &s); - } - if( p->rc==SQLITE_OK ){ - p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); - } - - return fts5IndexReturn(p); -} - -/* -** Open a new Fts5Index handle. If the bCreate argument is true, create -** and initialize the underlying %_data table. -** -** If successful, set *pp to point to the new object and return SQLITE_OK. -** Otherwise, set *pp to NULL and return an SQLite error code. -*/ -int sqlite3Fts5IndexOpen( - Fts5Config *pConfig, - int bCreate, - Fts5Index **pp, - char **pzErr -){ - int rc = SQLITE_OK; - Fts5Index *p; /* New object */ - - *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); - if( !p ) return SQLITE_NOMEM; - - memset(p, 0, sizeof(Fts5Index)); - p->pConfig = pConfig; - p->nWorkUnit = FTS5_WORK_UNIT; - p->nMaxPendingData = 1024*1024; - p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); - if( p->zDataTbl==0 ){ - rc = SQLITE_NOMEM; - }else if( bCreate ){ - rc = sqlite3Fts5CreateTable( - pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr - ); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts5IndexReinit(p); - } - } - - assert( p->rc==SQLITE_OK || rc!=SQLITE_OK ); - if( rc ){ - sqlite3Fts5IndexClose(p, 0); - *pp = 0; - } - return rc; -} - -/* -** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). -*/ -int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ - int rc = SQLITE_OK; - if( p ){ - if( bDestroy ){ - rc = sqlite3Fts5DropTable(p->pConfig, "data"); - } - assert( p->pReader==0 ); - sqlite3_finalize(p->pWriter); - sqlite3_finalize(p->pDeleter); - if( p->apHash ){ - int i; - for(i=0; i<=p->pConfig->nPrefix; i++){ - sqlite3Fts5HashFree(p->apHash[i]); - } - sqlite3_free(p->apHash); - } - sqlite3_free(p->zDataTbl); - sqlite3_free(p); - } - return rc; -} - -/* -** Argument p points to a buffer containing utf-8 text that is n bytes in -** size. Return the number of bytes in the nChar character prefix of the -** buffer, or 0 if there are less than nChar characters in total. -*/ -static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ - int n = 0; - int i; - for(i=0; i=nByte ) return 0; /* Input contains fewer than nChar chars */ - if( (unsigned char)p[n++]>=0xc0 ){ - while( (p[n] & 0xc0)==0x80 ) n++; - } - } - return n; -} - -/* -** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of -** unicode characters in the string. -*/ -int fts5IndexCharlen(const char *pIn, int nIn){ - int nChar = 0; - int i = 0; - while( i=0xc0 ){ - while( i delete) */ - int iPos, /* Position of token within column */ - const char *pToken, int nToken /* Token to add or remove to or from index */ -){ - int i; /* Used to iterate through indexes */ - int rc; /* Return code */ - Fts5Config *pConfig = p->pConfig; - - assert( p->rc==SQLITE_OK ); - - /* Add the new token to the main terms hash table. And to each of the - ** prefix hash tables that it is large enough for. */ - rc = sqlite3Fts5HashWrite( - p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken - ); - for(i=0; inPrefix && rc==SQLITE_OK; i++){ - int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); - if( nByte ){ - rc = sqlite3Fts5HashWrite( - p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte - ); - } - } - - return rc; -} - -/* -** Open a new iterator to iterate though all docids that match the -** specified token or token prefix. -*/ -int sqlite3Fts5IndexQuery( - Fts5Index *p, /* FTS index to query */ - const char *pToken, int nToken, /* Token (or prefix) to query for */ - int flags, /* Mask of FTS5INDEX_QUERY_X flags */ - Fts5IndexIter **ppIter /* OUT: New iterator object */ -){ - Fts5Config *pConfig = p->pConfig; - Fts5IndexIter *pRet; - int iIdx = 0; - - if( flags & FTS5INDEX_QUERY_PREFIX ){ - if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ - iIdx = 1+pConfig->nPrefix; - }else{ - int nChar = fts5IndexCharlen(pToken, nToken); - for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ - if( pConfig->aPrefix[iIdx-1]==nChar ) break; - } - } - } - - pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); - if( pRet ){ - memset(pRet, 0, sizeof(Fts5IndexIter)); - - pRet->pIndex = p; - if( iIdx<=pConfig->nPrefix ){ - pRet->pStruct = fts5StructureRead(p, iIdx); - if( pRet->pStruct ){ - fts5MultiIterNew(p, pRet->pStruct, - iIdx, 1, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti - ); - } - }else{ - int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - fts5SetupPrefixIter(p, bDesc, (const u8*)pToken, nToken, pRet); - } - } - - if( p->rc ){ - sqlite3Fts5IterClose(pRet); - pRet = 0; - } - *ppIter = pRet; - return fts5IndexReturn(p); -} - -/* -** Return true if the iterator passed as the only argument is at EOF. -*/ -int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ - assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - return pIter->pDoclist->aPoslist==0; - }else{ - return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); - } -} - -/* -** Move to the next matching rowid. -*/ -int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ - assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - fts5DoclistIterNext(pIter->pDoclist); - }else{ - fts5BufferZero(&pIter->poslist); - fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); - } - return fts5IndexReturn(pIter->pIndex); -} - -/* -** Move the doclist-iter passed as the first argument to the next -** matching rowid that occurs at or after iMatch. The definition of "at -** or after" depends on whether this iterator iterates in ascending or -** descending rowid order. -*/ -static void fts5DoclistIterNextFrom(Fts5DoclistIter *p, i64 iMatch){ - do{ - i64 iRowid = p->iRowid; - if( p->bDesc==0 && iRowid>=iMatch ) break; - if( p->bDesc!=0 && iRowid<=iMatch ) break; - fts5DoclistIterNext(p); - }while( p->aPoslist ); -} - -/* -** Move to the next matching rowid that occurs at or after iMatch. The -** definition of "at or after" depends on whether this iterator iterates -** in ascending or descending rowid order. -*/ -int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ - if( pIter->pDoclist ){ - fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); - }else{ - fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); - } - return fts5IndexReturn(pIter->pIndex); -} - -/* -** Return the current rowid. -*/ -i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ - if( pIter->pDoclist ){ - return pIter->pDoclist->iRowid; - }else{ - return fts5MultiIterRowid(pIter->pMulti); - } -} - - -/* -** Return a pointer to a buffer containing a copy of the position list for -** the current entry. Output variable *pn is set to the size of the buffer -** in bytes before returning. -** -** The returned position list does not include the "number of bytes" varint -** field that starts the position list on disk. -*/ -int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ - assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - *pn = pIter->pDoclist->nPoslist; - *pp = pIter->pDoclist->aPoslist; - }else{ - Fts5Index *p = pIter->pIndex; - fts5BufferZero(&pIter->poslist); - fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); - *pn = pIter->poslist.n; - *pp = pIter->poslist.p; - } - return fts5IndexReturn(pIter->pIndex); -} - -/* -** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). -*/ -void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ - if( pIter ){ - if( pIter->pDoclist ){ - sqlite3_free(pIter->pDoclist->a); - sqlite3_free(pIter->pDoclist); - }else{ - fts5MultiIterFree(pIter->pIndex, pIter->pMulti); - fts5StructureRelease(pIter->pStruct); - fts5BufferFree(&pIter->poslist); - } - fts5CloseReader(pIter->pIndex); - sqlite3_free(pIter); - } -} - -/* -** Read the "averages" record into the buffer supplied as the second -** argument. Return SQLITE_OK if successful, or an SQLite error code -** if an error occurs. -*/ -int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ - assert( p->rc==SQLITE_OK ); - fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); - return fts5IndexReturn(p); -} - -/* -** Replace the current "averages" record with the contents of the buffer -** supplied as the second argument. -*/ -int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ - assert( p->rc==SQLITE_OK ); - fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); - return fts5IndexReturn(p); -} - -/* -** Return the total number of blocks this module has read from the %_data -** table since it was created. -*/ -int sqlite3Fts5IndexReads(Fts5Index *p){ - return p->nRead; -} - -/* -** Set the 32-bit cookie value stored at the start of all structure -** records to the value passed as the second argument. -** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. -*/ -int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ - int rc = SQLITE_OK; - Fts5Config *pConfig = p->pConfig; - u8 aCookie[4]; - int i; - - assert( p->rc==SQLITE_OK ); - sqlite3Fts5Put32(aCookie, iNew); - for(i=0; rc==SQLITE_OK && i<=pConfig->nPrefix; i++){ - sqlite3_blob *pBlob = 0; - i64 iRowid = FTS5_STRUCTURE_ROWID(i); - rc = sqlite3_blob_open( - pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 1, &pBlob - ); - if( rc==SQLITE_OK ){ - sqlite3_blob_write(pBlob, aCookie, 4, 0); - rc = sqlite3_blob_close(pBlob); - } - } - - return rc; -} - -int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ - Fts5Structure *pStruct; - pStruct = fts5StructureRead(p, 0); - fts5StructureRelease(pStruct); - return fts5IndexReturn(p); -} - /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar diff --git a/ext/fts5/test/fts5fault2.test b/ext/fts5/test/fts5fault2.test index 2624b5a8e4..36d29f8a94 100644 --- a/ext/fts5/test/fts5fault2.test +++ b/ext/fts5/test/fts5fault2.test @@ -22,6 +22,8 @@ ifcapable !fts5 { return } +if 0 { + set doc [string trim [string repeat "x y z " 200]] do_execsql_test 1.0 { CREATE TABLE t1(a INTEGER PRIMARY KEY, x); @@ -73,6 +75,32 @@ do_faultsim_test 2.1 -faults oom-trans* -prep { catchsql { ROLLBACK } } +} + +#------------------------------------------------------------------------- +# OOM within an 'optimize' operation that writes multiple pages to disk. +# +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE zzz USING fts5(z); + INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32); + INSERT INTO zzz VALUES('a b c d'); + INSERT INTO zzz SELECT 'c d e f' FROM zzz; + INSERT INTO zzz SELECT 'e f g h' FROM zzz; + INSERT INTO zzz SELECT 'i j k l' FROM zzz; + INSERT INTO zzz SELECT 'l k m n' FROM zzz; + INSERT INTO zzz SELECT 'o p q r' FROM zzz; +} +faultsim_save_and_close + +do_faultsim_test 3.1 -faults oom-trans* -prep { + faultsim_restore_and_reopen + execsql { SELECT rowid FROM zzz } +} -body { + execsql { INSERT INTO zzz(zzz) VALUES('optimize') } +} -test { + faultsim_test_result {0 {}} +} finish_test diff --git a/manifest b/manifest index 1be9a25a1a..2c3dcdd1ca 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sfor\sfts5. -D 2015-04-25T18:56:48.351 +C Improve\scoverage\sof\sfts5_index.c\sslightly. +D 2015-04-25T20:29:46.707 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 43fcf838d3a3390d1245e3d5e651fa5cc1df575b F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c c87369d11271847df9f033f0df148e7f004a88a2 +F ext/fts5/fts5_index.c 699b716f1b84ef78da9ccee25a8b6fe020cff32a F ext/fts5/fts5_storage.c b3a4cbbcd197fe587789398e51a631f92fc9196c F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -141,7 +141,7 @@ F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d -F ext/fts5/test/fts5fault2.test f478fa94e39a6911189f9e052a3b93ab4cd275fa +F ext/fts5/test/fts5fault2.test 0476720b3fcbb3f30c26da4cc8bda4f020d3e408 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 @@ -1305,7 +1305,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1c78d8920fb59da3cb97dd2eb09b3e08dfd14259 -R 6572bfe0dee7d9becde3a8bb0a8d33a2 +P e748651c940eae2389fe826cf5c25f1166a5e611 +R 0f1e32978d2225a38a865fc7c17144a3 U dan -Z e7bb2196ce75c57e787365bfa027743a +Z 504cb23846c7efd7bba629c51f43c91a diff --git a/manifest.uuid b/manifest.uuid index 14320ee5eb..4145bd78fe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e748651c940eae2389fe826cf5c25f1166a5e611 \ No newline at end of file +e5aaa01306597ffd2475dcb83ae889393f68d315 \ No newline at end of file From c1cea8f731b5bf680540094a54c6d470e7678d86 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 27 Apr 2015 11:31:56 +0000 Subject: [PATCH 114/206] Further tests for fts5. FossilOrigin-Name: ffeb3ef3cfec3681b72bb28cfa612aa15e07887d --- ext/fts5/fts5.c | 5 +- ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_index.c | 148 ++++++++++++++++++++-------------- ext/fts5/fts5_storage.c | 4 + ext/fts5/test/fts5fault2.test | 4 - ext/fts5/test/fts5fault3.test | 87 ++++++++++++++++++++ ext/fts5/test/fts5merge.test | 139 +++++++++++++++++++++++++++++++ manifest | 22 ++--- manifest.uuid | 2 +- 9 files changed, 337 insertions(+), 76 deletions(-) create mode 100644 ext/fts5/test/fts5fault3.test create mode 100644 ext/fts5/test/fts5merge.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index f8450aab15..baa51f4a33 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1055,7 +1055,7 @@ static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){ static int fts5SpecialInsert( Fts5Table *pTab, /* Fts5 table object */ sqlite3_value *pCmd, /* Value inserted into special column */ - sqlite3_value *pVal /* Value inserted into rowid column */ + sqlite3_value *pVal /* Value inserted into rank column */ ){ Fts5Config *pConfig = pTab->pConfig; const char *z = (const char*)sqlite3_value_text(pCmd); @@ -1083,6 +1083,9 @@ static int fts5SpecialInsert( } }else if( 0==sqlite3_stricmp("optimize", z) ){ rc = sqlite3Fts5StorageOptimize(pTab->pStorage); + }else if( 0==sqlite3_stricmp("merge", z) ){ + int nMerge = sqlite3_value_int(pVal); + rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 1e08bd68e0..c553fac5b8 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -360,6 +360,7 @@ int sqlite3Fts5IndexReads(Fts5Index *p); int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); +int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); @@ -459,6 +460,7 @@ int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**); int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); int sqlite3Fts5StorageRebuild(Fts5Storage *p); int sqlite3Fts5StorageOptimize(Fts5Storage *p); +int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); /* ** End of interface to code in fts5_storage.c. diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 748dd27a7e..6990225cae 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -823,16 +823,20 @@ static Fts5Data *fts5DataReadOrBuffer( /* This call may return SQLITE_ABORT if there has been a savepoint ** rollback since it was last used. In this case a new blob handle ** is required. */ - rc = sqlite3_blob_reopen(p->pReader, iRowid); - if( rc==SQLITE_ABORT ){ + sqlite3_blob *pBlob = p->pReader; + p->pReader = 0; + rc = sqlite3_blob_reopen(pBlob, iRowid); + assert( p->pReader==0 ); + p->pReader = pBlob; + if( rc!=SQLITE_OK ){ fts5CloseReader(p); - rc = SQLITE_OK; } + if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } /* If the blob handle is not yet open, open and seek it. Otherwise, use ** the blob_reopen() API to reseek the existing blob handle. */ - if( p->pReader==0 ){ + if( p->pReader==0 && rc==SQLITE_OK ){ Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader @@ -2770,7 +2774,6 @@ static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ ** returned in this case. */ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ - int i; u32 iSegid = 0; if( p->rc==SQLITE_OK ){ @@ -3227,11 +3230,11 @@ static void fts5WriteInitForAppend( pWriter->iIdx = iIdx; pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); - pWriter->nWriter = pSeg->nHeight; if( p->rc==SQLITE_OK ){ int pgno = 1; int i; + pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pgno); @@ -3250,7 +3253,7 @@ static void fts5WriteInitForAppend( if( pSeg->nHeight==1 ){ pWriter->nEmpty = pSeg->pgnoLast-1; } - assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); + assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); pWriter->bFirstTermInPage = 1; assert( pWriter->aWriter[0].term.n==0 ); } @@ -3351,7 +3354,6 @@ static void fts5IndexMergeLevel( fts5WriteInit(p, &writer, iIdx, iSegid); /* Add the new segment to the output level */ - if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; pLvlOut->nSeg++; pSeg->pgnoFirst = 1; @@ -3447,6 +3449,59 @@ fflush(stdout); if( pnRem ) *pnRem -= writer.nLeafWritten; } +/* +** Do up to nPg pages of automerge work on index iIdx. +*/ +static void fts5IndexMerge( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ + int nPg /* Pages of work to do */ +){ + int nRem = nPg; + Fts5Structure *pStruct = *ppStruct; + while( nRem>0 && p->rc==SQLITE_OK ){ + int iLvl; /* To iterate through levels */ + int iBestLvl = 0; /* Level offering the most input segments */ + int nBest = 0; /* Number of input segments on best level */ + + /* Set iBestLvl to the level to read input segments from. */ + assert( pStruct->nLevel>0 ); + for(iLvl=0; iLvlnLevel; iLvl++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + if( pLvl->nMerge ){ + if( pLvl->nMerge>nBest ){ + iBestLvl = iLvl; + nBest = pLvl->nMerge; + } + break; + } + if( pLvl->nSeg>nBest ){ + nBest = pLvl->nSeg; + iBestLvl = iLvl; + } + } + + /* If nBest is still 0, then the index must be empty. */ +#ifdef SQLITE_DEBUG + for(iLvl=0; nBest==0 && iLvlnLevel; iLvl++){ + assert( pStruct->aLevel[iLvl].nSeg==0 ); + } +#endif + + if( nBestpConfig->nAutomerge + && pStruct->aLevel[iBestLvl].nMerge==0 + ){ + break; + } + fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); + if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ + fts5StructurePromote(p, iBestLvl+1, pStruct); + } + } + *ppStruct = pStruct; +} + /* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 ** segments in index iIdx with structure pStruct. This function updates the @@ -3456,13 +3511,13 @@ fflush(stdout); ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ -static void fts5IndexWork( +static void fts5IndexAutomerge( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */ ){ - if( p->rc==SQLITE_OK ){ + if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ Fts5Structure *pStruct = *ppStruct; i64 nWrite; /* Initial value of write-counter */ int nWork; /* Number of work-quanta to perform */ @@ -3474,62 +3529,21 @@ static void fts5IndexWork( pStruct->nWriteCounter += nLeaf; nRem = p->nWorkUnit * nWork * pStruct->nLevel; - while( nRem>0 ){ - int iLvl; /* To iterate through levels */ - int iBestLvl = 0; /* Level offering the most input segments */ - int nBest = 0; /* Number of input segments on best level */ - - /* Set iBestLvl to the level to read input segments from. */ - assert( pStruct->nLevel>0 ); - for(iLvl=0; iLvlnLevel; iLvl++){ - Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; - if( pLvl->nMerge ){ - if( pLvl->nMerge>nBest ){ - iBestLvl = iLvl; - nBest = pLvl->nMerge; - } - break; - } - if( pLvl->nSeg>nBest ){ - nBest = pLvl->nSeg; - iBestLvl = iLvl; - } - } - - /* If nBest is still 0, then the index must be empty. */ -#ifdef SQLITE_DEBUG - for(iLvl=0; nBest==0 && iLvlnLevel; iLvl++){ - assert( pStruct->aLevel[iLvl].nSeg==0 ); - } -#endif - - if( nBestpConfig->nAutomerge - && pStruct->aLevel[iBestLvl].nMerge==0 - ){ - break; - } - fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); - assert( nRem==0 || p->rc==SQLITE_OK ); - if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ - fts5StructurePromote(p, iBestLvl+1, pStruct); - } - *ppStruct = pStruct; - } - + fts5IndexMerge(p, iIdx, ppStruct, nRem); } } -static void fts5IndexCrisisMerge( +static void fts5IndexCrisismerge( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ ){ + const int nCrisis = p->pConfig->nCrisisMerge; Fts5Structure *pStruct = *ppStruct; int iLvl = 0; - while( p->rc==SQLITE_OK - && iLvlnLevel - && pStruct->aLevel[iLvl].nSeg>=p->pConfig->nCrisisMerge - ){ + + assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); + while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; @@ -3744,8 +3758,8 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } - if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); - fts5IndexCrisisMerge(p, iHash, &pStruct); + fts5IndexAutomerge(p, iHash, &pStruct, pgnoLast); + fts5IndexCrisismerge(p, iHash, &pStruct); fts5StructureWrite(p, iHash, pStruct); fts5StructureRelease(pStruct); } @@ -3759,7 +3773,7 @@ static void fts5IndexFlush(Fts5Index *p){ int nLeaf = 0; /* Number of leaves written */ /* If an error has already occured this call is a no-op. */ - if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; + if( p->nPendingData==0 ) return; assert( p->apHash ); /* Flush the terms and each prefix index to disk */ @@ -3774,6 +3788,7 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; int i; + assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); for(i=0; i<=pConfig->nPrefix; i++){ Fts5Structure *pStruct = fts5StructureRead(p, i); @@ -3828,6 +3843,16 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){ return fts5IndexReturn(p); } +int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ + Fts5Structure *pStruct; + + pStruct = fts5StructureRead(p, 0); + fts5IndexMerge(p, 0, &pStruct, nMerge); + fts5StructureWrite(p, 0, pStruct); + fts5StructureRelease(pStruct); + + return fts5IndexReturn(p); +} /* @@ -4122,6 +4147,7 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ } if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ + assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); } p->iWriteRowid = iRowid; @@ -4703,6 +4729,8 @@ static void fts5IndexIntegrityCheckSegment( ){ Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ + if( pSeg->pgnoFirst==0 && pSeg->pgnoLast==0 ) return; + /* Iterate through the b-tree hierarchy. */ for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); p->rc==SQLITE_OK && iter.bEof==0; diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 33eda7c3e7..866e6b75c7 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -596,6 +596,10 @@ int sqlite3Fts5StorageOptimize(Fts5Storage *p){ return sqlite3Fts5IndexOptimize(p->pIndex); } +int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ + return sqlite3Fts5IndexMerge(p->pIndex, nMerge); +} + /* ** Allocate a new rowid. This is used for "external content" tables when ** a NULL value is inserted into the rowid column. The new rowid is allocated diff --git a/ext/fts5/test/fts5fault2.test b/ext/fts5/test/fts5fault2.test index 36d29f8a94..fe1d205ec8 100644 --- a/ext/fts5/test/fts5fault2.test +++ b/ext/fts5/test/fts5fault2.test @@ -22,8 +22,6 @@ ifcapable !fts5 { return } -if 0 { - set doc [string trim [string repeat "x y z " 200]] do_execsql_test 1.0 { CREATE TABLE t1(a INTEGER PRIMARY KEY, x); @@ -75,8 +73,6 @@ do_faultsim_test 2.1 -faults oom-trans* -prep { catchsql { ROLLBACK } } -} - #------------------------------------------------------------------------- # OOM within an 'optimize' operation that writes multiple pages to disk. # diff --git a/ext/fts5/test/fts5fault3.test b/ext/fts5/test/fts5fault3.test new file mode 100644 index 0000000000..4d438d7f6c --- /dev/null +++ b/ext/fts5/test/fts5fault3.test @@ -0,0 +1,87 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault3 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# An OOM while resuming a partially completed segment merge. +# +db func rnddoc fts5_rnddoc +do_test 1.0 { + expr srand(0) + execsql { + CREATE VIRTUAL TABLE xx USING fts5(x); + INSERT INTO xx(xx, rank) VALUES('pgsz', 32); + INSERT INTO xx(xx, rank) VALUES('automerge', 16); + } + for {set i 0} {$i < 10} {incr i} { + execsql { + BEGIN; + INSERT INTO xx(x) VALUES(rnddoc(20)); + INSERT INTO xx(x) VALUES(rnddoc(20)); + INSERT INTO xx(x) VALUES(rnddoc(20)); + COMMIT + } + } + + execsql { + INSERT INTO xx(xx, rank) VALUES('automerge', 2); + INSERT INTO xx(xx, rank) VALUES('merge', 50); + } +} {} +faultsim_save_and_close + +do_faultsim_test 1 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO xx(xx, rank) VALUES('merge', 1) } +} -test { + faultsim_test_result [list 0 {}] +} + +#------------------------------------------------------------------------- +# An OOM while flushing an unusually large term to disk. +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE xx USING fts5(x); + INSERT INTO xx(xx, rank) VALUES('pgsz', 32); +} +faultsim_save_and_close + +set doc "a long term abcdefghijklmnopqrstuvwxyz " +append doc "and then abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz " +append doc [string repeat "abcdefghijklmnopqrstuvwxyz" 10] + +do_faultsim_test 2 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO xx(x) VALUES ($::doc) } +} -test { + faultsim_test_result [list 0 {}] +} + + + + +finish_test + diff --git a/ext/fts5/test/fts5merge.test b/ext/fts5/test/fts5merge.test new file mode 100644 index 0000000000..1c048be8ed --- /dev/null +++ b/ext/fts5/test/fts5merge.test @@ -0,0 +1,139 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Test that focus on incremental merges of segments. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5merge + +db func repeat [list string repeat] + +#------------------------------------------------------------------------- +# Create an fts index so that: +# +# * the index consists of two top-level segments +# * each segment contains records related to $nRowPerSeg rows +# * all rows consist of tokens "x" and "y" only. +# +# Then run ('merge', 1) until everything is completely merged. +# +proc do_merge1_test {testname nRowPerSeg} { + set ::nRowPerSeg [expr $nRowPerSeg] + do_execsql_test $testname.0 { + DROP TABLE IF EXISTS x8; + CREATE VIRTUAL TABLE x8 USING fts5(i); + INSERT INTO x8(x8, rank) VALUES('pgsz', 32); + + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg) + INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii; + + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg) + INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii; + + INSERT INTO x8(x8, rank) VALUES('automerge', 2); + } + + for {set tn 1} {[lindex [fts5_level_segs x8] 0]>0} {incr tn} { + do_execsql_test $testname.$tn { + INSERT INTO x8(x8, rank) VALUES('merge', 1); + INSERT INTO x8(x8) VALUES('integrity-check'); + } + if {$tn>5} break + } + + do_test $testname.x [list expr "$tn < 5"] 1 +} + +do_merge1_test 1.1 1 +do_merge1_test 1.2 2 +do_merge1_test 1.3 3 +do_merge1_test 1.4 4 +do_merge1_test 1.5 10 +do_merge1_test 1.6 20 +do_merge1_test 1.7 100 + +#------------------------------------------------------------------------- +# +proc do_merge2_test {testname nRow} { + db func rnddoc fts5_rnddoc + + do_execsql_test $testname.0 { + DROP TABLE IF EXISTS x8; + CREATE VIRTUAL TABLE x8 USING fts5(i); + INSERT INTO x8(x8, rank) VALUES('pgsz', 32); + } + + set ::nRow $nRow + do_test $testname.1 { + for {set i 0} {$i < $::nRow} {incr i} { + execsql { INSERT INTO x8 VALUES( rnddoc(($i%16) + 5) ) } + while {[not_merged x8]} { + execsql { + INSERT INTO x8(x8, rank) VALUES('automerge', 2); + INSERT INTO x8(x8, rank) VALUES('merge', 1); + INSERT INTO x8(x8, rank) VALUES('automerge', 16); + INSERT INTO x8(x8) VALUES('integrity-check'); + } + } + } + } {} +} +proc not_merged {tbl} { + set segs [fts5_level_segs $tbl] + foreach s $segs { if {$s>1} { return 1 } } + return 0 +} + +do_merge2_test 2.1 5 +do_merge2_test 2.2 10 +do_merge2_test 2.3 20 + +#------------------------------------------------------------------------- +# Test that an auto-merge will complete any merge that has already been +# started, even if the number of input segments is less than the current +# value of the 'automerge' configuration parameter. +# +db func rnddoc fts5_rnddoc + +do_execsql_test 3.1 { + DROP TABLE IF EXISTS x8; + CREATE VIRTUAL TABLE x8 USING fts5(i); + INSERT INTO x8(x8, rank) VALUES('pgsz', 32); + INSERT INTO x8 VALUES(rnddoc(100)); + INSERT INTO x8 VALUES(rnddoc(100)); +} +do_test 3.2 { + execsql { + INSERT INTO x8(x8, rank) VALUES('automerge', 4); + INSERT INTO x8(x8, rank) VALUES('merge', 1); + } + fts5_level_segs x8 +} {2} + +do_test 3.3 { + execsql { + INSERT INTO x8(x8, rank) VALUES('automerge', 2); + INSERT INTO x8(x8, rank) VALUES('merge', 1); + } + fts5_level_segs x8 +} {2 1} + +do_test 3.4 { + execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 4) } + while {[not_merged x8]} { + execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1) } + } + fts5_level_segs x8 +} {0 1} + +finish_test + diff --git a/manifest b/manifest index 2c3dcdd1ca..3e37056c03 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\scoverage\sof\sfts5_index.c\sslightly. -D 2015-04-25T20:29:46.707 +C Further\stests\sfor\sfts5. +D 2015-04-27T11:31:56.573 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b +F ext/fts5/fts5.c 3383b8a44766c68bda812b68ce74684c6b87787f F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 803fd2fc03e3799a38ebb404f2f1309ded5d3e8b +F ext/fts5/fts5Int.h 7dc902e195e00c5820b85d71c2476500c2cf9027 F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 43fcf838d3a3390d1245e3d5e651fa5cc1df575b F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 699b716f1b84ef78da9ccee25a8b6fe020cff32a -F ext/fts5/fts5_storage.c b3a4cbbcd197fe587789398e51a631f92fc9196c +F ext/fts5/fts5_index.c 5f969f5b6f5e022c9c62973dd6e4e82e1d5f3f30 +F ext/fts5/fts5_storage.c 87f85986a6d07391f4e0ddfa9799dbecc40fa165 F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d @@ -141,8 +141,10 @@ F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d -F ext/fts5/test/fts5fault2.test 0476720b3fcbb3f30c26da4cc8bda4f020d3e408 +F ext/fts5/test/fts5fault2.test 37c325cb3e6e27c74c122bb49673b4ae0862df33 +F ext/fts5/test/fts5fault3.test f8935b92976ae645d43205562fdbb0c8511dd049 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d +F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e @@ -1305,7 +1307,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e748651c940eae2389fe826cf5c25f1166a5e611 -R 0f1e32978d2225a38a865fc7c17144a3 +P e5aaa01306597ffd2475dcb83ae889393f68d315 +R 328723993a482062ac5cfa3f373fa969 U dan -Z 504cb23846c7efd7bba629c51f43c91a +Z dd82cb2122e4379a606aef38aa888497 diff --git a/manifest.uuid b/manifest.uuid index 4145bd78fe..02c02a5041 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e5aaa01306597ffd2475dcb83ae889393f68d315 \ No newline at end of file +ffeb3ef3cfec3681b72bb28cfa612aa15e07887d \ No newline at end of file From a3bdec7ee42f23906043d02b18e9e5438f37d43c Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 27 Apr 2015 16:21:49 +0000 Subject: [PATCH 115/206] Change the fts5 content= option so that it matches fts5 columns with the underlying table columns by name, not by their position within the CREATE TABLE statement. FossilOrigin-Name: e38e2bb637844dae8ae5d5f3e23d8369e1b91e45 --- ext/fts5/fts5Int.h | 11 +++++++++++ ext/fts5/fts5_config.c | 33 ++++++++++++++++++++++++++++++++- ext/fts5/fts5_index.c | 8 +++----- ext/fts5/fts5_storage.c | 10 ++++++---- ext/fts5/test/fts5content.test | 31 +++++++++++++++++++++++++++++++ ext/fts5/test/fts5fault2.test | 19 +++++++++++++++++++ ext/fts5/tool/showfts5.tcl | 31 +++++++++++++++++++++++++++++++ manifest | 23 ++++++++++++----------- manifest.uuid | 2 +- 9 files changed, 146 insertions(+), 22 deletions(-) create mode 100644 ext/fts5/tool/showfts5.tcl diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index c553fac5b8..2695cd0ff8 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -85,6 +85,16 @@ typedef struct Fts5Config Fts5Config; ** The minimum number of segments that an auto-merge operation should ** attempt to merge together. A value of 1 sets the object to use the ** compile time default. Zero disables auto-merge altogether. +** +** zContent: +** +** zContentRowid: +** The value of the content_rowid= option, if one was specified. Or +** the string "rowid" otherwise. This text is not quoted - if it is +** used as part of an SQL statement it needs to be quoted appropriately. +** +** zContentExprlist: +** */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -98,6 +108,7 @@ struct Fts5Config { int eContent; /* An FTS5_CONTENT value */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ + char *zContentExprlist; Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index fb840932b7..0c79cdfc03 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -370,7 +370,7 @@ static int fts5ConfigParseSpecial( *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); rc = SQLITE_ERROR; }else{ - pConfig->zContentRowid = fts5EscapeName(&rc, zArg); + pConfig->zContentRowid = fts5Strdup(&rc, zArg); } return rc; } @@ -469,6 +469,31 @@ static int fts5ConfigParseColumn( return rc; } +/* +** Populate the Fts5Config.zContentExprlist string. +*/ +static int fts5ConfigMakeExprlist(Fts5Config *p){ + int i; + int rc = SQLITE_OK; + Fts5Buffer buf = {0, 0, 0}; + const char *zSep = ""; + + sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); + if( p->eContent!=FTS5_CONTENT_NONE ){ + for(i=0; inCol; i++){ + if( p->eContent==FTS5_CONTENT_EXTERNAL ){ + sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); + } + } + } + + assert( p->zContentExprlist==0 ); + p->zContentExprlist = (char*)buf.p; + return rc; +} + /* ** Arguments nArg/azArg contain the string arguments passed to the xCreate ** or xConnect method of the virtual table. This function attempts to @@ -571,6 +596,11 @@ int sqlite3Fts5ConfigParse( pRet->zContentRowid = fts5Strdup(&rc, "rowid"); } + /* Formulate the zContentExprlist text */ + if( rc==SQLITE_OK ){ + rc = fts5ConfigMakeExprlist(pRet); + } + if( rc!=SQLITE_OK ){ sqlite3Fts5ConfigFree(pRet); *ppOut = 0; @@ -598,6 +628,7 @@ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ sqlite3_free(pConfig->zRankArgs); sqlite3_free(pConfig->zContent); sqlite3_free(pConfig->zContentRowid); + sqlite3_free(pConfig->zContentExprlist); sqlite3_free(pConfig); } } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6990225cae..a0cc72b567 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4729,7 +4729,7 @@ static void fts5IndexIntegrityCheckSegment( ){ Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ - if( pSeg->pgnoFirst==0 && pSeg->pgnoLast==0 ) return; + if( pSeg->pgnoFirst==0 ) return; /* Iterate through the b-tree hierarchy. */ for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); @@ -5148,10 +5148,8 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; - if( iOff=0 && eStmtaStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { - "SELECT * FROM %s ORDER BY %s ASC", /* SCAN_ASC */ - "SELECT * FROM %s ORDER BY %s DESC", /* SCAN_DESC */ - "SELECT * FROM %s WHERE %s=?", /* LOOKUP */ + "SELECT %s FROM %s T ORDER BY T.%Q ASC", /* SCAN_ASC */ + "SELECT %s FROM %s T ORDER BY T.%Q DESC", /* SCAN_DESC */ + "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ @@ -86,7 +86,9 @@ static int fts5StorageGetStmt( case FTS5_STMT_SCAN_ASC: case FTS5_STMT_SCAN_DESC: case FTS5_STMT_LOOKUP: - zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContent, pC->zContentRowid); + zSql = sqlite3_mprintf(azStmt[eStmt], + pC->zContentExprlist, pC->zContent, pC->zContentRowid + ); break; case FTS5_STMT_INSERT_CONTENT: diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 145fa4b6a9..be2b56a76d 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -9,6 +9,7 @@ # #*********************************************************************** # +# This file contains tests for the content= and content_rowid= options. # source [file join [file dirname [info script]] fts5_common.tcl] @@ -186,5 +187,35 @@ do_catchsql_test 3.8 { INSERT INTO t4(t4) VALUES('delete-all'); } {1 {'delete-all' may only be used with a contentless or external content fts5 table}} +#------------------------------------------------------------------------- +# Test an external content table with a more interesting schema. +# +do_execsql_test 4.1 { + CREATE TABLE x2(a, "key col" PRIMARY KEY, b, c) WITHOUT ROWID; + INSERT INTO x2 VALUES('a b', 1, 'c d' , 'e f'); + INSERT INTO x2 VALUES('x y', -40, 'z z' , 'y x'); + + CREATE VIRTUAL TABLE t2 USING fts5(a, c, content=x2, content_rowid='key col'); + INSERT INTO t2(t2) VALUES('rebuild'); +} + +do_execsql_test 4.2 { SELECT rowid FROM t2 } {-40 1} +do_execsql_test 4.3 { SELECT rowid FROM t2 WHERE t2 MATCH 'c'} {} +do_execsql_test 4.4 { SELECT rowid FROM t2 WHERE t2 MATCH 'a'} {1} +do_execsql_test 4.5 { SELECT rowid FROM t2 WHERE t2 MATCH 'x'} {-40} + +do_execsql_test 4.6 { INSERT INTO t2(t2) VALUES('integrity-check') } {} + +do_execsql_test 4.7 { + DELETE FROM x2 WHERE "key col" = 1; + INSERT INTO t2(t2, rowid, a, c) VALUES('delete', 1, 'a b', 'e f'); + INSERT INTO t2(t2) VALUES('integrity-check'); +} + +do_execsql_test 4.8 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'} {} +do_execsql_test 4.9 { SELECT rowid FROM t2 WHERE t2 MATCH 'y'} {-40} + + finish_test + diff --git a/ext/fts5/test/fts5fault2.test b/ext/fts5/test/fts5fault2.test index fe1d205ec8..9bf9b85e2b 100644 --- a/ext/fts5/test/fts5fault2.test +++ b/ext/fts5/test/fts5fault2.test @@ -98,5 +98,24 @@ do_faultsim_test 3.1 -faults oom-trans* -prep { faultsim_test_result {0 {}} } +#------------------------------------------------------------------------- +# OOM within an 'integrity-check' operation. +# +reset_db +db func rnddoc fts5_rnddoc +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE zzz USING fts5(z); + INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32); + WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<10) + INSERT INTO zzz SELECT rnddoc(10) || ' xccc' FROM ii; +} + +do_faultsim_test 4.1 -faults oom-trans* -prep { +} -body { + execsql { INSERT INTO zzz(zzz) VALUES('integrity-check') } +} -test { + faultsim_test_result {0 {}} +} + finish_test diff --git a/ext/fts5/tool/showfts5.tcl b/ext/fts5/tool/showfts5.tcl new file mode 100644 index 0000000000..3ed5680182 --- /dev/null +++ b/ext/fts5/tool/showfts5.tcl @@ -0,0 +1,31 @@ + + +proc usage {} { + puts stderr "usage: $::argv0 database table" + puts stderr "" + exit 1 +} + +set o(vtab) fts5 +set o(tok) "" +set o(limit) 0 +set o(automerge) -1 +set o(crisismerge) -1 + +if {[llength $argv]!=2} usage + +set database [lindex $argv 0] +set tbl [lindex $argv 1] + +sqlite3 db $database + +db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" { + foreach lvl [lrange $d 1 end] { + puts $lvl + } +} + + + + + diff --git a/manifest b/manifest index 3e37056c03..7fdbb96190 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\stests\sfor\sfts5. -D 2015-04-27T11:31:56.573 +C Change\sthe\sfts5\scontent=\soption\sso\sthat\sit\smatches\sfts5\scolumns\swith\sthe\sunderlying\stable\scolumns\sby\sname,\snot\sby\stheir\sposition\swithin\sthe\sCREATE\sTABLE\sstatement. +D 2015-04-27T16:21:49.481 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 3383b8a44766c68bda812b68ce74684c6b87787f F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 7dc902e195e00c5820b85d71c2476500c2cf9027 +F ext/fts5/fts5Int.h d148c951deae924105d77f21f25287b60c57327a F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 -F ext/fts5/fts5_config.c 43fcf838d3a3390d1245e3d5e651fa5cc1df575b +F ext/fts5/fts5_config.c f344ffa24d2add70fd5bde2b73c44846ad7a06bd F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 5f969f5b6f5e022c9c62973dd6e4e82e1d5f3f30 -F ext/fts5/fts5_storage.c 87f85986a6d07391f4e0ddfa9799dbecc40fa165 +F ext/fts5/fts5_index.c 65d5a75b1ba5f6db9f283f91e71aaa14105dcef7 +F ext/fts5/fts5_storage.c d5c3567b31a0e334ac7d4ac67a2be1c6ae9165cd F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d @@ -134,14 +134,14 @@ F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b -F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 +F ext/fts5/test/fts5content.test c4f5b0fe1bc7523bb6706591d05d1194a0aec452 F ext/fts5/test/fts5corrupt.test 9e8524281aa322c522c1d6e2b347e24e060c2727 F ext/fts5/test/fts5corrupt2.test 494111fd4f2dab36499cf97718eaba1f7c11e9d0 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d -F ext/fts5/test/fts5fault2.test 37c325cb3e6e27c74c122bb49673b4ae0862df33 +F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test f8935b92976ae645d43205562fdbb0c8511dd049 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 @@ -156,6 +156,7 @@ F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8 +F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1307,7 +1308,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e5aaa01306597ffd2475dcb83ae889393f68d315 -R 328723993a482062ac5cfa3f373fa969 +P ffeb3ef3cfec3681b72bb28cfa612aa15e07887d +R 16d25c5b756f9ed2c991a70239bfd1f6 U dan -Z dd82cb2122e4379a606aef38aa888497 +Z e5577dade0e18372482b97b5f2daae05 diff --git a/manifest.uuid b/manifest.uuid index 02c02a5041..c88421cb6e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ffeb3ef3cfec3681b72bb28cfa612aa15e07887d \ No newline at end of file +e38e2bb637844dae8ae5d5f3e23d8369e1b91e45 \ No newline at end of file From cdbd150170eeb935b639f578ff6e048694b78ad2 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 28 Apr 2015 18:35:28 +0000 Subject: [PATCH 116/206] Improve coverage of fts5 tests. FossilOrigin-Name: 8e8136f2dc08082c2984462719d9cba0f212c92a --- ext/fts5/fts5.c | 24 +++++----- ext/fts5/fts5Int.h | 14 ++---- ext/fts5/fts5_buffer.c | 60 +---------------------- ext/fts5/fts5_hash.c | 4 +- ext/fts5/fts5_index.c | 51 ++++++++------------ ext/fts5/fts5_storage.c | 35 +++++++------- ext/fts5/fts5_tcl.c | 39 ++++++++++++++- ext/fts5/test/fts5fault3.test | 26 ++++++++++ ext/fts5/test/fts5fault4.test | 45 ++++++++++++++++++ ext/fts5/test/fts5hash.test | 89 +++++++++++++++++++++++++++++++++++ ext/fts5/test/fts5prefix.test | 1 + manifest | 30 ++++++------ manifest.uuid | 2 +- 13 files changed, 274 insertions(+), 146 deletions(-) create mode 100644 ext/fts5/test/fts5fault4.test create mode 100644 ext/fts5/test/fts5hash.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index baa51f4a33..14ff73c718 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -266,17 +266,13 @@ static int fts5IsContentless(Fts5Table *pTab){ } /* -** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy -** argument is non-zero, attempt delete the shadow tables from teh database +** Delete a virtual table handle allocated by fts5InitVtab(). */ -static int fts5FreeVtab(Fts5Table *pTab, int bDestroy){ +static void fts5FreeVtab(Fts5Table *pTab){ int rc = SQLITE_OK; if( pTab ){ - int rc2; - rc2 = sqlite3Fts5IndexClose(pTab->pIndex, bDestroy); - if( rc==SQLITE_OK ) rc = rc2; - rc2 = sqlite3Fts5StorageClose(pTab->pStorage, bDestroy); - if( rc==SQLITE_OK ) rc = rc2; + sqlite3Fts5IndexClose(pTab->pIndex); + sqlite3Fts5StorageClose(pTab->pStorage); sqlite3Fts5ConfigFree(pTab->pConfig); sqlite3_free(pTab); } @@ -287,14 +283,20 @@ static int fts5FreeVtab(Fts5Table *pTab, int bDestroy){ ** The xDisconnect() virtual table method. */ static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ - return fts5FreeVtab((Fts5Table*)pVtab, 0); + fts5FreeVtab((Fts5Table*)pVtab); + return SQLITE_OK; } /* ** The xDestroy() virtual table method. */ static int fts5DestroyMethod(sqlite3_vtab *pVtab){ - return fts5FreeVtab((Fts5Table*)pVtab, 1); + Fts5Table *pTab = (Fts5Table*)pVtab; + int rc = sqlite3Fts5DropAll(pTab->pConfig); + if( rc==SQLITE_OK ){ + fts5FreeVtab((Fts5Table*)pVtab); + } + return rc; } /* @@ -352,7 +354,7 @@ static int fts5InitVtab( } if( rc!=SQLITE_OK ){ - fts5FreeVtab(pTab, 0); + fts5FreeVtab(pTab); pTab = 0; }else if( bCreate ){ fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 2695cd0ff8..d3bdbadc36 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -177,7 +177,6 @@ void sqlite3Fts5BufferFree(Fts5Buffer*); void sqlite3Fts5BufferZero(Fts5Buffer*); void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); -void sqlite3Fts5BufferAppendListElem(int*, Fts5Buffer*, const char*, int); void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int); #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) @@ -220,13 +219,6 @@ struct Fts5PoslistWriter { }; int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); -int sqlite3Fts5PoslistNext( - const u8 *a, int n, /* Buffer containing poslist */ - int *pi, /* IN/OUT: Offset within a[] */ - int *piCol, /* IN/OUT: Current column */ - int *piOff /* IN/OUT: Current token offset */ -); - int sqlite3Fts5PoslistNext64( const u8 *a, int n, /* Buffer containing poslist */ int *pi, /* IN/OUT: Offset within a[] */ @@ -259,7 +251,7 @@ typedef struct Fts5IndexIter Fts5IndexIter; ** Create/destroy an Fts5Index object. */ int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); -int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy); +int sqlite3Fts5IndexClose(Fts5Index *p); /* ** for( @@ -444,9 +436,9 @@ void sqlite3Fts5HashScanEntry(Fts5Hash *, typedef struct Fts5Storage Fts5Storage; int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); -int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy); +int sqlite3Fts5StorageClose(Fts5Storage *p); -int sqlite3Fts5DropTable(Fts5Config*, const char *zPost); +int sqlite3Fts5DropAll(Fts5Config*); int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); int sqlite3Fts5StorageDelete(Fts5Storage *p, i64); diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 94fb4216d1..26304f68aa 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -92,9 +92,8 @@ void sqlite3Fts5BufferAppendString( const char *zStr ){ int nStr = strlen(zStr); - if( sqlite3Fts5BufferGrow(pRc, pBuf, nStr+1) ) return; - sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr, (const u8*)zStr); - if( *pRc==SQLITE_OK ) pBuf->p[pBuf->n] = 0x00; + sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); + pBuf->n--; } /* @@ -228,61 +227,6 @@ int sqlite3Fts5PoslistWriterAppend( return rc; } -int sqlite3Fts5PoslistNext( - const u8 *a, int n, /* Buffer containing poslist */ - int *pi, /* IN/OUT: Offset within a[] */ - int *piCol, /* IN/OUT: Current column */ - int *piOff /* IN/OUT: Current token offset */ -){ - int i = *pi; - int iVal; - if( i>=n ){ - /* EOF */ - return 1; - } - i += getVarint32(&a[i], iVal); - if( iVal==1 ){ - i += getVarint32(&a[i], iVal); - *piCol = iVal; - *piOff = 0; - i += getVarint32(&a[i], iVal); - } - *piOff += (iVal-2); - *pi = i; - return 0; -} - -void sqlite3Fts5BufferAppendListElem( - int *pRc, /* IN/OUT: Error code */ - Fts5Buffer *pBuf, /* Buffer to append to */ - const char *z, int n /* Value to append to buffer */ -){ - int bParen = (n==0); - int nMax = n*2 + 2 + 1; - u8 *pOut; - int i; - - /* Ensure the buffer has space for the new list element */ - if( sqlite3Fts5BufferGrow(pRc, pBuf, nMax) ) return; - pOut = &pBuf->p[pBuf->n]; - - /* Figure out if we need the enclosing {} */ - for(i=0; in = pOut - pBuf->p; - *pOut = '\0'; -} - void *sqlite3Fts5MallocZero(int *pRc, int nByte){ void *pRet = 0; if( *pRc==SQLITE_OK ){ diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index fa7701a6d0..8bafbd6c82 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -417,8 +417,8 @@ int sqlite3Fts5HashScanInit( } void sqlite3Fts5HashScanNext(Fts5Hash *p){ - Fts5HashEntry *pScan = p->pScan; - if( pScan ) p->pScan = pScan->pScanNext; + assert( !sqlite3Fts5HashScanEof(p) ); + p->pScan = p->pScan->pScanNext; } int sqlite3Fts5HashScanEof(Fts5Hash *p){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a0cc72b567..8701ff4059 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -702,16 +702,7 @@ int sqlite3Fts5GetVarintLen(u32 iVal){ ** the Fts5Index handle passed as the first argument. */ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ - void *pRet = 0; - if( p->rc==SQLITE_OK ){ - pRet = sqlite3_malloc(nByte); - if( pRet==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - memset(pRet, 0, nByte); - } - } - return pRet; + return sqlite3Fts5MallocZero(&p->rc, nByte); } /* @@ -4213,28 +4204,27 @@ int sqlite3Fts5IndexOpen( int rc = SQLITE_OK; Fts5Index *p; /* New object */ - *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); - if( !p ) return SQLITE_NOMEM; - - memset(p, 0, sizeof(Fts5Index)); - p->pConfig = pConfig; - p->nWorkUnit = FTS5_WORK_UNIT; - p->nMaxPendingData = 1024*1024; - p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); - if( p->zDataTbl==0 ){ - rc = SQLITE_NOMEM; - }else if( bCreate ){ - rc = sqlite3Fts5CreateTable( - pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr - ); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts5IndexReinit(p); + *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); + if( rc==SQLITE_OK ){ + p->pConfig = pConfig; + p->nWorkUnit = FTS5_WORK_UNIT; + p->nMaxPendingData = 1024*1024; + p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); + if( p->zDataTbl==0 ){ + rc = SQLITE_NOMEM; + }else if( bCreate ){ + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexReinit(p); + } } } - assert( p->rc==SQLITE_OK || rc!=SQLITE_OK ); + assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); if( rc ){ - sqlite3Fts5IndexClose(p, 0); + sqlite3Fts5IndexClose(p); *pp = 0; } return rc; @@ -4243,12 +4233,9 @@ int sqlite3Fts5IndexOpen( /* ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). */ -int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ +int sqlite3Fts5IndexClose(Fts5Index *p){ int rc = SQLITE_OK; if( p ){ - if( bDestroy ){ - rc = sqlite3Fts5DropTable(p->pConfig, "data"); - } assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index ff60c0dece..e7f5027d86 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -154,13 +154,25 @@ static int fts5ExecPrintf( } /* -** Drop the shadow table with the postfix zPost (e.g. "content"). Return -** SQLITE_OK if successful or an SQLite error code otherwise. +** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error +** code otherwise. */ -int sqlite3Fts5DropTable(Fts5Config *pConfig, const char *zPost){ - return fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_%q'", - pConfig->zDb, pConfig->zName, zPost +int sqlite3Fts5DropAll(Fts5Config *pConfig){ + int rc = fts5ExecPrintf(pConfig->db, 0, + "DROP TABLE IF EXISTS %Q.'%q_data';" + "DROP TABLE IF EXISTS %Q.'%q_docsize';" + "DROP TABLE IF EXISTS %Q.'%q_config';", + pConfig->zDb, pConfig->zName, + pConfig->zDb, pConfig->zName, + pConfig->zDb, pConfig->zName ); + if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ + rc = fts5ExecPrintf(pConfig->db, 0, + "DROP TABLE IF EXISTS %Q.'%q_content';", + pConfig->zDb, pConfig->zName + ); + } + return rc; } /* @@ -248,7 +260,7 @@ int sqlite3Fts5StorageOpen( } if( rc ){ - sqlite3Fts5StorageClose(p, 0); + sqlite3Fts5StorageClose(p); *pp = 0; } return rc; @@ -257,7 +269,7 @@ int sqlite3Fts5StorageOpen( /* ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). */ -int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ +int sqlite3Fts5StorageClose(Fts5Storage *p){ int rc = SQLITE_OK; if( p ){ int i; @@ -267,15 +279,6 @@ int sqlite3Fts5StorageClose(Fts5Storage *p, int bDestroy){ sqlite3_finalize(p->aStmt[i]); } - /* If required, remove the shadow tables from the database */ - if( bDestroy ){ - if( p->pConfig->eContent==FTS5_CONTENT_NORMAL ){ - rc = sqlite3Fts5DropTable(p->pConfig, "content"); - } - if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "docsize"); - if( rc==SQLITE_OK ) rc = sqlite3Fts5DropTable(p->pConfig, "config"); - } - sqlite3_free(p); } return rc; diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 5bbfc821a2..568d816ff1 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -865,6 +865,42 @@ static int f5tMayBeCorrupt( return TCL_OK; } + +static unsigned int f5t_fts5HashKey(int nSlot, const char *p, int n){ + int i; + unsigned int h = 13; + for(i=n-1; i>=0; i--){ + h = (h << 3) ^ h ^ p[i]; + } + return (h % nSlot); +} + +static int f5tTokenHash( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + int bOld = sqlite3_fts5_may_be_corrupt; + char *z; + int n; + unsigned int iVal; + int nSlot; + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "NSLOT TOKEN"); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[1], &nSlot) ){ + return TCL_ERROR; + } + z = Tcl_GetStringFromObj(objv[2], &n); + + iVal = f5t_fts5HashKey(nSlot, z, n); + Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal)); + return TCL_OK; +} + /* ** Entry point. */ @@ -878,7 +914,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){ { "sqlite3_fts5_token", f5tTokenizerReturn, 1 }, { "sqlite3_fts5_tokenize", f5tTokenize, 0 }, { "sqlite3_fts5_create_function", f5tCreateFunction, 0 }, - { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 } + { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 }, + { "sqlite3_fts5_token_hash", f5tTokenHash, 0 } }; int i; F5tTokenizerContext *pContext; diff --git a/ext/fts5/test/fts5fault3.test b/ext/fts5/test/fts5fault3.test index 4d438d7f6c..bfeead4e23 100644 --- a/ext/fts5/test/fts5fault3.test +++ b/ext/fts5/test/fts5fault3.test @@ -80,6 +80,32 @@ do_faultsim_test 2 -faults oom-* -prep { faultsim_test_result [list 0 {}] } +#------------------------------------------------------------------------- +# An OOM while flushing an unusually large term to disk. +# +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE xx USING fts5(x); +} +faultsim_save_and_close + +set doc [fts5_rnddoc 1000] +do_faultsim_test 3.1 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO xx(x) VALUES ($::doc) } +} -test { + faultsim_test_result [list 0 {}] +} + +set doc [string repeat "abc " 100] +do_faultsim_test 3.2 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { INSERT INTO xx(x) VALUES ($::doc) } +} -test { + faultsim_test_result [list 0 {}] +} diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test new file mode 100644 index 0000000000..de79900bb6 --- /dev/null +++ b/ext/fts5/test/fts5fault4.test @@ -0,0 +1,45 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault4 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# An OOM while dropping an fts5 table. +# +db func rnddoc fts5_rnddoc +do_test 1.0 { + execsql { CREATE VIRTUAL TABLE xx USING fts5(x) } +} {} +faultsim_save_and_close + +do_faultsim_test 1 -faults oom-* -prep { + faultsim_restore_and_reopen + execsql { SELECT * FROM xx } +} -body { + execsql { DROP TABLE xx } +} -test { + faultsim_test_result [list 0 {}] +} + + +finish_test + diff --git a/ext/fts5/test/fts5hash.test b/ext/fts5/test/fts5hash.test new file mode 100644 index 0000000000..94119603c7 --- /dev/null +++ b/ext/fts5/test/fts5hash.test @@ -0,0 +1,89 @@ +# 2015 April 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The tests in this file are focused on the code in fts5_hash.c. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5hash + +#------------------------------------------------------------------------- +# Return a list of tokens (a vocabulary) that all share the same hash +# key value. This can be used to test hash collisions. +# +proc build_vocab1 {args} { + + set O(-nslot) 1024 + set O(-nword) 20 + set O(-hash) 88 + set O(-prefix) "" + + if {[llength $args] % 2} { error "bad args" } + array set O2 $args + foreach {k v} $args { + if {[info exists O($k)]==0} { error "bad option: $k" } + set O($k) $v + } + + set L [list] + while {[llength $L] < $O(-nword)} { + set t "$O(-prefix)[random_token]" + set h [sqlite3_fts5_token_hash $O(-nslot) $t] + if {$O(-hash)==$h} { lappend L $t } + } + return $L +} + +proc random_token {} { + set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] + set iVal [expr int(rand() * 2000000)] + return [string map $map $iVal] +} + +proc random_doc {vocab nWord} { + set doc "" + set nVocab [llength $vocab] + for {set i 0} {$i<$nWord} {incr i} { + set j [expr {int(rand() * $nVocab)}] + lappend doc [lindex $vocab $j] + } + return $doc +} + +set vocab [build_vocab1] +db func r random_doc + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE eee USING fts5(e, ee); + BEGIN; + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii; + INSERT INTO eee(eee) VALUES('integrity-check'); + COMMIT; + INSERT INTO eee(eee) VALUES('integrity-check'); +} + +set hash [sqlite3_fts5_token_hash 1024 xyz] +set vocab [build_vocab1 -prefix xyz -hash $hash] +lappend vocab xyz + +do_execsql_test 1.1 { + BEGIN; + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii; + INSERT INTO eee(eee) VALUES('integrity-check'); + COMMIT; + INSERT INTO eee(eee) VALUES('integrity-check'); +} + + +finish_test + diff --git a/ext/fts5/test/fts5prefix.test b/ext/fts5/test/fts5prefix.test index 44c21a744c..7c5a1a39a9 100644 --- a/ext/fts5/test/fts5prefix.test +++ b/ext/fts5/test/fts5prefix.test @@ -9,6 +9,7 @@ # #*********************************************************************** # +# This file containst tests focused on prefix indexes. # source [file join [file dirname [info script]] fts5_common.tcl] diff --git a/manifest b/manifest index 7fdbb96190..db7571000e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sfts5\scontent=\soption\sso\sthat\sit\smatches\sfts5\scolumns\swith\sthe\sunderlying\stable\scolumns\sby\sname,\snot\sby\stheir\sposition\swithin\sthe\sCREATE\sTABLE\sstatement. -D 2015-04-27T16:21:49.481 +C Improve\scoverage\sof\sfts5\stests. +D 2015-04-28T18:35:28.633 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,17 +104,17 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 3383b8a44766c68bda812b68ce74684c6b87787f +F ext/fts5/fts5.c d9a99a595c0e341cb24918bc67c323d2444a3036 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h d148c951deae924105d77f21f25287b60c57327a +F ext/fts5/fts5Int.h f573fe6c50471f1d66682fce282da801009c54e1 F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 -F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 +F ext/fts5/fts5_buffer.c 8c8cfe7f09ca2767ab53ea883f9a0af0edb6bbae F ext/fts5/fts5_config.c f344ffa24d2add70fd5bde2b73c44846ad7a06bd F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd -F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 65d5a75b1ba5f6db9f283f91e71aaa14105dcef7 -F ext/fts5/fts5_storage.c d5c3567b31a0e334ac7d4ac67a2be1c6ae9165cd -F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 +F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 +F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 +F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 +F ext/fts5/fts5_tcl.c af1d37fa93bcabc926aa4e89500adedbbe84a520 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -142,13 +142,15 @@ F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 -F ext/fts5/test/fts5fault3.test f8935b92976ae645d43205562fdbb0c8511dd049 +F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 +F ext/fts5/test/fts5fault4.test e860e0cf7e56f2f87330023be1f1ced44128d5c8 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d +F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e -F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 +F ext/fts5/test/fts5prefix.test 1287803c3df0e43f536196256fb9e0e6baccb4f1 F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test 7a6ee24db908c09a0dc1eba634ffa17afcc05d86 @@ -1308,7 +1310,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ffeb3ef3cfec3681b72bb28cfa612aa15e07887d -R 16d25c5b756f9ed2c991a70239bfd1f6 +P e38e2bb637844dae8ae5d5f3e23d8369e1b91e45 +R 36584b17863acf9c4ced66420a8dd86e U dan -Z e5577dade0e18372482b97b5f2daae05 +Z 4aeb27eb1cd105a6fffeab8d10b6e855 diff --git a/manifest.uuid b/manifest.uuid index c88421cb6e..ed44a022b6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e38e2bb637844dae8ae5d5f3e23d8369e1b91e45 \ No newline at end of file +8e8136f2dc08082c2984462719d9cba0f212c92a \ No newline at end of file From 90dd70226f402c4b539fe47426192584777405f8 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 28 Apr 2015 20:24:50 +0000 Subject: [PATCH 117/206] Fix an fts5 bug in handling writes while there are active cursors. FossilOrigin-Name: 07f70955392697556ca2951c9b6c3a5204cd5ec0 --- ext/fts5/fts5.c | 70 ++++++++++++++++++++++++++++++++-- ext/fts5/fts5_expr.c | 4 ++ ext/fts5/test/fts5restart.test | 48 +++++++++++++++++++++++ manifest | 15 ++++---- manifest.uuid | 2 +- 5 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 ext/fts5/test/fts5restart.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 14ff73c718..928ad6d916 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -186,6 +186,7 @@ struct Fts5Cursor { #define FTS5CSR_REQUIRE_DOCSIZE 0x02 #define FTS5CSR_EOF 0x04 #define FTS5CSR_FREE_ZRANK 0x08 +#define FTS5CSR_REQUIRE_RESEEK 0x10 /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -269,14 +270,12 @@ static int fts5IsContentless(Fts5Table *pTab){ ** Delete a virtual table handle allocated by fts5InitVtab(). */ static void fts5FreeVtab(Fts5Table *pTab){ - int rc = SQLITE_OK; if( pTab ){ sqlite3Fts5IndexClose(pTab->pIndex); sqlite3Fts5StorageClose(pTab->pStorage); sqlite3Fts5ConfigFree(pTab->pConfig); sqlite3_free(pTab); } - return rc; } /* @@ -608,6 +607,63 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ return rc; } + +/* +** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors +** open on table pTab. +*/ +static void fts5TripCursors(Fts5Table *pTab){ + Fts5Cursor *pCsr; + for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ + if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_MATCH + && pCsr->base.pVtab==(sqlite3_vtab*)pTab + ){ + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK); + } + } +} + +/* +** If the REQUIRE_RESEEK flag is set on the cursor passed as the first +** argument, close and reopen all Fts5IndexIter iterators that the cursor +** is using. Then attempt to move the cursor to a rowid equal to or laster +** (in the cursors sort order - ASC or DESC) than the current rowid. +** +** If the new rowid is not equal to the old, set output parameter *pbSkip +** to 1 before returning. Otherwise, leave it unchanged. +** +** Return SQLITE_OK if successful or if no reseek was required, or an +** error code if an error occurred. +*/ +static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ + int rc = SQLITE_OK; + assert( *pbSkip==0 ); + if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){ + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + int bDesc = ((pCsr->idxNum & FTS5_ORDER_DESC) ? 1 : 0); + i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); + + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bDesc); + while( rc==SQLITE_OK && sqlite3Fts5ExprEof(pCsr->pExpr)==0 ){ + i64 ii = sqlite3Fts5ExprRowid(pCsr->pExpr); + if( ii==iRowid ) break; + if( (bDesc && iiiRowid) ){ + *pbSkip = 1; + break; + } + rc = sqlite3Fts5ExprNext(pCsr->pExpr); + } + + CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK); + fts5CsrNewrow(pCsr); + if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + } + } + return rc; +} + + /* ** Advance the cursor to the next row in the table that matches the ** search criteria. @@ -619,7 +675,10 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = FTS5_PLAN(pCsr->idxNum); - int rc = SQLITE_OK; + int bSkip = 0; + int rc; + + if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; switch( ePlan ){ case FTS5_PLAN_MATCH: @@ -1156,6 +1215,7 @@ static int fts5UpdateMethod( assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); assert( pVtab->zErrMsg==0 ); + fts5TripCursors(pTab); if( rc==SQLITE_OK && eType0==SQLITE_INTEGER ){ if( fts5IsContentless(pTab) ){ pTab->base.zErrMsg = sqlite3_mprintf( @@ -1196,6 +1256,7 @@ static int fts5SyncMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SYNC, 0); + fts5TripCursors(pTab); rc = sqlite3Fts5StorageSync(pTab->pStorage, 1); return rc; } @@ -1717,6 +1778,7 @@ static int fts5RenameMethod( static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); + fts5TripCursors(pTab); return sqlite3Fts5StorageSync(pTab->pStorage, 0); } @@ -1728,6 +1790,7 @@ static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); + fts5TripCursors(pTab); return sqlite3Fts5StorageSync(pTab->pStorage, 0); } @@ -1739,6 +1802,7 @@ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); + fts5TripCursors(pTab); return sqlite3Fts5StorageRollback(pTab->pStorage); } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index af44bcf760..6463c64c3d 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -776,6 +776,10 @@ static int fts5ExprNearInitAll( pPhrase = pNear->apPhrase[i]; for(j=0; jnTerm; j++){ pTerm = &pPhrase->aTerm[j]; + if( pTerm->pIter ){ + sqlite3Fts5IterClose(pTerm->pIter); + pTerm->pIter = 0; + } rc = sqlite3Fts5IndexQuery( pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | diff --git a/ext/fts5/test/fts5restart.test b/ext/fts5/test/fts5restart.test new file mode 100644 index 0000000000..3b1f334221 --- /dev/null +++ b/ext/fts5/test/fts5restart.test @@ -0,0 +1,48 @@ +# 2015 April 28 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file focuses on testing the planner (xBestIndex function). +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5restart + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE f1 USING fts5(ff); +} + +do_test 1.1 { + for {set i 1} {$i < 1000} {incr i} { + execsql { INSERT INTO f1 VALUES('a b c d e') } + lappend lRowid $i + } +} {} + +do_execsql_test 1.2 { + SELECT rowid FROM f1 WHERE f1 MATCH 'c'; +} $lRowid + +breakpoint +do_test 1.3 { + set res [list] + db eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } { + if {$rowid == 100} { + execsql { INSERT INTO f1(f1) VALUES('optimize') } + } + lappend res $rowid + } + set res +} $lRowid + + + +finish_test + diff --git a/manifest b/manifest index db7571000e..370597381b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\scoverage\sof\sfts5\stests. -D 2015-04-28T18:35:28.633 +C Fix\san\sfts5\sbug\sin\shandling\swrites\swhile\sthere\sare\sactive\scursors. +D 2015-04-28T20:24:50.023 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,13 +104,13 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c d9a99a595c0e341cb24918bc67c323d2444a3036 +F ext/fts5/fts5.c 45e82b584bb3fce22c82d8521bccaa2a6e9202fe F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a F ext/fts5/fts5Int.h f573fe6c50471f1d66682fce282da801009c54e1 F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 8c8cfe7f09ca2767ab53ea883f9a0af0edb6bbae F ext/fts5/fts5_config.c f344ffa24d2add70fd5bde2b73c44846ad7a06bd -F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd +F ext/fts5/fts5_expr.c e647a2626af5c80a6325532b23eea6d1eb252a78 F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 @@ -152,6 +152,7 @@ F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 1287803c3df0e43f536196256fb9e0e6baccb4f1 F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb +F ext/fts5/test/fts5restart.test 4934233dd2633665198d563c79f1015e3a2b9518 F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test 7a6ee24db908c09a0dc1eba634ffa17afcc05d86 F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d @@ -1310,7 +1311,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e38e2bb637844dae8ae5d5f3e23d8369e1b91e45 -R 36584b17863acf9c4ced66420a8dd86e +P 8e8136f2dc08082c2984462719d9cba0f212c92a +R 65017a9d42efb9cfacfe6e269e92cdbd U dan -Z 4aeb27eb1cd105a6fffeab8d10b6e855 +Z 91e05f6f4d01e29ed38305d1f49be921 diff --git a/manifest.uuid b/manifest.uuid index ed44a022b6..e0e7db8c5b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8e8136f2dc08082c2984462719d9cba0f212c92a \ No newline at end of file +07f70955392697556ca2951c9b6c3a5204cd5ec0 \ No newline at end of file From 7b2ec1ae41531e0a726dfbb014421be746f35b21 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 29 Apr 2015 20:54:08 +0000 Subject: [PATCH 118/206] Improve fts5 tests. FossilOrigin-Name: c1f07a3aa98eac87e2747527d15e5e5562221ceb --- ext/fts5/fts5.c | 144 ++++++++++++++++------------- ext/fts5/fts5Int.h | 3 +- ext/fts5/fts5_aux.c | 5 +- ext/fts5/fts5_config.c | 10 +-- ext/fts5/fts5_expr.c | 6 +- ext/fts5/fts5_tcl.c | 7 +- ext/fts5/fts5_tokenize.c | 7 +- ext/fts5/test/fts5aa.test | 2 +- ext/fts5/test/fts5al.test | 8 ++ ext/fts5/test/fts5aux.test | 57 ++++++++++++ ext/fts5/test/fts5content.test | 18 ++++ ext/fts5/test/fts5corrupt.test | 17 ++++ ext/fts5/test/fts5doclist.test | 41 +++++++++ ext/fts5/test/fts5fault4.test | 150 +++++++++++++++++++++++++++++++ ext/fts5/test/fts5plan.test | 61 +++++++++++++ ext/fts5/test/fts5rank.test | 39 ++++++++ ext/fts5/test/fts5rebuild.test | 11 +++ ext/fts5/test/fts5restart.test | 100 ++++++++++++++++++++- ext/fts5/test/fts5tokenizer.test | 8 ++ manifest | 44 ++++----- manifest.uuid | 2 +- 21 files changed, 637 insertions(+), 103 deletions(-) create mode 100644 ext/fts5/test/fts5aux.test create mode 100644 ext/fts5/test/fts5doclist.test create mode 100644 ext/fts5/test/fts5plan.test create mode 100644 ext/fts5/test/fts5rank.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 928ad6d916..b37f72d58d 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -529,46 +529,48 @@ static void fts5CsrNewrow(Fts5Cursor *pCsr){ ** on the xClose method of the virtual table interface. */ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ - Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); - Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - Fts5Cursor **pp; - Fts5Auxdata *pData; - Fts5Auxdata *pNext; + if( pCursor ){ + Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); + Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; + Fts5Cursor **pp; + Fts5Auxdata *pData; + Fts5Auxdata *pNext; - fts5CsrNewrow(pCsr); - if( pCsr->pStmt ){ - int eStmt = fts5StmtType(pCsr->idxNum); - sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); - } - if( pCsr->pSorter ){ - Fts5Sorter *pSorter = pCsr->pSorter; - sqlite3_finalize(pSorter->pStmt); - sqlite3_free(pSorter); - } - - if( pCsr->idxNum!=FTS5_PLAN_SOURCE ){ - sqlite3Fts5ExprFree(pCsr->pExpr); - } + fts5CsrNewrow(pCsr); + if( pCsr->pStmt ){ + int eStmt = fts5StmtType(pCsr->idxNum); + sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); + } + if( pCsr->pSorter ){ + Fts5Sorter *pSorter = pCsr->pSorter; + sqlite3_finalize(pSorter->pStmt); + sqlite3_free(pSorter); + } - for(pData=pCsr->pAuxdata; pData; pData=pNext){ - pNext = pData->pNext; - if( pData->xDelete ) pData->xDelete(pData->pPtr); - sqlite3_free(pData); + if( pCsr->idxNum!=FTS5_PLAN_SOURCE ){ + sqlite3Fts5ExprFree(pCsr->pExpr); + } + + for(pData=pCsr->pAuxdata; pData; pData=pNext){ + pNext = pData->pNext; + if( pData->xDelete ) pData->xDelete(pData->pPtr); + sqlite3_free(pData); + } + + /* Remove the cursor from the Fts5Global.pCsr list */ + for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); + *pp = pCsr->pNext; + + sqlite3_finalize(pCsr->pRankArgStmt); + sqlite3_free(pCsr->apRankArg); + + sqlite3_free(pCsr->zSpecial); + if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ + sqlite3_free(pCsr->zRank); + sqlite3_free(pCsr->zRankArgs); + } + sqlite3_free(pCsr); } - - /* Remove the cursor from the Fts5Global.pCsr list */ - for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); - *pp = pCsr->pNext; - - sqlite3_finalize(pCsr->pRankArgStmt); - sqlite3_free(pCsr->apRankArg); - - sqlite3_free(pCsr->zSpecial); - if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ - sqlite3_free(pCsr->zRank); - sqlite3_free(pCsr->zRankArgs); - } - sqlite3_free(pCsr); return SQLITE_OK; } @@ -893,7 +895,11 @@ static int fts5CursorParseRank( char *zRank = 0; char *zRankArgs = 0; - rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); + if( z==0 ){ + if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR; + }else{ + rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); + } if( rc==SQLITE_OK ){ pCsr->zRank = zRank; pCsr->zRankArgs = zRankArgs; @@ -1207,16 +1213,16 @@ static int fts5UpdateMethod( ** 3. Values for each of the nCol matchable columns. ** 4. Values for the two hidden columns ( and "rank"). */ - assert( nArg==1 || nArg==(2 + pConfig->nCol + 2) ); eType0 = sqlite3_value_type(apVal[0]); eConflict = sqlite3_vtab_on_conflict(pConfig->db); assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); assert( pVtab->zErrMsg==0 ); + assert( (nArg==1 && eType0==SQLITE_INTEGER) || nArg==(2+pConfig->nCol+2) ); fts5TripCursors(pTab); - if( rc==SQLITE_OK && eType0==SQLITE_INTEGER ){ + if( eType0==SQLITE_INTEGER ){ if( fts5IsContentless(pTab) ){ pTab->base.zErrMsg = sqlite3_mprintf( "cannot %s contentless fts5 table: %s", @@ -1227,7 +1233,8 @@ static int fts5UpdateMethod( i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); } - }else if( nArg>1 ){ + }else{ + assert( nArg>1 ); sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ const char *z = (const char*)sqlite3_value_text(pCmd); @@ -1471,6 +1478,7 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); + CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); } if( iCol<0 ){ int i; @@ -1879,6 +1887,23 @@ static int fts5CreateTokenizer( return rc; } +static Fts5TokenizerModule *fts5LocateTokenizer( + Fts5Global *pGlobal, + const char *zName +){ + Fts5TokenizerModule *pMod = 0; + + if( zName==0 ){ + pMod = pGlobal->pDfltTok; + }else{ + for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ + if( sqlite3_stricmp(zName, pMod->zName)==0 ) break; + } + } + + return pMod; +} + /* ** Find a tokenizer. This is the implementation of the ** fts5_api.xFindTokenizer() method. @@ -1889,21 +1914,13 @@ static int fts5FindTokenizer( void **ppUserData, fts5_tokenizer *pTokenizer /* Populate this object */ ){ - Fts5Global *pGlobal = (Fts5Global*)pApi; int rc = SQLITE_OK; - Fts5TokenizerModule *pTok; + Fts5TokenizerModule *pMod; - if( zName==0 ){ - pTok = pGlobal->pDfltTok; - }else{ - for(pTok=pGlobal->pTok; pTok; pTok=pTok->pNext){ - if( sqlite3_stricmp(zName, pTok->zName)==0 ) break; - } - } - - if( pTok ){ - *pTokenizer = pTok->x; - *ppUserData = pTok->pUserData; + pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); + if( pMod ){ + *pTokenizer = pMod->x; + *ppUserData = pMod->pUserData; }else{ memset(pTokenizer, 0, sizeof(fts5_tokenizer)); rc = SQLITE_ERROR; @@ -1917,24 +1934,23 @@ int sqlite3Fts5GetTokenizer( const char **azArg, int nArg, Fts5Tokenizer **ppTok, - fts5_tokenizer **ppTokApi + fts5_tokenizer **ppTokApi, + char **pzErr ){ - Fts5TokenizerModule *pMod = 0; + Fts5TokenizerModule *pMod; int rc = SQLITE_OK; - if( nArg==0 ){ - pMod = pGlobal->pDfltTok; - }else{ - for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ - if( sqlite3_stricmp(azArg[0], pMod->zName)==0 ) break; - } - } - + pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]); if( pMod==0 ){ + assert( nArg>0 ); rc = SQLITE_ERROR; + *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]); }else{ rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok); *ppTokApi = &pMod->x; + if( rc!=SQLITE_OK && pzErr ){ + *pzErr = sqlite3_mprintf("error in tokenizer constructor"); + } } if( rc!=SQLITE_OK ){ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d3bdbadc36..34190bdec1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -61,7 +61,8 @@ int sqlite3Fts5GetTokenizer( const char **azArg, int nArg, Fts5Tokenizer**, - fts5_tokenizer** + fts5_tokenizer**, + char **pzErr ); /* diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 8e4beffe67..4ef9e96070 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -230,11 +230,12 @@ static void fts5HighlightFunction( if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_code(pCtx, rc); } sqlite3_free(ctx.zOut); } + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } } /* ** End of highlight() implementation. diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 0c79cdfc03..eae18dce27 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -330,11 +330,9 @@ static int fts5ConfigParseSpecial( rc = SQLITE_ERROR; }else{ rc = sqlite3Fts5GetTokenizer(pGlobal, - (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi + (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi, + pzErr ); - if( rc!=SQLITE_OK ){ - *pzErr = sqlite3_mprintf("error in tokenizer constructor"); - } } } } @@ -387,7 +385,7 @@ static int fts5ConfigParseSpecial( static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); return sqlite3Fts5GetTokenizer( - pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi + pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0 ); } @@ -563,7 +561,7 @@ int sqlite3Fts5ConfigParse( rc = SQLITE_ERROR; }else{ if( bOption ){ - rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo, pzErr); + rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr); }else{ rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); zOne = 0; diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 6463c64c3d..a638559310 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -280,15 +280,15 @@ int sqlite3Fts5ExprPhraseExpr( Fts5ExprPhrase *pCopy; pOrig = pExpr->apExprPhrase[iPhrase]; + pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm + ); pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) ); - pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, - sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm - ); for(i=0; rc==SQLITE_OK && inTerm; i++){ pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 568d816ff1..431533afcc 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -36,6 +36,9 @@ int sqlite3_fts5_may_be_corrupt = 0; ** can extract the sqlite3* pointer from an existing Tcl SQLite ** connection. */ + +extern const char *sqlite3ErrName(int); + struct SqliteDb { sqlite3 *db; }; @@ -390,7 +393,7 @@ static int xF5tApi( #undef CASE if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "error in api call", 0); + Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_ERROR; } @@ -727,8 +730,6 @@ static int f5tTokenizerTokenize( return rc; } -extern const char *sqlite3ErrName(int); - /* ** sqlite3_fts5_token TEXT START END POS */ diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 3f4261c698..428a637e38 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -529,11 +529,16 @@ static int fts5PorterCreate( int rc = SQLITE_OK; PorterTokenizer *pRet; void *pUserdata = 0; + const char *zBase = "unicode61"; + + if( nArg>0 ){ + zBase = azArg[0]; + } pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); if( pRet ){ memset(pRet, 0, sizeof(PorterTokenizer)); - rc = pApi->xFindTokenizer(pApi, "unicode61", &pUserdata, &pRet->tokenizer); + rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); }else{ rc = SQLITE_NOMEM; } diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 9c56790dc5..ce4c90b9a1 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -297,7 +297,7 @@ do_catchsql_test 12.2 { } {1 {unknown special query: stuff}} do_test 12.3 { - set res [db one { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }] + set res [db eval { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }] string is integer $res } {1} diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 36402d6f6b..522f44ce23 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -268,6 +268,14 @@ do_execsql_test 4.3.3 { {a three} 0 {a one} 1 {a four} 1 {a two} 2 {a five} 2 } +do_catchsql_test 4.4.3 { + SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'xyz(3)' +} {1 {no such function: xyz}} +do_catchsql_test 4.4.4 { + SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL +} {1 {parse error in rank function: }} + + finish_test diff --git a/ext/fts5/test/fts5aux.test b/ext/fts5/test/fts5aux.test new file mode 100644 index 0000000000..b5cbc6e9a3 --- /dev/null +++ b/ext/fts5/test/fts5aux.test @@ -0,0 +1,57 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the auxiliary function APIs. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5aux + +proc inst {cmd i} { + $cmd xInst $i +} +sqlite3_fts5_create_function db inst inst + +proc colsize {cmd i} { + $cmd xColumnSize $i +} +sqlite3_fts5_create_function db colsize colsize + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE f1 USING fts5(a, b); + INSERT INTO f1 VALUES('one two', 'two one zero'); +} + +do_catchsql_test 1.1 { + SELECT inst(f1, -1) FROM f1 WHERE f1 MATCH 'two'; +} {1 SQLITE_RANGE} +do_catchsql_test 1.2 { + SELECT inst(f1, 0) FROM f1 WHERE f1 MATCH 'two'; +} {0 {{0 0 1}}} +do_catchsql_test 1.3 { + SELECT inst(f1, 1) FROM f1 WHERE f1 MATCH 'two'; +} {0 {{0 1 0}}} +do_catchsql_test 1.4 { + SELECT inst(f1, 2) FROM f1 WHERE f1 MATCH 'two'; +} {1 SQLITE_RANGE} + +do_catchsql_test 2.1 { + SELECT colsize(f1, 2) FROM f1 WHERE f1 MATCH 'two'; +} {1 SQLITE_RANGE} + +do_execsql_test 2.2 { + SELECT colsize(f1, 0), colsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero'; +} {2 3} + + + +finish_test + diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index be2b56a76d..105e11224b 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -215,6 +215,24 @@ do_execsql_test 4.7 { do_execsql_test 4.8 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'} {} do_execsql_test 4.9 { SELECT rowid FROM t2 WHERE t2 MATCH 'y'} {-40} +#------------------------------------------------------------------------- +# Test that if the 'rowid' field of a 'delete' is not an integer, no +# changes are made to the FTS index. +# +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE t5 USING fts5(a, b, content=); + INSERT INTO t5(rowid, a, b) VALUES(-1, 'one', 'two'); + INSERT INTO t5(rowid, a, b) VALUES( 0, 'three', 'four'); + INSERT INTO t5(rowid, a, b) VALUES( 1, 'five', 'six'); +} + +set ::checksum [execsql {SELECT md5sum(id, block) FROM t5_data}] + +do_execsql_test 5.1 { + INSERT INTO t5(t5, rowid, a, b) VALUES('delete', NULL, 'three', 'four'); + SELECT md5sum(id, block) FROM t5_data; +} $::checksum + finish_test diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index 0791ab0cf9..7d0ea9d2bc 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -70,7 +70,24 @@ do_test 2.1 { } {} #-------------------------------------------------------------------- +# A mundane test - missing row in the %_content table. # +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t3 USING fts5(x); + INSERT INTO t3 VALUES('one o'); + INSERT INTO t3 VALUES('two e'); + INSERT INTO t3 VALUES('three o'); + INSERT INTO t3 VALUES('four e'); + INSERT INTO t3 VALUES('five o'); +} +do_execsql_test 3.1 { + SELECT * FROM t3 WHERE t3 MATCH 'o' +} {{one o} {three o} {five o}} + +do_catchsql_test 3.1 { + DELETE FROM t3_content WHERE rowid = 3; + SELECT * FROM t3 WHERE t3 MATCH 'o'; +} {1 {database disk image is malformed}} finish_test diff --git a/ext/fts5/test/fts5doclist.test b/ext/fts5/test/fts5doclist.test new file mode 100644 index 0000000000..eb1dc9a695 --- /dev/null +++ b/ext/fts5/test/fts5doclist.test @@ -0,0 +1,41 @@ +# 2015 April 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This test is focused on edge cases in the doclist format. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5doclist + + +#------------------------------------------------------------------------- +# Create a table with 1000 columns. Then add some large documents to it. +# All text is in the right most column of the table. +# +do_test 1.0 { + set cols [list] + for {set i 0} {$i < 900} {incr i} { lappend cols "x$i" } + execsql "CREATE VIRTUAL TABLE ccc USING fts5([join $cols ,])" +} {} + +db func rnddoc fts5_rnddoc +do_execsql_test 1.1 { + WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) + INSERT INTO ccc(x899) SELECT rnddoc(500) FROM ii; +} + +do_execsql_test 1.2 { + INSERT INTO ccc(ccc) VALUES('integrity-check'); +} + + +finish_test + diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index de79900bb6..d7ced10c72 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -22,6 +22,8 @@ ifcapable !fts5 { return } +if 1 { + #------------------------------------------------------------------------- # An OOM while dropping an fts5 table. # @@ -40,6 +42,154 @@ do_faultsim_test 1 -faults oom-* -prep { faultsim_test_result [list 0 {}] } +#------------------------------------------------------------------------- +# An OOM within an "ORDER BY rank" query. +# +db func rnddoc fts5_rnddoc +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE xx USING fts5(x); + INSERT INTO xx VALUES ('abc ' || rnddoc(10)); + INSERT INTO xx VALUES ('abc abc' || rnddoc(9)); + INSERT INTO xx VALUES ('abc abc abc' || rnddoc(8)); +} {} +faultsim_save_and_close + +do_faultsim_test 2 -faults oom-* -prep { + faultsim_restore_and_reopen + execsql { SELECT * FROM xx } +} -body { + execsql { SELECT rowid FROM xx WHERE xx MATCH 'abc' ORDER BY rank } +} -test { + faultsim_test_result [list 0 {3 2 1}] +} + +#------------------------------------------------------------------------- +# An OOM while "reseeking" an FTS cursor. +# +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE jj USING fts5(j); + INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s'); + INSERT INTO jj(rowid, j) VALUES(202, 't w t f s'); + INSERT INTO jj(rowid, j) VALUES(303, 'w t f'); + INSERT INTO jj(rowid, j) VALUES(404, 't'); +} +faultsim_save_and_close + +do_faultsim_test 3 -faults oom-* -prep { + faultsim_restore_and_reopen + execsql { SELECT * FROM jj } +} -body { + set res [list] + db eval { SELECT rowid FROM jj WHERE jj MATCH 't' } { + lappend res $rowid + if {$rowid==303} { + execsql { DELETE FROM jj WHERE rowid=404 } + } + } + set res +} -test { + faultsim_test_result [list 0 {101 202 303}] +} + +#------------------------------------------------------------------------- +# An OOM within a special "*reads" query. +# +reset_db +db func rnddoc fts5_rnddoc +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1(x1, rank) VALUES('pgsz', 32); + + WITH ii(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10 ) + INSERT INTO x1 SELECT rnddoc(5) FROM ii; +} + +set ::res [db eval {SELECT rowid, x1 FROM x1 WHERE x1 MATCH '*reads'}] + +do_faultsim_test 4 -faults oom-* -body { + db eval {SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'} +} -test { + faultsim_test_result {0 {0 {} 3}} +} + +#------------------------------------------------------------------------- +# An OOM within a query that uses a custom rank function. +# +reset_db +do_execsql_test 5.0 { + PRAGMA encoding='utf16'; + CREATE VIRTUAL TABLE x2 USING fts5(x); + INSERT INTO x2(rowid, x) VALUES(10, 'a b c'); -- 3 + INSERT INTO x2(rowid, x) VALUES(20, 'a b c'); -- 6 + INSERT INTO x2(rowid, x) VALUES(30, 'a b c'); -- 2 + INSERT INTO x2(rowid, x) VALUES(40, 'a b c'); -- 5 + INSERT INTO x2(rowid, x) VALUES(50, 'a b c'); -- 1 +} + +proc rowidmod {cmd mod} { + set row [$cmd xRowid] + expr {$row % $mod} +} +sqlite3_fts5_create_function db rowidmod rowidmod + +do_faultsim_test 5.1 -faults oom-* -body { + db eval { + SELECT rowid || '-' || rank FROM x2 WHERE x2 MATCH 'b' AND + rank MATCH "rowidmod('7')" ORDER BY rank + } +} -test { + faultsim_test_result {0 {50-1 30-2 10-3 40-5 20-6}} +} + +proc rowidprefix {cmd prefix} { + set row [$cmd xRowid] + set {} "${row}-${prefix}" +} +sqlite3_fts5_create_function db rowidprefix rowidprefix + +set str [string repeat abcdefghijklmnopqrstuvwxyz 10] +do_faultsim_test 5.2 -faults oom-* -body { + db eval " + SELECT rank, x FROM x2 WHERE x2 MATCH 'b' AND + rank MATCH 'rowidprefix(''$::str'')' + LIMIT 1 + " +} -test { + faultsim_test_result "0 {10-$::str {a b c}}" +} + +} + + +#------------------------------------------------------------------------- +# OOM errors within auxiliary functions. +# +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE x3 USING fts5(xxx); + INSERT INTO x3 VALUES('a b c d c b a'); +} + +do_faultsim_test 6.1 -faults oom-t* -body { + db eval { SELECT highlight(x3, 0, '*', '*') FROM x3 WHERE x3 MATCH 'c' } +} -test { + faultsim_test_result {0 {{a b *c* d *c* b a}}} +} + +proc firstinst {cmd} { + foreach {p c o} [$cmd xInst 0] {} + expr $c*100 + $o +} +sqlite3_fts5_create_function db firstinst firstinst + +do_faultsim_test 6.2 -faults oom-t* -body { + db eval { SELECT firstinst(x3) FROM x3 WHERE x3 MATCH 'c' } +} -test { + faultsim_test_result {0 2} {1 SQLITE_NOMEM} +} + + + finish_test diff --git a/ext/fts5/test/fts5plan.test b/ext/fts5/test/fts5plan.test new file mode 100644 index 0000000000..1670f89faa --- /dev/null +++ b/ext/fts5/test/fts5plan.test @@ -0,0 +1,61 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file focuses on testing the planner (xBestIndex function). +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5plan + +do_execsql_test 1.0 { + CREATE TABLE t1(x, y); + CREATE VIRTUAL TABLE f1 USING fts5(ff); +} + +do_eqp_test 1.1 { + SELECT * FROM t1, f1 WHERE f1 MATCH t1.x +} { + 0 0 0 {SCAN TABLE t1} + 0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:} +} + +do_eqp_test 1.2 { + SELECT * FROM t1, f1 WHERE f1 > t1.x +} { + 0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} + 0 1 0 {SCAN TABLE t1} +} + +do_eqp_test 1.3 { + SELECT * FROM f1 WHERE f1 MATCH ? ORDER BY ff +} { + 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:} + 0 0 0 {USE TEMP B-TREE FOR ORDER BY} +} + +do_eqp_test 1.4 { + SELECT * FROM f1 ORDER BY rank +} { + 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} + 0 0 0 {USE TEMP B-TREE FOR ORDER BY} +} + +do_eqp_test 1.5 { + SELECT * FROM f1 WHERE rank MATCH ? +} { + 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} +} + + + + +finish_test + diff --git a/ext/fts5/test/fts5rank.test b/ext/fts5/test/fts5rank.test new file mode 100644 index 0000000000..6da24adee2 --- /dev/null +++ b/ext/fts5/test/fts5rank.test @@ -0,0 +1,39 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file focuses on testing queries that use the "rank" column. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5rank + + +#------------------------------------------------------------------------- +# "ORDER BY rank" + highlight() + large poslists. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE xyz USING fts5(z); +} +do_test 1.1 { + set doc [string trim [string repeat "x y " 500]] + execsql { INSERT INTO xyz VALUES($doc) } +} {} +do_execsql_test 1.2 { + SELECT highlight(xyz, 0, '[', ']') FROM xyz WHERE xyz MATCH 'x' ORDER BY rank +} [list [string map {x [x]} $doc]] + +do_execsql_test 1.3 { + SELECT highlight(xyz, 0, '[', ']') FROM xyz + WHERE xyz MATCH 'x AND y' ORDER BY rank +} [list [string map {x [x] y [y]} $doc]] + +finish_test + diff --git a/ext/fts5/test/fts5rebuild.test b/ext/fts5/test/fts5rebuild.test index 644a674942..96961792f5 100644 --- a/ext/fts5/test/fts5rebuild.test +++ b/ext/fts5/test/fts5rebuild.test @@ -46,5 +46,16 @@ do_execsql_test 1.7 { INSERT INTO f1(f1) VALUES('integrity-check'); } {} + +#------------------------------------------------------------------------- +# Check that 'rebuild' may not be used with a contentless table. +# +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE nc USING fts5(doc, content=); +} + +do_catchsql_test 2.2 { + INSERT INTO nc(nc) VALUES('rebuild'); +} {1 {'rebuild' may not be used with a contentless fts5 table}} finish_test diff --git a/ext/fts5/test/fts5restart.test b/ext/fts5/test/fts5restart.test index 3b1f334221..8fa3600ab3 100644 --- a/ext/fts5/test/fts5restart.test +++ b/ext/fts5/test/fts5restart.test @@ -19,6 +19,10 @@ do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(ff); } +#------------------------------------------------------------------------- +# Run the 'optimize' command. Check that it does not disturb ongoing +# full-text queries. +# do_test 1.1 { for {set i 1} {$i < 1000} {incr i} { execsql { INSERT INTO f1 VALUES('a b c d e') } @@ -30,7 +34,6 @@ do_execsql_test 1.2 { SELECT rowid FROM f1 WHERE f1 MATCH 'c'; } $lRowid -breakpoint do_test 1.3 { set res [list] db eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } { @@ -42,6 +45,101 @@ do_test 1.3 { set res } $lRowid +do_test 1.4.1 { + sqlite3 db2 test.db + set res [list] + db2 eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } { + if {$rowid == 100} { + set cres [catchsql { INSERT INTO f1(f1) VALUES('optimize') }] + } + lappend res $rowid + } + set res +} $lRowid + +do_test 1.4.2 { + db2 close + set cres +} {1 {database is locked}} + +#------------------------------------------------------------------------- +# Open a couple of cursors. Then close them in the same order. +# +do_test 2.1 { + set ::s1 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'b'" -1 X] + set ::s2 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'c'" -1 X] + + sqlite3_step $::s1 +} {SQLITE_ROW} +do_test 2.2 { + sqlite3_step $::s2 +} {SQLITE_ROW} + +do_test 2.1 { + sqlite3_finalize $::s1 + sqlite3_finalize $::s2 +} {SQLITE_OK} + +#------------------------------------------------------------------------- +# Copy data between two FTS5 tables. +# +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE f2 USING fts5(gg); + INSERT INTO f2 SELECT ff FROM f1 WHERE f1 MATCH 'b+c+d'; +} +do_execsql_test 3.2 { + SELECT rowid FROM f2 WHERE f2 MATCH 'a+b+c+d+e' +} $lRowid + +#------------------------------------------------------------------------- +# Remove the row that an FTS5 cursor is currently pointing to. And +# various other similar things. Check that this does not disturb +# ongoing scans. +# +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE n4 USING fts5(n); + INSERT INTO n4(rowid, n) VALUES(100, '1 2 3 4 5'); + INSERT INTO n4(rowid, n) VALUES(200, '1 2 3 4'); + INSERT INTO n4(rowid, n) VALUES(300, '2 3 4'); + INSERT INTO n4(rowid, n) VALUES(400, '2 3'); + INSERT INTO n4(rowid, n) VALUES(500, '3'); +} + +do_test 4.1 { + set res [list] + db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' } { + if {$rowid==300} { + execsql { DELETE FROM n4 WHERE rowid=300 } + } + lappend res $rowid + } + set res +} {100 200 300 400 500} + +do_test 4.2 { + execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') } + set res [list] + db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} { + if {$rowid==300} { + execsql { DELETE FROM n4 WHERE rowid=300 } + } + lappend res $rowid + } + set res +} {500 400 300 200 100} + +do_test 4.3 { + execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') } + set res [list] + db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} { + if {$rowid==300} { + execsql { DELETE FROM n4 } + } + lappend res $rowid + } + set res +} {500 400 300} + finish_test diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index 44de1690fe..42f5956492 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -37,6 +37,14 @@ do_execsql_test 1.4 { DROP TABLE ft1; } +do_catchsql_test 1.5 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'nosuch'); +} {1 {no such tokenizer: nosuch}} + +do_catchsql_test 1.6 { + CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter nosuch'); +} {1 {error in tokenizer constructor}} + do_execsql_test 2.0 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); INSERT INTO ft1 VALUES('embedded databases'); diff --git a/manifest b/manifest index 370597381b..199e150b25 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sbug\sin\shandling\swrites\swhile\sthere\sare\sactive\scursors. -D 2015-04-28T20:24:50.023 +C Improve\sfts5\stests. +D 2015-04-29T20:54:08.849 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,23 +104,23 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 45e82b584bb3fce22c82d8521bccaa2a6e9202fe +F ext/fts5/fts5.c 932284a253cc9bb32caf047879669720680be2c3 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h f573fe6c50471f1d66682fce282da801009c54e1 -F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 +F ext/fts5/fts5Int.h 2e0a1a6b77e1e014b7e9b1479ca686ff79930457 +F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 8c8cfe7f09ca2767ab53ea883f9a0af0edb6bbae -F ext/fts5/fts5_config.c f344ffa24d2add70fd5bde2b73c44846ad7a06bd -F ext/fts5/fts5_expr.c e647a2626af5c80a6325532b23eea6d1eb252a78 +F ext/fts5/fts5_config.c ecbbd5163758a958106867051892e0dfecf68b5c +F ext/fts5/fts5_expr.c 663c75dfdb1bfd8809d696357d7b55f507815098 F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 -F ext/fts5/fts5_tcl.c af1d37fa93bcabc926aa4e89500adedbbe84a520 -F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b +F ext/fts5/fts5_tcl.c 19ab8cfa642950648968dcf25075d6d969900524 +F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test 87b2e01084c3e2a960ae1500dd5f0729dac2166c +F ext/fts5/test/fts5aa.test e17657bd749cb3982745ec503ce22372dee7340d F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d @@ -131,30 +131,34 @@ F ext/fts5/test/fts5ah.test d74cf8b7de5b8424f732acef69fe12122a12f2bf F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 -F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede +F ext/fts5/test/fts5al.test e6bddd2c11c0d1e3ae189ee51081899d2f4ea570 +F ext/fts5/test/fts5aux.test 1e475d928a3d1decf74167394db20330d7beeb0f F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b -F ext/fts5/test/fts5content.test c4f5b0fe1bc7523bb6706591d05d1194a0aec452 -F ext/fts5/test/fts5corrupt.test 9e8524281aa322c522c1d6e2b347e24e060c2727 +F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f +F ext/fts5/test/fts5corrupt.test 138aecc75c36c3dac9259c7f57c5bc3d009255f8 F ext/fts5/test/fts5corrupt2.test 494111fd4f2dab36499cf97718eaba1f7c11e9d0 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f +F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test e860e0cf7e56f2f87330023be1f1ced44128d5c8 +F ext/fts5/test/fts5fault4.test aea710bbf5680ed41afb9d3313c297d429f9feac F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 +F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 1287803c3df0e43f536196256fb9e0e6baccb4f1 -F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb -F ext/fts5/test/fts5restart.test 4934233dd2633665198d563c79f1015e3a2b9518 +F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 +F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 +F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 -F ext/fts5/test/fts5tokenizer.test 7a6ee24db908c09a0dc1eba634ffa17afcc05d86 +F ext/fts5/test/fts5tokenizer.test bbcde2a7473dcaa9a1fc6809aa8965acb7b846ff F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 @@ -1311,7 +1315,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8e8136f2dc08082c2984462719d9cba0f212c92a -R 65017a9d42efb9cfacfe6e269e92cdbd +P 07f70955392697556ca2951c9b6c3a5204cd5ec0 +R 07c0e8a626b67779b0890a98070e21c4 U dan -Z 91e05f6f4d01e29ed38305d1f49be921 +Z 5236003ea42b936c8f2a3578dc6f6593 diff --git a/manifest.uuid b/manifest.uuid index e0e7db8c5b..615ae11606 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -07f70955392697556ca2951c9b6c3a5204cd5ec0 \ No newline at end of file +c1f07a3aa98eac87e2747527d15e5e5562221ceb \ No newline at end of file From 557085a5a1bb0f6a2d60215e87d78b6e8729e442 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 May 2015 12:14:23 +0000 Subject: [PATCH 119/206] Improve test coverage of fts5.c. FossilOrigin-Name: add4f4681c648dcbecaa68d08f7b2f4e6d63003c --- ext/fts5/fts5.c | 12 +++- ext/fts5/fts5_tcl.c | 74 ++++++++++++++++---- ext/fts5/test/fts5aux.test | 128 ++++++++++++++++++++++++++++++++++ ext/fts5/test/fts5fault4.test | 16 +++++ manifest | 18 ++--- manifest.uuid | 2 +- 6 files changed, 223 insertions(+), 27 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index b37f72d58d..a753d671da 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1495,6 +1495,9 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ return rc; } +/* +** Implementation of the xSetAuxdata() method. +*/ static int fts5ApiSetAuxdata( Fts5Context *pCtx, /* Fts5 context */ void *pPtr, /* Pointer to save as auxdata */ @@ -1503,6 +1506,8 @@ static int fts5ApiSetAuxdata( Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Auxdata *pData; + /* Search through the cursors list of Fts5Auxdata objects for one that + ** corresponds to the currently executing auxiliary function. */ for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ if( pData->pAux==pCsr->pAux ) break; } @@ -1512,12 +1517,12 @@ static int fts5ApiSetAuxdata( pData->xDelete(pData->pPtr); } }else{ - pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata)); + int rc = SQLITE_OK; + pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); if( pData==0 ){ if( xDelete ) xDelete(pPtr); - return SQLITE_NOMEM; + return rc; } - memset(pData, 0, sizeof(Fts5Auxdata)); pData->pAux = pCsr->pAux; pData->pNext = pCsr->pAuxdata; pCsr->pAuxdata = pData; @@ -1644,6 +1649,7 @@ static void fts5ApiCallback( if( pCsr==0 ){ char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); }else{ fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); } diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 431533afcc..171b4849a5 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -61,6 +61,27 @@ static int f5tDbPointer(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb){ /* End of code that accesses the SqliteDb struct. **************************************************************************/ +static int f5tResultToErrorCode(const char *zRes){ + struct ErrorCode { + int rc; + const char *zError; + } aErr[] = { + { SQLITE_DONE, "SQLITE_DONE" }, + { SQLITE_ERROR, "SQLITE_ERROR" }, + { SQLITE_OK, "SQLITE_OK" }, + { SQLITE_OK, "" }, + }; + int i; + + for(i=0; iinterp, zCmd); + if( rc==TCL_OK ){ + rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp)); + } + return rc; } @@ -195,20 +220,22 @@ static int xF5tApi( int nArg; const char *zMsg; } aSub[] = { - { "xColumnCount", 0, "" }, - { "xRowCount", 0, "" }, - { "xColumnTotalSize", 1, "COL" }, - { "xTokenize", 2, "TEXT SCRIPT" }, - { "xPhraseCount", 0, "" }, - { "xPhraseSize", 1, "PHRASE" }, - { "xInstCount", 0, "" }, - { "xInst", 1, "IDX" }, - { "xRowid", 0, "" }, - { "xColumnText", 1, "COL" }, - { "xColumnSize", 1, "COL" }, - { "xQueryPhrase", 2, "PHRASE SCRIPT" }, - { "xSetAuxdata", 1, "VALUE" }, - { "xGetAuxdata", 1, "CLEAR" }, + { "xColumnCount", 0, "" }, /* 0 */ + { "xRowCount", 0, "" }, /* 1 */ + { "xColumnTotalSize", 1, "COL" }, /* 2 */ + { "xTokenize", 2, "TEXT SCRIPT" }, /* 3 */ + { "xPhraseCount", 0, "" }, /* 4 */ + { "xPhraseSize", 1, "PHRASE" }, /* 5 */ + { "xInstCount", 0, "" }, /* 6 */ + { "xInst", 1, "IDX" }, /* 7 */ + { "xRowid", 0, "" }, /* 8 */ + { "xColumnText", 1, "COL" }, /* 9 */ + { "xColumnSize", 1, "COL" }, /* 10 */ + { "xQueryPhrase", 2, "PHRASE SCRIPT" }, /* 11 */ + { "xSetAuxdata", 1, "VALUE" }, /* 12 */ + { "xGetAuxdata", 1, "CLEAR" }, /* 13 */ + { "xSetAuxdataInt", 1, "INTEGER" }, /* 14 */ + { "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */ { 0, 0, 0} }; @@ -386,6 +413,25 @@ static int xF5tApi( break; } + /* These two - xSetAuxdataInt and xGetAuxdataInt - are similar to the + ** xSetAuxdata and xGetAuxdata methods implemented above. The difference + ** is that they may only save an integer value as auxiliary data, and + ** do not specify a destructor function. */ + CASE(14, "xSetAuxdataInt") { + int iVal; + if( Tcl_GetIntFromObj(interp, objv[2], &iVal) ) return TCL_ERROR; + rc = p->pApi->xSetAuxdata(p->pFts, (void*)iVal, 0); + break; + } + CASE(15, "xGetAuxdataInt") { + int iVal; + int bClear; + if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR; + iVal = (int)p->pApi->xGetAuxdata(p->pFts, bClear); + Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal)); + break; + } + default: assert( 0 ); break; diff --git a/ext/fts5/test/fts5aux.test b/ext/fts5/test/fts5aux.test index b5cbc6e9a3..61a28e50fb 100644 --- a/ext/fts5/test/fts5aux.test +++ b/ext/fts5/test/fts5aux.test @@ -52,6 +52,134 @@ do_execsql_test 2.2 { } {2 3} +#------------------------------------------------------------------------- +# Test the xSet and xGetAuxdata APIs with a NULL destructor. +# +proc prevrowid {add cmd} { + set res [$cmd xGetAuxdataInt 0] + set r [$cmd xRowid] + $cmd xSetAuxdataInt $r + return [expr $res + $add] +} +sqlite3_fts5_create_function db prevrowid [list prevrowid 0] +sqlite3_fts5_create_function db prevrowid1 [list prevrowid 1] + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE e5 USING fts5(x); + INSERT INTO e5 VALUES('a b c'); + INSERT INTO e5 VALUES('d e f'); + INSERT INTO e5 VALUES('a b c'); + INSERT INTO e5 VALUES('d e f'); + INSERT INTO e5 VALUES('a b c'); +} + +do_execsql_test 3.1 { + SELECT prevrowid(e5) || '+' || rowid FROM e5 WHERE e5 MATCH 'c' +} {0+1 1+3 3+5} + +do_execsql_test 3.2 { + SELECT prevrowid(e5) || '+' || prevrowid1(e5) || '+' || rowid + FROM e5 WHERE e5 MATCH 'e' +} {0+1+2 2+3+4} + +#------------------------------------------------------------------------- +# Test that if the xQueryPhrase callback returns other than SQLITE_OK, +# the query is abandoned. And that if it returns an error code other than +# SQLITE_DONE, the error is propagated back to the caller. +# +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE e7 USING fts5(x); + INSERT INTO e7 VALUES('a x a'); + INSERT INTO e7 VALUES('b x b'); + INSERT INTO e7 VALUES('c x c'); + INSERT INTO e7 VALUES('d x d'); + INSERT INTO e7 VALUES('e x e'); +} + +proc xCallback {rowid code cmd} { + set r [$cmd xRowid] + lappend ::cb $r + if {$r==$rowid} { return $code } + return "" +} + +proc phrasequery {cmd code} { + set ::cb [list] + $cmd xQueryPhrase 1 [list xCallback [$cmd xRowid] $code] + set ::cb +} + +sqlite3_fts5_create_function db phrasequery phrasequery + +do_execsql_test 4.1 { + SELECT phrasequery(e7, 'SQLITE_OK') FROM e7 WHERE e7 MATCH 'c x' +} {{1 2 3 4 5}} + +do_execsql_test 4.2 { + SELECT phrasequery(e7, 'SQLITE_DONE') FROM e7 WHERE e7 MATCH 'c x' +} {{1 2 3}} + +do_catchsql_test 4.3 { + SELECT phrasequery(e7, 'SQLITE_ERROR') FROM e7 WHERE e7 MATCH 'c x' +} {1 SQLITE_ERROR} + +#------------------------------------------------------------------------- +# Auxiliary function calls with many cursors in the global cursor list. +# +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE e9 USING fts5(y); + INSERT INTO e9(rowid, y) VALUES(1, 'i iii'); + INSERT INTO e9(rowid, y) VALUES(2, 'ii iv'); + INSERT INTO e9(rowid, y) VALUES(3, 'ii'); + INSERT INTO e9(rowid, y) VALUES(4, 'i iv'); + INSERT INTO e9(rowid, y) VALUES(5, 'iii'); +} + +proc my_rowid {cmd} { $cmd xRowid } +sqlite3_fts5_create_function db my_rowid my_rowid + +foreach {var q} { + s1 i + s2 ii + s3 iii + s4 iv +} { + set sql "SELECT my_rowid(e9) FROM e9 WHERE e9 MATCH '$q'" + set $var [sqlite3_prepare db $sql -1 dummy] +} + +do_test 5.1.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 1 +do_test 5.1.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 2 +do_test 5.1.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 1 +do_test 5.1.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 2 + +do_test 5.2.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 4 +do_test 5.2.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 3 +do_test 5.2.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 5 +do_test 5.2.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 4 + +sqlite3_finalize $s1 +sqlite3_finalize $s2 +sqlite3_finalize $s3 +sqlite3_finalize $s4 + +#------------------------------------------------------------------------- +# Passing an invalid first argument to an auxiliary function is detected. +# +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE e11 USING fts5(y, z); + INSERT INTO e11(rowid, y, z) VALUES(1, 'a b', 45); + INSERT INTO e11(rowid, y, z) VALUES(2, 'b c', 46); +} + +do_catchsql_test 6.1 { + SELECT my_rowid(z) FROM e11 WHERE e11 MATCH 'b' +} {1 {no such cursor: 45}} + +do_catchsql_test 6.2 { + SELECT my_rowid(y) FROM e11 WHERE e11 MATCH 'b' +} {1 {no such cursor: 0}} + finish_test diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index d7ced10c72..c0c9dd1d6c 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -168,6 +168,8 @@ reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE x3 USING fts5(xxx); INSERT INTO x3 VALUES('a b c d c b a'); + INSERT INTO x3 VALUES('a a a a a a a'); + INSERT INTO x3 VALUES('a a a a a a a'); } do_faultsim_test 6.1 -faults oom-t* -body { @@ -188,6 +190,20 @@ do_faultsim_test 6.2 -faults oom-t* -body { faultsim_test_result {0 2} {1 SQLITE_NOMEM} } +proc previc {cmd} { + set res [$cmd xGetAuxdataInt 0] + $cmd xSetAuxdataInt [$cmd xInstCount] + return $res +} +sqlite3_fts5_create_function db previc previc + +do_faultsim_test 6.2 -faults oom-t* -body { + db eval { SELECT previc(x3) FROM x3 WHERE x3 MATCH 'a' } +} -test { + faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM} +} + + diff --git a/manifest b/manifest index 199e150b25..5f785b7ee6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\sfts5\stests. -D 2015-04-29T20:54:08.849 +C Improve\stest\scoverage\sof\sfts5.c. +D 2015-05-01T12:14:23.640 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,7 +104,7 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 932284a253cc9bb32caf047879669720680be2c3 +F ext/fts5/fts5.c 3a0a73bcfbcb7e65ccda099cfb8fd268d2480c7e F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a F ext/fts5/fts5Int.h 2e0a1a6b77e1e014b7e9b1479ca686ff79930457 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 @@ -114,7 +114,7 @@ F ext/fts5/fts5_expr.c 663c75dfdb1bfd8809d696357d7b55f507815098 F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 -F ext/fts5/fts5_tcl.c 19ab8cfa642950648968dcf25075d6d969900524 +F ext/fts5/fts5_tcl.c aa3b102bb01f366174718be7ce8e9311b9abb482 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -132,7 +132,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test e6bddd2c11c0d1e3ae189ee51081899d2f4ea570 -F ext/fts5/test/fts5aux.test 1e475d928a3d1decf74167394db20330d7beeb0f +F ext/fts5/test/fts5aux.test d9c724351d8e4dc46cad1308c0b4b8ac94d07660 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f @@ -145,7 +145,7 @@ F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test aea710bbf5680ed41afb9d3313c297d429f9feac +F ext/fts5/test/fts5fault4.test 087066bae36f41227eb85968a2436c8a9c960501 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 @@ -1315,7 +1315,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 07f70955392697556ca2951c9b6c3a5204cd5ec0 -R 07c0e8a626b67779b0890a98070e21c4 +P c1f07a3aa98eac87e2747527d15e5e5562221ceb +R 67c24377157269f998af5007c163ea4c U dan -Z 5236003ea42b936c8f2a3578dc6f6593 +Z 082bd0388383a4ef8c727daa5b60df05 diff --git a/manifest.uuid b/manifest.uuid index 615ae11606..0a40e5a487 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c1f07a3aa98eac87e2747527d15e5e5562221ceb \ No newline at end of file +add4f4681c648dcbecaa68d08f7b2f4e6d63003c \ No newline at end of file From 0501b2492a63adb781050eef55000955e061f3e1 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 May 2015 20:38:57 +0000 Subject: [PATCH 120/206] Further improvements to test coverage of fts5 code. FossilOrigin-Name: d4331943dff259380c4025bb740d8aba6972d351 --- ext/fts5/fts5_expr.c | 81 +++++++++++++++++------------------ ext/fts5/test/fts5ea.test | 20 ++++----- ext/fts5/test/fts5fault4.test | 60 +++++++++++++++++++++++++- manifest | 16 +++---- manifest.uuid | 2 +- 5 files changed, 116 insertions(+), 63 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index a638559310..79dfec5667 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -104,13 +104,13 @@ struct Fts5Parse { }; void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ + va_list ap; + va_start(ap, zFmt); if( pParse->rc==SQLITE_OK ){ - va_list ap; - va_start(ap, zFmt); pParse->zErr = sqlite3_vmprintf(zFmt, ap); - va_end(ap); pParse->rc = SQLITE_ERROR; } + va_end(ap); } static int fts5ExprIsspace(char t){ @@ -269,52 +269,51 @@ int sqlite3Fts5ExprPhraseExpr( Fts5Expr **ppNew ){ int rc = SQLITE_OK; /* Return code */ - Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ - int i; /* Used to iterate through phrase terms */ - - /* Components of the new expression object */ - Fts5Expr *pNew; - Fts5ExprPhrase **apPhrase; - Fts5ExprNode *pNode; - Fts5ExprNearset *pNear; - Fts5ExprPhrase *pCopy; + Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ + Fts5ExprPhrase *pCopy; /* Copy of pOrig */ + Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ pOrig = pExpr->apExprPhrase[iPhrase]; pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm ); - pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); - apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); - pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); - pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, - sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) - ); + if( pCopy ){ + int i; /* Used to iterate through phrase terms */ + Fts5ExprPhrase **apPhrase; + Fts5ExprNode *pNode; + Fts5ExprNearset *pNear; - for(i=0; rc==SQLITE_OK && inTerm; i++){ - pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); - pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; - } + pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); + apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); + pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); + pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) + ); - if( rc==SQLITE_OK ){ - /* All the allocations succeeded. Put the expression object together. */ - pNew->pIndex = pExpr->pIndex; - pNew->pRoot = pNode; - pNew->nPhrase = 1; - pNew->apExprPhrase = apPhrase; - pNew->apExprPhrase[0] = pCopy; + for(i=0; inTerm; i++){ + pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); + pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; + } - pNode->eType = FTS5_STRING; - pNode->pNear = pNear; + if( rc==SQLITE_OK ){ + /* All the allocations succeeded. Put the expression object together. */ + pNew->pIndex = pExpr->pIndex; + pNew->pRoot = pNode; + pNew->nPhrase = 1; + pNew->apExprPhrase = apPhrase; + pNew->apExprPhrase[0] = pCopy; - pNear->iCol = -1; - pNear->nPhrase = 1; - pNear->apPhrase[0] = pCopy; + pNode->eType = FTS5_STRING; + pNode->pNear = pNear; - pCopy->nTerm = pOrig->nTerm; - pCopy->pNode = pNode; - }else{ - /* At least one allocation failed. Free them all. */ - if( pCopy ){ + pNear->iCol = -1; + pNear->nPhrase = 1; + pNear->apPhrase[0] = pCopy; + + pCopy->nTerm = pOrig->nTerm; + pCopy->pNode = pNode; + }else{ + /* At least one allocation failed. Free them all. */ for(i=0; inTerm; i++){ sqlite3_free(pCopy->aTerm[i].zTerm); } @@ -504,7 +503,7 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ - int nByte = sizeof(Fts5LookaheadReader) * pNear->nPhrase; + int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; a = (Fts5NearTrimmer*)sqlite3_malloc(nByte); if( !a ) return SQLITE_NOMEM; memset(a, 0, nByte); @@ -719,7 +718,7 @@ static int fts5ExprNearNextMatch( /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); - if( pNode->bEof || rc!=SQLITE_OK ) break; + if( rc!=SQLITE_OK || pNode->bEof ) break; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index f913006530..7b58d8cd8f 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -9,6 +9,9 @@ # #************************************************************************* # +# Test the fts5 expression parser directly using the fts5_expr() SQL +# test function. +# source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ea @@ -40,6 +43,10 @@ foreach {tn expr res} { 8 {one OR two AND three} {"one" OR ("two" AND "three")} 9 {NEAR(one two)} {NEAR("one" "two", 10)} 10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)} + 11 {a OR b NOT c} {"a" OR ("b" NOT "c")} + 12 "\x20one\x20two\x20three" {("one" AND "two") AND "three"} + 13 "\x09one\x0Atwo\x0Dthree" {("one" AND "two") AND "three"} + 14 {"abc""def"} {"abc" + "def"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } @@ -53,7 +60,6 @@ foreach {tn expr res} { do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res] } -breakpoint foreach {tn expr err} { 1 {AND} {fts5: syntax error near "AND"} 2 {abc def AND} {fts5: syntax error near ""} @@ -63,21 +69,13 @@ foreach {tn expr err} { 6 {(a OR b) NOT c)} {fts5: syntax error near ")"} 7 {nosuch: a nosuch2: b} {no such column: nosuch} 8 {addr: a nosuch2: b} {no such column: nosuch2} + 9 {NOT} {fts5: syntax error near "NOT"} + 10 {a AND "abc} {unterminated string} } { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } -# do_syntax_error_test 1.0 {NOT} {syntax error near "NOT"} - - - -# do_catchsql_test 1.1 { - # SELECT fts5_expr('a OR b NOT c') -#} {0 {"a" OR "b" NOT "c"}} - - -#do_execsql_test 1.0 { SELECT fts5_expr('a') } {{"a"}} finish_test diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index c0c9dd1d6c..8e6c827ad3 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -158,8 +158,6 @@ do_faultsim_test 5.2 -faults oom-* -body { faultsim_test_result "0 {10-$::str {a b c}}" } -} - #------------------------------------------------------------------------- # OOM errors within auxiliary functions. @@ -203,6 +201,64 @@ do_faultsim_test 6.2 -faults oom-t* -body { faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM} } +#------------------------------------------------------------------------- +# OOM error when querying for a phrase with many tokens. +# +reset_db +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE tt USING fts5(x, y); + INSERT INTO tt VALUES('f b g b c b', 'f a d c c b'); -- 1 + INSERT INTO tt VALUES('d a e f e d', 'f b b d e e'); -- 2 + INSERT INTO tt VALUES('f b g a d c', 'e f c f a d'); -- 3 + INSERT INTO tt VALUES('f f c d g f', 'f a e b g b'); -- 4 + INSERT INTO tt VALUES('a g b d a g', 'e g a e a c'); -- 5 + INSERT INTO tt VALUES('c d b d e f', 'f g e g e e'); -- 6 + INSERT INTO tt VALUES('e g f f b c', 'f c e f g f'); -- 7 + INSERT INTO tt VALUES('e g c f c e', 'f e e a f g'); -- 8 + INSERT INTO tt VALUES('e a e b e e', 'd c c f f f'); -- 9 + INSERT INTO tt VALUES('f a g g c c', 'e g d g c e'); -- 10 + INSERT INTO tt VALUES('c d b a e f', 'f g e h e e'); -- 11 +} + +do_faultsim_test 7.2 -faults oom-* -body { + db eval { SELECT rowid FROM tt WHERE tt MATCH 'f+g+e+g+e+e' } +} -test { + faultsim_test_result {0 6} {1 SQLITE_NOMEM} +} + +do_faultsim_test 7.3 -faults oom-* -body { + db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d e f)' } +} -test { + faultsim_test_result {0 11} {1 SQLITE_NOMEM} +} + +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE tt USING fts5(x); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO tt(rowid, x) VALUES(1, 'a b c d x x'); + WITH ii(i) AS (SELECT 2 UNION ALL SELECT i+1 FROM ii WHERE i<99) + INSERT INTO tt(rowid, x) SELECT i, 'a b c x x d' FROM ii; + INSERT INTO tt(rowid, x) VALUES(100, 'a b c d x x'); + COMMIT; +} + +do_faultsim_test 8.1 -faults oom-t* -body { + db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d, 2)' } +} -test { + faultsim_test_result {0 {1 100}} {1 SQLITE_NOMEM} +} + +do_faultsim_test 8.2 -faults oom-t* -body { + db eval { SELECT count(*) FROM tt WHERE tt MATCH 'a OR d' } +} -test { + faultsim_test_result {0 100} {1 SQLITE_NOMEM} +} diff --git a/manifest b/manifest index 5f785b7ee6..92d1a1bb51 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\stest\scoverage\sof\sfts5.c. -D 2015-05-01T12:14:23.640 +C Further\simprovements\sto\stest\scoverage\sof\sfts5\scode. +D 2015-05-01T20:38:57.153 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5Int.h 2e0a1a6b77e1e014b7e9b1479ca686ff79930457 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 8c8cfe7f09ca2767ab53ea883f9a0af0edb6bbae F ext/fts5/fts5_config.c ecbbd5163758a958106867051892e0dfecf68b5c -F ext/fts5/fts5_expr.c 663c75dfdb1bfd8809d696357d7b55f507815098 +F ext/fts5/fts5_expr.c e2005ba7823f4ac51d46a8e5aaa1ff66c701b32e F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 @@ -140,12 +140,12 @@ F ext/fts5/test/fts5corrupt.test 138aecc75c36c3dac9259c7f57c5bc3d009255f8 F ext/fts5/test/fts5corrupt2.test 494111fd4f2dab36499cf97718eaba1f7c11e9d0 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 -F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c +F ext/fts5/test/fts5ea.test 7cc498993c16849bb866dbdfb008d91a29f9870b F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 087066bae36f41227eb85968a2436c8a9c960501 +F ext/fts5/test/fts5fault4.test 4090af395d8d3342c0a0b27349dd71eb7cc6262d F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 @@ -1315,7 +1315,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c1f07a3aa98eac87e2747527d15e5e5562221ceb -R 67c24377157269f998af5007c163ea4c +P add4f4681c648dcbecaa68d08f7b2f4e6d63003c +R 58de859125935eeaf7ecede3ffbb5a07 U dan -Z 082bd0388383a4ef8c727daa5b60df05 +Z fc00d731e9356a3f17b66c35cad599ad diff --git a/manifest.uuid b/manifest.uuid index 0a40e5a487..ee6ce26d11 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -add4f4681c648dcbecaa68d08f7b2f4e6d63003c \ No newline at end of file +d4331943dff259380c4025bb740d8aba6972d351 \ No newline at end of file From 7c479d51e5ad209db5b9c3c8e6c456b33f09f7c1 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 2 May 2015 20:35:24 +0000 Subject: [PATCH 121/206] Reorganize some of the fts5 expression parsing code. Improve test coverage of the same. FossilOrigin-Name: c4456dc5f5f8f45f04e3bbae53b6bcc209fc27d5 --- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_buffer.c | 25 ++++ ext/fts5/fts5_config.c | 23 +--- ext/fts5/fts5_expr.c | 250 +++++++++++++++------------------- ext/fts5/test/fts5ea.test | 14 +- ext/fts5/test/fts5fault4.test | 43 ++++++ manifest | 22 +-- manifest.uuid | 2 +- 8 files changed, 203 insertions(+), 177 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 34190bdec1..07c3a767b2 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -228,6 +228,7 @@ int sqlite3Fts5PoslistNext64( /* Malloc utility */ void *sqlite3Fts5MallocZero(int *pRc, int nByte); +char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); /* ** End of interface to code in fts5_buffer.c. diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 26304f68aa..dbe51fafc4 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -239,5 +239,30 @@ void *sqlite3Fts5MallocZero(int *pRc, int nByte){ } return pRet; } + +/* +** Return a nul-terminated copy of the string indicated by pIn. If nIn +** is non-negative, then it is the length of the string in bytes. Otherwise, +** the length of the string is determined using strlen(). +** +** It is the responsibility of the caller to eventually free the returned +** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. +*/ +char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + if( nIn<0 ){ + nIn = strlen(pIn); + } + zRet = (char*)sqlite3_malloc(nIn+1); + if( zRet ){ + memcpy(zRet, pIn, nIn); + zRet[nIn] = '\0'; + }else{ + *pRc = SQLITE_NOMEM; + } + } + return zRet; +} #endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index eae18dce27..1b29351ec2 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -203,21 +203,6 @@ void sqlite3Fts5Dequote(char *z){ } } -/* -** Duplicate the string passed as the only argument into a buffer allocated -** by sqlite3_malloc(). -** -** Return 0 if an OOM error is encountered. -*/ -static char *fts5Strdup(int *pRc, const char *z){ - char *pRet = 0; - if( *pRc==SQLITE_OK ){ - pRet = sqlite3_mprintf("%s", z); - if( pRet==0 ) *pRc = SQLITE_NOMEM; - } - return pRet; -} - /* ** Argument z points to a nul-terminated string containing an SQL identifier. ** This function returns a copy of the identifier enclosed in backtick @@ -368,7 +353,7 @@ static int fts5ConfigParseSpecial( *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); rc = SQLITE_ERROR; }else{ - pConfig->zContentRowid = fts5Strdup(&rc, zArg); + pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); } return rc; } @@ -526,8 +511,8 @@ int sqlite3Fts5ConfigParse( nByte = nArg * (sizeof(char*) + sizeof(u8)); pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; - pRet->zDb = fts5Strdup(&rc, azArg[1]); - pRet->zName = fts5Strdup(&rc, azArg[2]); + pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); + pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; @@ -591,7 +576,7 @@ int sqlite3Fts5ConfigParse( } } if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ - pRet->zContentRowid = fts5Strdup(&rc, "rowid"); + pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); } /* Formulate the zContentExprlist text */ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 79dfec5667..570331bfb1 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -231,33 +231,6 @@ int sqlite3Fts5ExprNew( return sParse.rc; } -static char *fts5ExprStrdup(int *pRc, const char *zIn){ - char *zRet = 0; - if( *pRc==SQLITE_OK ){ - int nByte = strlen(zIn) + 1; - zRet = sqlite3_malloc(nByte); - if( zRet ){ - memcpy(zRet, zIn, nByte); - }else{ - *pRc = SQLITE_NOMEM; - } - } - return zRet; -} - -static void *fts5ExprMalloc(int *pRc, int nByte){ - void *pRet = 0; - if( *pRc==SQLITE_OK ){ - pRet = sqlite3_malloc(nByte); - if( pRet ){ - memset(pRet, 0, nByte); - }else{ - *pRc = SQLITE_NOMEM; - } - } - return pRet; -} - /* ** Create a new FTS5 expression by cloning phrase iPhrase of the ** expression passed as the second argument. @@ -274,7 +247,7 @@ int sqlite3Fts5ExprPhraseExpr( Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ pOrig = pExpr->apExprPhrase[iPhrase]; - pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, + pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm ); if( pCopy ){ @@ -283,15 +256,17 @@ int sqlite3Fts5ExprPhraseExpr( Fts5ExprNode *pNode; Fts5ExprNearset *pNear; - pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); - apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); - pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); - pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, + pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); + apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, + sizeof(Fts5ExprPhrase*) + ); + pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode)); + pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) ); for(i=0; inTerm; i++){ - pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); + pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1); pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; } @@ -576,29 +551,29 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ } /* -** Advance each term iterator in each phrase in pNear. If any reach EOF, -** set output variable *pbEof to true before returning. +** Advance the first term iterator in the first phrase of pNear. Set output +** variable *pbEof to true if it reaches EOF or if an error occurs. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. */ -static int fts5ExprNearAdvanceAll( +static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprNearset *pNear, /* Near object to advance iterators of */ - int *pbEof /* OUT: Set to true if phrase at EOF */ + Fts5ExprNode *pNode, /* FTS5_STRING node */ + int bFromValid, + i64 iFrom ){ - int i, j; /* Phrase and token index, respectively */ + Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; + int rc; - for(i=0; inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - for(j=0; jnTerm; j++){ - Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - int rc = sqlite3Fts5IterNext(pIter); - if( rc || sqlite3Fts5IterEof(pIter) ){ - *pbEof = 1; - return rc; - } - } + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(pIter); } - return SQLITE_OK; + pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); + return rc; } /* @@ -649,9 +624,7 @@ static int fts5ExprAdvanceto( */ static int fts5ExprNearNextRowidMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprNode *pNode, - int bFromValid, - i64 iFrom + Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; @@ -664,10 +637,6 @@ static int fts5ExprNearNextRowidMatch( ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); - if( bFromValid && (iFrom>iLast)==(pExpr->bDesc==0) ){ - assert( pExpr->bDesc || iFrom>=iLast ); - iLast = iFrom; - } do { bMatch = 1; @@ -707,9 +676,7 @@ static int fts5ExprNearNextRowidMatch( */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ - Fts5ExprNode *pNode, /* The "NEAR" node (FTS5_STRING) */ - int bFromValid, - i64 iFrom + Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ int rc = SQLITE_OK; Fts5ExprNearset *pNear = pNode->pNear; @@ -717,7 +684,7 @@ static int fts5ExprNearNextMatch( int i; /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); if( rc!=SQLITE_OK || pNode->bEof ) break; /* Check that each phrase in the nearset matches the current row. @@ -748,7 +715,7 @@ static int fts5ExprNearNextMatch( /* If control flows to here, then the current rowid is not a match. ** Advance all term iterators in all phrases to the next rowid. */ if( rc==SQLITE_OK ){ - rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); + rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); } if( pNode->bEof || rc!=SQLITE_OK ) break; } @@ -797,7 +764,23 @@ static int fts5ExprNearInitAll( } /* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ -static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*, int, i64); +static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); + + +static int fts5RowidCmp( + Fts5Expr *pExpr, + i64 iLhs, + i64 iRhs +){ + assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); + if( pExpr->bDesc==0 ){ + if( iLhs iRhs); + }else{ + if( iLhs>iRhs ) return -1; + return (iLhs < iRhs); + } +} /* ** Compare the values currently indicated by the two nodes as follows: @@ -818,13 +801,7 @@ static int fts5NodeCompare( ){ if( p2->bEof ) return -1; if( p1->bEof ) return +1; - if( pExpr->bDesc==0 ){ - if( p1->iRowidiRowid ) return -1; - return (p1->iRowid > p2->iRowid); - }else{ - if( p1->iRowid>p2->iRowid ) return -1; - return (p1->iRowid < p2->iRowid); - } + return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); } /* @@ -845,7 +822,7 @@ static int fts5ExprNodeNext( if( pNode->bEof==0 ){ switch( pNode->eType ){ case FTS5_STRING: { - rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof); + rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; @@ -863,13 +840,14 @@ static int fts5ExprNodeNext( Fts5ExprNode *p2 = pNode->pRight; int cmp = fts5NodeCompare(pExpr, p1, p2); - if( cmp==0 ){ + if( cmp<=0 || (bFromValid && fts5RowidCmp(pExpr,p1->iRowid,iFrom)<0) ){ rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + } + + if( cmp>=0 || (bFromValid && fts5RowidCmp(pExpr,p2->iRowid,iFrom)<0) ){ if( rc==SQLITE_OK ){ rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); } - }else{ - rc = fts5ExprNodeNext(pExpr, (cmp < 0) ? p1 : p2, bFromValid, iFrom); } break; @@ -882,10 +860,22 @@ static int fts5ExprNodeNext( } if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode, bFromValid, iFrom); + rc = fts5ExprNodeNextMatch(pExpr, pNode); } } + /* Assert that if bFromValid was true, either: + ** + ** a) an error occurred, or + ** b) the node is now at EOF, or + ** c) the node is now at or past rowid iFrom. + */ + assert( bFromValid==0 + || rc!=SQLITE_OK /* a */ + || pNode->bEof /* b */ + || pNode->iRowid==iFrom || pExpr->bDesc==(pNode->iRowidbEof==0 ){ switch( pNode->eType ){ case FTS5_STRING: { - rc = fts5ExprNearNextMatch(pExpr, pNode, bFromValid, iFrom); + rc = fts5ExprNearNextMatch(pExpr, pNode); break; } @@ -921,17 +911,14 @@ static int fts5ExprNodeNextMatch( while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ Fts5ExprNode *pAdv; + i64 iFrom; assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); if( pExpr->bDesc==(p1->iRowid > p2->iRowid) ){ pAdv = p1; - if( bFromValid==0 || pExpr->bDesc==(p2->iRowid < iFrom) ){ - iFrom = p2->iRowid; - } + iFrom = p2->iRowid; }else{ pAdv = p2; - if( bFromValid==0 || pExpr->bDesc==(p1->iRowid < iFrom) ){ - iFrom = p1->iRowid; - } + iFrom = p1->iRowid; } rc = fts5ExprNodeNext(pExpr, pAdv, 1, iFrom); if( rc!=SQLITE_OK ) break; @@ -955,13 +942,16 @@ static int fts5ExprNodeNextMatch( default: assert( pNode->eType==FTS5_NOT ); { Fts5ExprNode *p1 = pNode->pLeft; Fts5ExprNode *p2 = pNode->pRight; - while( rc==SQLITE_OK ){ - int cmp; - while( rc==SQLITE_OK && (cmp = fts5NodeCompare(pExpr, p1, p2))>0 ){ - rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); + + while( rc==SQLITE_OK && p1->bEof==0 ){ + int cmp = fts5NodeCompare(pExpr, p1, p2); + if( cmp>0 ){ + rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); + cmp = fts5NodeCompare(pExpr, p1, p2); } - if( rc || cmp ) break; - rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + assert( rc!=SQLITE_OK || cmp<=0 ); + if( rc || cmp<0 ) break; + rc = fts5ExprNodeNext(pExpr, p1, 0, 0); } pNode->bEof = p1->bEof; pNode->iRowid = p1->iRowid; @@ -991,7 +981,7 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ /* Attempt to advance to the first match */ if( rc==SQLITE_OK && pNode->bEof==0 ){ - rc = fts5ExprNearNextMatch(pExpr, pNode, 0, 0); + rc = fts5ExprNearNextMatch(pExpr, pNode); } }else{ @@ -1000,7 +990,7 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ rc = fts5ExprNodeFirst(pExpr, pNode->pRight); } if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode, 0, 0); + rc = fts5ExprNodeNextMatch(pExpr, pNode); } } return rc; @@ -1047,31 +1037,9 @@ i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ return p->pRoot->iRowid; } -/* -** Argument pIn points to a buffer of nIn bytes. This function allocates -** and returns a new buffer populated with a copy of (pIn/nIn) with a -** nul-terminator byte appended to it. -** -** It is the responsibility of the caller to eventually free the returned -** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. -*/ -static char *fts5Strndup(int *pRc, const char *pIn, int nIn){ - char *zRet = 0; - if( *pRc==SQLITE_OK ){ - zRet = (char*)sqlite3_malloc(nIn+1); - if( zRet ){ - memcpy(zRet, pIn, nIn); - zRet[nIn] = '\0'; - }else{ - *pRc = SQLITE_NOMEM; - } - } - return zRet; -} - static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ int rc = SQLITE_OK; - *pz = fts5Strndup(&rc, pToken->p, pToken->n); + *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); return rc; } @@ -1123,7 +1091,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( pRet->iCol = -1; } }else if( (pNear->nPhrase % SZALLOC)==0 ){ - int nNew = pRet->nPhrase + SZALLOC; + int nNew = pNear->nPhrase + SZALLOC; int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); @@ -1181,7 +1149,7 @@ static int fts5ParseTokenize( pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = fts5Strndup(&rc, pToken, nToken); + pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); return rc; } @@ -1272,12 +1240,10 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( ** in the pParse object. */ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ - if( pParse->rc==SQLITE_OK ){ - if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ - sqlite3Fts5ParseError( - pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p - ); - } + if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ + sqlite3Fts5ParseError( + pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p + ); } } @@ -1310,23 +1276,25 @@ void sqlite3Fts5ParseSetColumn( Fts5ExprNearset *pNear, Fts5Token *p ){ - char *z = 0; - int rc = fts5ParseStringFromToken(p, &z); - if( rc==SQLITE_OK ){ - Fts5Config *pConfig = pParse->pConfig; - int i; - for(i=0; inCol; i++){ - if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ - pNear->iCol = i; - break; + if( pParse->rc==SQLITE_OK ){ + char *z = 0; + int rc = fts5ParseStringFromToken(p, &z); + if( rc==SQLITE_OK ){ + Fts5Config *pConfig = pParse->pConfig; + int i; + for(i=0; inCol; i++){ + if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ + pNear->iCol = i; + break; + } } + if( i==pConfig->nCol ){ + sqlite3Fts5ParseError(pParse, "no such column: %s", z); + } + sqlite3_free(z); + }else{ + pParse->rc = rc; } - if( i==pConfig->nCol ){ - sqlite3Fts5ParseError(pParse, "no such column: %s", z); - } - sqlite3_free(z); - }else{ - pParse->rc = rc; } } diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index 7b58d8cd8f..1248edea0b 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -66,11 +66,15 @@ foreach {tn expr err} { 3 {abc OR AND} {fts5: syntax error near "AND"} 4 {(a OR b) abc} {fts5: syntax error near "abc"} 5 {NEaR (a b)} {fts5: syntax error near "NEaR"} - 6 {(a OR b) NOT c)} {fts5: syntax error near ")"} - 7 {nosuch: a nosuch2: b} {no such column: nosuch} - 8 {addr: a nosuch2: b} {no such column: nosuch2} - 9 {NOT} {fts5: syntax error near "NOT"} - 10 {a AND "abc} {unterminated string} + 6 {NEa (a b)} {fts5: syntax error near "NEa"} + 7 {(a OR b) NOT c)} {fts5: syntax error near ")"} + 8 {nosuch: a nosuch2: b} {no such column: nosuch} + 9 {addr: a nosuch2: b} {no such column: nosuch2} + 10 {NOT} {fts5: syntax error near "NOT"} + 11 {a AND "abc} {unterminated string} + + 12 {NEAR(a b, xyz)} {expected integer, got "xyz"} + 13 {NEAR(a b, // )} {expected integer, got "//"} } { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index 8e6c827ad3..943d331db8 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -201,6 +201,8 @@ do_faultsim_test 6.2 -faults oom-t* -body { faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM} } +} + #------------------------------------------------------------------------- # OOM error when querying for a phrase with many tokens. # @@ -218,6 +220,10 @@ do_execsql_test 7.0 { INSERT INTO tt VALUES('e a e b e e', 'd c c f f f'); -- 9 INSERT INTO tt VALUES('f a g g c c', 'e g d g c e'); -- 10 INSERT INTO tt VALUES('c d b a e f', 'f g e h e e'); -- 11 + + CREATE VIRTUAL TABLE tt2 USING fts5(o); + INSERT INTO tt2(rowid, o) SELECT rowid, x||' '||y FROM tt; + INSERT INTO tt2(rowid, o) VALUES(12, 'a b c d e f g h i j k l'); } do_faultsim_test 7.2 -faults oom-* -body { @@ -232,6 +238,22 @@ do_faultsim_test 7.3 -faults oom-* -body { faultsim_test_result {0 11} {1 SQLITE_NOMEM} } +do_faultsim_test 7.4 -faults oom-t* -body { + db eval { SELECT rowid FROM tt2 WHERE tt2 MATCH '"g c f c e f e e a f"' } +} -test { + faultsim_test_result {0 8} {1 SQLITE_NOMEM} +} + +do_faultsim_test 7.5 -faults oom-* -body { + db eval {SELECT rowid FROM tt2 WHERE tt2 MATCH 'NEAR(a b c d e f g h i j k)'} +} -test { + faultsim_test_result {0 12} {1 SQLITE_NOMEM} +} + +do_faultsim_test 7.6 -faults oom-* -body { + db eval {SELECT rowid FROM tt WHERE tt MATCH 'y: "c c"'} +} -test { + faultsim_test_result {0 {1 9}} {1 SQLITE_NOMEM} } #------------------------------------------------------------------------- @@ -261,6 +283,27 @@ do_faultsim_test 8.2 -faults oom-t* -body { } +#------------------------------------------------------------------------- +# Fault in NOT query. +# +reset_db +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE tt USING fts5(x); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); + BEGIN; + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200) + INSERT INTO tt(rowid, x) + SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END + FROM ii; + COMMIT; +} + +do_faultsim_test 9.1 -faults oom-* -body { + db eval { SELECT rowid FROM tt WHERE tt MATCH 'a NOT x' } +} -test { + faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM} +} + finish_test diff --git a/manifest b/manifest index 92d1a1bb51..53ddbec52d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\simprovements\sto\stest\scoverage\sof\sfts5\scode. -D 2015-05-01T20:38:57.153 +C Reorganize\ssome\sof\sthe\sfts5\sexpression\sparsing\scode.\sImprove\stest\scoverage\sof\sthe\ssame. +D 2015-05-02T20:35:24.467 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,11 +106,11 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 3a0a73bcfbcb7e65ccda099cfb8fd268d2480c7e F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 2e0a1a6b77e1e014b7e9b1479ca686ff79930457 +F ext/fts5/fts5Int.h 05e97ffb2911e8c8cfcb8bdb009e17347c24eb2d F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 -F ext/fts5/fts5_buffer.c 8c8cfe7f09ca2767ab53ea883f9a0af0edb6bbae -F ext/fts5/fts5_config.c ecbbd5163758a958106867051892e0dfecf68b5c -F ext/fts5/fts5_expr.c e2005ba7823f4ac51d46a8e5aaa1ff66c701b32e +F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b +F ext/fts5/fts5_config.c 4e0de8bea4746a7560740b9dcf8be4dced68ef4f +F ext/fts5/fts5_expr.c f49d68411dc72cb66f2b55cc109dbf3dce368eef F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 @@ -140,12 +140,12 @@ F ext/fts5/test/fts5corrupt.test 138aecc75c36c3dac9259c7f57c5bc3d009255f8 F ext/fts5/test/fts5corrupt2.test 494111fd4f2dab36499cf97718eaba1f7c11e9d0 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 -F ext/fts5/test/fts5ea.test 7cc498993c16849bb866dbdfb008d91a29f9870b +F ext/fts5/test/fts5ea.test f4d35cd2776dab9358206f7d88a67ea187fdec22 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 4090af395d8d3342c0a0b27349dd71eb7cc6262d +F ext/fts5/test/fts5fault4.test 09728cadb4897c97cea092edb9c431d9ec25b88b F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 @@ -1315,7 +1315,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P add4f4681c648dcbecaa68d08f7b2f4e6d63003c -R 58de859125935eeaf7ecede3ffbb5a07 +P d4331943dff259380c4025bb740d8aba6972d351 +R 5651f6663bbd8efe3d630621e3f4b900 U dan -Z fc00d731e9356a3f17b66c35cad599ad +Z 822e7611ba479003f18b45bbb7ca820a diff --git a/manifest.uuid b/manifest.uuid index ee6ce26d11..ddc1ccf8f3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d4331943dff259380c4025bb740d8aba6972d351 \ No newline at end of file +c4456dc5f5f8f45f04e3bbae53b6bcc209fc27d5 \ No newline at end of file From 4591334dd4152852332836670c222c274949713b Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 7 May 2015 19:29:46 +0000 Subject: [PATCH 122/206] Change to storing all keys in a single merge-tree structure instead of one main structure and a separate one for each prefix index. This is a file-format change. Also introduce a mechanism for managing file-format changes. FossilOrigin-Name: a684b5e2d9d52cf4700e7e5f9dd547a2ba54e8e9 --- ext/fts5/fts5.c | 2 +- ext/fts5/fts5Int.h | 9 +- ext/fts5/fts5_config.c | 41 +- ext/fts5/fts5_expr.c | 15 +- ext/fts5/fts5_hash.c | 32 +- ext/fts5/fts5_index.c | 645 ++++++++++++++----------------- ext/fts5/fts5_storage.c | 19 +- ext/fts5/test/fts5al.test | 6 +- ext/fts5/test/fts5corrupt.test | 4 +- ext/fts5/test/fts5corrupt2.test | 4 +- ext/fts5/test/fts5ea.test | 7 + ext/fts5/test/fts5fault1.test | 4 +- ext/fts5/test/fts5fault4.test | 32 +- ext/fts5/test/fts5integrity.test | 35 ++ ext/fts5/test/fts5prefix.test | 38 +- ext/fts5/test/fts5rowid.test | 14 +- ext/fts5/tool/loadfts5.tcl | 11 +- manifest | 43 ++- manifest.uuid | 2 +- 19 files changed, 496 insertions(+), 467 deletions(-) create mode 100644 ext/fts5/test/fts5integrity.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index a753d671da..df1646786a 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1164,7 +1164,7 @@ static int fts5SpecialInsert( if( bError ){ rc = SQLITE_ERROR; }else{ - rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal); + rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal, 0); } } } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 07c3a767b2..d09029710b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -122,12 +122,16 @@ struct Fts5Config { char *zRankArgs; /* Arguments to rank function */ }; +/* Current expected value of %_config table 'version' field */ +#define FTS5_CURRENT_VERSION 1 + #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 #define FTS5_CONTENT_EXTERNAL 2 + int sqlite3Fts5ConfigParse( Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** ); @@ -394,6 +398,7 @@ int sqlite3Fts5HashWrite( i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ + char bByte, const char *pToken, int nToken /* Token to add or remove to or from index */ ); @@ -458,7 +463,9 @@ int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit); int sqlite3Fts5StorageRollback(Fts5Storage *p); -int sqlite3Fts5StorageConfigValue(Fts5Storage *p, const char*, sqlite3_value*); +int sqlite3Fts5StorageConfigValue( + Fts5Storage *p, const char*, sqlite3_value*, int +); int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**); diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 1b29351ec2..0846eec8f6 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -203,33 +203,6 @@ void sqlite3Fts5Dequote(char *z){ } } -/* -** Argument z points to a nul-terminated string containing an SQL identifier. -** This function returns a copy of the identifier enclosed in backtick -** quotes. -*/ -static char *fts5EscapeName(int *pRc, const char *z){ - char *pRet = 0; - if( *pRc==SQLITE_OK ){ - int n = strlen(z); - pRet = (char*)sqlite3_malloc(2 + 2*n + 1); - if( pRet==0 ){ - *pRc = SQLITE_NOMEM; - }else{ - int i; - char *p = pRet; - *p++ = '`'; - for(i=0; izContentRowid); if( p->eContent!=FTS5_CONTENT_NONE ){ @@ -849,6 +821,7 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ char *zSql; sqlite3_stmt *p = 0; int rc; + int iVersion = 0; /* Set default values */ pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; @@ -868,9 +841,17 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ while( SQLITE_ROW==sqlite3_step(p) ){ const char *zK = (const char*)sqlite3_column_text(p, 0); sqlite3_value *pVal = sqlite3_column_value(p, 1); - sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, 0); + if( 0==sqlite3_stricmp(zK, "version") ){ + iVersion = sqlite3_value_int(pVal); + }else{ + sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, 0); + } } - rc = sqlite3_finalize(p); + if( rc==SQLITE_OK ) rc = sqlite3_finalize(p); + } + + if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ + rc = sqlite3Fts5Corrupt(); } if( rc==SQLITE_OK ){ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 570331bfb1..b7018e4768 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1371,7 +1371,7 @@ static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ va_start(ap, zFmt); zNew = sqlite3_vmprintf(zFmt, ap); va_end(ap); - if( zApp ){ + if( zApp && zNew ){ char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); sqlite3_free(zNew); zNew = zNew2; @@ -1548,12 +1548,14 @@ static void fts5ExprFunction( const char *zNearsetCmd = "nearset"; int nConfig; /* Size of azConfig[] */ Fts5Config *pConfig = 0; + int iArg = 1; if( bTcl && nArg>1 ){ zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); + iArg = 2; } - nConfig = nArg + 2 - bTcl; + nConfig = 3 + (nArg-iArg); azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); if( azConfig==0 ){ sqlite3_result_error_nomem(pCtx); @@ -1562,9 +1564,10 @@ static void fts5ExprFunction( azConfig[0] = 0; azConfig[1] = "main"; azConfig[2] = "tbl"; - for(i=1+bTcl; ipRoot); } - if( rc==SQLITE_OK ){ + if( zText==0 ){ + rc = SQLITE_NOMEM; + }else{ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); sqlite3_free(zText); } diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 8bafbd6c82..39821d04a2 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -136,6 +136,16 @@ static unsigned int fts5HashKey(int nSlot, const char *p, int n){ return (h % nSlot); } +static unsigned int fts5HashKey2(int nSlot, char b, const char *p, int n){ + int i; + unsigned int h = 13; + for(i=n-1; i>=0; i--){ + h = (h << 3) ^ h ^ p[i]; + } + h = (h << 3) ^ h ^ b; + return (h % nSlot); +} + /* ** Resize the hash table by doubling the number of slots. */ @@ -191,36 +201,44 @@ int sqlite3Fts5HashWrite( i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ + char bByte, /* First byte of token */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ - unsigned int iHash = fts5HashKey(pHash->nSlot, pToken, nToken); + unsigned int iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken); Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ /* Attempt to locate an existing hash entry */ for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ - if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break; + if( p->zKey[0]==bByte + && memcmp(&p->zKey[1], pToken, nToken)==0 + && p->zKey[nToken+1]==0 + ){ + break; + } } /* If an existing hash entry cannot be found, create a new one. */ if( p==0 ){ - int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64; + int nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; if( nByte<128 ) nByte = 128; if( (pHash->nEntry*2)>=pHash->nSlot ){ int rc = fts5HashResize(pHash); if( rc!=SQLITE_OK ) return rc; - iHash = fts5HashKey(pHash->nSlot, pToken, nToken); + iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken); } p = (Fts5HashEntry*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; - memcpy(p->zKey, pToken, nToken); - p->zKey[nToken] = '\0'; - p->nData = nToken + 1 + sizeof(Fts5HashEntry); + p->zKey[0] = bByte; + memcpy(&p->zKey[1], pToken, nToken); + assert( iHash==fts5HashKey(pHash->nSlot, p->zKey, nToken+1) ); + p->zKey[nToken+1] = '\0'; + p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8701ff4059..a680c20c28 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -49,6 +49,12 @@ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ +#define FTS5_MAIN_PREFIX '0' + +#if FTS5_MAX_PREFIX_INDEXES > 31 +# error "FTS5_MAX_PREFIX_INDEXES is too large" +#endif + /* ** Details: ** @@ -211,7 +217,7 @@ ** Rowids for the averages and structure records in the %_data table. */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ -#define FTS5_STRUCTURE_ROWID(iIdx) (10 + (iIdx)) /* For structure records */ +#define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* ** Macros determining the rowids used by segment nodes. All nodes in all @@ -233,22 +239,16 @@ ** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns ** SQLITE_FULL and fails the current operation if they ever prove too small. */ -#define FTS5_DATA_IDX_B 5 /* Max of 31 prefix indexes */ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ #define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ -#define FTS5_SEGMENT_ROWID(idx, segid, height, pgno) ( \ - ((i64)(idx) << (FTS5_DATA_ID_B + FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ +#define FTS5_SEGMENT_ROWID(segid, height, pgno) ( \ ((i64)(segid) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ ((i64)(pgno)) \ ) -#if FTS5_MAX_PREFIX_INDEXES > ((1<pConfig; Fts5Structure *pRet = 0; /* Object to return */ Fts5Data *pData; /* %_data entry containing structure record */ int iCookie; /* Configuration cookie */ - assert( iIdx<=pConfig->nPrefix ); - pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx)); + pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( !pData ) return 0; p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); @@ -1198,12 +1195,12 @@ static int fts5StructureCountSegments(Fts5Structure *pStruct){ #endif /* -** Serialize and store the "structure" record for index iIdx. +** Serialize and store the "structure" record. ** ** If an error occurs, leave an error code in the Fts5Index object. If an ** error has already occurred, this function is a no-op. */ -static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ +static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ if( p->rc==SQLITE_OK ){ Fts5Buffer buf; /* Buffer to serialize record into */ int iLvl; /* Used to iterate through levels */ @@ -1236,7 +1233,7 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ } } - fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); + fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); fts5BufferFree(&buf); } } @@ -1532,7 +1529,7 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ static Fts5DlidxIter *fts5DlidxIterInit( Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ - int iIdx, int iSegid, /* Segment iSegid within index iIdx */ + int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */ ){ Fts5DlidxIter *pIter; @@ -1540,7 +1537,7 @@ static Fts5DlidxIter *fts5DlidxIterInit( pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); if( pIter==0 ) return 0; - pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPg)); + pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg)); if( pIter->pData==0 ){ sqlite3_free(pIter); pIter = 0; @@ -1583,7 +1580,7 @@ static void fts5SegIterNextPage( pIter->iLeafPgno++; if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, - FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno) + FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) ); }else{ pIter->pLeaf = 0; @@ -1669,15 +1666,14 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ /* ** Initialize the iterator object pIter to iterate through the entries in -** segment pSeg within index iIdx. The iterator is left pointing to the -** first entry when this function returns. +** segment pSeg. The iterator is left pointing to the first entry when +** this function returns. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterInit( - Fts5Index *p, - int iIdx, /* Config.aHash[] index of FTS index */ + Fts5Index *p, /* FTS index object */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ @@ -1694,7 +1690,6 @@ static void fts5SegIterInit( if( p->rc==SQLITE_OK ){ memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; - pIter->iIdx = iIdx; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } @@ -1771,7 +1766,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( - pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + pIter->pSeg->iSegid, 0, pIter->iLeafPgno )); if( pNew ){ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ @@ -1879,8 +1874,8 @@ static void fts5SegIterNext( const char *zTerm; int nList; if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ - sqlite3Fts5HashScanNext(p->apHash[0]); - sqlite3Fts5HashScanEntry(p->apHash[0], &zTerm, &pList, &nList); + sqlite3Fts5HashScanNext(p->pHash); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); } if( pList==0 ){ fts5DataRelease(pIter->pLeaf); @@ -1935,7 +1930,7 @@ static void fts5SegIterNext( ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ -static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ +static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; @@ -1946,7 +1941,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ if( fts5DlidxIterEof(p, pDlidx)==0 ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = pDlidx->iLeafPgno; - pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); } @@ -1989,7 +1984,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ /* The last rowid in the doclist may not be on the current page. Search ** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ - i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pgno); + i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, iTerm; @@ -2029,13 +2024,12 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ } /* -** Iterator pIter currently points to the first rowid of a doclist within -** index iIdx. There is a doclist-index associated with the final term on -** the current page. If the current term is the last term on the page, -** load the doclist-index from disk and initialize an iterator at -** (pIter->pDlidx). +** Iterator pIter currently points to the first rowid of a doclist. +** There is a doclist-index associated with the final term on the current +** page. If the current term is the last term on the page, load the +** doclist-index from disk and initialize an iterator at (pIter->pDlidx). */ -static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ +static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ int iSeg = pIter->pSeg->iSegid; int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ @@ -2062,20 +2056,18 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ } } - pIter->pDlidx = fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno); + pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment -** pSeg, index iIdx. If there is no such term in the index, the iterator -** is set to EOF. +** pSeg. If there is no such term in the index, the iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterSeekInit( Fts5Index *p, /* FTS5 backend */ - int iIdx, /* Config.aHash[] index of FTS index */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5StructureSegment *pSeg, /* Description of segment */ @@ -2083,20 +2075,19 @@ static void fts5SegIterSeekInit( ){ int iPg = 1; int h; - int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0); + int bGe = (flags & FTS5INDEX_QUERY_PREFIX); int bDlidx = 0; /* True if there is a doclist-index */ assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; - pIter->iIdx = iIdx; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ for(h=pSeg->nHeight-1; h>0; h--){ Fts5NodeIter node; /* For iterating through internal nodes */ - i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg); + i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, h, iPg); Fts5Data *pNode = fts5DataRead(p, iRowid); if( pNode==0 ) break; @@ -2149,10 +2140,10 @@ static void fts5SegIterSeekInit( pIter->flags |= FTS5_SEGITER_REVERSE; } if( bDlidx ){ - fts5SegIterLoadDlidx(p, iIdx, pIter); + fts5SegIterLoadDlidx(p, pIter); } if( flags & FTS5INDEX_QUERY_DESC ){ - fts5SegIterReverse(p, iIdx, pIter); + fts5SegIterReverse(p, pIter); } } } @@ -2160,7 +2151,7 @@ static void fts5SegIterSeekInit( /* ** Initialize the object pIter to point to term pTerm/nTerm within the -** in-memory hash table iIdx. If there is no such term in the table, the +** in-memory hash table. If there is no such term in the hash-table, the ** iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If @@ -2168,27 +2159,25 @@ static void fts5SegIterSeekInit( */ static void fts5SegIterHashInit( Fts5Index *p, /* FTS5 backend */ - int iIdx, /* Config.aHash[] index of FTS index */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5SegIter *pIter /* Object to populate */ ){ - Fts5Hash *pHash = p->apHash[iIdx]; const u8 *pList = 0; int nList = 0; const u8 *z = 0; int n = 0; - assert( pHash ); + assert( p->pHash ); assert( p->rc==SQLITE_OK ); - if( pTerm==0 || (iIdx==0 && (flags & FTS5INDEX_QUERY_PREFIX)) ){ - p->rc = sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm); - sqlite3Fts5HashScanEntry(pHash, (const char**)&z, &pList, &nList); + if( pTerm==0 || (flags & FTS5INDEX_QUERY_PREFIX) ){ + p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); + sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); n = (z ? strlen((const char*)z) : 0); }else{ pIter->flags |= FTS5_SEGITER_ONETERM; - sqlite3Fts5HashQuery(pHash, (const char*)pTerm, nTerm, &pList, &nList); + sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList); z = pTerm; n = nTerm; } @@ -2552,7 +2541,6 @@ static void fts5MultiIterNext( static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ - int iIdx, /* Config.aHash[] index of FTS index */ int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* FTS5INDEX_QUERY_XXX flags */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ @@ -2574,7 +2562,7 @@ static void fts5MultiIterNew( if( iLevel<0 ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); nSeg = pStruct->nSegment; - nSeg += (p->apHash ? 1 : 0); + nSeg += (p->pHash ? 1 : 0); }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } @@ -2596,26 +2584,26 @@ static void fts5MultiIterNew( /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; - if( p->apHash ){ + if( p->pHash ){ /* Add a segment iterator for the current contents of the hash table. */ Fts5SegIter *pIter = &pNew->aSeg[iIter++]; - fts5SegIterHashInit(p, iIdx, pTerm, nTerm, flags, pIter); + fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); } for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; Fts5SegIter *pIter = &pNew->aSeg[iIter++]; if( pTerm==0 ){ - fts5SegIterInit(p, iIdx, pSeg, pIter); + fts5SegIterInit(p, pSeg, pIter); }else{ - fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, flags, pSeg, pIter); + fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); } } } }else{ pLvl = &pStruct->aLevel[iLevel]; for(iSeg=nSeg-1; iSeg>=0; iSeg--){ - fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); } } assert( iIter==nSeg ); @@ -2734,8 +2722,7 @@ static void fts5ChunkIterInit( ** currently stored in a hash table. In this case there is no leaf-rowid ** to calculate. */ if( pSeg->pSeg ){ - int iId = pSeg->pSeg->iSegid; - i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + i64 rowid = FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pSeg->iLeafPgno); pIter->iLeafRowid = rowid; } @@ -2794,13 +2781,9 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ ** Discard all data currently cached in the hash-tables. */ static void fts5IndexDiscardData(Fts5Index *p){ - assert( p->apHash || p->nPendingData==0 ); - if( p->apHash ){ - Fts5Config *pConfig = p->pConfig; - int i; - for(i=0; i<=pConfig->nPrefix; i++){ - sqlite3Fts5HashClear(p->apHash[i]); - } + assert( p->pHash || p->nPendingData==0 ); + if( p->pHash ){ + sqlite3Fts5HashClear(p->pHash); p->nPendingData = 0; } } @@ -2832,8 +2815,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ pPg = &pWriter->aWriter[1]; if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( - pWriter->iIdx, pWriter->iSegid, - pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty + pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty ); assert( pWriter->cdlidx.n>0 ); fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); @@ -2901,9 +2883,7 @@ static void fts5WriteBtreeTerm( if( pPage->buf.n>=p->pConfig->pgsz ){ /* pPage will be written to disk. The term will be written into the ** parent of pPage. */ - i64 iRowid = FTS5_SEGMENT_ROWID( - pWriter->iIdx, pWriter->iSegid, iHeight, pPage->pgno - ); + i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, iHeight, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->term); @@ -2971,7 +2951,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ } /* Write the current page to the db. */ - iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno); + iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ @@ -3179,7 +3159,7 @@ static void fts5WriteFinish( for(i=1; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5DataWrite(p, - FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno), + FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pPg->pgno), pPg->buf.p, pPg->buf.n ); } @@ -3197,10 +3177,9 @@ static void fts5WriteFinish( static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, - int iIdx, int iSegid + int iSegid ){ memset(pWriter, 0, sizeof(Fts5SegWriter)); - pWriter->iIdx = iIdx; pWriter->iSegid = iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); @@ -3213,12 +3192,10 @@ static void fts5WriteInit( static void fts5WriteInitForAppend( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter, /* Writer to initialize */ - int iIdx, /* Index segment is a part of */ Fts5StructureSegment *pSeg /* Segment object to append to */ ){ int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); memset(pWriter, 0, sizeof(Fts5SegWriter)); - pWriter->iIdx = iIdx; pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); @@ -3228,7 +3205,7 @@ static void fts5WriteInitForAppend( pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ - i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pgno); + i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno); Fts5PageWriter *pPg = &pWriter->aWriter[i]; pPg->pgno = pgno; fts5DataBuffer(p, &pPg->buf, iRowid); @@ -3276,7 +3253,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ int iId = pSeg->pSeg->iSegid; u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; - iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iTermLeafPgno); + iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); @@ -3286,7 +3263,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; - fts5DataDelete(p, FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, 1),iLeafRowid); + fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } @@ -3299,8 +3276,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ - int iIdx, /* Index to work on */ - Fts5Structure **ppStruct, /* IN/OUT: Stucture of index iIdx */ + Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ @@ -3321,12 +3297,11 @@ static void fts5IndexMergeLevel( memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); - writer.iIdx = iIdx; if( pLvl->nMerge ){ pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; - fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); + fts5WriteInitForAppend(p, &writer, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; }else{ int iSegid = fts5AllocateSegid(p, pStruct); @@ -3342,7 +3317,7 @@ static void fts5IndexMergeLevel( pLvl = &pStruct->aLevel[iLvl]; pLvlOut = &pStruct->aLevel[iLvl+1]; - fts5WriteInit(p, &writer, iIdx, iSegid); + fts5WriteInit(p, &writer, iSegid); /* Add the new segment to the output level */ pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; @@ -3362,7 +3337,7 @@ fflush(stdout); #endif assert( iLvl>=0 ); - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter); + for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ @@ -3414,7 +3389,7 @@ fflush(stdout); /* Remove the redundant segments from the %_data table */ for(i=0; iaSeg[i].iSegid); + fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); } /* Remove the redundant segments from the input level */ @@ -3441,11 +3416,10 @@ fflush(stdout); } /* -** Do up to nPg pages of automerge work on index iIdx. +** Do up to nPg pages of automerge work on the index. */ static void fts5IndexMerge( Fts5Index *p, /* FTS5 backend object */ - int iIdx, /* Index to work on */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nPg /* Pages of work to do */ ){ @@ -3485,7 +3459,7 @@ static void fts5IndexMerge( ){ break; } - fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); + fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ fts5StructurePromote(p, iBestLvl+1, pStruct); } @@ -3495,16 +3469,14 @@ static void fts5IndexMerge( /* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 -** segments in index iIdx with structure pStruct. This function updates the -** write-counter accordingly and, if necessary, performs incremental merge -** work. +** segment. This function updates the write-counter accordingly and, if +** necessary, performs incremental merge work. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5IndexAutomerge( Fts5Index *p, /* FTS5 backend object */ - int iIdx, /* Index to work on */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */ ){ @@ -3520,13 +3492,12 @@ static void fts5IndexAutomerge( pStruct->nWriteCounter += nLeaf; nRem = p->nWorkUnit * nWork * pStruct->nLevel; - fts5IndexMerge(p, iIdx, ppStruct, nRem); + fts5IndexMerge(p, ppStruct, nRem); } } static void fts5IndexCrisismerge( Fts5Index *p, /* FTS5 backend object */ - int iIdx, /* Index to work on */ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ ){ const int nCrisis = p->pConfig->nCrisisMerge; @@ -3535,7 +3506,7 @@ static void fts5IndexCrisismerge( assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ - fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); + fts5IndexMergeLevel(p, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } @@ -3584,15 +3555,15 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ -static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ - Fts5Hash *pHash = p->apHash[iHash]; +static void fts5FlushOneHash(Fts5Index *p){ + Fts5Hash *pHash = p->pHash; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id ** for the new level-0 segment. */ - pStruct = fts5StructureRead(p, iHash); + pStruct = fts5StructureRead(p); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ @@ -3604,7 +3575,7 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ const u8 *zPrev = 0; Fts5SegWriter writer; - fts5WriteInit(p, &writer, iHash, iSegid); + fts5WriteInit(p, &writer, iSegid); /* Pre-allocate the buffer used to assemble leaf pages to the target ** page size. */ @@ -3749,9 +3720,9 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } - fts5IndexAutomerge(p, iHash, &pStruct, pgnoLast); - fts5IndexCrisismerge(p, iHash, &pStruct); - fts5StructureWrite(p, iHash, pStruct); + fts5IndexAutomerge(p, &pStruct, pgnoLast); + fts5IndexCrisismerge(p, &pStruct); + fts5StructureWrite(p, pStruct); fts5StructureRelease(pStruct); } @@ -3759,87 +3730,77 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ ** Flush any data stored in the in-memory hash tables to the database. */ static void fts5IndexFlush(Fts5Index *p){ - Fts5Config *pConfig = p->pConfig; - int i; /* Used to iterate through indexes */ - int nLeaf = 0; /* Number of leaves written */ - - /* If an error has already occured this call is a no-op. */ - if( p->nPendingData==0 ) return; - assert( p->apHash ); - - /* Flush the terms and each prefix index to disk */ - for(i=0; i<=pConfig->nPrefix; i++){ - fts5FlushOneHash(p, i, &nLeaf); + /* Unless it is empty, flush the hash table to disk */ + if( p->rc==SQLITE_OK && p->nPendingData ){ + assert( p->pHash ); + p->nPendingData = 0; + fts5FlushOneHash(p); } - p->nPendingData = 0; } int sqlite3Fts5IndexOptimize(Fts5Index *p){ - Fts5Config *pConfig = p->pConfig; - int i; + Fts5Structure *pStruct; + Fts5Structure *pNew = 0; + int nSeg = 0; assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); - for(i=0; i<=pConfig->nPrefix; i++){ - Fts5Structure *pStruct = fts5StructureRead(p, i); - Fts5Structure *pNew = 0; - int nSeg = 0; - if( pStruct ){ - assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); - nSeg = pStruct->nSegment; - if( nSeg>1 ){ - int nByte = sizeof(Fts5Structure); - nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); - pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); - } + pStruct = fts5StructureRead(p); + + if( pStruct ){ + assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); + nSeg = pStruct->nSegment; + if( nSeg>1 ){ + int nByte = sizeof(Fts5Structure); + nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); + pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); } - if( pNew ){ - Fts5StructureLevel *pLvl; - int nByte = nSeg * sizeof(Fts5StructureSegment); - pNew->nLevel = pStruct->nLevel+1; - pNew->nWriteCounter = pStruct->nWriteCounter; - pLvl = &pNew->aLevel[pStruct->nLevel]; - pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); - if( pLvl->aSeg ){ - int iLvl, iSeg; - int iSegOut = 0; - for(iLvl=0; iLvlnLevel; iLvl++){ - for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ - pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; - iSegOut++; - } + } + if( pNew ){ + Fts5StructureLevel *pLvl; + int nByte = nSeg * sizeof(Fts5StructureSegment); + pNew->nLevel = pStruct->nLevel+1; + pNew->nWriteCounter = pStruct->nWriteCounter; + pLvl = &pNew->aLevel[pStruct->nLevel]; + pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( pLvl->aSeg ){ + int iLvl, iSeg; + int iSegOut = 0; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; + iSegOut++; } - pNew->nSegment = pLvl->nSeg = nSeg; - }else{ - sqlite3_free(pNew); - pNew = 0; } + pNew->nSegment = pLvl->nSeg = nSeg; + }else{ + sqlite3_free(pNew); + pNew = 0; } - - if( pNew ){ - int iLvl = pNew->nLevel-1; - while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ - int nRem = FTS5_OPT_WORK_UNIT; - fts5IndexMergeLevel(p, i, &pNew, iLvl, &nRem); - } - - fts5StructureWrite(p, i, pNew); - fts5StructureRelease(pNew); - } - - fts5StructureRelease(pStruct); } + if( pNew ){ + int iLvl = pNew->nLevel-1; + while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ + int nRem = FTS5_OPT_WORK_UNIT; + fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); + } + + fts5StructureWrite(p, pNew); + fts5StructureRelease(pNew); + } + + fts5StructureRelease(pStruct); return fts5IndexReturn(p); } int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ Fts5Structure *pStruct; - pStruct = fts5StructureRead(p, 0); - fts5IndexMerge(p, 0, &pStruct, nMerge); - fts5StructureWrite(p, 0, pStruct); + pStruct = fts5StructureRead(p); + fts5IndexMerge(p, &pStruct, nMerge); + fts5StructureWrite(p, pStruct); fts5StructureRelease(pStruct); return fts5IndexReturn(p); @@ -4040,7 +4001,7 @@ static void fts5SetupPrefixIter( const int nBuf = 32; aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); - pStruct = fts5StructureRead(p, 0); + pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ Fts5DoclistIter *pDoclist; @@ -4050,7 +4011,7 @@ static void fts5SetupPrefixIter( Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); - for(fts5MultiIterNew(p, pStruct, 0, 1, 1, pToken, nToken, -1, 0, &p1); + for(fts5MultiIterNew(p, pStruct, 1, 1, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; fts5MultiIterNext(p, p1, 0, 0) ){ @@ -4113,32 +4074,12 @@ static void fts5SetupPrefixIter( int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ assert( p->rc==SQLITE_OK ); - /* Allocate hash tables if they have not already been allocated */ - if( p->apHash==0 ){ - int i; - int rc = SQLITE_OK; - int nHash = p->pConfig->nPrefix + 1; - Fts5Hash **apNew; - - apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash); - for(i=0; rc==SQLITE_OK && inPendingData); - } - if( rc==SQLITE_OK ){ - p->apHash = apNew; - }else{ - if( apNew ){ - for(i=0; ipHash==0 ){ + p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData); } if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ - assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); } p->iWriteRowid = iRowid; @@ -4174,16 +4115,13 @@ int sqlite3Fts5IndexRollback(Fts5Index *p){ ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ - int i; Fts5Structure s; + assert( p->rc==SQLITE_OK ); + p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); + memset(&s, 0, sizeof(Fts5Structure)); - for(i=0; ipConfig->nPrefix+1; i++){ - fts5StructureWrite(p, i, &s); - } - if( p->rc==SQLITE_OK ){ - p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); - } + fts5StructureWrite(p, &s); return fts5IndexReturn(p); } @@ -4239,13 +4177,8 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); - if( p->apHash ){ - int i; - for(i=0; i<=p->pConfig->nPrefix; i++){ - sqlite3Fts5HashFree(p->apHash[i]); - } - sqlite3_free(p->apHash); - } + sqlite3Fts5HashFree(p->pHash); + sqlite3Fts5BufferFree(&p->scratch); sqlite3_free(p->zDataTbl); sqlite3_free(p); } @@ -4302,21 +4235,21 @@ int sqlite3Fts5IndexWrite( const char *pToken, int nToken /* Token to add or remove to or from index */ ){ int i; /* Used to iterate through indexes */ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ Fts5Config *pConfig = p->pConfig; assert( p->rc==SQLITE_OK ); - /* Add the new token to the main terms hash table. And to each of the - ** prefix hash tables that it is large enough for. */ + /* Add the entry to the main terms index. */ rc = sqlite3Fts5HashWrite( - p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken + p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); + for(i=0; inPrefix && rc==SQLITE_OK; i++){ int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); if( nByte ){ - rc = sqlite3Fts5HashWrite( - p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte + rc = sqlite3Fts5HashWrite(p->pHash, + p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX+i+1, pToken, nByte ); } } @@ -4337,6 +4270,11 @@ int sqlite3Fts5IndexQuery( Fts5Config *pConfig = p->pConfig; Fts5IndexIter *pRet; int iIdx = 0; + Fts5Buffer buf = {0, 0, 0}; + + if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ + memcpy(&buf.p[1], pToken, nToken); + } if( flags & FTS5INDEX_QUERY_PREFIX ){ if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ @@ -4355,15 +4293,18 @@ int sqlite3Fts5IndexQuery( pRet->pIndex = p; if( iIdx<=pConfig->nPrefix ){ - pRet->pStruct = fts5StructureRead(p, iIdx); + buf.p[0] = FTS5_MAIN_PREFIX + iIdx; + pRet->pStruct = fts5StructureRead(p); if( pRet->pStruct ){ - fts5MultiIterNew(p, pRet->pStruct, - iIdx, 1, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + int f = (flags & ~FTS5INDEX_QUERY_PREFIX); + fts5MultiIterNew( + p, pRet->pStruct, 1, f, buf.p, nToken+1, -1, 0, &pRet->pMulti ); } }else{ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - fts5SetupPrefixIter(p, bDesc, (const u8*)pToken, nToken, pRet); + buf.p[0] = FTS5_MAIN_PREFIX; + fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pRet); } } @@ -4372,6 +4313,7 @@ int sqlite3Fts5IndexQuery( pRet = 0; } *ppIter = pRet; + sqlite3Fts5BufferFree(&buf); return fts5IndexReturn(p); } @@ -4520,23 +4462,20 @@ int sqlite3Fts5IndexReads(Fts5Index *p){ ** occurs. */ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ - int rc = SQLITE_OK; - Fts5Config *pConfig = p->pConfig; - u8 aCookie[4]; - int i; + int rc; /* Return code */ + Fts5Config *pConfig = p->pConfig; /* Configuration object */ + u8 aCookie[4]; /* Binary representation of iNew */ assert( p->rc==SQLITE_OK ); + sqlite3Fts5Put32(aCookie, iNew); - for(i=0; rc==SQLITE_OK && i<=pConfig->nPrefix; i++){ - sqlite3_blob *pBlob = 0; - i64 iRowid = FTS5_STRUCTURE_ROWID(i); - rc = sqlite3_blob_open( - pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 1, &pBlob - ); - if( rc==SQLITE_OK ){ - sqlite3_blob_write(pBlob, aCookie, 4, 0); - rc = sqlite3_blob_close(pBlob); - } + sqlite3_blob *pBlob = 0; + rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, + "block", FTS5_STRUCTURE_ROWID, 1, &pBlob + ); + if( rc==SQLITE_OK ){ + sqlite3_blob_write(pBlob, aCookie, 4, 0); + rc = sqlite3_blob_close(pBlob); } return rc; @@ -4544,7 +4483,7 @@ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ Fts5Structure *pStruct; - pStruct = fts5StructureRead(p, 0); + pStruct = fts5StructureRead(p); fts5StructureRelease(pStruct); return fts5IndexReturn(p); } @@ -4563,20 +4502,21 @@ static u64 fts5IndexEntryCksum( i64 iRowid, int iCol, int iPos, - const char *pTerm, + int iIdx, + const char *pTerm, int nTerm ){ int i; u64 ret = iRowid; ret += (ret<<3) + iCol; ret += (ret<<3) + iPos; + if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); for(i=0; irc==SQLITE_OK ){ pIter->nLvl = pSeg->nHeight-1; - pIter->iIdx = iIdx; pIter->p = p; pIter->pSeg = pSeg; } for(i=0; p->rc==SQLITE_OK && inLvl; i++){ - i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1); + i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, i+1, 1); Fts5Data *pData; pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); if( pData ){ @@ -4635,7 +4574,7 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ int iSegid = pIter->pSeg->iSegid; for(i--; i>=0; i--){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; - i64 iRowid = FTS5_SEGMENT_ROWID(pIter->iIdx,iSegid,i+1,pLvl[1].s.iChild); + i64 iRowid = FTS5_SEGMENT_ROWID(iSegid, i+1, pLvl[1].s.iChild); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData ){ fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); @@ -4668,12 +4607,11 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ ** ** Instead, it tests that the same set of pgno/rowid combinations are ** visited regardless of whether the doclist-index identified by parameters -** iIdx/iSegid/iLeaf is iterated in forwards or reverse order. +** iSegid/iLeaf is iterated in forwards or reverse order. */ #ifdef SQLITE_DEBUG static void fts5DlidxIterTestReverse( Fts5Index *p, - int iIdx, /* Index to load doclist-index from */ int iSegid, /* Segment id to load from */ int iLeaf /* Load doclist-index for this leaf */ ){ @@ -4681,7 +4619,7 @@ static void fts5DlidxIterTestReverse( i64 cksum1 = 13; i64 cksum2 = 13; - for(pDlidx=fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf); + for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ @@ -4692,7 +4630,7 @@ static void fts5DlidxIterTestReverse( fts5DlidxIterFree(pDlidx); pDlidx = 0; - for(pDlidx=fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf); + for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(pDlidx) ){ @@ -4706,12 +4644,11 @@ static void fts5DlidxIterTestReverse( if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } #else -# define fts5DlidxIterTestReverse(w,x,y,z) +# define fts5DlidxIterTestReverse(x,y,z) #endif static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ - int iIdx, /* Index that pSeg is a part of */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ @@ -4719,7 +4656,7 @@ static void fts5IndexIntegrityCheckSegment( if( pSeg->pgnoFirst==0 ) return; /* Iterate through the b-tree hierarchy. */ - for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); + for(fts5BtreeIterInit(p, pSeg, &iter); p->rc==SQLITE_OK && iter.bEof==0; fts5BtreeIterNext(&iter) ){ @@ -4731,7 +4668,7 @@ static void fts5IndexIntegrityCheckSegment( /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iter.iLeafpgnoFirst ) continue; - iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf); + iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iter.iLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; @@ -4771,14 +4708,14 @@ static void fts5IndexIntegrityCheckSegment( int iPg; i64 iKey; - for(pDlidx=fts5DlidxIterInit(p, 0, iIdx, iSegid, iter.iLeaf); + for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPgiLeafPgno; iPg++){ - iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg); + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; @@ -4789,7 +4726,7 @@ static void fts5IndexIntegrityCheckSegment( /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ - iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno); + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; @@ -4802,7 +4739,7 @@ static void fts5IndexIntegrityCheckSegment( } for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ - iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg); + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; @@ -4811,7 +4748,7 @@ static void fts5IndexIntegrityCheckSegment( } fts5DlidxIterFree(pDlidx); - fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); + fts5DlidxIterTestReverse(p, iSegid, iter.iLeaf); } } @@ -4830,11 +4767,12 @@ static void fts5IndexIntegrityCheckSegment( static int fts5QueryCksum( - Fts5Index *p, - const char *z, - int n, - int flags, - u64 *pCksum + Fts5Index *p, /* Fts5 index object */ + int iIdx, + const char *z, /* Index key to query for */ + int n, /* Size of index key in bytes */ + int flags, /* Flags for Fts5IndexQuery */ + u64 *pCksum /* IN/OUT: Checksum value */ ){ u64 cksum = *pCksum; Fts5IndexIter *pIdxIter = 0; @@ -4853,7 +4791,7 @@ static int fts5QueryCksum( ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); + cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } rc = sqlite3Fts5IterNext(pIdxIter); } @@ -4875,26 +4813,25 @@ static int fts5QueryCksum( ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ - Fts5Config *pConfig = p->pConfig; - int iIdx; /* Used to iterate through indexes */ u64 cksum2 = 0; /* Checksum based on contents of indexes */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ + Fts5MultiSegIter *pIter; /* Used to iterate through entire index */ + Fts5Structure *pStruct; /* Index structure */ + + /* Load the FTS index structure */ + pStruct = fts5StructureRead(p); /* Check that the internal nodes of each segment match the leaves */ - for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ - Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - if( pStruct ){ - int iLvl, iSeg; - for(iLvl=0; iLvlnLevel; iLvl++){ - for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ - Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; - fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); - } + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, pSeg); } } - fts5StructureRelease(pStruct); } /* The cksum argument passed to this function is a checksum calculated @@ -4910,68 +4847,69 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ ** same term is performed. cksum3 is calculated based on the entries ** extracted by these queries. */ - for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ - Fts5MultiSegIter *pIter; - Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter); - fts5MultiIterEof(p, pIter)==0; - fts5MultiIterNext(p, pIter, 0, 0) - ){ - int n; /* Size of term in bytes */ - i64 iPos = 0; /* Position read from poslist */ - int iOff = 0; /* Offset within poslist */ - i64 iRowid = fts5MultiIterRowid(pIter); - char *z = (char*)fts5MultiIterTerm(pIter, &n); + for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter, 0, 0) + ){ + int n; /* Size of term in bytes */ + i64 iPos = 0; /* Position read from poslist */ + int iOff = 0; /* Offset within poslist */ + i64 iRowid = fts5MultiIterRowid(pIter); + char *z = (char*)fts5MultiIterTerm(pIter, &n); - poslist.n = 0; - fts5MultiIterPoslist(p, pIter, 0, &poslist); - while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ - int iCol = FTS5_POS2COLUMN(iPos); - int iTokOff = FTS5_POS2OFFSET(iPos); - cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, z, n); - } - - /* If this is a new term, query for it. Update cksum3 with the results. */ - if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ - int rc; - int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); - u64 ck1 = 0; - u64 ck2 = 0; - - /* Check that the results returned for ASC and DESC queries are - ** the same. If not, call this corruption. */ - rc = fts5QueryCksum(p, z, n, flags, &ck1); - if( rc==SQLITE_OK ){ - rc = fts5QueryCksum(p, z, n, flags|FTS5INDEX_QUERY_DESC, &ck2); - } - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - - /* If this is a prefix query, check that the results returned if the - ** the index is disabled are the same. In both ASC and DESC order. */ - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; - ck2 = 0; - rc = fts5QueryCksum(p, z, n, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; - ck2 = 0; - rc = fts5QueryCksum(p, z, n, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - - cksum3 ^= ck1; - fts5BufferSet(&rc, &term, n, (const u8*)z); - p->rc = rc; - } + poslist.n = 0; + fts5MultiIterPoslist(p, pIter, 0, &poslist); + while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + int iTokOff = FTS5_POS2OFFSET(iPos); + cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); + } + + /* If this is a new term, query for it. Update cksum3 with the results. */ + if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ + const char *zTerm = &z[1]; /* The term without the prefix-byte */ + int nTerm = n-1; /* Size of zTerm in bytes */ + int iIdx = (z[0] - FTS5_MAIN_PREFIX); + int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); + int rc; + u64 ck1 = 0; + u64 ck2 = 0; + + /* Check that the results returned for ASC and DESC queries are + ** the same. If not, call this corruption. */ + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); + if( rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_DESC; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + } + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + + /* If this is a prefix query, check that the results returned if the + ** the index is disabled are the same. In both ASC and DESC order. */ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + + cksum3 ^= ck1; + fts5BufferSet(&rc, &term, n, (const u8*)z); + p->rc = rc; } - fts5MultiIterFree(p, pIter); - fts5StructureRelease(pStruct); } + fts5MultiIterFree(p, pIter); + if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT; + fts5StructureRelease(pStruct); fts5BufferFree(&term); fts5BufferFree(&poslist); return fts5IndexReturn(p); @@ -4993,12 +4931,12 @@ u64 sqlite3Fts5IndexCksum( u64 ret = 0; /* Return value */ int iIdx; /* For iterating through indexes */ - ret = fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nTerm); + ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm); for(iIdx=0; iIdxnPrefix; iIdx++){ int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); if( nByte ){ - ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nByte); + ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte); } } @@ -5017,7 +4955,6 @@ u64 sqlite3Fts5IndexCksum( */ static void fts5DecodeRowid( i64 iRowid, /* Rowid from %_data table */ - int *piIdx, /* OUT: Index */ int *piSegid, /* OUT: Segment id */ int *piHeight, /* OUT: Height */ int *piPgno /* OUT: Page number */ @@ -5029,14 +4966,11 @@ static void fts5DecodeRowid( iRowid >>= FTS5_DATA_HEIGHT_B; *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); - iRowid >>= FTS5_DATA_ID_B; - - *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); } static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ - int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ - fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); + int iSegid, iHeight, iPgno; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ @@ -5048,12 +4982,12 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ } } else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", - iIdx, iSegid, iPgno + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)", + iSegid, iPgno ); }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)", - iIdx, iSegid, iHeight, iPgno + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)", + iSegid, iHeight, iPgno ); } } @@ -5163,7 +5097,7 @@ static void fts5DecodeFunction( sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ - int iIdx,iSegid,iHeight,iPgno; /* Rowid components */ + int iSegid,iHeight,iPgno; /* Rowid components */ const u8 *aBlob; int n; /* Record to decode */ u8 *a = 0; Fts5Buffer s; /* Build up text to return here */ @@ -5180,7 +5114,7 @@ static void fts5DecodeFunction( a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; memcpy(a, aBlob, n); - fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); + fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ @@ -5301,19 +5235,19 @@ static void fts5RowidFunction( zArg = (const char*)sqlite3_value_text(apVal[0]); if( 0==sqlite3_stricmp(zArg, "segment") ){ i64 iRowid; - int idx, segid, height, pgno; - if( nArg!=5 ){ + int segid, height, pgno; + if( nArg!=4 ){ sqlite3_result_error(pCtx, - "should be: fts5_rowid('segment', idx, segid, height, pgno))", -1 + "should be: fts5_rowid('segment', segid, height, pgno))", -1 ); }else{ - idx = sqlite3_value_int(apVal[1]); - segid = sqlite3_value_int(apVal[2]); - height = sqlite3_value_int(apVal[3]); - pgno = sqlite3_value_int(apVal[4]); - iRowid = FTS5_SEGMENT_ROWID(idx, segid, height, pgno); + segid = sqlite3_value_int(apVal[1]); + height = sqlite3_value_int(apVal[2]); + pgno = sqlite3_value_int(apVal[3]); + iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno); sqlite3_result_int64(pCtx, iRowid); } +#if 0 }else if( 0==sqlite3_stricmp(zArg, "start-of-index") ){ i64 iRowid; int idx; @@ -5326,6 +5260,7 @@ static void fts5RowidFunction( iRowid = FTS5_SEGMENT_ROWID(idx, 1, 0, 0); sqlite3_result_int64(pCtx, iRowid); } +#endif }else { sqlite3_result_error(pCtx, "first arg to fts5_rowid() must be 'segment' " diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index e7f5027d86..0ea99c25a2 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -257,6 +257,9 @@ int sqlite3Fts5StorageOpen( pConfig, "config", "k PRIMARY KEY, v", 1, pzErr ); } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); + } } if( rc ){ @@ -543,6 +546,9 @@ int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexReinit(p->pIndex); } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); + } return rc; } @@ -983,18 +989,23 @@ int sqlite3Fts5StorageRollback(Fts5Storage *p){ int sqlite3Fts5StorageConfigValue( Fts5Storage *p, - const char *z, - sqlite3_value *pVal + const char *z, + sqlite3_value *pVal, + int iVal ){ sqlite3_stmt *pReplace = 0; int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC); - sqlite3_bind_value(pReplace, 2, pVal); + if( pVal ){ + sqlite3_bind_value(pReplace, 2, pVal); + }else{ + sqlite3_bind_int(pReplace, 2, iVal); + } sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); } - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && pVal ){ int iNew = p->pConfig->iCookie + 1; rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); if( rc==SQLITE_OK ){ diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 522f44ce23..9f712ffc86 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -26,17 +26,17 @@ ifcapable !fts5 { do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; -} {} +} {version 1} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; -} {pgsz 32} +} {pgsz 32 version 1} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; -} {pgsz 64} +} {pgsz 64 version 1} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index 7d0ea9d2bc..efbe3f5d84 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -37,7 +37,7 @@ set segid [lindex [fts5_level_segids t1] 0] do_test 1.3 { execsql { - DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4); + DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} @@ -46,7 +46,7 @@ do_test 1.4 { db_restore_and_reopen execsql { UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE - rowid = fts5_rowid('segment', 0, $segid, 0, 4); + rowid = fts5_rowid('segment', $segid, 0, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 74591cda78..7cbd7b00e8 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -126,8 +126,8 @@ do_execsql_test 3.0 { } foreach {tn hdr} { - 1 "\00\00\00\00" - 2 "\FF\FF\FF\FF" + 1 "\x00\x00\x00\x00" + 2 "\xFF\xFF\xFF\xFF" } { set tn2 0 set nCorrupt 0 diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index 1248edea0b..b80e767b63 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -79,6 +79,13 @@ foreach {tn expr err} { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } +#------------------------------------------------------------------------- +# Experiment with a tokenizer that considers " to be a token character. +# +do_execsql_test 4.0 { + SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); +} {{"a" AND """"}} + diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index ff6e2483e9..56f73c3ab7 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -32,12 +32,12 @@ ifcapable !fts5 { # faultsim_save_and_close -do_faultsim_test 1 -prep { +do_faultsim_test 1 -faults ioerr-t* -prep { faultsim_restore_and_reopen } -body { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3') } } -test { - faultsim_test_result {0 {}} + faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}} } reset_db diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index 943d331db8..df2112c63f 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -201,8 +201,6 @@ do_faultsim_test 6.2 -faults oom-t* -body { faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM} } -} - #------------------------------------------------------------------------- # OOM error when querying for a phrase with many tokens. # @@ -304,6 +302,36 @@ do_faultsim_test 9.1 -faults oom-* -body { faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM} } +#------------------------------------------------------------------------- +# OOM in fts5_expr() SQL function. +# +reset_db +do_execsql_test 10.0 { + CREATE VIRTUAL TABLE tt USING fts5(x); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); + BEGIN; + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200) + INSERT INTO tt(rowid, x) + SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END + FROM ii; + COMMIT; +} + +} + +do_faultsim_test 10.1 -faults oom-t* -body { + db one { SELECT fts5_expr('a AND b NEAR(a b)') } +} -test { + faultsim_test_result {0 {"a" AND ("b" AND NEAR("a" "b", 10))}} +} + +#do_faultsim_test 10.2 -faults oom-t* -body { +# db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } +#} -test { +# set res {[ns -col 0 -- {a b c}] && ([ns -- {b}] && [ns -near 10 -- {a} {b}]} +# faultsim_test_result [list 0 $res] +#} + finish_test diff --git a/ext/fts5/test/fts5integrity.test b/ext/fts5/test/fts5integrity.test new file mode 100644 index 0000000000..a6dc34a90e --- /dev/null +++ b/ext/fts5/test/fts5integrity.test @@ -0,0 +1,35 @@ +# 2015 Jan 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file containst tests focused on the integrity-check procedure. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5integrity + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE xx USING fts5(x); + INSERT INTO xx VALUES('term'); +} +do_execsql_test 1.1 { + INSERT INTO xx(xx) VALUES('integrity-check'); +} + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1); + INSERT INTO yy VALUES('term'); +} +do_execsql_test 2.1 { + INSERT INTO yy(yy) VALUES('integrity-check'); +} + +finish_test + diff --git a/ext/fts5/test/fts5prefix.test b/ext/fts5/test/fts5prefix.test index 7c5a1a39a9..c555080a27 100644 --- a/ext/fts5/test/fts5prefix.test +++ b/ext/fts5/test/fts5prefix.test @@ -15,17 +15,28 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5prefix +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1); + INSERT INTO xx VALUES('one two three'); + INSERT INTO xx VALUES('four five six'); + INSERT INTO xx VALUES('seven eight nine ten'); +} + +do_execsql_test 1.1 { + SELECT rowid FROM xx WHERE xx MATCH 't*' +} {1 3} + #------------------------------------------------------------------------- # Check that prefix indexes really do index n-character prefixes, not # n-byte prefixes. Use the ascii tokenizer so as not to be confused by # diacritic removal. # -do_execsql_test 1.0 { +do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) } -do_test 1.2 { +do_test 2.1 { foreach {rowid string} { 1 "\xCA\xCB\xCC\xCD" 2 "\u1234\u5678\u4321\u8765" @@ -34,26 +45,15 @@ do_test 1.2 { } } {} -do_execsql_test 1.1.2 { +do_execsql_test 2.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } -#db eval { select fts5_decode(id, block) AS d FROM t1_data; } { puts $d } - -foreach o {1 2} { - if {$o==2} breakpoint - foreach {tn q res} { - 1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1 - 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2 - } { - do_execsql_test 1.$o.$tn $q $res - } - - execsql { - DELETE FROM t1_data WHERE - rowid>=fts5_rowid('start-of-index', 0) AND - rowid=$nOpt } usage + set O(prefix) [lindex $argv $i] + } + default { usage } @@ -98,8 +105,10 @@ sqlite3 db $dbfile db func loadfile loadfile db transaction { + set pref "" + if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { - db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))" + db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { diff --git a/manifest b/manifest index 53ddbec52d..c9a1dfd035 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Reorganize\ssome\sof\sthe\sfts5\sexpression\sparsing\scode.\sImprove\stest\scoverage\sof\sthe\ssame. -D 2015-05-02T20:35:24.467 +C Change\sto\sstoring\sall\skeys\sin\sa\ssingle\smerge-tree\sstructure\sinstead\sof\sone\smain\sstructure\sand\sa\sseparate\sone\sfor\seach\sprefix\sindex.\sThis\sis\sa\sfile-format\schange.\sAlso\sintroduce\sa\smechanism\sfor\smanaging\sfile-format\schanges. +D 2015-05-07T19:29:46.763 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,16 +104,16 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 3a0a73bcfbcb7e65ccda099cfb8fd268d2480c7e +F ext/fts5/fts5.c 62b2657320aac309d7bcf2bfb855f8d4c216ae15 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 05e97ffb2911e8c8cfcb8bdb009e17347c24eb2d +F ext/fts5/fts5Int.h 94b1800ea50e52ce19365744174c65e6fc8b87e0 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b -F ext/fts5/fts5_config.c 4e0de8bea4746a7560740b9dcf8be4dced68ef4f -F ext/fts5/fts5_expr.c f49d68411dc72cb66f2b55cc109dbf3dce368eef -F ext/fts5/fts5_hash.c 29d8b0668727863cc1f1efa65efe4dd78635b016 -F ext/fts5/fts5_index.c de588982b0237b1605d6c37afd115b34c95c3da1 -F ext/fts5/fts5_storage.c ef60fc9dcc4e274f9589165e26833173c273ae18 +F ext/fts5/fts5_config.c 7a8b4665239a4f3001a4ecbc77573c42d2694161 +F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 +F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 +F ext/fts5/fts5_index.c aa8d73d043417740c07861beb78c86103a6a9d90 +F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d F ext/fts5/fts5_tcl.c aa3b102bb01f366174718be7ce8e9311b9abb482 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d @@ -131,38 +131,39 @@ F ext/fts5/test/fts5ah.test d74cf8b7de5b8424f732acef69fe12122a12f2bf F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 -F ext/fts5/test/fts5al.test e6bddd2c11c0d1e3ae189ee51081899d2f4ea570 +F ext/fts5/test/fts5al.test 8cde0a064ffe452281b7c90a759d220f796bbb20 F ext/fts5/test/fts5aux.test d9c724351d8e4dc46cad1308c0b4b8ac94d07660 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f -F ext/fts5/test/fts5corrupt.test 138aecc75c36c3dac9259c7f57c5bc3d009255f8 -F ext/fts5/test/fts5corrupt2.test 494111fd4f2dab36499cf97718eaba1f7c11e9d0 +F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 +F ext/fts5/test/fts5corrupt2.test c65a6619a1f712b87be0ccb3ef1a2120bf1f6430 F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 -F ext/fts5/test/fts5ea.test f4d35cd2776dab9358206f7d88a67ea187fdec22 +F ext/fts5/test/fts5ea.test ed163ed820fd503354bd7dcf9d3b0e3801ade962 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d +F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 09728cadb4897c97cea092edb9c431d9ec25b88b +F ext/fts5/test/fts5fault4.test 420f2e23775b458eeb9a325bcdfe84650c2e9d39 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa +F ext/fts5/test/fts5integrity.test 39deee579b84df2786d9c8298e9196b339cfc872 F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e -F ext/fts5/test/fts5prefix.test 1287803c3df0e43f536196256fb9e0e6baccb4f1 +F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 -F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 +F ext/fts5/test/fts5rowid.test 0dd51524739ebe5f1251a25f3d3ece9840fdc1a8 F ext/fts5/test/fts5tokenizer.test bbcde2a7473dcaa9a1fc6809aa8965acb7b846ff F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 -F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8 +F ext/fts5/tool/loadfts5.tcl 8a8f10d7d2d0d77f622e0a84cc0824c158c34a52 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1315,7 +1316,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d4331943dff259380c4025bb740d8aba6972d351 -R 5651f6663bbd8efe3d630621e3f4b900 +P c4456dc5f5f8f45f04e3bbae53b6bcc209fc27d5 +R d535b9198036ecff3827ebadb9a9b6f4 U dan -Z 822e7611ba479003f18b45bbb7ca820a +Z 02d7c47a9ec4004e437813912cd05f33 diff --git a/manifest.uuid b/manifest.uuid index ddc1ccf8f3..d0b2967a0c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c4456dc5f5f8f45f04e3bbae53b6bcc209fc27d5 \ No newline at end of file +a684b5e2d9d52cf4700e7e5f9dd547a2ba54e8e9 \ No newline at end of file From 76724372ae254b8381fcac659c7a386c8ae80922 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 8 May 2015 09:21:05 +0000 Subject: [PATCH 123/206] Improve the error message returned by FTS5 if it encounters an unknown file format. FossilOrigin-Name: f369caec145f311bb136cf7af144e2695badcb9b --- ext/fts5/fts5.c | 17 ++++++++-- ext/fts5/fts5Int.h | 8 +++++ ext/fts5/fts5_config.c | 9 +++++- ext/fts5/test/fts5version.test | 59 ++++++++++++++++++++++++++++++++++ manifest | 17 +++++----- manifest.uuid | 2 +- 6 files changed, 100 insertions(+), 12 deletions(-) create mode 100644 ext/fts5/test/fts5version.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index df1646786a..cd4eff325d 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -943,6 +943,10 @@ static int fts5FilterMethod( Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int bDesc = ((idxNum & FTS5_ORDER_DESC) ? 1 : 0); int rc = SQLITE_OK; + char **pzErrmsg = pTab->pConfig->pzErrmsg; + + assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg ); + pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; assert( nVal<=2 ); assert( pCsr->pStmt==0 ); @@ -1004,6 +1008,7 @@ static int fts5FilterMethod( } } + pTab->pConfig->pzErrmsg = pzErrmsg; return rc; } @@ -1205,6 +1210,9 @@ static int fts5UpdateMethod( /* A transaction must be open when this is called. */ assert( pTab->ts.eState==1 ); + assert( pTab->pConfig->pzErrmsg==0 ); + pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; + /* A delete specifies a single argument - the rowid of the row to remove. ** Update and insert operations pass: ** @@ -1241,10 +1249,11 @@ static int fts5UpdateMethod( if( pConfig->eContent!=FTS5_CONTENT_NORMAL && 0==sqlite3_stricmp("delete", z) ){ - return fts5SpecialDelete(pTab, apVal, pRowid); + rc = fts5SpecialDelete(pTab, apVal, pRowid); }else{ - return fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); + rc = fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); } + goto update_method_out; } } @@ -1253,6 +1262,8 @@ static int fts5UpdateMethod( rc = sqlite3Fts5StorageInsert(pTab->pStorage, apVal, eConflict, pRowid); } + update_method_out: + pTab->pConfig->pzErrmsg = 0; return rc; } @@ -1263,8 +1274,10 @@ static int fts5SyncMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SYNC, 0); + pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; fts5TripCursors(pTab); rc = sqlite3Fts5StorageSync(pTab->pStorage, 1); + pTab->pConfig->pzErrmsg = 0; return rc; } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d09029710b..696a8ea5a7 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -96,6 +96,11 @@ typedef struct Fts5Config Fts5Config; ** ** zContentExprlist: ** +** pzErrmsg: +** This exists in order to allow the fts5_index.c module to return a +** decent error message if it encounters a file-format version it does +** not understand. +** */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -120,6 +125,9 @@ struct Fts5Config { int nCrisisMerge; /* Maximum allowed segments per level */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ + + /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ + char **pzErrmsg; }; /* Current expected value of %_config table 'version' field */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 0846eec8f6..90d7b7318f 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -851,7 +851,14 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ } if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ - rc = sqlite3Fts5Corrupt(); + rc = SQLITE_ERROR; + if( pConfig->pzErrmsg ){ + assert( 0==*pConfig->pzErrmsg ); + *pConfig->pzErrmsg = sqlite3_mprintf( + "invalid fts5 file format (found %d, expected %d) - run 'rebuild'", + iVersion, FTS5_CURRENT_VERSION + ); + } } if( rc==SQLITE_OK ){ diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test new file mode 100644 index 0000000000..4e5df579e5 --- /dev/null +++ b/ext/fts5/test/fts5version.test @@ -0,0 +1,59 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The tests in this file focus on testing that unrecognized file-format +# versions are detected and reported. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5version + + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts5(one); + INSERT INTO t1 VALUES('a b c d'); +} {} + +do_execsql_test 1.2 { + SELECT * FROM t1_config WHERE k='version' +} {version 1} + +do_execsql_test 1.3 { + SELECT rowid FROM t1 WHERE t1 MATCH 'a'; +} {1} + +do_execsql_test 1.4 { + UPDATE t1_config set v=2 WHERE k='version'; +} + +do_test 1.5 { + db close + sqlite3 db test.db + catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } +} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} + +breakpoint +do_test 1.6 { + db close + sqlite3 db test.db + catchsql { INSERT INTO t1 VALUES('x y z') } +} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} + +do_test 1.7 { + execsql { DELETE FROM t1_config WHERE k='version' } + db close + sqlite3 db test.db + catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } +} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}} + + +finish_test + diff --git a/manifest b/manifest index c9a1dfd035..76b5dac082 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sto\sstoring\sall\skeys\sin\sa\ssingle\smerge-tree\sstructure\sinstead\sof\sone\smain\sstructure\sand\sa\sseparate\sone\sfor\seach\sprefix\sindex.\sThis\sis\sa\sfile-format\schange.\sAlso\sintroduce\sa\smechanism\sfor\smanaging\sfile-format\schanges. -D 2015-05-07T19:29:46.763 +C Improve\sthe\serror\smessage\sreturned\sby\sFTS5\sif\sit\sencounters\san\sunknown\sfile\sformat. +D 2015-05-08T09:21:05.416 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,12 +104,12 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 62b2657320aac309d7bcf2bfb855f8d4c216ae15 +F ext/fts5/fts5.c 7f58ea9ba1e72038137963719c5b5335f499cecd F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 94b1800ea50e52ce19365744174c65e6fc8b87e0 +F ext/fts5/fts5Int.h be8ac04ce40705aa088c3d2509cadad0f98085fa F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b -F ext/fts5/fts5_config.c 7a8b4665239a4f3001a4ecbc77573c42d2694161 +F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c aa8d73d043417740c07861beb78c86103a6a9d90 @@ -163,6 +163,7 @@ F ext/fts5/test/fts5tokenizer.test bbcde2a7473dcaa9a1fc6809aa8965acb7b846ff F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 +F ext/fts5/test/fts5version.test 1c902eaa7359336293ac45c7a34616527513e9fb F ext/fts5/tool/loadfts5.tcl 8a8f10d7d2d0d77f622e0a84cc0824c158c34a52 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1316,7 +1317,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c4456dc5f5f8f45f04e3bbae53b6bcc209fc27d5 -R d535b9198036ecff3827ebadb9a9b6f4 +P a684b5e2d9d52cf4700e7e5f9dd547a2ba54e8e9 +R ee87a64da50ec14a005ebb86ec227c20 U dan -Z 02d7c47a9ec4004e437813912cd05f33 +Z 8aaf7ae5929104d0a1ed12733f883e2b diff --git a/manifest.uuid b/manifest.uuid index d0b2967a0c..9406775af2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a684b5e2d9d52cf4700e7e5f9dd547a2ba54e8e9 \ No newline at end of file +f369caec145f311bb136cf7af144e2695badcb9b \ No newline at end of file From 5e38f1c9bf726872fa55edadf849c9936fd1d93e Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 8 May 2015 20:21:24 +0000 Subject: [PATCH 124/206] Add the fts5vocab module, for direct access to the fts5 index. FossilOrigin-Name: 6bf93e3b56e6705b7d12bab5024fc615f373b36c --- ext/fts5/fts5.c | 41 ++-- ext/fts5/fts5Int.h | 53 +++-- ext/fts5/fts5_index.c | 48 ++++- ext/fts5/fts5_vocab.c | 370 +++++++++++++++++++++++++++++++++++ ext/fts5/test/fts5vocab.test | 55 ++++++ main.mk | 7 +- manifest | 20 +- manifest.uuid | 2 +- 8 files changed, 553 insertions(+), 43 deletions(-) create mode 100644 ext/fts5/fts5_vocab.c create mode 100644 ext/fts5/test/fts5vocab.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index cd4eff325d..73bcd88953 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -145,11 +145,9 @@ struct Fts5Sorter { /* ** Virtual-table cursor object. ** -** zSpecial: +** iSpecial: ** If this is a 'special' query (refer to function fts5SpecialMatch()), -** then this variable points to a nul-terminated buffer containing the -** result to return through the table-name column. It is nul-terminated -** and should eventually be freed using sqlite3_free(). +** then this variable contains the result of the query. */ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ @@ -159,7 +157,7 @@ struct Fts5Cursor { Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ int csrflags; /* Mask of cursor flags (see below) */ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ - char *zSpecial; /* Result of special query */ + i64 iSpecial; /* Result of special query */ /* "rank" function. Populated on demand from vtab.xColumn(). */ char *zRank; /* Custom rank function */ @@ -564,7 +562,6 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ sqlite3_finalize(pCsr->pRankArgStmt); sqlite3_free(pCsr->apRankArg); - sqlite3_free(pCsr->zSpecial); if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ sqlite3_free(pCsr->zRank); sqlite3_free(pCsr->zRankArgs); @@ -799,12 +796,13 @@ static int fts5SpecialMatch( for(n=0; z[n] && z[n]!=' '; n++); assert( pTab->base.zErrMsg==0 ); - assert( pCsr->zSpecial==0 ); + pCsr->idxNum = FTS5_PLAN_SPECIAL; if( 0==sqlite3_strnicmp("reads", z, n) ){ - pCsr->zSpecial = sqlite3_mprintf("%d", sqlite3Fts5IndexReads(pTab->pIndex)); - pCsr->idxNum = FTS5_PLAN_SPECIAL; - if( pCsr->zSpecial==0 ) rc = SQLITE_NOMEM; + pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex); + } + else if( 0==sqlite3_strnicmp("id", z, n) ){ + pCsr->iSpecial = pCsr->iCsrId; } else{ /* An unrecognized directive. Return an error message. */ @@ -1668,6 +1666,26 @@ static void fts5ApiCallback( } } + +/* +** Given cursor id iId, return a pointer to the corresponding Fts5Index +** object. Or NULL If the cursor id does not exist. +*/ +Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ + Fts5Cursor *pCsr; + Fts5Index *pIndex = 0; + + for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ + if( pCsr->iCsrId==iCsrId ) break; + } + if( pCsr ){ + Fts5Table *pTab = (Fts5Table*)pCsr->base.pVtab; + pIndex = pTab->pIndex; + } + + return pIndex; +} + /* ** Return a "position-list blob" corresponding to the current position of ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains @@ -1728,7 +1746,7 @@ static int fts5ColumnMethod( if( pCsr->idxNum==FTS5_PLAN_SPECIAL ){ if( iCol==pConfig->nCol ){ - sqlite3_result_text(pCtx, pCsr->zSpecial, -1, SQLITE_TRANSIENT); + sqlite3_result_int64(pCtx, pCsr->iSpecial); } }else @@ -2059,6 +2077,7 @@ int sqlite3Fts5Init(sqlite3 *db){ if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); + if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db); if( rc==SQLITE_OK ){ rc = sqlite3_create_function( db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 696a8ea5a7..018f26c00a 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -51,24 +51,8 @@ extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(x) #endif -/************************************************************************** -** Interface to code in fts5.c. -*/ typedef struct Fts5Global Fts5Global; -int sqlite3Fts5GetTokenizer( - Fts5Global*, - const char **azArg, - int nArg, - Fts5Tokenizer**, - fts5_tokenizer**, - char **pzErr -); - -/* -** End of interface to code in fts5.c. -**************************************************************************/ - /************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. @@ -260,6 +244,7 @@ typedef struct Fts5IndexIter Fts5IndexIter; #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ +#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* ** Create/destroy an Fts5Index object. @@ -303,6 +288,13 @@ int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn); */ void sqlite3Fts5IterClose(Fts5IndexIter*); +/* +** This interface is used by the fts5vocab module. +*/ +const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); +int sqlite3Fts5IterNextScan(Fts5IndexIter*); + + /* ** Insert or remove data to or from the index. Each time a document is ** added to or removed from the index, this function is called one or more @@ -390,6 +382,25 @@ int sqlite3Fts5GetVarintLen(u32 iVal); ** End of interface to code in fts5_index.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5.c. +*/ + +int sqlite3Fts5GetTokenizer( + Fts5Global*, + const char **azArg, + int nArg, + Fts5Tokenizer**, + fts5_tokenizer**, + char **pzErr +); + +Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64); + +/* +** End of interface to code in fts5.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_hash.c. */ @@ -607,4 +618,14 @@ int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp); ** End of interface to code in fts5_sorter.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5_vocab.c. +*/ + +int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); + +/* +** End of interface to code in fts5_vocab.c. +**************************************************************************/ + #endif diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a680c20c28..cd15c71b56 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2075,7 +2075,7 @@ static void fts5SegIterSeekInit( ){ int iPg = 1; int h; - int bGe = (flags & FTS5INDEX_QUERY_PREFIX); + int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); @@ -2171,7 +2171,7 @@ static void fts5SegIterHashInit( assert( p->pHash ); assert( p->rc==SQLITE_OK ); - if( pTerm==0 || (flags & FTS5INDEX_QUERY_PREFIX) ){ + if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); n = (z ? strlen((const char*)z) : 0); @@ -4004,6 +4004,7 @@ static void fts5SetupPrefixIter( pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ + const int flags = FTS5INDEX_QUERY_SCAN; Fts5DoclistIter *pDoclist; int i; i64 iLastRowid = 0; @@ -4011,7 +4012,7 @@ static void fts5SetupPrefixIter( Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); - for(fts5MultiIterNew(p, pStruct, 1, 1, pToken, nToken, -1, 0, &p1); + for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; fts5MultiIterNext(p, p1, 0, 0) ){ @@ -4272,6 +4273,11 @@ int sqlite3Fts5IndexQuery( int iIdx = 0; Fts5Buffer buf = {0, 0, 0}; + /* If the QUERY_SCAN flag is set, all other flags must be clear. */ + assert( (flags & FTS5INDEX_QUERY_SCAN)==0 + || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN + ); + if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); } @@ -4296,9 +4302,8 @@ int sqlite3Fts5IndexQuery( buf.p[0] = FTS5_MAIN_PREFIX + iIdx; pRet->pStruct = fts5StructureRead(p); if( pRet->pStruct ){ - int f = (flags & ~FTS5INDEX_QUERY_PREFIX); fts5MultiIterNew( - p, pRet->pStruct, 1, f, buf.p, nToken+1, -1, 0, &pRet->pMulti + p, pRet->pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet->pMulti ); } }else{ @@ -4343,6 +4348,29 @@ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ return fts5IndexReturn(pIter->pIndex); } +/* +** Move to the next matching term/rowid. Used by the fts5vocab module. +*/ +int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ + Fts5Index *p = pIter->pIndex; + Fts5MultiSegIter *pMulti = pIter->pMulti; + + assert( pIter->pIndex->rc==SQLITE_OK ); + assert( pMulti ); + + fts5BufferZero(&pIter->poslist); + fts5MultiIterNext(p, pMulti, 0, 0); + if( p->rc==SQLITE_OK ){ + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ + fts5DataRelease(pSeg->pLeaf); + pSeg->pLeaf = 0; + } + } + + return fts5IndexReturn(pIter->pIndex); +} + /* ** Move the doclist-iter passed as the first argument to the next ** matching rowid that occurs at or after iMatch. The definition of "at @@ -4383,6 +4411,16 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ } } +/* +** Return the current term. +*/ +const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ + int n; + const char *z = fts5MultiIterTerm(pIter->pMulti, &n); + *pn = n-1; + return &z[1]; +} + /* ** Return a pointer to a buffer containing a copy of the position list for diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c new file mode 100644 index 0000000000..8d4d6c1762 --- /dev/null +++ b/ext/fts5/fts5_vocab.c @@ -0,0 +1,370 @@ +/* +** 2015 May 08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This is an SQLite module implementing full-text search. +*/ + +#if defined(SQLITE_ENABLE_FTS5) + +#include "fts5Int.h" + + +typedef struct Fts5VocabTable Fts5VocabTable; +typedef struct Fts5VocabCursor Fts5VocabCursor; + +struct Fts5VocabTable { + sqlite3_vtab base; + char *zFts5Tbl; /* Name of fts5 table */ + char *zFts5Db; /* Db containing fts5 table */ + sqlite3 *db; /* Database handle */ + Fts5Global *pGlobal; /* FTS5 global object for this database */ +}; + +struct Fts5VocabCursor { + sqlite3_vtab_cursor base; + sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ + Fts5Index *pIndex; /* Associated FTS5 index */ + + Fts5IndexIter *pIter; /* Iterator object */ + int bEof; /* True if this cursor is at EOF */ + Fts5Buffer term; /* Current value of 'term' column */ + i64 nRow; /* Current value of 'row' column */ + i64 nInst; /* Current value of 'inst' column */ + i64 rowid; /* Current value of rowid column */ +}; + + +/* +** The xDisconnect() virtual table method. +*/ +static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ + Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* +** The xDestroy() virtual table method. +*/ +static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ + Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the FTS3 virtual table. +** +** The argv[] array contains the following: +** +** argv[0] -> module name ("fts5vocab") +** argv[1] -> database name +** argv[2] -> table name +** argv[3] -> name of fts5 table +*/ +static int fts5VocabInitVtab( + sqlite3 *db, /* The SQLite database connection */ + void *pAux, /* Pointer to Fts5Global object */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ + char **pzErr /* Write any error message here */ +){ + const char *zSchema = "CREATE TABLE vvv(term, row, inst)"; + Fts5VocabTable *pRet = 0; + int rc = SQLITE_OK; /* Return code */ + + if( argc!=4 ){ + *pzErr = sqlite3_mprintf("wrong number of vtable arguments"); + rc = SQLITE_ERROR; + }else{ + int nByte; /* Bytes of space to allocate */ + const char *zDb = argv[1]; + const char *zTab = argv[3]; + int nDb = strlen(zDb) + 1; + int nTab = strlen(zTab) + 1; + + rc = sqlite3_declare_vtab(db, zSchema); + + nByte = sizeof(Fts5VocabTable) + nDb + nTab; + pRet = sqlite3Fts5MallocZero(&rc, nByte); + if( pRet ){ + pRet->pGlobal = (Fts5Global*)pAux; + pRet->db = db; + pRet->zFts5Tbl = (char*)&pRet[1]; + pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; + memcpy(pRet->zFts5Tbl, zTab, nTab); + memcpy(pRet->zFts5Db, zDb, nDb); + } + } + + *ppVTab = (sqlite3_vtab*)pRet; + return rc; +} + + +/* +** The xConnect() and xCreate() methods for the virtual table. All the +** work is done in function fts5VocabInitVtab(). +*/ +static int fts5VocabConnectMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); +} +static int fts5VocabCreateMethod( + sqlite3 *db, /* Database connection */ + void *pAux, /* Pointer to tokenizer hash table */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); +} + +/* +** Implementation of the xBestIndex method. +*/ +static int fts5VocabBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + return SQLITE_OK; +} + +/* +** Implementation of xOpen method. +*/ +static int fts5VocabOpenMethod( + sqlite3_vtab *pVTab, + sqlite3_vtab_cursor **ppCsr +){ + Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; + Fts5VocabCursor *pCsr; + int rc = SQLITE_OK; + + pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5VocabCursor)); + if( pCsr ){ + char *zSql = sqlite3_mprintf( + "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", + pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0); + } + sqlite3_free(zSql); + if( rc==SQLITE_OK && sqlite3_step(pCsr->pStmt)==SQLITE_ROW ){ + i64 iId = sqlite3_column_int64(pCsr->pStmt, 0); + pCsr->pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId); + } + + if( rc==SQLITE_OK && pCsr->pIndex==0 ){ + rc = sqlite3_finalize(pCsr->pStmt); + pCsr->pStmt = 0; + if( rc==SQLITE_OK ){ + pVTab->zErrMsg = sqlite3_mprintf( + "no such fts5 table: %Q.%Q", pTab->zFts5Db, pTab->zFts5Tbl + ); + rc = SQLITE_ERROR; + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(pCsr); + pCsr = 0; + } + } + + + *ppCsr = (sqlite3_vtab_cursor*)pCsr; + return rc; +} + +static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ + pCsr->rowid = 0; + sqlite3Fts5IterClose(pCsr->pIter); + pCsr->pIter = 0; +} + +/* +** Close the cursor. For additional information see the documentation +** on the xClose method of the virtual table interface. +*/ +static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ + if( pCursor ){ + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + fts5VocabResetCursor(pCsr); + sqlite3Fts5BufferFree(&pCsr->term); + sqlite3_finalize(pCsr->pStmt); + sqlite3_free(pCsr); + } + return SQLITE_OK; +} + + +/* +** Advance the cursor to the next row in the table. +*/ +static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + int rc = SQLITE_OK; + + if( sqlite3Fts5IterEof(pCsr->pIter) ){ + pCsr->bEof = 1; + }else{ + const char *zTerm; + int nTerm; + + zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); + pCsr->nInst = 0; + pCsr->nRow = 0; + pCsr->rowid++; + + while( 1 ){ + const u8 *pPos; int nPos; /* Position list */ + i64 dummy = 0; + int iOff = 0; + + rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos); + if( rc!=SQLITE_OK ) break; + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &dummy) ){ + pCsr->nInst++; + } + pCsr->nRow++; + + rc = sqlite3Fts5IterNextScan(pCsr->pIter); + if( rc!=SQLITE_OK ) break; + zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; + if( sqlite3Fts5IterEof(pCsr->pIter) ) break; + } + } + return rc; +} + +/* +** This is the xFilter implementation for the virtual table. +*/ +static int fts5VocabFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + int rc; + const int flags = FTS5INDEX_QUERY_SCAN; + + fts5VocabResetCursor(pCsr); + rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter); + if( rc==SQLITE_OK ){ + rc = fts5VocabNextMethod(pCursor); + } + + return rc; +} + +/* +** This is the xEof method of the virtual table. SQLite calls this +** routine to find out if it has reached the end of a result set. +*/ +static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + return pCsr->bEof; +} + +static int fts5VocabColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + switch( iCol ){ + case 0: /* term */ + sqlite3_result_text( + pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT + ); + break; + + case 1: /* row */ + sqlite3_result_int64(pCtx, pCsr->nRow); + break; + + case 2: /* inst */ + sqlite3_result_int64(pCtx, pCsr->nInst); + break; + + default: + assert( 0 ); + } + return SQLITE_OK; +} + +/* +** This is the xRowid method. The SQLite core calls this routine to +** retrieve the rowid for the current row of the result set. fts5 +** exposes %_content.docid as the rowid for the virtual table. The +** rowid should be written to *pRowid. +*/ +static int fts5VocabRowidMethod( + sqlite3_vtab_cursor *pCursor, + sqlite_int64 *pRowid +){ + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + *pRowid = pCsr->rowid; + return SQLITE_OK; +} + +int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ + static const sqlite3_module fts5Vocab = { + /* iVersion */ 2, + /* xCreate */ fts5VocabCreateMethod, + /* xConnect */ fts5VocabConnectMethod, + /* xBestIndex */ fts5VocabBestIndexMethod, + /* xDisconnect */ fts5VocabDisconnectMethod, + /* xDestroy */ fts5VocabDestroyMethod, + /* xOpen */ fts5VocabOpenMethod, + /* xClose */ fts5VocabCloseMethod, + /* xFilter */ fts5VocabFilterMethod, + /* xNext */ fts5VocabNextMethod, + /* xEof */ fts5VocabEofMethod, + /* xColumn */ fts5VocabColumnMethod, + /* xRowid */ fts5VocabRowidMethod, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindFunction */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + }; + void *p = (void*)pGlobal; + + return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); +} +#endif /* defined(SQLITE_ENABLE_FTS5) */ + + diff --git a/ext/fts5/test/fts5vocab.test b/ext/fts5/test/fts5vocab.test new file mode 100644 index 0000000000..fb7c24e1ff --- /dev/null +++ b/ext/fts5/test/fts5vocab.test @@ -0,0 +1,55 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The tests in this file focus on testing the fts5vocab module. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5vocab + + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1); + CREATE VIRTUAL TABLE v1 USING fts5vocab(t1); + PRAGMA table_info = v1; +} { + 0 term {} 0 {} 0 + 1 row {} 0 {} 0 + 2 inst {} 0 {} 0 +} + +do_execsql_test 1.2 { SELECT * FROM v1 } { } + +do_execsql_test 1.3 { + INSERT INTO t1 VALUES('x y z'); + INSERT INTO t1 VALUES('x x x'); +} + +do_execsql_test 1.4 { + SELECT * FROM v1; +} {x 2 4 y 1 1 z 1 1} + +do_execsql_test 1.5 { + BEGIN; + INSERT INTO t1 VALUES('a b c'); + SELECT * FROM v1 WHERE term<'d'; + COMMIT; +} {a 1 1 b 1 1 c 1 1} + +do_execsql_test 1.6 { + DELETE FROM t1 WHERE one = 'a b c'; + SELECT * FROM v1; +} {x 2 4 y 1 1 z 1 1} + + + +finish_test + diff --git a/main.mk b/main.mk index 11dc94ea59..bc63a3b569 100644 --- a/main.mk +++ b/main.mk @@ -82,6 +82,7 @@ LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5_tokenize.o LIBOBJ += fts5_unicode2.o +LIBOBJ += fts5_vocab.o LIBOBJ += fts5parse.o @@ -246,7 +247,8 @@ SRC += \ fts5parse.c fts5parse.h \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ - $(TOP)/ext/fts5/fts5_unicode2.c + $(TOP)/ext/fts5/fts5_unicode2.c \ + $(TOP)/ext/fts5/fts5_vocab.c # Generated source code files @@ -656,6 +658,9 @@ fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) fts5_unicode2.o: $(TOP)/ext/fts5/fts5_unicode2.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c +fts5_vocab.o: $(TOP)/ext/fts5/fts5_vocab.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_vocab.c + fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h diff --git a/manifest b/manifest index 76b5dac082..ddaea8872b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\sthe\serror\smessage\sreturned\sby\sFTS5\sif\sit\sencounters\san\sunknown\sfile\sformat. -D 2015-05-08T09:21:05.416 +C Add\sthe\sfts5vocab\smodule,\sfor\sdirect\saccess\sto\sthe\sfts5\sindex. +D 2015-05-08T20:21:24.206 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,19 +104,20 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 7f58ea9ba1e72038137963719c5b5335f499cecd +F ext/fts5/fts5.c 9e521f3556b9929996909402ddf337f2e771e87c F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h be8ac04ce40705aa088c3d2509cadad0f98085fa +F ext/fts5/fts5Int.h fc3edf2538551c5bdb02885c517483d604394d3c F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c aa8d73d043417740c07861beb78c86103a6a9d90 +F ext/fts5/fts5_index.c 6a4fed2d64d7dbb0416c4278b23201f77daf94ea F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d F ext/fts5/fts5_tcl.c aa3b102bb01f366174718be7ce8e9311b9abb482 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d +F ext/fts5/fts5_vocab.c 9e021b7f95890f1403e84dc4be4c94559c07ee54 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 @@ -164,6 +165,7 @@ F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test 1c902eaa7359336293ac45c7a34616527513e9fb +F ext/fts5/test/fts5vocab.test d0cb4286a0d900f46498587366efacfc75741f0f F ext/fts5/tool/loadfts5.tcl 8a8f10d7d2d0d77f622e0a84cc0824c158c34a52 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -215,7 +217,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 76306018b967871262c6c8290e3914685f279ded +F main.mk 063cdc009247a9b543875ea12f4e27b8f3bcca54 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk d5e22023b5238985bb54a72d33e0ac71fe4f8a32 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1317,7 +1319,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a684b5e2d9d52cf4700e7e5f9dd547a2ba54e8e9 -R ee87a64da50ec14a005ebb86ec227c20 +P f369caec145f311bb136cf7af144e2695badcb9b +R c4f9cd11ccfbc0fa9f03125deb45e448 U dan -Z 8aaf7ae5929104d0a1ed12733f883e2b +Z 31686ee8f2f28db91dc188b739012da2 diff --git a/manifest.uuid b/manifest.uuid index 9406775af2..1a592dd121 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f369caec145f311bb136cf7af144e2695badcb9b \ No newline at end of file +6bf93e3b56e6705b7d12bab5024fc615f373b36c \ No newline at end of file From 71ab324066f7de3ad6080ed5a78e3684db70d9b2 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 9 May 2015 18:28:27 +0000 Subject: [PATCH 125/206] Allow the fts5vocab table to optionally provide data on a per-column basis. FossilOrigin-Name: 3922276135a7825d0ede8d9c757e9cfe492f803a --- ext/fts5/fts5.c | 10 +- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_vocab.c | 280 +++++++++++++++++++++++++---------- ext/fts5/test/fts5vocab.test | 79 +++++++++- manifest | 18 +-- manifest.uuid | 2 +- 6 files changed, 291 insertions(+), 100 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 73bcd88953..77274eda16 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1670,8 +1670,15 @@ static void fts5ApiCallback( /* ** Given cursor id iId, return a pointer to the corresponding Fts5Index ** object. Or NULL If the cursor id does not exist. +** +** If successful, set *pnCol to the number of indexed columns in the +** table before returning. */ -Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ +Fts5Index *sqlite3Fts5IndexFromCsrid( + Fts5Global *pGlobal, + i64 iCsrId, + int *pnCol +){ Fts5Cursor *pCsr; Fts5Index *pIndex = 0; @@ -1681,6 +1688,7 @@ Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ if( pCsr ){ Fts5Table *pTab = (Fts5Table*)pCsr->base.pVtab; pIndex = pTab->pIndex; + *pnCol = pTab->pConfig->nCol; } return pIndex; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 018f26c00a..afef22679f 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -395,7 +395,7 @@ int sqlite3Fts5GetTokenizer( char **pzErr ); -Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64); +Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, int*); /* ** End of interface to code in fts5.c. diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index 8d4d6c1762..23a063296a 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -10,7 +10,25 @@ ** ****************************************************************************** ** -** This is an SQLite module implementing full-text search. +** This is an SQLite virtual table module implementing direct access to an +** existing FTS5 index. The module may create several different types of +** tables: +** +** col: +** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); +** +** One row for each term/column combination. The value of $doc is set to +** the number of fts5 rows that contain at least one instance of term +** $term within column $col. Field $cnt is set to the total number of +** instances of term $term in column $col (in any row of the fts5 table). +** +** row: +** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); +** +** One row for each term in the database. The value of $doc is set to +** the number of fts5 rows that contain at least one instance of term +** $term. Field $cnt is set to the total number of instances of term +** $term in the database. */ #if defined(SQLITE_ENABLE_FTS5) @@ -27,6 +45,7 @@ struct Fts5VocabTable { char *zFts5Db; /* Db containing fts5 table */ sqlite3 *db; /* Database handle */ Fts5Global *pGlobal; /* FTS5 global object for this database */ + int eType; /* FTS5_VOCAB_COL or ROW */ }; struct Fts5VocabCursor { @@ -34,14 +53,55 @@ struct Fts5VocabCursor { sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ Fts5Index *pIndex; /* Associated FTS5 index */ - Fts5IndexIter *pIter; /* Iterator object */ int bEof; /* True if this cursor is at EOF */ + Fts5IndexIter *pIter; /* Term/rowid iterator object */ + + /* These are used by 'col' tables only */ + int nCol; + int iCol; + i64 *aCnt; + i64 *aDoc; + + /* Output values */ + i64 rowid; /* This table's current rowid value */ Fts5Buffer term; /* Current value of 'term' column */ - i64 nRow; /* Current value of 'row' column */ - i64 nInst; /* Current value of 'inst' column */ - i64 rowid; /* Current value of rowid column */ + i64 aVal[3]; /* Up to three columns left of 'term' */ }; +#define FTS5_VOCAB_COL 0 +#define FTS5_VOCAB_ROW 1 + +#define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" +#define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" + +/* +** Translate a string containing an fts5vocab table type to an +** FTS5_VOCAB_XXX constant. If successful, set *peType to the output +** value and return SQLITE_OK. Otherwise, set *pzErr to an error message +** and return SQLITE_ERROR. +*/ +static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ + int rc = SQLITE_OK; + char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); + if( rc==SQLITE_OK ){ + sqlite3Fts5Dequote(zCopy); + if( sqlite3_stricmp(zCopy, "col")==0 ){ + *peType = FTS5_VOCAB_COL; + }else + + if( sqlite3_stricmp(zCopy, "row")==0 ){ + *peType = FTS5_VOCAB_ROW; + }else + { + *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy); + rc = SQLITE_ERROR; + } + sqlite3_free(zCopy); + } + + return rc; +} + /* ** The xDisconnect() virtual table method. @@ -70,7 +130,17 @@ static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ ** argv[0] -> module name ("fts5vocab") ** argv[1] -> database name ** argv[2] -> table name +** +** then: +** ** argv[3] -> name of fts5 table +** argv[4] -> type of fts5vocab table +** +** or, for tables in the TEMP schema only. +** +** argv[3] -> name of fts5 tables database +** argv[4] -> name of fts5 table +** argv[5] -> type of fts5vocab table */ static int fts5VocabInitVtab( sqlite3 *db, /* The SQLite database connection */ @@ -80,26 +150,40 @@ static int fts5VocabInitVtab( sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */ ){ - const char *zSchema = "CREATE TABLE vvv(term, row, inst)"; + const char *azSchema[] = { + "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")", + "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")" + }; + Fts5VocabTable *pRet = 0; int rc = SQLITE_OK; /* Return code */ + int bDb; - if( argc!=4 ){ + bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); + + if( argc!=5 && bDb==0 ){ *pzErr = sqlite3_mprintf("wrong number of vtable arguments"); rc = SQLITE_ERROR; }else{ int nByte; /* Bytes of space to allocate */ - const char *zDb = argv[1]; - const char *zTab = argv[3]; - int nDb = strlen(zDb) + 1; - int nTab = strlen(zTab) + 1; - - rc = sqlite3_declare_vtab(db, zSchema); + const char *zDb = bDb ? argv[3] : argv[1]; + const char *zTab = bDb ? argv[4] : argv[3]; + const char *zType = bDb ? argv[5] : argv[4]; + int nDb = strlen(zDb)+1; + int nTab = strlen(zTab)+1; + int eType; + + rc = fts5VocabTableType(zType, pzErr, &eType); + if( rc==SQLITE_OK ){ + assert( eType>=0 && eTypepGlobal = (Fts5Global*)pAux; + pRet->eType = eType; pRet->db = db; pRet->zFts5Tbl = (char*)&pRet[1]; pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; @@ -156,43 +240,52 @@ static int fts5VocabOpenMethod( sqlite3_vtab_cursor **ppCsr ){ Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; - Fts5VocabCursor *pCsr; + Fts5Index *pIndex = 0; + int nCol = 0; + Fts5VocabCursor *pCsr = 0; int rc = SQLITE_OK; + sqlite3_stmt *pStmt = 0; + char *zSql = 0; + int nByte; - pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5VocabCursor)); - if( pCsr ){ - char *zSql = sqlite3_mprintf( - "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", - pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl - ); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0); - } - sqlite3_free(zSql); - if( rc==SQLITE_OK && sqlite3_step(pCsr->pStmt)==SQLITE_ROW ){ - i64 iId = sqlite3_column_int64(pCsr->pStmt, 0); - pCsr->pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId); - } + zSql = sqlite3_mprintf( + "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", + pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); + } + sqlite3_free(zSql); - if( rc==SQLITE_OK && pCsr->pIndex==0 ){ - rc = sqlite3_finalize(pCsr->pStmt); - pCsr->pStmt = 0; - if( rc==SQLITE_OK ){ - pVTab->zErrMsg = sqlite3_mprintf( - "no such fts5 table: %Q.%Q", pTab->zFts5Db, pTab->zFts5Tbl - ); - rc = SQLITE_ERROR; - } - } + if( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ + i64 iId = sqlite3_column_int64(pStmt, 0); + pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol); + } - if( rc!=SQLITE_OK ){ - sqlite3_free(pCsr); - pCsr = 0; + if( rc==SQLITE_OK && pIndex==0 ){ + rc = sqlite3_finalize(pStmt); + pStmt = 0; + if( rc==SQLITE_OK ){ + pVTab->zErrMsg = sqlite3_mprintf( + "no such fts5 table: %Q.%Q", pTab->zFts5Db, pTab->zFts5Tbl + ); + rc = SQLITE_ERROR; } } + nByte = nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor); + pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); + if( pCsr ){ + pCsr->pIndex = pIndex; + pCsr->pStmt = pStmt; + pCsr->nCol = nCol; + pCsr->aCnt = (i64*)&pCsr[1]; + pCsr->aDoc = &pCsr->aCnt[nCol]; + }else{ + sqlite3_finalize(pStmt); + } *ppCsr = (sqlite3_vtab_cursor*)pCsr; return rc; @@ -225,39 +318,72 @@ static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ */ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; int rc = SQLITE_OK; - if( sqlite3Fts5IterEof(pCsr->pIter) ){ - pCsr->bEof = 1; - }else{ - const char *zTerm; - int nTerm; + pCsr->rowid++; - zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); - sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); - pCsr->nInst = 0; - pCsr->nRow = 0; - pCsr->rowid++; - - while( 1 ){ - const u8 *pPos; int nPos; /* Position list */ - i64 dummy = 0; - int iOff = 0; - - rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos); - if( rc!=SQLITE_OK ) break; - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &dummy) ){ - pCsr->nInst++; - } - pCsr->nRow++; - - rc = sqlite3Fts5IterNextScan(pCsr->pIter); - if( rc!=SQLITE_OK ) break; - zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); - if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; - if( sqlite3Fts5IterEof(pCsr->pIter) ) break; + if( pTab->eType==FTS5_VOCAB_COL ){ + for(pCsr->iCol++; pCsr->iColnCol; pCsr->iCol++){ + if( pCsr->aCnt[pCsr->iCol] ) break; } } + + if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=pCsr->nCol ){ + if( sqlite3Fts5IterEof(pCsr->pIter) ){ + pCsr->bEof = 1; + }else{ + const char *zTerm; + int nTerm; + + zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); + memset(pCsr->aVal, 0, sizeof(pCsr->aVal)); + memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64)); + memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64)); + pCsr->iCol = 0; + + assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); + while( 1 ){ + const u8 *pPos; int nPos; /* Position list */ + i64 iPos = 0; /* 64-bit position read from poslist */ + int iOff = 0; /* Current offset within position list */ + + rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos); + if( rc!=SQLITE_OK ) break; + + if( pTab->eType==FTS5_VOCAB_ROW ){ + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + pCsr->aVal[1]++; + } + pCsr->aVal[0]++; + }else{ + int iCol = -1; + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + int ii = FTS5_POS2COLUMN(iPos); + pCsr->aCnt[ii]++; + if( iCol!=ii ){ + pCsr->aDoc[ii]++; + iCol = ii; + } + } + } + + rc = sqlite3Fts5IterNextScan(pCsr->pIter); + if( rc!=SQLITE_OK ) break; + zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; + if( sqlite3Fts5IterEof(pCsr->pIter) ) break; + } + } + } + + if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ + while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++; + pCsr->aVal[0] = pCsr->iCol; + pCsr->aVal[1] = pCsr->aDoc[pCsr->iCol]; + pCsr->aVal[2] = pCsr->aCnt[pCsr->iCol]; + } return rc; } @@ -306,16 +432,10 @@ static int fts5VocabColumnMethod( ); break; - case 1: /* row */ - sqlite3_result_int64(pCtx, pCsr->nRow); - break; - - case 2: /* inst */ - sqlite3_result_int64(pCtx, pCsr->nInst); - break; - default: - assert( 0 ); + assert( iCol<4 && iCol>0 ); + sqlite3_result_int64(pCtx, pCsr->aVal[iCol-1]); + break; } return SQLITE_OK; } diff --git a/ext/fts5/test/fts5vocab.test b/ext/fts5/test/fts5vocab.test index fb7c24e1ff..b61db67d7b 100644 --- a/ext/fts5/test/fts5vocab.test +++ b/ext/fts5/test/fts5vocab.test @@ -16,40 +16,103 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5vocab -do_execsql_test 1.1 { +do_execsql_test 1.1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1); - CREATE VIRTUAL TABLE v1 USING fts5vocab(t1); + CREATE VIRTUAL TABLE v1 USING fts5vocab(t1, 'row'); PRAGMA table_info = v1; } { 0 term {} 0 {} 0 - 1 row {} 0 {} 0 - 2 inst {} 0 {} 0 + 1 doc {} 0 {} 0 + 2 cnt {} 0 {} 0 } -do_execsql_test 1.2 { SELECT * FROM v1 } { } +do_execsql_test 1.1.2 { + CREATE VIRTUAL TABLE v2 USING fts5vocab(t1, 'col'); + PRAGMA table_info = v2; +} { + 0 term {} 0 {} 0 + 1 col {} 0 {} 0 + 2 doc {} 0 {} 0 + 3 cnt {} 0 {} 0 +} + +do_execsql_test 1.2.1 { SELECT * FROM v1 } { } +do_execsql_test 1.2.2 { SELECT * FROM v2 } { } do_execsql_test 1.3 { INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('x x x'); } -do_execsql_test 1.4 { +do_execsql_test 1.4.1 { SELECT * FROM v1; } {x 2 4 y 1 1 z 1 1} -do_execsql_test 1.5 { +do_execsql_test 1.4.2 { + SELECT * FROM v2; +} {x 0 2 4 y 0 1 1 z 0 1 1} + +do_execsql_test 1.5.1 { BEGIN; INSERT INTO t1 VALUES('a b c'); SELECT * FROM v1 WHERE term<'d'; - COMMIT; } {a 1 1 b 1 1 c 1 1} +do_execsql_test 1.5.2 { + SELECT * FROM v2 WHERE term<'d'; + COMMIT; +} {a 0 1 1 b 0 1 1 c 0 1 1} + do_execsql_test 1.6 { DELETE FROM t1 WHERE one = 'a b c'; SELECT * FROM v1; } {x 2 4 y 1 1 z 1 1} +#------------------------------------------------------------------------- +# +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE tt USING fts5(a, b); + INSERT INTO tt VALUES('d g b f d f', 'f c e c d a'); + INSERT INTO tt VALUES('f a e a a b', 'e d c f d d'); + INSERT INTO tt VALUES('b c a a a b', 'f f c c b c'); + INSERT INTO tt VALUES('f d c a c e', 'd g d e g d'); + INSERT INTO tt VALUES('g d e f a g x', 'f f d a a b'); + INSERT INTO tt VALUES('g c f b c g', 'a g f d c b'); + INSERT INTO tt VALUES('c e c f g b', 'f e d b g a'); + INSERT INTO tt VALUES('g d e f d e', 'a c d b a g'); + INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y'); + INSERT INTO tt VALUES('c c a a c f', 'd g a e b g'); + CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'col'); + SELECT * FROM tv; +} { + a 0 6 11 a 1 7 9 + b 0 6 7 b 1 7 7 + c 0 6 12 c 1 5 8 + d 0 4 6 d 1 9 13 + e 0 6 7 e 1 6 6 + f 0 9 10 f 1 7 10 + g 0 5 7 g 1 5 7 + x 0 1 1 y 1 1 1 +} +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE temp.tv2 USING fts5vocab(main, tt, 'row'); + SELECT * FROM tv2; +} { + a 10 20 b 9 14 c 9 20 d 9 19 + e 8 13 f 10 20 g 7 14 x 1 1 + y 1 1 +} +#------------------------------------------------------------------------- +# +foreach {tn sql} { + 1 { CREATE VIRTUAL TABLE aa USING fts5vocab() } + 2 { CREATE VIRTUAL TABLE aa USING fts5vocab(x) } + 3 { CREATE VIRTUAL TABLE aa USING fts5vocab(x,y,z) } + 4 { CREATE VIRTUAL TABLE temp.aa USING fts5vocab(x,y,z,y) } +} { + do_catchsql_test 3.$tn $sql {1 {wrong number of vtable arguments}} +} finish_test diff --git a/manifest b/manifest index ddaea8872b..12d81ba731 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sfts5vocab\smodule,\sfor\sdirect\saccess\sto\sthe\sfts5\sindex. -D 2015-05-08T20:21:24.206 +C Allow\sthe\sfts5vocab\stable\sto\soptionally\sprovide\sdata\son\sa\sper-column\sbasis. +D 2015-05-09T18:28:27.134 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,9 +104,9 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 9e521f3556b9929996909402ddf337f2e771e87c +F ext/fts5/fts5.c a5a908a68c79c352a0dfa77d16712de43896bd07 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h fc3edf2538551c5bdb02885c517483d604394d3c +F ext/fts5/fts5Int.h 5b9e4afe80d18648bc236b9b5bc2f873634326f6 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 @@ -117,7 +117,7 @@ F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d F ext/fts5/fts5_tcl.c aa3b102bb01f366174718be7ce8e9311b9abb482 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d -F ext/fts5/fts5_vocab.c 9e021b7f95890f1403e84dc4be4c94559c07ee54 +F ext/fts5/fts5_vocab.c 2e37ea9b4d4d5460bc778f2adb872c6a869601e7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 @@ -165,7 +165,7 @@ F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test 1c902eaa7359336293ac45c7a34616527513e9fb -F ext/fts5/test/fts5vocab.test d0cb4286a0d900f46498587366efacfc75741f0f +F ext/fts5/test/fts5vocab.test 2d1bddfb6e1effd9e1d2f5d1d25bf05e9ab33e64 F ext/fts5/tool/loadfts5.tcl 8a8f10d7d2d0d77f622e0a84cc0824c158c34a52 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1319,7 +1319,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f369caec145f311bb136cf7af144e2695badcb9b -R c4f9cd11ccfbc0fa9f03125deb45e448 +P 6bf93e3b56e6705b7d12bab5024fc615f373b36c +R f31ac5d295b9e0df865bd081bc32aa0b U dan -Z 31686ee8f2f28db91dc188b739012da2 +Z 9e8a79e0ffff336d7475aff60e841c57 diff --git a/manifest.uuid b/manifest.uuid index 1a592dd121..b81db19d90 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6bf93e3b56e6705b7d12bab5024fc615f373b36c \ No newline at end of file +3922276135a7825d0ede8d9c757e9cfe492f803a \ No newline at end of file From 23d538885eb5b4db3056f0cce47004248bf1949f Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 13 May 2015 17:15:32 +0000 Subject: [PATCH 126/206] Change fts5 doclist-index structures to be trees instead of flat lists. This only makes a difference for databases that contain millions of instances of the same token. FossilOrigin-Name: aa34bf666c384cf32a8d8166ab6d9afbca26a256 --- ext/fts5/fts5.c | 8 + ext/fts5/fts5Int.h | 4 +- ext/fts5/fts5_index.c | 681 +++++++++++++++++++++---------- ext/fts5/fts5_tcl.c | 8 +- ext/fts5/test/fts5aa.test | 4 +- ext/fts5/test/fts5al.test | 6 +- ext/fts5/test/fts5corrupt2.test | 3 + ext/fts5/test/fts5dlidx.test | 5 +- ext/fts5/test/fts5integrity.test | 22 + ext/fts5/test/fts5rowid.test | 2 +- ext/fts5/test/fts5version.test | 10 +- ext/fts5/tool/loadfts5.tcl | 1 + manifest | 38 +- manifest.uuid | 2 +- test/permutations.test | 8 + tool/mksqlite3c.tcl | 1 + 16 files changed, 549 insertions(+), 254 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 77274eda16..e519635149 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -17,6 +17,14 @@ #include "fts5Int.h" +/* +** This variable is set to true when running corruption tests. Otherwise +** false. If it is false, extra assert() conditions in the fts5 code are +** activated - conditions that are only true if it is guaranteed that the +** fts5 database is not corrupt. +*/ +int sqlite3_fts5_may_be_corrupt = 0; + typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index afef22679f..3c84c9ed2a 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -44,7 +44,7 @@ int sqlite3Fts5Corrupt(void); ** is used for assert() conditions that are true only if it can be ** guranteed that the database is not corrupt. */ -#ifdef SQLITE_TEST +#ifdef SQLITE_DEBUG extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else @@ -115,7 +115,7 @@ struct Fts5Config { }; /* Current expected value of %_config table 'version' field */ -#define FTS5_CURRENT_VERSION 1 +#define FTS5_CURRENT_VERSION 2 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index cd15c71b56..6df3774ebb 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -202,15 +202,33 @@ ** ** 5. Segment doclist indexes: ** -** A list of varints. If the first termless page contains at least one -** docid, the list begins with that docid as a varint followed by the -** value 1 (0x01). Or, if the first termless page contains no docids, -** a varint containing the last docid stored on the term page followed -** by a 0 (0x00) value. +** Doclist indexes are themselves b-trees, however they usually consist of +** a single leaf record only. The format of each doclist index leaf page +** is: +** +** * Flags byte. Bits are: +** 0x01: Clear if leaf is also the root page, otherwise set. +** +** * Page number of fts index leaf page. As a varint. +** +** * First docid on page indicated by previous field. As a varint. +** +** * A list of varints, one for each subsequent termless page. A +** positive delta if the termless page contains at least one docid, +** or an 0x00 byte otherwise. +** +** Internal doclist index nodes are: +** +** * Flags byte. Bits are: +** 0x01: Clear for root page, otherwise set. +** +** * Page number of first child page. As a varint. +** +** * Copy of first docid on page indicated by previous field. As a varint. +** +** * A list of delta-encoded varints - the first docid on each subsequent +** child page. ** -** For each subsequent page in the doclist, either a 0x00 byte if the -** page contains no terms, or a delta-encoded docid (always +ve) -** representing the first docid on the page otherwise. */ /* @@ -240,33 +258,42 @@ ** SQLITE_FULL and fails the current operation if they ever prove too small. */ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ +#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ #define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ -#define FTS5_SEGMENT_ROWID(segid, height, pgno) ( \ - ((i64)(segid) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ +#define fts5_dri(segid, dlidx, height, pgno) ( \ + ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ + ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ ((i64)(pgno)) \ ) +#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno) +#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) + +#if 0 /* ** The height of segment b-trees is actually limited to one less than ** (1<iLeafPgno is the page number the -** doclist is associated with (the one featuring the term). +** Advance the iterator passed as the only argument. If the end of the +** doclist-index page is reached, return non-zero. */ -static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ - Fts5Data *pData = pIter->pData; - int i; - int bPresent; +static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ + Fts5Data *pData = pLvl->pData; - assert( pIter->pData ); - assert( pIter->iLeafPgno>0 ); - - /* Read the first rowid value. And the "present" flag that follows it. */ - pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid); - bPresent = pData->p[pIter->iOff++]; - if( bPresent ){ - i = 0; + if( pLvl->iOff==0 ){ + assert( pLvl->bEof==0 ); + pLvl->iOff = 1; + pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); + pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); + pLvl->iFirstOff = pLvl->iOff; }else{ - /* Count the number of leading 0x00 bytes. */ - for(i=1; pIter->iOffn; i++){ - if( pData->p[pIter->iOff] ) break; - pIter->iOff++; + int iOff; + for(iOff=pLvl->iOff; iOffn; iOff++){ + if( pData->p[iOff] ) break; } - /* Unless we are already at the end of the doclist-index, load the first - ** rowid value. */ - if( pIter->iOffn ){ + if( iOffn ){ i64 iVal; - pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal); - pIter->iRowid += iVal; + pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; + iOff += getVarint(&pData->p[iOff], (u64*)&iVal); + pLvl->iRowid += iVal; + pLvl->iOff = iOff; }else{ - pIter->bEof = 1; + pLvl->bEof = 1; } } - pIter->iLeafPgno += (i+1); - pIter->iFirstOff = pIter->iOff; - return pIter->bEof; + return pLvl->bEof; } /* ** Advance the iterator passed as the only argument. */ -static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ - Fts5Data *pData = pIter->pData; - int iOff; +static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; - for(iOff=pIter->iOff; iOffn; iOff++){ - if( pData->p[iOff] ) break; + assert( iLvlnLvl ); + if( fts5DlidxLvlNext(pLvl) ){ + if( (iLvl+1) < pIter->nLvl ){ + fts5DlidxIterNextR(p, pIter, iLvl+1); + if( pLvl[1].bEof==0 ){ + fts5DataRelease(pLvl->pData); + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) + ); + if( pLvl->pData ) fts5DlidxLvlNext(pLvl); + } + } } - if( iOffn ){ - i64 iVal; - pIter->iLeafPgno += (iOff - pIter->iOff) + 1; - iOff += getVarint(&pData->p[iOff], (u64*)&iVal); - pIter->iRowid += iVal; - pIter->iOff = iOff; - }else{ - pIter->bEof = 1; - } - - return pIter->bEof; + return pIter->aLvl[0].bEof; } +static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ + return fts5DlidxIterNextR(p, pIter, 0); +} + +/* +** The iterator passed as the first argument has the following fields set +** as follows. This function sets up the rest of the iterator so that it +** points to the first rowid in the doclist-index. +** +** pData: +** pointer to doclist-index record, +** +** When this function is called pIter->iLeafPgno is the page number the +** doclist is associated with (the one featuring the term). +*/ +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ + int i; + for(i=0; inLvl; i++){ + fts5DlidxLvlNext(&pIter->aLvl[i]); + } + return pIter->aLvl[0].bEof; +} + static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ - return pIter->bEof; + return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; } -static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ - if( fts5DlidxIterFirst(pIter)==0 ){ - while( 0==fts5DlidxIterNext(pIter) ); - pIter->bEof = 0; +static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ + int i; + + /* Advance each level to the last entry on the last page */ + for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; + while( fts5DlidxLvlNext(pLvl)==0 ); + pLvl->bEof = 0; + + if( i>0 ){ + Fts5DlidxLvl *pChild = &pLvl[-1]; + fts5DataRelease(pChild->pData); + memset(pChild, 0, sizeof(Fts5DlidxLvl)); + pChild->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) + ); + } } } -static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ - int iOff = pIter->iOff; +/* +** Move the iterator passed as the only argument to the previous entry. +*/ +static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ + int iOff = pLvl->iOff; - assert( pIter->bEof==0 ); - if( iOff<=pIter->iFirstOff ){ - pIter->bEof = 1; + assert( pLvl->bEof==0 ); + if( iOff<=pLvl->iFirstOff ){ + pLvl->bEof = 1; }else{ - u8 *a = pIter->pData->p; + u8 *a = pLvl->pData->p; i64 iVal; int iLimit; + int ii; + int nZero = 0; /* Currently iOff points to the first byte of a varint. This block ** decrements iOff until it points to the first byte of the previous @@ -1510,20 +1578,70 @@ static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ } getVarint(&a[iOff], (u64*)&iVal); - pIter->iRowid -= iVal; - pIter->iLeafPgno--; + pLvl->iRowid -= iVal; + pLvl->iLeafPgno--; - /* Skip backwards passed any 0x00 bytes. */ - while( iOff>pIter->iFirstOff - && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 - ){ - iOff--; - pIter->iLeafPgno--; + /* Skip backwards past any 0x00 varints. */ + for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ + nZero++; } - pIter->iOff = iOff; + if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ + /* The byte immediately before the last 0x00 byte has the 0x80 bit + ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 + ** bytes before a[ii]. */ + int bZero = 0; /* True if last 0x00 counts */ + if( (ii-8)>=pLvl->iFirstOff ){ + int j; + for(j=1; j<=8 && (a[ii-j] & 0x80); j++); + bZero = (j>8); + } + if( bZero==0 ) nZero--; + } + pLvl->iLeafPgno -= nZero; + pLvl->iOff = iOff - nZero; } - return pIter->bEof; + return pLvl->bEof; +} + +static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ + Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; + + assert( iLvlnLvl ); + if( fts5DlidxLvlPrev(pLvl) ){ + if( (iLvl+1) < pIter->nLvl ){ + fts5DlidxIterPrevR(p, pIter, iLvl+1); + if( pLvl[1].bEof==0 ){ + fts5DataRelease(pLvl->pData); + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, + FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) + ); + if( pLvl->pData ){ + while( fts5DlidxLvlNext(pLvl)==0 ); + pLvl->bEof = 0; + } + } + } + } + + return pIter->aLvl[0].bEof; +} +static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ + return fts5DlidxIterPrevR(p, pIter, 0); +} + +/* +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). +*/ +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ + if( pIter ){ + int i; + for(i=0; inLvl; i++){ + fts5DataRelease(pIter->aLvl[i].pData); + } + sqlite3_free(pIter); + } } static Fts5DlidxIter *fts5DlidxIterInit( @@ -1532,35 +1650,52 @@ static Fts5DlidxIter *fts5DlidxIterInit( int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */ ){ - Fts5DlidxIter *pIter; + Fts5DlidxIter *pIter = 0; + int i; + int bDone = 0; - pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); - if( pIter==0 ) return 0; + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ + int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); + Fts5DlidxIter *pNew; - pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iSegid, iLeafPg)); - if( pIter->pData==0 ){ - sqlite3_free(pIter); - pIter = 0; - }else{ - pIter->iLeafPgno = iLeafPg; + pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); + if( pNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); + Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; + pIter = pNew; + memset(pLvl, 0, sizeof(Fts5DlidxLvl)); + pLvl->pData = fts5DataRead(p, iRowid); + if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ + bDone = 1; + } + pIter->nLvl = i+1; + } + } + + if( p->rc==SQLITE_OK ){ + pIter->iSegid = iSegid; if( bRev==0 ){ fts5DlidxIterFirst(pIter); }else{ - fts5DlidxIterLast(pIter); + fts5DlidxIterLast(p, pIter); } } + if( p->rc!=SQLITE_OK ){ + fts5DlidxIterFree(pIter); + pIter = 0; + } + return pIter; } -/* -** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). -*/ -static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ - if( pIter ){ - fts5DataRelease(pIter->pData); - sqlite3_free(pIter); - } +static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ + return pIter->aLvl[0].iRowid; +} +static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ + return pIter->aLvl[0].iLeafPgno; } static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ @@ -1940,7 +2075,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ ** contains no entries except those on the current page. */ if( fts5DlidxIterEof(p, pDlidx)==0 ){ int iSegid = pIter->pSeg->iSegid; - pgnoLast = pDlidx->iLeafPgno; + pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); @@ -2346,7 +2481,7 @@ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. -** It is an error if leaf iLeafPgno contains no rowid. +** It is an error if leaf iLeafPgno does not exist or contains no rowids. */ static void fts5SegIterGotoPage( Fts5Index *p, /* FTS5 backend object */ @@ -2354,22 +2489,26 @@ static void fts5SegIterGotoPage( int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); - pIter->iLeafPgno = iLeafPgno-1; - fts5SegIterNextPage(p, pIter); - assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); + if( iLeafPgno>pIter->pSeg->pgnoLast ){ + p->rc = FTS5_CORRUPT; + }else{ + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); - if( p->rc==SQLITE_OK ){ - int iOff; - u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; + if( p->rc==SQLITE_OK ){ + int iOff; + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; - iOff = fts5GetU16(&a[0]); - if( iOff<4 || iOff>=n ){ - p->rc = FTS5_CORRUPT; - }else{ - iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - fts5SegIterLoadNPos(p, pIter); + iOff = fts5GetU16(&a[0]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + fts5SegIterLoadNPos(p, pIter); + } } } } @@ -2394,21 +2533,21 @@ static void fts5SegIterNextFrom( assert( pIter->pLeaf ); if( bRev==0 ){ - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ - iLeafPgno = pDlidx->iLeafPgno; - fts5DlidxIterNext(pDlidx); + while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ + iLeafPgno = fts5DlidxIterPgno(pDlidx); + fts5DlidxIterNext(p, pDlidx); } - assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); + assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); if( iLeafPgno>pIter->iLeafPgno ){ fts5SegIterGotoPage(p, pIter, iLeafPgno); bMove = 0; } }else{ assert( iMatchiRowid ); - while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ - fts5DlidxIterPrev(pDlidx); + while( !fts5DlidxIterEof(p, pDlidx) && iMatchiLeafPgno; + iLeafPgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); @@ -2804,6 +2943,53 @@ static int fts5PrefixCompress( return i; } +static void fts5WriteDlidxClear( + Fts5Index *p, + Fts5SegWriter *pWriter, + int bFlush /* If true, write dlidx to disk */ +){ + int i; + assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); + for(i=0; inDlidx; i++){ + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; + if( pDlidx->buf.n==0 ) break; + if( bFlush ){ + assert( pDlidx->pgno!=0 ); + fts5DataWrite(p, + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), + pDlidx->buf.p, pDlidx->buf.n + ); + } + sqlite3Fts5BufferZero(&pDlidx->buf); + pDlidx->bPrevValid = 0; + } +} + +/* +** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. +** Any new array elements are zeroed before returning. +*/ +static int fts5WriteDlidxGrow( + Fts5Index *p, + Fts5SegWriter *pWriter, + int nLvl +){ + if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ + Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( + pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl + ); + if( aDlidx==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); + memset(&aDlidx[pWriter->nDlidx], 0, nByte); + pWriter->aDlidx = aDlidx; + pWriter->nDlidx = nLvl; + } + } + return p->rc; +} + /* ** If an "nEmpty" record must be written to the b-tree before the next ** term, write it now. @@ -2813,23 +2999,22 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - i64 iKey = FTS5_DOCLIST_IDX_ROWID( - pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty - ); - assert( pWriter->cdlidx.n>0 ); - fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); + + /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written + ** to the database, also write the doclist-index to disk. */ + if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ bFlag = 1; } + fts5WriteDlidxClear(p, pWriter, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); pWriter->nEmpty = 0; + }else{ + fts5WriteDlidxClear(p, pWriter, 0); } - /* Whether or not it was written to disk, zero the doclist index at this - ** point */ - sqlite3Fts5BufferZero(&pWriter->cdlidx); - pWriter->bDlidxPrevValid = 0; + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); + assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); } static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ @@ -2900,43 +3085,98 @@ static void fts5WriteBtreeTerm( } } +/* +** This function is called when flushing a leaf page that contains no +** terms at all to disk. +*/ static void fts5WriteBtreeNoTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter /* Writer object */ ){ - if( pWriter->bFirstRowidInPage ){ - /* No rowids on this page. Append an 0x00 byte to the current - ** doclist-index */ - if( pWriter->bDlidxPrevValid==0 ){ - i64 iRowid = pWriter->iPrevRowid; - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); - pWriter->bDlidxPrevValid = 1; - pWriter->iDlidxPrev = iRowid; - } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0); + /* If there were no rowids on the leaf page either and the doclist-index + ** has already been started, append an 0x00 byte to it. */ + if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; + assert( pDlidx->bPrevValid ); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); } + + /* Increment the "number of sequential leaves without a term" counter. */ pWriter->nEmpty++; } +static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ + i64 iRowid; + int iOff; + + iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid); + getVarint(&pBuf->p[iOff], (u64*)&iRowid); + return iRowid; +} + /* -** Rowid iRowid has just been appended to the current leaf page. As it is -** the first on its page, append an entry to the current doclist-index. +** Rowid iRowid has just been appended to the current leaf page. It is the +** first on the page. This function appends an appropriate entry to the current +** doclist-index. */ static void fts5WriteDlidxAppend( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ - i64 iVal; - if( pWriter->bDlidxPrevValid ){ - iVal = iRowid - pWriter->iDlidxPrev; - }else{ - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); - iVal = 1; + int i; + int bDone = 0; + + for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ + i64 iVal; + Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; + + if( pDlidx->buf.n>=p->pConfig->pgsz ){ + /* The current doclist-index page is full. Write it to disk and push + ** a copy of iRowid (which will become the first rowid on the next + ** doclist-index leaf page) up into the next level of the b-tree + ** hierarchy. If the node being flushed is currently the root node, + ** also push its first rowid upwards. */ + pDlidx->buf.p[0] = 0x01; /* Not the root node */ + fts5DataWrite(p, + FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), + pDlidx->buf.p, pDlidx->buf.n + ); + fts5WriteDlidxGrow(p, pWriter, i+2); + pDlidx = &pWriter->aDlidx[i]; + if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ + i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); + + /* This was the root node. Push its first rowid up to the new root. */ + pDlidx[1].pgno = pDlidx->pgno; + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); + pDlidx[1].bPrevValid = 1; + pDlidx[1].iPrev = iFirst; + } + + sqlite3Fts5BufferZero(&pDlidx->buf); + pDlidx->bPrevValid = 0; + pDlidx->pgno++; + }else{ + bDone = 1; + } + + if( pDlidx->bPrevValid ){ + iVal = iRowid - pDlidx->iPrev; + }else{ + i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); + assert( pDlidx->buf.n==0 ); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); + iVal = iRowid; + } + + sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); + pDlidx->bPrevValid = 1; + pDlidx->iPrev = iRowid; } - sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal); - pWriter->bDlidxPrevValid = 1; - pWriter->iDlidxPrev = iRowid; } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ @@ -3034,6 +3274,9 @@ static void fts5WriteAppendTerm( pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; + assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); + pWriter->aDlidx[0].pgno = pPage->pgno; + /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); @@ -3171,7 +3414,11 @@ static void fts5WriteFinish( fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); - sqlite3Fts5BufferFree(&pWriter->cdlidx); + + for(i=0; inDlidx; i++){ + sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); + } + sqlite3_free(pWriter->aDlidx); } static void fts5WriteInit( @@ -3182,9 +3429,11 @@ static void fts5WriteInit( memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; - pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); - if( pWriter->aWriter==0 ) return; + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); + if( pWriter->aDlidx==0 ) return; pWriter->nWriter = 1; + pWriter->nDlidx = 1; pWriter->aWriter[0].pgno = 1; pWriter->bFirstTermInPage = 1; } @@ -3198,10 +3447,12 @@ static void fts5WriteInitForAppend( memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); + pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); if( p->rc==SQLITE_OK ){ int pgno = 1; int i; + pWriter->nDlidx = 1; pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ @@ -3583,18 +3834,18 @@ static void fts5FlushOneHash(Fts5Index *p){ pBuf = &writer.aWriter[0].buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); - /* Begin scanning through hash table entries. */ + /* Begin scanning through hash table entries. This loop runs once for each + ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } - while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ - const char *zTerm; - int nTerm; - const u8 *pDoclist; - int nDoclist; + const char *zTerm; /* Buffer containing term */ + int nTerm; /* Size of zTerm in bytes */ + const u8 *pDoclist; /* Pointer to doclist for this term */ + int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); @@ -3611,7 +3862,9 @@ static void fts5FlushOneHash(Fts5Index *p){ } } - /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + /* Write the term to the leaf. And if it is the first on the leaf, and + ** the leaf is not page number 1, push it up into the b-tree hierarchy + ** as well. */ if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); @@ -3630,6 +3883,12 @@ static void fts5FlushOneHash(Fts5Index *p){ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); + /* We just wrote a term into page writer.aWriter[0].pgno. If a + ** doclist-index is to be generated for this doclist, it will be + ** associated with this page. */ + assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); + writer.aDlidx[0].pgno = writer.aWriter[0].pgno; + if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); @@ -3825,8 +4084,6 @@ static void fts5MultiIterPoslist( Fts5ChunkIter iter; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); - static int nCall = 0; - nCall++; fts5ChunkIterInit(p, pSeg, &iter); @@ -4416,7 +4673,7 @@ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ int n; - const char *z = fts5MultiIterTerm(pIter->pMulti, &n); + const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n); *pn = n-1; return &z[1]; } @@ -4654,32 +4911,35 @@ static void fts5DlidxIterTestReverse( int iLeaf /* Load doclist-index for this leaf */ ){ Fts5DlidxIter *pDlidx = 0; - i64 cksum1 = 13; - i64 cksum2 = 13; + u64 cksum1 = 13; + u64 cksum2 = 13; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx) + fts5DlidxIterNext(p, pDlidx) ){ - assert( pDlidx->iLeafPgno>iLeaf ); - cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); - cksum1 = (cksum1 ^ pDlidx->iRowid); + i64 iRowid = fts5DlidxIterRowid(pDlidx); + int pgno = fts5DlidxIterPgno(pDlidx); + assert( pgno>iLeaf ); + cksum1 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterPrev(pDlidx) + fts5DlidxIterPrev(p, pDlidx) ){ - assert( pDlidx->iLeafPgno>iLeaf ); - cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); - cksum2 = (cksum2 ^ pDlidx->iRowid); + i64 iRowid = fts5DlidxIterRowid(pDlidx); + int pgno = fts5DlidxIterPgno(pDlidx); + + assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); + cksum2 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; - if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; + if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } #else # define fts5DlidxIterTestReverse(x,y,z) @@ -4748,11 +5008,11 @@ static void fts5IndexIntegrityCheckSegment( for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; - fts5DlidxIterNext(pDlidx) + fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ - for(iPg=iPrevLeaf+1; iPgiLeafPgno; iPg++){ + for(iPg=iPrevLeaf+1; iPgiLeafPgno; + iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, pDlidx->iLeafPgno); + iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; + if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } - } for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ @@ -4994,6 +5253,7 @@ u64 sqlite3Fts5IndexCksum( static void fts5DecodeRowid( i64 iRowid, /* Rowid from %_data table */ int *piSegid, /* OUT: Segment id */ + int *pbDlidx, /* OUT: Dlidx flag */ int *piHeight, /* OUT: Height */ int *piPgno /* OUT: Page number */ ){ @@ -5003,29 +5263,26 @@ static void fts5DecodeRowid( *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); iRowid >>= FTS5_DATA_HEIGHT_B; + *pbDlidx = (int)(iRowid & 0x0001); + iRowid >>= FTS5_DATA_DLI_B; + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); } static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ - int iSegid, iHeight, iPgno; /* Rowid compenents */ - fts5DecodeRowid(iKey, &iSegid, &iHeight, &iPgno); + int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - "{structure idx=%d}", (int)(iKey-10) - ); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); } } - else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx segid=%d pgno=%d)", - iSegid, iPgno - ); - }else{ - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(segid=%d h=%d pgno=%d)", - iSegid, iHeight, iPgno + else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", + bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno ); } } @@ -5135,7 +5392,7 @@ static void fts5DecodeFunction( sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ - int iSegid,iHeight,iPgno; /* Rowid components */ + int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ const u8 *aBlob; int n; /* Record to decode */ u8 *a = 0; Fts5Buffer s; /* Build up text to return here */ @@ -5152,24 +5409,24 @@ static void fts5DecodeFunction( a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; memcpy(a, aBlob, n); - fts5DecodeRowid(iRowid, &iSegid, &iHeight, &iPgno); + fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); - if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + if( bDlidx ){ Fts5Data dlidx; - Fts5DlidxIter iter; + Fts5DlidxLvl lvl; dlidx.p = a; dlidx.n = n; dlidx.nRef = 2; - memset(&iter, 0, sizeof(Fts5DlidxIter)); - iter.pData = &dlidx; - iter.iLeafPgno = iPgno; + memset(&lvl, 0, sizeof(Fts5DlidxLvl)); + lvl.pData = &dlidx; + lvl.iLeafPgno = iPgno; - for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){ + for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ sqlite3Fts5BufferAppendPrintf(&rc, &s, - " %d(%lld)", iter.iLeafPgno, iter.iRowid + " %d(%lld)", lvl.iLeafPgno, lvl.iRowid ); } }else if( iSegid==0 ){ diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 171b4849a5..46e2f121b5 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -22,13 +22,7 @@ #include #include -/* -** This variable is set to true when running corruption tests. Otherwise -** false. If it is false, extra assert() conditions in the fts5 code are -** activated - conditions that are only true if it is guaranteed that the -** fts5 database is not corrupt. -*/ -int sqlite3_fts5_may_be_corrupt = 0; +extern int sqlite3_fts5_may_be_corrupt; /************************************************************************* ** This is a copy of the first part of the SqliteDb structure in diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index ce4c90b9a1..559a5a1256 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -49,7 +49,7 @@ do_execsql_test 2.1 { } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } -} {/{{structure idx=0} {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} +} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } @@ -181,7 +181,6 @@ for {set i 1} {$i <= 10} {incr i} { } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} -# if {$i==1} break } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} #exit @@ -243,6 +242,7 @@ for {set i 1} {$i <= 10} {incr i} { if {[set_test_counter errors]} break } + #------------------------------------------------------------------------- # reset_db diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 9f712ffc86..cc6435bb0e 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -26,17 +26,17 @@ ifcapable !fts5 { do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; -} {version 1} +} {version 2} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; -} {pgsz 32 version 1} +} {pgsz 32 version 2} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; -} {pgsz 64 version 1} +} {pgsz 64 version 2} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 7cbd7b00e8..15868d1de1 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -16,6 +16,7 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt2 +sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document # contains 10 terms, each of which start with the character "x". @@ -30,6 +31,7 @@ do_execsql_test 1.0 { } set mask [expr 31 << 31] + # Test 1: # # For each page in the t1_data table, open a transaction and DELETE @@ -194,5 +196,6 @@ foreach {tn nCut} { } +sqlite3_fts5_may_be_corrupt 0 finish_test diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test index 6a23622ce0..76c05e8fd0 100644 --- a/ext/fts5/test/fts5dlidx.test +++ b/ext/fts5/test/fts5dlidx.test @@ -61,6 +61,7 @@ proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { } execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } + breakpoint execsql COMMIT do_test $tn.1 { @@ -82,8 +83,8 @@ proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { do_dlidx_test1 1.1 10 100 10000 0 1000 do_dlidx_test1 1.2 10 10 10000 0 128 -do_dlidx_test1 1.3 10 10 100 0 36028797018963970 -do_dlidx_test1 1.3 10 10 50 0 150000000000000000 +do_dlidx_test1 1.3 10 10 66 0 36028797018963970 +do_dlidx_test1 1.4 10 10 50 0 150000000000000000 diff --git a/ext/fts5/test/fts5integrity.test b/ext/fts5/test/fts5integrity.test index a6dc34a90e..9e244c26fa 100644 --- a/ext/fts5/test/fts5integrity.test +++ b/ext/fts5/test/fts5integrity.test @@ -31,5 +31,27 @@ do_execsql_test 2.1 { INSERT INTO yy(yy) VALUES('integrity-check'); } +#-------------------------------------------------------------------- +# +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE zz USING fts5(z); + INSERT INTO zz(zz, rank) VALUES('pgsz', 32); + INSERT INTO zz VALUES('b b b b b b b b b b b b b b'); + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz SELECT z FROM zz; + INSERT INTO zz(zz) VALUES('optimize'); +} + +do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); } + + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r} +#exit + + finish_test diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index 57bb0bb2bd..5d0253472d 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -25,7 +25,7 @@ do_catchsql_test 1.2 { do_execsql_test 1.3 { SELECT fts5_rowid('segment', 1, 1, 1) -} {70866960385} +} {139586437121} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test index 4e5df579e5..ccd035ae4b 100644 --- a/ext/fts5/test/fts5version.test +++ b/ext/fts5/test/fts5version.test @@ -24,35 +24,35 @@ do_execsql_test 1.1 { do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' -} {version 1} +} {version 2} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { - UPDATE t1_config set v=2 WHERE k='version'; + UPDATE t1_config set v=3 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} breakpoint do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } -} {1 {invalid fts5 file format (found 2, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 0, expected 1) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}} finish_test diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index ef543552dc..3110954a8c 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -109,6 +109,7 @@ db transaction { if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" + # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { diff --git a/manifest b/manifest index 12d81ba731..78e665497d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Allow\sthe\sfts5vocab\stable\sto\soptionally\sprovide\sdata\son\sa\sper-column\sbasis. -D 2015-05-09T18:28:27.134 +C Change\sfts5\sdoclist-index\sstructures\sto\sbe\strees\sinstead\sof\sflat\slists.\sThis\sonly\smakes\sa\sdifference\sfor\sdatabases\sthat\scontain\smillions\sof\sinstances\sof\sthe\ssame\stoken. +D 2015-05-13T17:15:32.981 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 31b38b9da2e4b36f54a013bd71a5c3f6e45ca78f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,24 +104,24 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c a5a908a68c79c352a0dfa77d16712de43896bd07 +F ext/fts5/fts5.c 2899b3c60a382613889500571fd5158b2c1aa159 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 5b9e4afe80d18648bc236b9b5bc2f873634326f6 +F ext/fts5/fts5Int.h e01aec94c0d927924134c30afd9803425cd88812 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 6a4fed2d64d7dbb0416c4278b23201f77daf94ea +F ext/fts5/fts5_index.c b9a3382af3027f5c9717d90613fda5f29f7d57fa F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d -F ext/fts5/fts5_tcl.c aa3b102bb01f366174718be7ce8e9311b9abb482 +F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5_vocab.c 2e37ea9b4d4d5460bc778f2adb872c6a869601e7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test e17657bd749cb3982745ec503ce22372dee7340d +F ext/fts5/test/fts5aa.test 5f221b82487abfb915e1b040eb4e305cf79a2ef5 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d @@ -132,14 +132,14 @@ F ext/fts5/test/fts5ah.test d74cf8b7de5b8424f732acef69fe12122a12f2bf F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 -F ext/fts5/test/fts5al.test 8cde0a064ffe452281b7c90a759d220f796bbb20 +F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 F ext/fts5/test/fts5aux.test d9c724351d8e4dc46cad1308c0b4b8ac94d07660 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 -F ext/fts5/test/fts5corrupt2.test c65a6619a1f712b87be0ccb3ef1a2120bf1f6430 -F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f +F ext/fts5/test/fts5corrupt2.test bdad9241f73076917512f5298501d50d9e9d7dc7 +F ext/fts5/test/fts5dlidx.test 74c3c8c33dfde594c0d8a22b9358d82fe56c8c7a F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 F ext/fts5/test/fts5ea.test ed163ed820fd503354bd7dcf9d3b0e3801ade962 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e @@ -149,7 +149,7 @@ F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 420f2e23775b458eeb9a325bcdfe84650c2e9d39 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa -F ext/fts5/test/fts5integrity.test 39deee579b84df2786d9c8298e9196b339cfc872 +F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 @@ -159,14 +159,14 @@ F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 -F ext/fts5/test/fts5rowid.test 0dd51524739ebe5f1251a25f3d3ece9840fdc1a8 +F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 F ext/fts5/test/fts5tokenizer.test bbcde2a7473dcaa9a1fc6809aa8965acb7b846ff F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 -F ext/fts5/test/fts5version.test 1c902eaa7359336293ac45c7a34616527513e9fb +F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 F ext/fts5/test/fts5vocab.test 2d1bddfb6e1effd9e1d2f5d1d25bf05e9ab33e64 -F ext/fts5/tool/loadfts5.tcl 8a8f10d7d2d0d77f622e0a84cc0824c158c34a52 +F ext/fts5/tool/loadfts5.tcl add4d349ae5463c5f60b26e821c24e60ed8054d3 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -860,7 +860,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 F test/percentile.test 4243af26b8f3f4555abe166f723715a1f74c77ff -F test/permutations.test 62ff8c49738c72a70b034ecc31957bee437f76ff +F test/permutations.test fd77d410331d76399cdd31175c00c54eabe11fca F test/pragma.test be7195f0aa72bdb8a512133e9640ac40f15b57a2 F test/pragma2.test f624a496a95ee878e81e59961eade66d5c00c028 F test/pragma3.test 6f849ccffeee7e496d2f2b5e74152306c0b8757c @@ -1284,7 +1284,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 94f196c9961e0ca3513e29f57125a3197808be2d F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 69bae8ce4aa52d2ff82d4a8a856bf283ec035b2e -F tool/mksqlite3c.tcl e3136f007fcdaac00c207306ef4b352ca87bf9af +F tool/mksqlite3c.tcl eea6aa21d76f47c7932af2baa9291517ec72f5ce F tool/mksqlite3h.tcl 44730d586c9031638cdd2eb443b801c0d2dbd9f8 F tool/mksqlite3internalh.tcl eb994013e833359137eb53a55acdad0b5ae1049b F tool/mkvsix.tcl 3b58b9398f91c7dbf18d49eb87cefeee9efdbce1 @@ -1319,7 +1319,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 6bf93e3b56e6705b7d12bab5024fc615f373b36c -R f31ac5d295b9e0df865bd081bc32aa0b +P 3922276135a7825d0ede8d9c757e9cfe492f803a +R 80fc221c857ceeffac4c1ca1e8e3c4c0 U dan -Z 9e8a79e0ffff336d7475aff60e841c57 +Z ea2587087e805cefd8e74a1d1357bdd2 diff --git a/manifest.uuid b/manifest.uuid index b81db19d90..3338b019bb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3922276135a7825d0ede8d9c757e9cfe492f803a \ No newline at end of file +aa34bf666c384cf32a8d8166ab6d9afbca26a256 \ No newline at end of file diff --git a/test/permutations.test b/test/permutations.test index 2ee3953d58..1bb5985b30 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -242,6 +242,14 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] +test_suite "fts5-light" -prefix "" -description { + All FTS5 tests. +} -files [ + test_set \ + [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ + -exclude *corrupt* *fault* *big* *fts5aj* +] + test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index ca0eb02598..97b31f1a02 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -377,6 +377,7 @@ foreach file { fts5_storage.c fts5_tokenize.c fts5_unicode2.c + fts5_vocab.c rtree.c icu.c From 415ae725cb652b24d9630cf4003dbe99322ff154 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 15 May 2015 12:18:39 +0000 Subject: [PATCH 127/206] Add test cases. And some fixes. FossilOrigin-Name: adee788586197445672013d434e7ba47ce510b59 --- ext/fts5/fts5_index.c | 41 +++++++------- ext/fts5/fts5_vocab.c | 44 +++++++-------- ext/fts5/test/fts5corrupt2.test | 15 +++++- ext/fts5/test/fts5dlidx.test | 54 +++++++++++++++---- ext/fts5/test/fts5fault4.test | 6 +-- ext/fts5/test/fts5fault5.test | 96 +++++++++++++++++++++++++++++++++ manifest | 23 ++++---- manifest.uuid | 2 +- 8 files changed, 213 insertions(+), 68 deletions(-) create mode 100644 ext/fts5/test/fts5fault5.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6df3774ebb..e2796d9c78 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2071,15 +2071,9 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ int pgnoLast = 0; if( pDlidx ){ - /* If the doclist-iterator is already at EOF, then the current doclist - ** contains no entries except those on the current page. */ - if( fts5DlidxIterEof(p, pDlidx)==0 ){ - int iSegid = pIter->pSeg->iSegid; - pgnoLast = fts5DlidxIterPgno(pDlidx); - pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); - }else{ - pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); - } + int iSegid = pIter->pSeg->iSegid; + pgnoLast = fts5DlidxIterPgno(pDlidx); + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ @@ -3430,8 +3424,7 @@ static void fts5WriteInit( pWriter->iSegid = iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); - pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); - if( pWriter->aDlidx==0 ) return; + if( fts5WriteDlidxGrow(p, pWriter, 1) ) return; pWriter->nWriter = 1; pWriter->nDlidx = 1; pWriter->aWriter[0].pgno = 1; @@ -3902,7 +3895,7 @@ static void fts5FlushOneHash(Fts5Index *p){ /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ - while( iOffrc==SQLITE_OK && iOffrc==SQLITE_OK ){ int nSpace = pgsz - pBuf->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ @@ -3990,7 +3983,7 @@ static void fts5FlushOneHash(Fts5Index *p){ */ static void fts5IndexFlush(Fts5Index *p){ /* Unless it is empty, flush the hash table to disk */ - if( p->rc==SQLITE_OK && p->nPendingData ){ + if( p->nPendingData ){ assert( p->pHash ); p->nPendingData = 0; fts5FlushOneHash(p); @@ -4337,6 +4330,7 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData); } + /* Flush the hash table to disk if required */ if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ fts5IndexFlush(p); } @@ -4932,7 +4926,6 @@ static void fts5DlidxIterTestReverse( ){ i64 iRowid = fts5DlidxIterRowid(pDlidx); int pgno = fts5DlidxIterPgno(pDlidx); - assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); cksum2 += iRowid + ((i64)pgno<<32); } @@ -4971,23 +4964,31 @@ static void fts5IndexIntegrityCheckSegment( if( pLeaf==0 ) break; /* Check that the leaf contains at least one term, and that it is equal - ** to or larger than the split-key in iter.term. */ + ** to or larger than the split-key in iter.term. Also check that if there + ** is also a rowid pointer within the leaf page header, it points to a + ** location before the term. */ iOff = fts5GetU16(&pLeaf->p[2]); if( iOff==0 ){ p->rc = FTS5_CORRUPT; }else{ + int iRowidOff; int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ - iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); - res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); - if( res==0 ) res = nTerm - iter.term.n; - if( res<0 ){ + + iRowidOff = fts5GetU16(&pLeaf->p[0]); + if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; + }else{ + iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); + res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); + if( res==0 ) res = nTerm - iter.term.n; + if( res<0 ) p->rc = FTS5_CORRUPT; } } fts5DataRelease(pLeaf); if( p->rc ) break; + /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index 23a063296a..ee2a316291 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -344,36 +344,36 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ pCsr->iCol = 0; assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); - while( 1 ){ + while( rc==SQLITE_OK ){ const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos); - if( rc!=SQLITE_OK ) break; - - if( pTab->eType==FTS5_VOCAB_ROW ){ - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - pCsr->aVal[1]++; - } - pCsr->aVal[0]++; - }else{ - int iCol = -1; - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - int ii = FTS5_POS2COLUMN(iPos); - pCsr->aCnt[ii]++; - if( iCol!=ii ){ - pCsr->aDoc[ii]++; - iCol = ii; + if( rc==SQLITE_OK ){ + if( pTab->eType==FTS5_VOCAB_ROW ){ + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + pCsr->aVal[1]++; + } + pCsr->aVal[0]++; + }else{ + int iCol = -1; + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + int ii = FTS5_POS2COLUMN(iPos); + pCsr->aCnt[ii]++; + if( iCol!=ii ){ + pCsr->aDoc[ii]++; + iCol = ii; + } } } + rc = sqlite3Fts5IterNextScan(pCsr->pIter); + } + if( rc==SQLITE_OK ){ + zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; + if( sqlite3Fts5IterEof(pCsr->pIter) ) break; } - - rc = sqlite3Fts5IterNextScan(pCsr->pIter); - if( rc!=SQLITE_OK ) break; - zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); - if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; - if( sqlite3Fts5IterEof(pCsr->pIter) ) break; } } } diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 15868d1de1..df22f61b93 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -31,6 +31,7 @@ do_execsql_test 1.0 { } set mask [expr 31 << 31] +if 1 { # Test 1: # @@ -114,6 +115,8 @@ for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} { } {} } +} + #------------------------------------------------------------------------- # Test that corruption in leaf page headers is detected by queries that use # doclist-indexes. @@ -130,17 +133,21 @@ do_execsql_test 3.0 { foreach {tn hdr} { 1 "\x00\x00\x00\x00" 2 "\xFF\xFF\xFF\xFF" + 3 "\x44\x45" } { set tn2 0 set nCorrupt 0 + set nCorrupt2 0 foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 - do_test 3.$tn.$tn2 { + do_test 3.$tn.$tn2.1 { execsql BEGIN set fd [db incrblob main x3_data block $rowid] fconfigure $fd -encoding binary -translation binary + set existing [read $fd [string length $hdr]] + seek $fd 0 puts -nonewline $fd $hdr close $fd @@ -149,6 +156,12 @@ foreach {tn hdr} { set {} 1 } {1} + if {($tn2 % 10)==0 && $existing != $hdr} { + do_test 3.$tn.$tn2.2 { + catchsql { INSERT INTO x3(x3) VALUES('integrity-check') } + } {1 {database disk image is malformed}} + } + execsql ROLLBACK } diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test index 76c05e8fd0..013de1b4a0 100644 --- a/ext/fts5/test/fts5dlidx.test +++ b/ext/fts5/test/fts5dlidx.test @@ -26,11 +26,6 @@ proc do_fb_test {tn sql res} { uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2] } -do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x); - INSERT INTO t1(t1, rank) VALUES('pgsz', 32); -} - # This test populates the FTS5 table containing $nEntry entries. Rows are # numbered from 0 to ($nEntry-1). The rowid for row $i is: # @@ -61,9 +56,9 @@ proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { } execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } - breakpoint execsql COMMIT + breakpoint do_test $tn.1 { execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} @@ -81,12 +76,51 @@ proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { } -do_dlidx_test1 1.1 10 100 10000 0 1000 -do_dlidx_test1 1.2 10 10 10000 0 128 -do_dlidx_test1 1.3 10 10 66 0 36028797018963970 -do_dlidx_test1 1.4 10 10 50 0 150000000000000000 +foreach {tn pgsz} { + 1 32 + 2 200 +} { + do_execsql_test $tn.0 { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz); + } + do_dlidx_test1 1.$tn.1 10 100 10000 0 1000 + do_dlidx_test1 1.$tn.2 10 10 10000 0 128 + do_dlidx_test1 1.$tn.3 10 10 66 0 36028797018963970 + do_dlidx_test1 1.$tn.4 10 10 50 0 150000000000000000 + do_dlidx_test1 1.$tn.5 10 10 200 0 [expr 1<<55] + do_dlidx_test1 1.$tn.6 10 10 30 0 [expr 1<<58] +} +proc do_dlidx_test2 {tn nEntry iFirst nStep} { + set str [string repeat "a " 500] + execsql { + BEGIN; + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + INSERT INTO t1 VALUES('b a'); + + WITH iii(ii, i) AS ( + SELECT 1, $iFirst UNION ALL + SELECT ii+1, i+$nStep FROM iii WHERE ii<$nEntry + ) + INSERT INTO t1(rowid,x) SELECT i, $str FROM iii; + COMMIT; + } + + do_execsql_test $tn.1 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' + } {1} + breakpoint + do_execsql_test $tn.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' ORDER BY rowid DESC + } {1} +} + +do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128] finish_test diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index df2112c63f..6bfc825320 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -317,14 +317,15 @@ do_execsql_test 10.0 { COMMIT; } -} - do_faultsim_test 10.1 -faults oom-t* -body { db one { SELECT fts5_expr('a AND b NEAR(a b)') } } -test { faultsim_test_result {0 {"a" AND ("b" AND NEAR("a" "b", 10))}} } +} + + #do_faultsim_test 10.2 -faults oom-t* -body { # db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } #} -test { @@ -333,6 +334,5 @@ do_faultsim_test 10.1 -faults oom-t* -body { #} - finish_test diff --git a/ext/fts5/test/fts5fault5.test b/ext/fts5/test/fts5fault5.test new file mode 100644 index 0000000000..21a4e5d6b4 --- /dev/null +++ b/ext/fts5/test/fts5fault5.test @@ -0,0 +1,96 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault5 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# OOM while creating an FTS5 table. +# +do_faultsim_test 1.1 -faults oom-t* -prep { + db eval { DROP TABLE IF EXISTS abc } +} -body { + db eval { CREATE VIRTUAL TABLE abc USING fts5(x,y) } +} -test { + faultsim_test_result {0 {}} +} + + +#------------------------------------------------------------------------- +# OOM while writing a multi-tier doclist-index. And while running +# integrity-check on the same. +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE tt USING fts5(x); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); +} +faultsim_save_and_close + +do_faultsim_test 2.1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM tt } +} -body { + set str [string repeat "abc " 50] + db eval { + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO tt(rowid, x) SELECT i, $str FROM ii; + } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 2.2 -faults oom-t* -body { + db eval { INSERT INTO tt(tt) VALUES('integrity-check') } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +# OOM while scanning an fts5vocab table. +# +reset_db +do_test 3.0 { + execsql { + CREATE VIRTUAL TABLE tt USING fts5(x); + CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'row'); + INSERT INTO tt(tt, rank) VALUES('pgsz', 32); + BEGIN; + } + for {set i 0} {$i < 20} {incr i} { + set str [string repeat "$i " 20] + execsql { INSERT INTO tt VALUES($str) } + } + execsql COMMIT +} {} + +do_faultsim_test 2.1 -faults oom-t* -body { + db eval { + SELECT term FROM tv; + } +} -test { + faultsim_test_result {0 {0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9}} +} + + + +finish_test + diff --git a/manifest b/manifest index 2af27f7119..46ffb625b4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-05-13T18:12:58.028 +C Add\stest\scases.\sAnd\ssome\sfixes. +D 2015-05-15T12:18:39.221 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,12 +112,12 @@ F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c b9a3382af3027f5c9717d90613fda5f29f7d57fa +F ext/fts5/fts5_index.c 71d5ce47464f176e8708c7ec02d18613eb5eebda F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d -F ext/fts5/fts5_vocab.c 2e37ea9b4d4d5460bc778f2adb872c6a869601e7 +F ext/fts5/fts5_vocab.c e532f38a62d27fa662138a6cf33890b314225506 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 @@ -138,15 +138,16 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 -F ext/fts5/test/fts5corrupt2.test bdad9241f73076917512f5298501d50d9e9d7dc7 -F ext/fts5/test/fts5dlidx.test 74c3c8c33dfde594c0d8a22b9358d82fe56c8c7a +F ext/fts5/test/fts5corrupt2.test 88942d27ed581314f2867ef37352c72372c543df +F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 F ext/fts5/test/fts5ea.test ed163ed820fd503354bd7dcf9d3b0e3801ade962 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 420f2e23775b458eeb9a325bcdfe84650c2e9d39 +F ext/fts5/test/fts5fault4.test a40e676e3e3b40901e2142f6fa5dff9e7313f5f7 +F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a @@ -246,7 +247,7 @@ F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575 F src/ctime.c 5a0b735dc95604766f5dac73973658eef782ee8b F src/date.c e4d50b3283696836ec1036b695ead9a19e37a5ac -F src/dbstat.c f402e77e25089c6003d0c60b3233b9b3947d599a w src/test_stat.c +F src/dbstat.c f402e77e25089c6003d0c60b3233b9b3947d599a F src/delete.c 37964e6c1d73ff49cbea9ff690c9605fb15f600e F src/expr.c 3fb2ab3ab69d15b4b75ae53fceb4e317f64cb306 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb @@ -1324,7 +1325,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P aa34bf666c384cf32a8d8166ab6d9afbca26a256 59e3e9e764440b7feaafadff74f422535d21bca2 -R 7fb50d1391be18153fa78235920902bf +P b5f0e8c5b4bc018d672617ffd342d12b228548b9 +R bdc9509f45810e2b38f3396d130ff7ee U dan -Z 5ed330406879741f67800dc0f7082030 +Z 2d21c26b247bac296f85906c81a77b9c diff --git a/manifest.uuid b/manifest.uuid index a27ca1b112..6c7bb30f6e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b5f0e8c5b4bc018d672617ffd342d12b228548b9 \ No newline at end of file +adee788586197445672013d434e7ba47ce510b59 \ No newline at end of file From 369e4129b90fe256241cf05464a7a9212e667804 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 15 May 2015 18:13:14 +0000 Subject: [PATCH 128/206] Improve test coverage of fts5_index.c. FossilOrigin-Name: 7aea8c6d99737c6c72078e0b4b9c5f8186021aa0 --- ext/fts5/fts5_index.c | 227 ++++++++++++++++++-------------- ext/fts5/test/fts5corrupt2.test | 36 ++++- ext/fts5/test/fts5corrupt3.test | 57 ++++++++ ext/fts5/test/fts5merge.test | 49 +++++++ manifest | 17 +-- manifest.uuid | 2 +- 6 files changed, 275 insertions(+), 113 deletions(-) create mode 100644 ext/fts5/test/fts5corrupt3.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e2796d9c78..f9317ddd66 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3462,9 +3462,6 @@ static void fts5WriteInitForAppend( fts5NodeIterFree(&ss); } } - if( pSeg->nHeight==1 ){ - pWriter->nEmpty = pSeg->pgnoLast-1; - } assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); pWriter->bFirstTermInPage = 1; assert( pWriter->aWriter[0].term.n==0 ); @@ -4051,8 +4048,10 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); - fts5IndexMerge(p, &pStruct, nMerge); - fts5StructureWrite(p, pStruct); + if( pStruct && pStruct->nLevel ){ + fts5IndexMerge(p, &pStruct, nMerge); + fts5StructureWrite(p, pStruct); + } fts5StructureRelease(pStruct); return fts5IndexReturn(p); @@ -4533,21 +4532,21 @@ int sqlite3Fts5IndexQuery( memcpy(&buf.p[1], pToken, nToken); } +#ifdef SQLITE_DEBUG + if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + assert( flags & FTS5INDEX_QUERY_PREFIX ); + iIdx = 1+pConfig->nPrefix; + }else +#endif if( flags & FTS5INDEX_QUERY_PREFIX ){ - if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ - iIdx = 1+pConfig->nPrefix; - }else{ - int nChar = fts5IndexCharlen(pToken, nToken); - for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ - if( pConfig->aPrefix[iIdx-1]==nChar ) break; - } + int nChar = fts5IndexCharlen(pToken, nToken); + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nChar ) break; } } pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); if( pRet ){ - memset(pRet, 0, sizeof(Fts5IndexIter)); - pRet->pIndex = p; if( iIdx<=pConfig->nPrefix ){ buf.p[0] = FTS5_MAIN_PREFIX + iIdx; @@ -4890,6 +4889,7 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ fts5BufferFree(&pIter->term); } +#ifdef SQLITE_DEBUG /* ** This function is purely an internal test. It does not contribute to ** FTS functionality, or even the integrity-check, in any way. @@ -4898,8 +4898,7 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ ** visited regardless of whether the doclist-index identified by parameters ** iSegid/iLeaf is iterated in forwards or reverse order. */ -#ifdef SQLITE_DEBUG -static void fts5DlidxIterTestReverse( +static void fts5TestDlidxReverse( Fts5Index *p, int iSegid, /* Segment id to load from */ int iLeaf /* Load doclist-index for this leaf */ @@ -4934,8 +4933,107 @@ static void fts5DlidxIterTestReverse( if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } + +static int fts5QueryCksum( + Fts5Index *p, /* Fts5 index object */ + int iIdx, + const char *z, /* Index key to query for */ + int n, /* Size of index key in bytes */ + int flags, /* Flags for Fts5IndexQuery */ + u64 *pCksum /* IN/OUT: Checksum value */ +){ + u64 cksum = *pCksum; + Fts5IndexIter *pIdxIter = 0; + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + const u8 *pPos; + int nPos; + i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); + } + rc = sqlite3Fts5IterNext(pIdxIter); + } + } + sqlite3Fts5IterClose(pIdxIter); + + *pCksum = cksum; + return rc; +} + + +/* +** This function is also purely an internal test. It does not contribute to +** FTS functionality, or even the integrity-check, in any way. +*/ +static void fts5TestTerm( + Fts5Index *p, + Fts5Buffer *pPrev, /* Previous term */ + const char *z, int n, /* Possibly new term to test */ + u64 expected, + u64 *pCksum +){ + int rc = p->rc; + if( pPrev->n==0 ){ + fts5BufferSet(&rc, pPrev, n, (const u8*)z); + }else + if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ + u32 cksum3 = *pCksum; + const char *zTerm = &pPrev->p[1]; /* The term without the prefix-byte */ + int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ + int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); + int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); + int rc; + u64 ck1 = 0; + u64 ck2 = 0; + + /* Check that the results returned for ASC and DESC queries are + ** the same. If not, call this corruption. */ + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); + if( rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_DESC; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + } + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + + /* If this is a prefix query, check that the results returned if the + ** the index is disabled are the same. In both ASC and DESC order. */ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + + cksum3 ^= ck1; + fts5BufferSet(&rc, pPrev, n, (const u8*)z); + + if( rc==SQLITE_OK && cksum3!=expected ){ + rc = FTS5_CORRUPT; + } + *pCksum = cksum3; + } + p->rc = rc; +} + #else -# define fts5DlidxIterTestReverse(x,y,z) +# define fts5TestDlidxReverse(x,y,z) +# define fts5TestTerm(u,v,w,x,y,z) #endif static void fts5IndexIntegrityCheckSegment( @@ -5046,17 +5144,12 @@ static void fts5IndexIntegrityCheckSegment( } fts5DlidxIterFree(pDlidx); - fts5DlidxIterTestReverse(p, iSegid, iter.iLeaf); + fts5TestDlidxReverse(p, iSegid, iter.iLeaf); } } - /* Either iter.iLeaf must be the rightmost leaf-page in the segment, or - ** else the segment has been completely emptied by an ongoing merge - ** operation. */ - if( p->rc==SQLITE_OK - && iter.iLeaf!=pSeg->pgnoLast - && (pSeg->pgnoFirst || pSeg->pgnoLast) - ){ + /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ + if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } @@ -5064,42 +5157,6 @@ static void fts5IndexIntegrityCheckSegment( } -static int fts5QueryCksum( - Fts5Index *p, /* Fts5 index object */ - int iIdx, - const char *z, /* Index key to query for */ - int n, /* Size of index key in bytes */ - int flags, /* Flags for Fts5IndexQuery */ - u64 *pCksum /* IN/OUT: Checksum value */ -){ - u64 cksum = *pCksum; - Fts5IndexIter *pIdxIter = 0; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); - - while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ - const u8 *pPos; - int nPos; - i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); - if( rc==SQLITE_OK ){ - Fts5PoslistReader sReader; - for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); - sReader.bEof==0; - sqlite3Fts5PoslistReaderNext(&sReader) - ){ - int iCol = FTS5_POS2COLUMN(sReader.iPos); - int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); - } - rc = sqlite3Fts5IterNext(pIdxIter); - } - } - sqlite3Fts5IterClose(pIdxIter); - - *pCksum = cksum; - return rc; -} - /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums @@ -5112,11 +5169,13 @@ static int fts5QueryCksum( */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ u64 cksum2 = 0; /* Checksum based on contents of indexes */ - u64 cksum3 = 0; /* Checksum based on contents of indexes */ - Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ Fts5MultiSegIter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ + + /* Used by extra internal tests only run if NDEBUG is not defined */ + u64 cksum3 = 0; /* Checksum based on contents of indexes */ + Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ /* Load the FTS index structure */ pStruct = fts5StructureRead(p); @@ -5164,48 +5223,12 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ } /* If this is a new term, query for it. Update cksum3 with the results. */ - if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ - const char *zTerm = &z[1]; /* The term without the prefix-byte */ - int nTerm = n-1; /* Size of zTerm in bytes */ - int iIdx = (z[0] - FTS5_MAIN_PREFIX); - int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); - int rc; - u64 ck1 = 0; - u64 ck2 = 0; - - /* Check that the results returned for ASC and DESC queries are - ** the same. If not, call this corruption. */ - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); - if( rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_DESC; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - } - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - - /* If this is a prefix query, check that the results returned if the - ** the index is disabled are the same. In both ASC and DESC order. */ - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - - cksum3 ^= ck1; - fts5BufferSet(&rc, &term, n, (const u8*)z); - p->rc = rc; - } + fts5TestTerm(p, &term, z, n, cksum2, &cksum3); } - fts5MultiIterFree(p, pIter); + fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); + fts5MultiIterFree(p, pIter); if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; - if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT; fts5StructureRelease(pStruct); fts5BufferFree(&term); diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index df22f61b93..099b966945 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -115,8 +115,6 @@ for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} { } {} } -} - #------------------------------------------------------------------------- # Test that corruption in leaf page headers is detected by queries that use # doclist-indexes. @@ -208,6 +206,40 @@ foreach {tn nCut} { do_test 4.$tn.x { expr $nCorrupt>0 } 1 } +} + +set doc [string repeat "A B C " 1000] +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE x5 USING fts5(tt); + INSERT INTO x5(x5, rank) VALUES('pgsz', 32); + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) + INSERT INTO x5 SELECT $doc FROM ii; +} + +foreach {tn hdr} { + 1 "\x00\x01" +} { + set tn2 0 + set nCorrupt 0 + foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] { + if {$rowid & $mask} continue + incr tn2 + do_test 4.$tn.$tn2 { + execsql BEGIN + + set fd [db incrblob main x5_data block $rowid] + fconfigure $fd -encoding binary -translation binary + puts -nonewline $fd $hdr + close $fd + + catchsql { INSERT INTO x5(x5) VALUES('integrity-check') } + set {} {} + } {} + + execsql ROLLBACK + } +} + sqlite3_fts5_may_be_corrupt 0 finish_test diff --git a/ext/fts5/test/fts5corrupt3.test b/ext/fts5/test/fts5corrupt3.test new file mode 100644 index 0000000000..1df8b3e8de --- /dev/null +++ b/ext/fts5/test/fts5corrupt3.test @@ -0,0 +1,57 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file tests that FTS5 handles corrupt databases (i.e. internal +# inconsistencies in the backing tables) correctly. In this case +# "correctly" means without crashing. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5corrupt3 +sqlite3_fts5_may_be_corrupt 1 + +# Create a simple FTS5 table containing 100 documents. Each document +# contains 10 terms, each of which start with the character "x". +# +expr srand(0) +db func rnddoc fts5_rnddoc +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) + INSERT INTO t1 SELECT rnddoc(10) FROM ii; +} +set mask [expr 31 << 31] + +do_test 1.1 { + # Pick out the rowid of the right-most b-tree leaf in the new segment. + set rowid [db one { + SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1 + }] + set L [db one {SELECT length(block) FROM t1_data WHERE rowid = $rowid}] + set {} {} +} {} + +for {set i 0} {$i < $L} {incr i} { + do_test 1.2.$i { + catchsql { + BEGIN; + UPDATE t1_data SET block = substr(block, 1, $i) WHERE id = $rowid; + INSERT INTO t1(t1) VALUES('integrity-check'); + } + } {1 {database disk image is malformed}} + catchsql ROLLBACK +} + + +sqlite3_fts5_may_be_corrupt 0 +finish_test + diff --git a/ext/fts5/test/fts5merge.test b/ext/fts5/test/fts5merge.test index 1c048be8ed..d869c6cedb 100644 --- a/ext/fts5/test/fts5merge.test +++ b/ext/fts5/test/fts5merge.test @@ -135,5 +135,54 @@ do_test 3.4 { fts5_level_segs x8 } {0 1} +#------------------------------------------------------------------------- +# +proc mydoc {} { + set x [lindex {a b c d e f g h i j} [expr int(rand()*10)]] + return [string repeat "$x " 30] +} +db func mydoc mydoc + +proc mycount {} { + set res [list] + foreach x {a b c d e f g h i j} { + lappend res [db one {SELECT count(*) FROM x8 WHERE x8 MATCH $x}] + } + set res +} + + #1 32 +foreach {tn pgsz} { + 2 1000 +} { + do_execsql_test 4.$tn.1 { + DROP TABLE IF EXISTS x8; + CREATE VIRTUAL TABLE x8 USING fts5(i); + INSERT INTO x8(x8, rank) VALUES('pgsz', $pgsz); + } + + do_execsql_test 4.$tn.2 { + INSERT INTO x8(x8, rank) VALUES('merge', 1); + } + + do_execsql_test 4.$tn.3 { + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO x8 SELECT mydoc() FROM ii; + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO x8 SELECT mydoc() FROM ii; + INSERT INTO x8(x8, rank) VALUES('automerge', 2); + } + + set expect [mycount] + for {set i 0} {$i < 20} {incr i} { + do_test 4.$tn.4.$i { + execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1); } + mycount + } $expect + break + } + db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r } +} + finish_test diff --git a/manifest b/manifest index 46ffb625b4..aaac042c6a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stest\scases.\sAnd\ssome\sfixes. -D 2015-05-15T12:18:39.221 +C Improve\stest\scoverage\sof\sfts5_index.c. +D 2015-05-15T18:13:14.380 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 71d5ce47464f176e8708c7ec02d18613eb5eebda +F ext/fts5/fts5_index.c 6e0ac5835ab33a2cf97efd591acd4fc130490e0f F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc @@ -138,7 +138,8 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 -F ext/fts5/test/fts5corrupt2.test 88942d27ed581314f2867ef37352c72372c543df +F ext/fts5/test/fts5corrupt2.test 7000030df189f1f3ca58b555b459bcbf9b8f8f77 +F ext/fts5/test/fts5corrupt3.test fe42c0ce0b58b7ad487a469049f91419d22c7459 F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 F ext/fts5/test/fts5ea.test ed163ed820fd503354bd7dcf9d3b0e3801ade962 @@ -151,7 +152,7 @@ F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a -F ext/fts5/test/fts5merge.test 453a0717881aa7784885217b2040f3f275caff03 +F ext/fts5/test/fts5merge.test b985b6891e093a4b4c3c9683fe3cba7498fed690 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 @@ -1325,7 +1326,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b5f0e8c5b4bc018d672617ffd342d12b228548b9 -R bdc9509f45810e2b38f3396d130ff7ee +P adee788586197445672013d434e7ba47ce510b59 +R 5c5f538830728743e22dc39869e34f52 U dan -Z 2d21c26b247bac296f85906c81a77b9c +Z ae5179a6b1088d2fd3b13e20090e807a diff --git a/manifest.uuid b/manifest.uuid index 6c7bb30f6e..90a0fbd3cb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -adee788586197445672013d434e7ba47ce510b59 \ No newline at end of file +7aea8c6d99737c6c72078e0b4b9c5f8186021aa0 \ No newline at end of file From e0569a4aae133438113e163454ba8d0613c612de Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 16 May 2015 20:04:43 +0000 Subject: [PATCH 129/206] Further test coverage improvements for fts5. FossilOrigin-Name: 927d9a64e13c6b768f0a15475713192fcfaaf9e7 --- ext/fts5/fts5.c | 27 ++++++++++++++------------- ext/fts5/fts5_expr.c | 34 ++++++++++++++++++++-------------- ext/fts5/test/fts5ac.test | 4 ++++ ext/fts5/test/fts5aux.test | 19 +++++++++++++++++++ ext/fts5/test/fts5fault4.test | 31 ++++++++++++------------------- ext/fts5/test/fts5hash.test | 15 ++++++++++++++- manifest | 24 ++++++++++++------------ manifest.uuid | 2 +- test/malloc_common.tcl | 2 +- 9 files changed, 97 insertions(+), 61 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index e519635149..aa23937f43 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1648,6 +1648,14 @@ static void fts5ApiInvoke( pCsr->pAux = 0; } +static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ + Fts5Cursor *pCsr; + for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ + if( pCsr->iCsrId==iCsrId ) break; + } + return pCsr; +} + static void fts5ApiCallback( sqlite3_context *context, int argc, @@ -1662,9 +1670,7 @@ static void fts5ApiCallback( pAux = (Fts5Auxiliary*)sqlite3_user_data(context); iCsrId = sqlite3_value_int64(argv[0]); - for(pCsr=pAux->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ - if( pCsr->iCsrId==iCsrId ) break; - } + pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); if( pCsr==0 ){ char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); sqlite3_result_error(context, zErr, -1); @@ -1688,18 +1694,13 @@ Fts5Index *sqlite3Fts5IndexFromCsrid( int *pnCol ){ Fts5Cursor *pCsr; - Fts5Index *pIndex = 0; + Fts5Table *pTab; - for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ - if( pCsr->iCsrId==iCsrId ) break; - } - if( pCsr ){ - Fts5Table *pTab = (Fts5Table*)pCsr->base.pVtab; - pIndex = pTab->pIndex; - *pnCol = pTab->pConfig->nCol; - } + pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); + pTab = (Fts5Table*)pCsr->base.pVtab; + *pnCol = pTab->pConfig->nCol; - return pIndex; + return pTab->pIndex; } /* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index b7018e4768..3446c3e543 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1398,6 +1398,7 @@ static char *fts5ExprPrintTcl( int iTerm; zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); + if( zRet==0 ) return 0; if( pNear->iCol>=0 ){ zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol); if( zRet==0 ) return 0; @@ -1424,7 +1425,7 @@ static char *fts5ExprPrintTcl( if( zRet==0 ) return 0; } - if( zRet ) zRet = fts5PrintfAppend(zRet, "]"); + zRet = fts5PrintfAppend(zRet, "]"); if( zRet==0 ) return 0; }else{ @@ -1434,8 +1435,10 @@ static char *fts5ExprPrintTcl( switch( pExpr->eType ){ case FTS5_AND: zOp = "&&"; break; case FTS5_NOT: zOp = "&& !"; break; - case FTS5_OR: zOp = "||"; break; - default: assert( 0 ); + default: + assert( pExpr->eType==FTS5_OR ); + zOp = "||"; + break; } z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft); @@ -1504,8 +1507,10 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ switch( pExpr->eType ){ case FTS5_AND: zOp = "AND"; break; case FTS5_NOT: zOp = "NOT"; break; - case FTS5_OR: zOp = "OR"; break; - default: assert( 0 ); + default: + assert( pExpr->eType==FTS5_OR ); + zOp = "OR"; + break; } z1 = fts5ExprPrint(pConfig, pExpr->pLeft); @@ -1663,16 +1668,17 @@ int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ ** iPhrase. */ int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ - if( iPhrase>=0 && iPhrasenPhrase ){ - Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; - Fts5ExprNode *pNode = pPhrase->pNode; - if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ - *pa = pPhrase->poslist.p; - return pPhrase->poslist.n; - } + int nRet; + Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; + Fts5ExprNode *pNode = pPhrase->pNode; + if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ + *pa = pPhrase->poslist.p; + nRet = pPhrase->poslist.n; + }else{ + *pa = 0; + nRet = 0; } - *pa = 0; - return 0; + return nRet; } #endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index b061d0bf6f..3370063bea 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -443,5 +443,9 @@ foreach {tn2 sql} { } } +do_execsql_test 3.1 { + SELECT fts5_expr_tcl('a AND b'); +} {{[nearset -- {a}] && [nearset -- {b}]}} + finish_test diff --git a/ext/fts5/test/fts5aux.test b/ext/fts5/test/fts5aux.test index 61a28e50fb..894c8d6f1b 100644 --- a/ext/fts5/test/fts5aux.test +++ b/ext/fts5/test/fts5aux.test @@ -180,6 +180,25 @@ do_catchsql_test 6.2 { SELECT my_rowid(y) FROM e11 WHERE e11 MATCH 'b' } {1 {no such cursor: 0}} +#------------------------------------------------------------------------- +# Test passing an out-of-range phrase number to xPhraseSize (should +# return 0). +# +proc my_phrasesize {cmd iPhrase} { $cmd xPhraseSize $iPhrase } +sqlite3_fts5_create_function db my_phrasesize my_phrasesize + +do_execsql_test 7.1 { + CREATE VIRTUAL TABLE t1 USING fts5(a); + INSERT INTO t1 VALUES('a b c'); +} +do_execsql_test 7.2 { + SELECT + my_phrasesize(t1, -1), + my_phrasesize(t1, 0), + my_phrasesize(t1, 1), + my_phrasesize(t1, 2) + FROM t1 WHERE t1 MATCH 'a OR b+c' +} {0 1 2 0} finish_test diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index 6bfc825320..b22407c5cb 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -302,36 +302,29 @@ do_faultsim_test 9.1 -faults oom-* -body { faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM} } +} + #------------------------------------------------------------------------- # OOM in fts5_expr() SQL function. # -reset_db -do_execsql_test 10.0 { - CREATE VIRTUAL TABLE tt USING fts5(x); - INSERT INTO tt(tt, rank) VALUES('pgsz', 32); - BEGIN; - WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200) - INSERT INTO tt(rowid, x) - SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END - FROM ii; - COMMIT; -} - do_faultsim_test 10.1 -faults oom-t* -body { db one { SELECT fts5_expr('a AND b NEAR(a b)') } } -test { faultsim_test_result {0 {"a" AND ("b" AND NEAR("a" "b", 10))}} } +do_faultsim_test 10.2 -faults oom-t* -body { + db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } +} -test { + set res {[ns -col 0 -- {a b c}] && ([ns -- {b}] && [ns -near 10 -- {a} {b}])} + faultsim_test_result [list 0 $res] } - -#do_faultsim_test 10.2 -faults oom-t* -body { -# db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } -#} -test { -# set res {[ns -col 0 -- {a b c}] && ([ns -- {b}] && [ns -near 10 -- {a} {b}]} -# faultsim_test_result [list 0 $res] -#} +do_faultsim_test 10.3 -faults oom-t* -body { + db one { SELECT fts5_expr('x:a', 'x') } +} -test { + faultsim_test_result {0 {x : "a"}} +} finish_test diff --git a/ext/fts5/test/fts5hash.test b/ext/fts5/test/fts5hash.test index 94119603c7..1189ef5391 100644 --- a/ext/fts5/test/fts5hash.test +++ b/ext/fts5/test/fts5hash.test @@ -76,14 +76,27 @@ set vocab [build_vocab1 -prefix xyz -hash $hash] lappend vocab xyz do_execsql_test 1.1 { + CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row'); BEGIN; WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii; INSERT INTO eee(eee) VALUES('integrity-check'); +} + +do_test 1.2 { + db eval { SELECT term, doc FROM vocab } { + set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}] + if {$nRow != $doc} { + error "term=$term fts5vocab=$doc cnt=$nRow" + } + } + set {} {} +} {} + +do_execsql_test 1.3 { COMMIT; INSERT INTO eee(eee) VALUES('integrity-check'); } - finish_test diff --git a/manifest b/manifest index aaac042c6a..a0e7897b83 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\stest\scoverage\sof\sfts5_index.c. -D 2015-05-15T18:13:14.380 +C Further\stest\scoverage\simprovements\sfor\sfts5. +D 2015-05-16T20:04:43.177 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,13 +104,13 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 2899b3c60a382613889500571fd5158b2c1aa159 +F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a F ext/fts5/fts5Int.h e01aec94c0d927924134c30afd9803425cd88812 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 -F ext/fts5/fts5_expr.c 3fe1170453d6a322d2de8a3fd0aed3edff7b8b09 +F ext/fts5/fts5_expr.c a88af159ce5dcc44d7262ea80865ffc8e4ab2143 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 6e0ac5835ab33a2cf97efd591acd4fc130490e0f F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d @@ -123,7 +123,7 @@ F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 F ext/fts5/test/fts5aa.test 5f221b82487abfb915e1b040eb4e305cf79a2ef5 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad -F ext/fts5/test/fts5ac.test 8b3c2938840da8f3f6a53b1324fb03e0bac12d1e +F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a @@ -133,7 +133,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5aux.test d9c724351d8e4dc46cad1308c0b4b8ac94d07660 +F ext/fts5/test/fts5aux.test d9cd26ee45ad5f628b4899f1ac5b757ce7a77bdf F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f @@ -147,10 +147,10 @@ F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test a40e676e3e3b40901e2142f6fa5dff9e7313f5f7 +F ext/fts5/test/fts5fault4.test 6e5b1609a0e7d51e7598fa675db581b3b1bfbf7e F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d -F ext/fts5/test/fts5hash.test adb7b0442cc1c77c507f07e16d11490486e75dfa +F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a F ext/fts5/test/fts5merge.test b985b6891e093a4b4c3c9683fe3cba7498fed690 F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 @@ -803,7 +803,7 @@ F test/mallocI.test a88c2b9627c8506bf4703d8397420043a786cdb6 F test/mallocJ.test b5d1839da331d96223e5f458856f8ffe1366f62e F test/mallocK.test da01dcdd316767b8356741f8d33a23a06a23def5 F test/mallocL.test 252ddc7eb4fbf75364eab17b938816085ff1fc17 -F test/malloc_common.tcl a644f12e2da20ddfabb8bd077ec610a44113450e +F test/malloc_common.tcl aac62499b76be719fac31e7a3e54a7fd53272e7f F test/manydb.test 28385ae2087967aa05c38624cec7d96ec74feb3e F test/mem5.test c6460fba403c5703141348cd90de1c294188c68f F test/memdb.test fcb5297b321b562084fc79d64d5a12a1cd2b639b @@ -1326,7 +1326,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P adee788586197445672013d434e7ba47ce510b59 -R 5c5f538830728743e22dc39869e34f52 +P 7aea8c6d99737c6c72078e0b4b9c5f8186021aa0 +R 5014cf90f97a250385093a02a58cfb68 U dan -Z ae5179a6b1088d2fd3b13e20090e807a +Z e8178e8c15c67776f9bff619db326cbd diff --git a/manifest.uuid b/manifest.uuid index 90a0fbd3cb..5f3b901b63 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7aea8c6d99737c6c72078e0b4b9c5f8186021aa0 \ No newline at end of file +927d9a64e13c6b768f0a15475713192fcfaaf9e7 \ No newline at end of file diff --git a/test/malloc_common.tcl b/test/malloc_common.tcl index 625dd43229..2d0e57e4fc 100644 --- a/test/malloc_common.tcl +++ b/test/malloc_common.tcl @@ -292,7 +292,7 @@ proc faultsim_test_result_int {args} { upvar testrc testrc testresult testresult testnfail testnfail set t [list $testrc $testresult] set r $args - if { ($testnfail==0 && $t != [lindex $r 0]) || [lsearch $r $t]<0 } { + if { ($testnfail==0 && $t != [lindex $r 0]) || [lsearch -exact $r $t]<0 } { error "nfail=$testnfail rc=$testrc result=$testresult list=$r" } } From 27aac274b981a65f60410080e1d7d2218a8bec15 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 18 May 2015 17:50:17 +0000 Subject: [PATCH 130/206] Improve test coverage of fts5_config.c. FossilOrigin-Name: 47dbfadb994814c9349d4c9c113b862c2e97c01a --- ext/fts5/fts5.h | 5 +- ext/fts5/fts5Int.h | 3 + ext/fts5/fts5_buffer.c | 26 +++++ ext/fts5/fts5_config.c | 176 ++++++++++++++--------------- ext/fts5/fts5_expr.c | 8 +- ext/fts5/fts5_index.c | 14 --- ext/fts5/fts5_vocab.c | 2 + ext/fts5/test/fts5aa.test | 23 ++++ ext/fts5/test/fts5config.test | 202 ++++++++++++++++++++++++++++++++++ ext/fts5/test/fts5ea.test | 2 +- ext/fts5/test/fts5fault2.test | 19 ++++ ext/fts5/test/fts5fault4.test | 14 +++ ext/fts5/test/fts5merge.test | 2 +- ext/fts5/test/fts5near.test | 2 +- ext/fts5/test/fts5vocab.test | 31 ++++-- manifest | 39 +++---- manifest.uuid | 2 +- 17 files changed, 423 insertions(+), 147 deletions(-) create mode 100644 ext/fts5/test/fts5config.test diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 28be0de676..7782bf697c 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -203,8 +203,9 @@ struct Fts5ExtensionApi { ** ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the -** following structure. The structure methods are expected to function -** as follows: +** following structure. All structure methods must be defined, setting +** any member of the fts5_tokenizer struct to NULL leads to undefined +** behaviour. The structure methods are expected to function as follows: ** ** xCreate: ** This function is used to allocate and inititalize a tokenizer instance. diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 3c84c9ed2a..2a66fd8b18 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -226,6 +226,9 @@ int sqlite3Fts5PoslistNext64( void *sqlite3Fts5MallocZero(int *pRc, int nByte); char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); +/* Character set tests (like isspace(), isalpha() etc.) */ +int sqlite3Fts5IsBareword(char t); + /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index dbe51fafc4..24e7d8f65f 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -264,5 +264,31 @@ char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ } return zRet; } + + +/* +** Return true if character 't' may be part of an FTS5 bareword, or false +** otherwise. Characters that may be part of barewords: +** +** * All non-ASCII characters, +** * The 52 upper and lower case ASCII characters, and +** * The 10 integer ASCII characters. +** * The underscore character "_" (0x5F). +*/ +int sqlite3Fts5IsBareword(char t){ + u8 aBareword[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ + }; + + return (t & 0x80) || aBareword[(int)t]; +} + #endif /* SQLITE_ENABLE_FTS5 */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 90d7b7318f..4b35ac8b2c 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -53,12 +53,7 @@ static const char *fts5ConfigSkipWhitespace(const char *pIn){ */ static const char *fts5ConfigSkipBareword(const char *pIn){ const char *p = pIn; - while( *p && *p!=' ' && *p!=':' && *p!='!' && *p!='@' - && *p!='#' && *p!='$' && *p!='%' && *p!='^' && *p!='&' - && *p!='*' && *p!='(' && *p!=')' && *p!='=' - ){ - p++; - } + while ( sqlite3Fts5IsBareword(*p) ) p++; if( p==pIn ) p = 0; return p; } @@ -71,64 +66,62 @@ static int fts5_isdigit(char a){ static const char *fts5ConfigSkipLiteral(const char *pIn){ const char *p = pIn; - if( p ){ - switch( *p ){ - case 'n': case 'N': - if( sqlite3_strnicmp("null", p, 4)==0 ){ - p = &p[4]; + switch( *p ){ + case 'n': case 'N': + if( sqlite3_strnicmp("null", p, 4)==0 ){ + p = &p[4]; + }else{ + p = 0; + } + break; + + case 'x': case 'X': + p++; + if( *p=='\'' ){ + p++; + while( (*p>='a' && *p<='f') + || (*p>='A' && *p<='F') + || (*p>='0' && *p<='9') + ){ + p++; + } + if( *p=='\'' && 0==((p-pIn)%2) ){ + p++; }else{ p = 0; } - break; - - case 'x': case 'X': - p++; + }else{ + p = 0; + } + break; + + case '\'': + p++; + while( p ){ if( *p=='\'' ){ p++; - while( (*p>='a' && *p<='f') - || (*p>='A' && *p<='F') - || (*p>='0' && *p<='9') - ){ - p++; - } - if( *p=='\'' && 0==((p-pIn)%2) ){ - p++; - }else{ - p = 0; - } - }else{ - p = 0; + if( *p!='\'' ) break; } - break; - - case '\'': p++; - while( p ){ - if( *p=='\'' ){ - p++; - if( *p!='\'' ) break; - } - p++; - if( *p==0 ) p = 0; - } - break; + if( *p==0 ) p = 0; + } + break; - default: - /* maybe a number */ - if( *p=='+' || *p=='-' ) p++; + default: + /* maybe a number */ + if( *p=='+' || *p=='-' ) p++; + while( fts5_isdigit(*p) ) p++; + + /* At this point, if the literal was an integer, the parse is + ** finished. Or, if it is a floating point value, it may continue + ** with either a decimal point or an 'E' character. */ + if( *p=='.' && fts5_isdigit(p[1]) ){ + p += 2; while( fts5_isdigit(*p) ) p++; + } + if( p==pIn ) p = 0; - /* At this point, if the literal was an integer, the parse is - ** finished. Or, if it is a floating point value, it may continue - ** with either a decimal point or an 'E' character. */ - if( *p=='.' && fts5_isdigit(p[1]) ){ - p += 2; - while( fts5_isdigit(*p) ) p++; - } - if( p==pIn ) p = 0; - - break; - } + break; } return p; @@ -157,12 +150,12 @@ static int fts5Dequote(char *z){ assert( q=='[' || q=='\'' || q=='"' || q=='`' ); if( q=='[' ) q = ']'; - while( z[iIn] ){ + while( ALWAYS(z[iIn]) ){ if( z[iIn]==q ){ if( z[iIn+1]!=q ){ /* Character iIn was the close quote. */ - z[iOut] = '\0'; - return iIn+1; + iIn++; + break; }else{ /* Character iIn and iIn+1 form an escaped quote character. Skip ** the input cursor past both and copy a single quote character @@ -175,9 +168,8 @@ static int fts5Dequote(char *z){ } } - /* Did not find the close-quote character. Return -1. */ z[iOut] = '\0'; - return -1; + return iIn; } /* @@ -270,7 +262,7 @@ static int fts5ConfigParseSpecial( }else{ for(nArg=0; p && *p; nArg++){ const char *p2 = fts5ConfigSkipWhitespace(p); - if( p2 && *p2=='\'' ){ + if( *p2=='\'' ){ p = fts5ConfigSkipLiteral(p2); }else{ p = fts5ConfigSkipBareword(p2); @@ -369,32 +361,32 @@ static const char *fts5ConfigGobbleWord( int *pbQuoted /* OUT: Set to true if dequoting required */ ){ const char *zRet = 0; + + int nIn = strlen(zIn); + char *zOut = sqlite3_malloc(nIn+1); + + assert( *pRc==SQLITE_OK ); *pbQuoted = 0; *pzOut = 0; - if( *pRc==SQLITE_OK ){ - int nIn = strlen(zIn); - char *zOut = sqlite3_malloc(nIn+1); - - if( zOut==0 ){ - *pRc = SQLITE_NOMEM; + if( zOut==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + memcpy(zOut, zIn, nIn+1); + if( fts5_isopenquote(zOut[0]) ){ + int ii = fts5Dequote(zOut); + zRet = &zIn[ii]; + *pbQuoted = 1; }else{ - memcpy(zOut, zIn, nIn+1); - if( fts5_isopenquote(zOut[0]) ){ - int ii = fts5Dequote(zOut); - if( ii>0 ) zRet = &zIn[ii]; - *pbQuoted = 1; - }else{ - zRet = fts5ConfigSkipBareword(zIn); - zOut[zRet-zIn] = '\0'; - } + zRet = fts5ConfigSkipBareword(zIn); + zOut[zRet-zIn] = '\0'; } + } - if( zRet==0 ){ - sqlite3_free(zOut); - }else{ - *pzOut = zOut; - } + if( zRet==0 ){ + sqlite3_free(zOut); + }else{ + *pzOut = zOut; } return zRet; @@ -569,7 +561,7 @@ int sqlite3Fts5ConfigParse( void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; - if( pConfig->pTok && pConfig->pTokApi->xDelete ){ + if( pConfig->pTok ){ pConfig->pTokApi->xDelete(pConfig->pTok); } sqlite3_free(pConfig->zDb); @@ -727,7 +719,7 @@ int sqlite3Fts5ConfigParseRank( p = fts5ConfigSkipArgs(p); if( p==0 ){ rc = SQLITE_ERROR; - }else if( p!=pArgs ){ + }else{ zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); } @@ -751,17 +743,14 @@ int sqlite3Fts5ConfigSetValue( int *pbBadkey ){ int rc = SQLITE_OK; - if( 0==sqlite3_stricmp(zKey, "cookie") ){ - pConfig->iCookie = sqlite3_value_int(pVal); - } - else if( 0==sqlite3_stricmp(zKey, "pgsz") ){ + if( 0==sqlite3_stricmp(zKey, "pgsz") ){ int pgsz = 0; if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ pgsz = sqlite3_value_int(pVal); } if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){ - if( pbBadkey ) *pbBadkey = 1; + *pbBadkey = 1; }else{ pConfig->pgsz = pgsz; } @@ -773,7 +762,7 @@ int sqlite3Fts5ConfigSetValue( nAutomerge = sqlite3_value_int(pVal); } if( nAutomerge<0 || nAutomerge>64 ){ - if( pbBadkey ) *pbBadkey = 1; + *pbBadkey = 1; }else{ if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; pConfig->nAutomerge = nAutomerge; @@ -786,7 +775,7 @@ int sqlite3Fts5ConfigSetValue( nCrisisMerge = sqlite3_value_int(pVal); } if( nCrisisMerge<0 ){ - if( pbBadkey ) *pbBadkey = 1; + *pbBadkey = 1; }else{ if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; pConfig->nCrisisMerge = nCrisisMerge; @@ -805,10 +794,10 @@ int sqlite3Fts5ConfigSetValue( pConfig->zRankArgs = zRankArgs; }else if( rc==SQLITE_ERROR ){ rc = SQLITE_OK; - if( pbBadkey ) *pbBadkey = 1; + *pbBadkey = 1; } }else{ - if( pbBadkey ) *pbBadkey = 1; + *pbBadkey = 1; } return rc; } @@ -844,10 +833,11 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ if( 0==sqlite3_stricmp(zK, "version") ){ iVersion = sqlite3_value_int(pVal); }else{ - sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, 0); + int bDummy = 0; + sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); } } - if( rc==SQLITE_OK ) rc = sqlite3_finalize(p); + rc = sqlite3_finalize(p); } if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 3446c3e543..e0496fb8d2 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -117,12 +117,6 @@ static int fts5ExprIsspace(char t){ return t==' ' || t=='\t' || t=='\n' || t=='\r'; } -static int fts5ExprIstoken(char t){ - return fts5ExprIsspace(t)==0 && t!='\0' - && t!=':' && t!='(' && t!=')' - && t!=',' && t!='+' && t!='*'; -} - /* ** Read the first token from the nul-terminated string at *pz. */ @@ -169,7 +163,7 @@ static int fts5ExprGetToken( default: { const char *z2; tok = FTS5_STRING; - for(z2=&z[1]; fts5ExprIstoken(*z2); z2++); + for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index f9317ddd66..dd0f42fa30 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -5566,20 +5566,6 @@ static void fts5RowidFunction( iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno); sqlite3_result_int64(pCtx, iRowid); } -#if 0 - }else if( 0==sqlite3_stricmp(zArg, "start-of-index") ){ - i64 iRowid; - int idx; - if( nArg!=2 ){ - sqlite3_result_error(pCtx, - "should be: fts5_rowid('start-of-index', idx)", -1 - ); - }else{ - idx = sqlite3_value_int(apVal[1]); - iRowid = FTS5_SEGMENT_ROWID(idx, 1, 0, 0); - sqlite3_result_int64(pCtx, iRowid); - } -#endif }else { sqlite3_result_error(pCtx, "first arg to fts5_rowid() must be 'segment' " diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index ee2a316291..6d03877ad0 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -189,6 +189,8 @@ static int fts5VocabInitVtab( pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; memcpy(pRet->zFts5Tbl, zTab, nTab); memcpy(pRet->zFts5Db, zDb, nDb); + sqlite3Fts5Dequote(pRet->zFts5Tbl); + sqlite3Fts5Dequote(pRet->zFts5Db); } } diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 559a5a1256..028eece68a 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -285,6 +285,9 @@ do_catchsql_test 11.1 { do_catchsql_test 11.2 { CREATE VIRTUAL TABLE rank USING fts5(a, b, c); } {1 {reserved fts5 table name: rank}} +do_catchsql_test 11.3 { + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid); +} {1 {reserved fts5 column name: rowid}} #------------------------------------------------------------------------- # @@ -377,6 +380,26 @@ do_catchsql_test 15.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } {1 {database disk image is malformed}} +#------------------------------------------------------------------------- +# +do_execsql_test 16.1 { + CREATE VIRTUAL TABLE n1 USING fts5(a); + INSERT INTO n1 VALUES('a b c d'); +} + +proc funk {} { + set fd [db incrblob main n1_data block 10] + fconfigure $fd -encoding binary -translation binary + puts -nonewline $fd "\x44\x45" + close $fd + db eval { UPDATE n1_config SET v=50 WHERE k='version' } +} +db func funk funk + +do_catchsql_test 16.2 { + SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d' +} {1 {SQL logic error or missing database}} + finish_test diff --git a/ext/fts5/test/fts5config.test b/ext/fts5/test/fts5config.test new file mode 100644 index 0000000000..22e7631bc0 --- /dev/null +++ b/ext/fts5/test/fts5config.test @@ -0,0 +1,202 @@ +# 2015 Jan 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file focuses on the code in fts5_config.c, which is largely concerned +# with parsing the various configuration and CREATE TABLE options. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5config + +#------------------------------------------------------------------------- +# Try different types of quote characters. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5('a', "b", [c], `d`); + PRAGMA table_info = t1; +} { + 0 a {} 0 {} 0 + 1 b {} 0 {} 0 + 2 c {} 0 {} 0 + 3 d {} 0 {} 0 +} + +#------------------------------------------------------------------------- +# Syntax errors in the prefix= option. +# +foreach {tn opt} { + 1 {prefix=x} + 2 {prefix='x'} + 3 {prefix='$'} +} { + set res [list 1 {malformed prefix=... directive}] + do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res +} + +#------------------------------------------------------------------------- +# Syntax errors in the 'rank' option. +# +foreach {tn val} { + 1 "f1(xyz)" + 2 "f1(zyx)" + 3 "f1(nzz)" + 4 "f1(x'!!')" + 5 "f1(x':;')" + 6 "f1(x'[]')" + 7 "f1(x'{}')" + 8 "f1('abc)" +} { + do_catchsql_test 3.$tn { + INSERT INTO t1(t1, rank) VALUES('rank', $val); + } {1 {SQL logic error or missing database}} +} + +#------------------------------------------------------------------------- +# The parsing of SQL literals specified as part of 'rank' options. +# +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE zzz USING fts5(one); + INSERT INTO zzz VALUES('a b c'); +} +proc first {cmd A} { return $A } +sqlite3_fts5_create_function db first first + +foreach {tn arg} { + 1 "123" + 2 "'01234567890ABCDEF'" + 3 "x'0123'" + 4 "x'ABCD'" + 5 "x'0123456789ABCDEF'" + 6 "x'0123456789abcdef'" + 7 "22.5" + 8 "-91.5" + 9 "-.5" + 10 "''''" + 11 "+.5" +} { + set func [string map {' ''} "first($arg)"] + do_execsql_test 4.1.$tn " + INSERT INTO zzz(zzz, rank) VALUES('rank', '$func'); + SELECT rank IS $arg FROM zzz WHERE zzz MATCH 'a + b + c' + " 1 +} + +do_execsql_test 4.2 { + INSERT INTO zzz(zzz, rank) VALUES('rank', 'f1()'); +} {} + +#------------------------------------------------------------------------- +# Misquoting in tokenize= and other options. +# +do_catchsql_test 5.1 { + CREATE VIRTUAL TABLE xx USING fts5(x, tokenize="porter 'ascii"); +} {1 {parse error in tokenize directive}} + +breakpoint +do_catchsql_test 5.2 { + CREATE VIRTUAL TABLE xx USING fts5(x, [y[]); +} {0 {}} + +do_catchsql_test 5.3 { + CREATE VIRTUAL TABLE yy USING fts5(x, [y]]); +} {1 {unrecognized token: "]"}} + +#------------------------------------------------------------------------- +# Errors in prefix= directives. +# +do_catchsql_test 6.1 { + CREATE VIRTUAL TABLE abc USING fts5(a, prefix=1, prefix=2); +} {1 {multiple prefix=... directives}} +do_catchsql_test 6.2 { + CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1, 2, 1001'); +} {1 {prefix length out of range: 1001}} +do_catchsql_test 6.3 { + CREATE VIRTUAL TAbLE abc USING fts5(a, prefix='1, 2, 0000'); +} {1 {prefix length out of range: 0}} +do_catchsql_test 6.4 { + CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1 , 1000000'); +} {1 {malformed prefix=... directive}} + +#------------------------------------------------------------------------- +# Duplicate tokenize= and other options. +# +do_catchsql_test 7.1 { + CREATE VIRTUAL TABLE abc USING fts5(a, tokenize=porter, tokenize=ascii); +} {1 {multiple tokenize=... directives}} +do_catchsql_test 7.2 { + CREATE VIRTUAL TABLE abc USING fts5(a, content=porter, content=ascii); +} {1 {multiple content=... directives}} +do_catchsql_test 7.3 { + CREATE VIRTUAL TABLE abc USING fts5(a, content_rowid=porter, content_rowid=a); +} {1 {multiple content_rowid=... directives}} + +#------------------------------------------------------------------------- +# Unrecognized option. +# +do_catchsql_test 8.0 { + CREATE VIRTUAL TABLE abc USING fts5(a, nosuchoption=123); +} {1 {unrecognized option: "nosuchoption"}} +do_catchsql_test 8.1 { + CREATE VIRTUAL TABLE abc USING fts5(a, "nosuchoption"=123); +} {1 {parse error in ""nosuchoption"=123"}} + +#------------------------------------------------------------------------- +# Errors in: +# +# 9.1.* 'pgsz' options. +# 9.2.* 'automerge' options. +# 9.3.* 'crisismerge' options. +# +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE abc USING fts5(a, b); +} {} +do_catchsql_test 9.1.1 { + INSERT INTO abc(abc, rank) VALUES('pgsz', -5); +} {1 {SQL logic error or missing database}} +do_catchsql_test 9.1.2 { + INSERT INTO abc(abc, rank) VALUES('pgsz', 50000000); +} {1 {SQL logic error or missing database}} +do_catchsql_test 9.1.3 { + INSERT INTO abc(abc, rank) VALUES('pgsz', 66.67); +} {1 {SQL logic error or missing database}} + +do_catchsql_test 9.2.1 { + INSERT INTO abc(abc, rank) VALUES('automerge', -5); +} {1 {SQL logic error or missing database}} +do_catchsql_test 9.2.2 { + INSERT INTO abc(abc, rank) VALUES('automerge', 50000000); +} {1 {SQL logic error or missing database}} +do_catchsql_test 9.2.3 { + INSERT INTO abc(abc, rank) VALUES('automerge', 66.67); +} {1 {SQL logic error or missing database}} +do_execsql_test 9.2.4 { + INSERT INTO abc(abc, rank) VALUES('automerge', 1); +} {} + +do_catchsql_test 9.3.1 { + INSERT INTO abc(abc, rank) VALUES('crisismerge', -5); +} {1 {SQL logic error or missing database}} +do_catchsql_test 9.3.2 { + INSERT INTO abc(abc, rank) VALUES('crisismerge', 66.67); +} {1 {SQL logic error or missing database}} +do_execsql_test 9.3.3 { + INSERT INTO abc(abc, rank) VALUES('crisismerge', 1); +} {} +do_execsql_test 9.3.4 { + INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000); +} {} + +do_catchsql_test 9.4.1 { + INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1); +} {1 {SQL logic error or missing database}} + +finish_test + diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index b80e767b63..3929ed9f59 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -74,7 +74,7 @@ foreach {tn expr err} { 11 {a AND "abc} {unterminated string} 12 {NEAR(a b, xyz)} {expected integer, got "xyz"} - 13 {NEAR(a b, // )} {expected integer, got "//"} + 13 {NEAR(a b, // )} {fts5: syntax error near "/"} } { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } diff --git a/ext/fts5/test/fts5fault2.test b/ext/fts5/test/fts5fault2.test index 9bf9b85e2b..ef1df8826a 100644 --- a/ext/fts5/test/fts5fault2.test +++ b/ext/fts5/test/fts5fault2.test @@ -117,5 +117,24 @@ do_faultsim_test 4.1 -faults oom-trans* -prep { faultsim_test_result {0 {}} } +#------------------------------------------------------------------------- +# OOM while parsing a tokenize=option +# +reset_db +faultsim_save_and_close +do_faultsim_test 5.0 -faults oom-* -prep { + faultsim_restore_and_reopen +} -body { + execsql { + CREATE VIRTUAL TABLE uio USING fts5(a, b, + tokenize="porter 'ascii'", + content="another table", + content_rowid="somecolumn" + ); + } +} -test { + faultsim_test_result {0 {}} +} + finish_test diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index b22407c5cb..f160a84ba3 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -326,6 +326,20 @@ do_faultsim_test 10.3 -faults oom-t* -body { faultsim_test_result {0 {x : "a"}} } +#------------------------------------------------------------------------- +# OOM while configuring 'rank' option. +# +reset_db +do_execsql_test 11.0 { + CREATE VIRTUAL TABLE ft USING fts5(x); +} +do_faultsim_test 11.1 -faults oom-* -body { + db eval { + INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)'); + } +} -test { + faultsim_test_result {0 {}} +} finish_test diff --git a/ext/fts5/test/fts5merge.test b/ext/fts5/test/fts5merge.test index d869c6cedb..023a2f7fe8 100644 --- a/ext/fts5/test/fts5merge.test +++ b/ext/fts5/test/fts5merge.test @@ -181,7 +181,7 @@ foreach {tn pgsz} { } $expect break } - db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r } +# db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r } } finish_test diff --git a/ext/fts5/test/fts5near.test b/ext/fts5/test/fts5near.test index f545447e6f..dffce29431 100644 --- a/ext/fts5/test/fts5near.test +++ b/ext/fts5/test/fts5near.test @@ -24,7 +24,7 @@ proc do_near_test {tn doc near res} { } execsql { - CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'ascii tokenchars .') + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "ascii tokenchars '.'") } do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1 diff --git a/ext/fts5/test/fts5vocab.test b/ext/fts5/test/fts5vocab.test index b61db67d7b..fcef2b793d 100644 --- a/ext/fts5/test/fts5vocab.test +++ b/ext/fts5/test/fts5vocab.test @@ -82,9 +82,9 @@ do_execsql_test 2.0 { INSERT INTO tt VALUES('g d e f d e', 'a c d b a g'); INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y'); INSERT INTO tt VALUES('c c a a c f', 'd g a e b g'); - CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'col'); - SELECT * FROM tv; -} { +} + +set res_col { a 0 6 11 a 1 7 9 b 0 6 7 b 1 7 7 c 0 6 12 c 1 5 8 @@ -94,16 +94,31 @@ do_execsql_test 2.0 { g 0 5 7 g 1 5 7 x 0 1 1 y 1 1 1 } - -do_execsql_test 2.1 { - CREATE VIRTUAL TABLE temp.tv2 USING fts5vocab(main, tt, 'row'); - SELECT * FROM tv2; -} { +set res_row { a 10 20 b 9 14 c 9 20 d 9 19 e 8 13 f 10 20 g 7 14 x 1 1 y 1 1 } +foreach {tn tbl resname} { + 1 "fts5vocab(tt, 'col')" res_col + 2 "fts5vocab(tt, 'row')" res_row + 3 "fts5vocab(tt, \"row\")" res_row + 4 "fts5vocab(tt, [row])" res_row + 5 "fts5vocab(tt, `row`)" res_row + + 6 "fts5vocab('tt', 'row')" res_row + 7 "fts5vocab(\"tt\", \"row\")" res_row + 8 "fts5vocab([tt], [row])" res_row + 9 "fts5vocab(`tt`, `row`)" res_row +} { + do_execsql_test 2.$tn " + DROP TABLE IF EXISTS tv; + CREATE VIRTUAL TABLE tv USING $tbl; + SELECT * FROM tv; + " [set $resname] +} + #------------------------------------------------------------------------- # foreach {tn sql} { diff --git a/manifest b/manifest index a0e7897b83..4488ddf4f5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\stest\scoverage\simprovements\sfor\sfts5. -D 2015-05-16T20:04:43.177 +C Improve\stest\scoverage\sof\sfts5_config.c. +D 2015-05-18T17:50:17.248 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,23 +105,23 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 -F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h e01aec94c0d927924134c30afd9803425cd88812 +F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 +F ext/fts5/fts5Int.h 9e581dc077d4c6758eaeb0d6a85dc875f53918dc F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 -F ext/fts5/fts5_buffer.c 70b971e13503566f1e257941c60817ba0920a16b -F ext/fts5/fts5_config.c 05811f0bd80c396afcf3ceea68da16149a9a3258 -F ext/fts5/fts5_expr.c a88af159ce5dcc44d7262ea80865ffc8e4ab2143 +F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 +F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 +F ext/fts5/fts5_expr.c 0c4b50bb48740c76b8e8b89d5d40a55f8dbffd07 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 6e0ac5835ab33a2cf97efd591acd4fc130490e0f +F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d -F ext/fts5/fts5_vocab.c e532f38a62d27fa662138a6cf33890b314225506 +F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test 5f221b82487abfb915e1b040eb4e305cf79a2ef5 +F ext/fts5/test/fts5aa.test 29409f14dec2ee67bd82ebf15111d6167b1003df F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d @@ -136,24 +136,25 @@ F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 F ext/fts5/test/fts5aux.test d9cd26ee45ad5f628b4899f1ac5b757ce7a77bdf F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b +F ext/fts5/test/fts5config.test c9cc535f3b36cde1e5a32bf579f3f5962a9e82b2 F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 F ext/fts5/test/fts5corrupt2.test 7000030df189f1f3ca58b555b459bcbf9b8f8f77 F ext/fts5/test/fts5corrupt3.test fe42c0ce0b58b7ad487a469049f91419d22c7459 F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 -F ext/fts5/test/fts5ea.test ed163ed820fd503354bd7dcf9d3b0e3801ade962 +F ext/fts5/test/fts5ea.test c2f33a33d307b2d38e29f66323a67b7af0b082ea F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b -F ext/fts5/test/fts5fault2.test 26c3d70648f691e2cc9391e14bbc11a973656383 +F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 6e5b1609a0e7d51e7598fa675db581b3b1bfbf7e +F ext/fts5/test/fts5fault4.test 7301602e916261f0b277ed93f447fd871d5d7064 F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a -F ext/fts5/test/fts5merge.test b985b6891e093a4b4c3c9683fe3cba7498fed690 -F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947 +F ext/fts5/test/fts5merge.test 8077454f2975a63f35761f4b8a718b3a808b7c9c +F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e @@ -167,7 +168,7 @@ F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 -F ext/fts5/test/fts5vocab.test 2d1bddfb6e1effd9e1d2f5d1d25bf05e9ab33e64 +F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa F ext/fts5/tool/loadfts5.tcl add4d349ae5463c5f60b26e821c24e60ed8054d3 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1326,7 +1327,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7aea8c6d99737c6c72078e0b4b9c5f8186021aa0 -R 5014cf90f97a250385093a02a58cfb68 +P 927d9a64e13c6b768f0a15475713192fcfaaf9e7 +R 820e0577a96c7901c9b911f2b08f8e07 U dan -Z e8178e8c15c67776f9bff619db326cbd +Z b644c576eadef53c3df6a681b5fdaf76 diff --git a/manifest.uuid b/manifest.uuid index 5f3b901b63..c5cb986583 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -927d9a64e13c6b768f0a15475713192fcfaaf9e7 \ No newline at end of file +47dbfadb994814c9349d4c9c113b862c2e97c01a \ No newline at end of file From 5211e54f9558016ba101c382ef35bc34134acf1f Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 18 May 2015 18:03:06 +0000 Subject: [PATCH 131/206] Add a test for an untested branch in fts5_expr.c. FossilOrigin-Name: ce08206b5c5bf6a81decf20c99a524dd92c3715b --- ext/fts5/test/fts5ea.test | 1 + manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index 3929ed9f59..4b5b14a441 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -75,6 +75,7 @@ foreach {tn expr err} { 12 {NEAR(a b, xyz)} {expected integer, got "xyz"} 13 {NEAR(a b, // )} {fts5: syntax error near "/"} + 14 {NEAR(a b, "xyz" )} {expected integer, got ""xyz""} } { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } diff --git a/manifest b/manifest index 4488ddf4f5..733ba5ec49 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\stest\scoverage\sof\sfts5_config.c. -D 2015-05-18T17:50:17.248 +C Add\sa\stest\sfor\san\suntested\sbranch\sin\sfts5_expr.c. +D 2015-05-18T18:03:06.791 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -143,7 +143,7 @@ F ext/fts5/test/fts5corrupt2.test 7000030df189f1f3ca58b555b459bcbf9b8f8f77 F ext/fts5/test/fts5corrupt3.test fe42c0ce0b58b7ad487a469049f91419d22c7459 F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 -F ext/fts5/test/fts5ea.test c2f33a33d307b2d38e29f66323a67b7af0b082ea +F ext/fts5/test/fts5ea.test 6159e66c4fe9466c37cd6a0ed4197354588b4bcb F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 @@ -1327,7 +1327,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 927d9a64e13c6b768f0a15475713192fcfaaf9e7 -R 820e0577a96c7901c9b911f2b08f8e07 +P 47dbfadb994814c9349d4c9c113b862c2e97c01a +R 8b343fcd5b0f5f6f5d1de871460e7a2e U dan -Z b644c576eadef53c3df6a681b5fdaf76 +Z fa2c7b5394616fc24195f932cca28cd9 diff --git a/manifest.uuid b/manifest.uuid index c5cb986583..f33df5097a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -47dbfadb994814c9349d4c9c113b862c2e97c01a \ No newline at end of file +ce08206b5c5bf6a81decf20c99a524dd92c3715b \ No newline at end of file From 23390b1b2b606199e9747114007dbe025d663a88 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 18 May 2015 18:24:41 +0000 Subject: [PATCH 132/206] Fix a bug in test script fts5fault4.test. FossilOrigin-Name: a508059305641c2cb53197b0f6e93d7b723697e2 --- ext/fts5/test/fts5fault4.test | 12 +++--------- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index f160a84ba3..ad1315bec0 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -22,8 +22,6 @@ ifcapable !fts5 { return } -if 1 { - #------------------------------------------------------------------------- # An OOM while dropping an fts5 table. # @@ -302,8 +300,6 @@ do_faultsim_test 9.1 -faults oom-* -body { faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM} } -} - #------------------------------------------------------------------------- # OOM in fts5_expr() SQL function. # @@ -333,12 +329,10 @@ reset_db do_execsql_test 11.0 { CREATE VIRTUAL TABLE ft USING fts5(x); } -do_faultsim_test 11.1 -faults oom-* -body { - db eval { - INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)'); - } +do_faultsim_test 11.1 -faults oom-t* -body { + db eval { INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)') } } -test { - faultsim_test_result {0 {}} + faultsim_test_result {0 {}} {1 {disk I/O error}} } finish_test diff --git a/manifest b/manifest index 733ba5ec49..7a404472a1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\stest\sfor\san\suntested\sbranch\sin\sfts5_expr.c. -D 2015-05-18T18:03:06.791 +C Fix\sa\sbug\sin\stest\sscript\sfts5fault4.test. +D 2015-05-18T18:24:41.367 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -148,7 +148,7 @@ F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 7301602e916261f0b277ed93f447fd871d5d7064 +F ext/fts5/test/fts5fault4.test 7c7c46559368f06b98daa940d376c4740bcdad25 F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 @@ -1327,7 +1327,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 47dbfadb994814c9349d4c9c113b862c2e97c01a -R 8b343fcd5b0f5f6f5d1de871460e7a2e +P ce08206b5c5bf6a81decf20c99a524dd92c3715b +R 2291f8df21e231273c06942c0f18b8cc U dan -Z fa2c7b5394616fc24195f932cca28cd9 +Z 4cefad1c255457ef6a42a53ca33a35a5 diff --git a/manifest.uuid b/manifest.uuid index f33df5097a..2399b85853 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ce08206b5c5bf6a81decf20c99a524dd92c3715b \ No newline at end of file +a508059305641c2cb53197b0f6e93d7b723697e2 \ No newline at end of file From 09963b27b8a0344556fae6151778c84454bd0478 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 18 May 2015 20:34:59 +0000 Subject: [PATCH 133/206] Improve test coverage of fts5_storage.c. FossilOrigin-Name: 4dfe2e5871704125338620705e6638f36baaacd7 --- ext/fts5/fts5_storage.c | 4 +- ext/fts5/test/fts5aa.test | 27 ++++++++ ext/fts5/test/fts5aux.test | 22 ++++++- ext/fts5/test/fts5content.test | 15 ++++- ext/fts5/test/fts5corrupt2.test | 20 ++++++ ext/fts5/test/fts5corrupt3.test | 17 +++++ ext/fts5/test/fts5fault6.test | 104 +++++++++++++++++++++++++++++++ ext/fts5/test/fts5integrity.test | 44 +++++++++++++ manifest | 25 ++++---- manifest.uuid | 2 +- 10 files changed, 262 insertions(+), 18 deletions(-) create mode 100644 ext/fts5/test/fts5fault6.test diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 0ea99c25a2..5e2e1413c9 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -663,9 +663,7 @@ int sqlite3Fts5StorageInsert( }else{ if( eConflict==SQLITE_REPLACE ){ eStmt = FTS5_STMT_REPLACE_CONTENT; - if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ - rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); - } + rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); }else{ eStmt = FTS5_STMT_INSERT_CONTENT; } diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 028eece68a..800215d2bb 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -143,6 +143,14 @@ do_execsql_test 6.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +do_execsql_test 6.3 { + REPLACE INTO t1(rowid, x, y) VALUES('22', 'l l l', 'l l l'); +} + +do_execsql_test 6.4 { + INSERT INTO t1(t1) VALUES('integrity-check') +} + #------------------------------------------------------------------------- # reset_db @@ -400,6 +408,25 @@ do_catchsql_test 16.2 { SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d' } {1 {SQL logic error or missing database}} +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 17.1 { + CREATE VIRTUAL TABLE b2 USING fts5(x); + INSERT INTO b2 VALUES('a'); + INSERT INTO b2 VALUES('b'); + INSERT INTO b2 VALUES('c'); +} + +do_test 17.2 { + set res [list] + db eval { SELECT * FROM b2 ORDER BY rowid ASC } { + lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }] + } + set res +} {{a b c} {a b c} {a b c}} + + finish_test diff --git a/ext/fts5/test/fts5aux.test b/ext/fts5/test/fts5aux.test index 894c8d6f1b..9237241474 100644 --- a/ext/fts5/test/fts5aux.test +++ b/ext/fts5/test/fts5aux.test @@ -25,9 +25,15 @@ proc colsize {cmd i} { } sqlite3_fts5_create_function db colsize colsize +proc totalsize {cmd i} { + $cmd xColumnTotalSize $i +} +sqlite3_fts5_create_function db totalsize totalsize + do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1 VALUES('one two', 'two one zero'); + INSERT INTO f1 VALUES('one one', 'one one one'); } do_catchsql_test 1.1 { @@ -46,11 +52,25 @@ do_catchsql_test 1.4 { do_catchsql_test 2.1 { SELECT colsize(f1, 2) FROM f1 WHERE f1 MATCH 'two'; } {1 SQLITE_RANGE} - do_execsql_test 2.2 { SELECT colsize(f1, 0), colsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero'; } {2 3} +do_execsql_test 2.3 { + SELECT colsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero'; +} {5} +do_execsql_test 2.4.1 { + SELECT totalsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero'; +} {10} +do_execsql_test 2.4.2 { + SELECT totalsize(f1, 0) FROM f1 WHERE f1 MATCH 'zero'; +} {4} +do_execsql_test 2.4.3 { + SELECT totalsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero'; +} {6} +do_catchsql_test 2.4.4 { + SELECT totalsize(f1, 2) FROM f1 WHERE f1 MATCH 'zero'; +} {1 SQLITE_RANGE} #------------------------------------------------------------------------- # Test the xSet and xGetAuxdata APIs with a NULL destructor. diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 105e11224b..1b5b6b3072 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -234,6 +234,19 @@ do_execsql_test 5.1 { } $::checksum +#------------------------------------------------------------------------- +# Check that a contentless table can be dropped. +# +reset_db +do_execsql_test 6.1 { + CREATE VIRTUAL TABLE xx USING fts5(x, y, content=""); + SELECT name FROM sqlite_master; +} {xx xx_data xx_docsize xx_config} +do_execsql_test 6.2 { + DROP TABLE xx; + SELECT name FROM sqlite_master; +} {} + + finish_test - diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 099b966945..28486b34ac 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -240,6 +240,26 @@ foreach {tn hdr} { } } +#-------------------------------------------------------------------- +reset_db +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE x5 USING fts5(tt); + INSERT INTO x5 VALUES('a'); + INSERT INTO x5 VALUES('a a'); + INSERT INTO x5 VALUES('a a a'); + INSERT INTO x5 VALUES('a a a a'); + + UPDATE x5_docsize SET sz = X'' WHERE id=3; +} +proc colsize {cmd i} { + $cmd xColumnSize $i +} +sqlite3_fts5_create_function db colsize colsize + +do_catchsql_test 5.2 { + SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a' +} {1 SQLITE_CORRUPT_VTAB} + sqlite3_fts5_may_be_corrupt 0 finish_test diff --git a/ext/fts5/test/fts5corrupt3.test b/ext/fts5/test/fts5corrupt3.test index 1df8b3e8de..831eca5454 100644 --- a/ext/fts5/test/fts5corrupt3.test +++ b/ext/fts5/test/fts5corrupt3.test @@ -51,6 +51,23 @@ for {set i 0} {$i < $L} {incr i} { catchsql ROLLBACK } +#------------------------------------------------------------------------- +# Test that trailing bytes appended to the averages record are ignored. +# +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t2 USING fts5(x); + INSERT INTO t2 VALUES(rnddoc(10)); + INSERT INTO t2 VALUES(rnddoc(10)); + SELECT length(block) FROM t2_data WHERE id=1; +} {2} +do_execsql_test 2.2 { + UPDATE t2_data SET block = block || 'abcd' WHERE id=1; + SELECT length(block) FROM t2_data WHERE id=1; +} {6} +do_execsql_test 2.2 { + INSERT INTO t2 VALUES(rnddoc(10)); + SELECT length(block) FROM t2_data WHERE id=1; +} {2} sqlite3_fts5_may_be_corrupt 0 finish_test diff --git a/ext/fts5/test/fts5fault6.test b/ext/fts5/test/fts5fault6.test new file mode 100644 index 0000000000..73e488ba63 --- /dev/null +++ b/ext/fts5/test/fts5fault6.test @@ -0,0 +1,104 @@ +# 2014 June 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault6 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# OOM while rebuilding an FTS5 table. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE tt USING fts5(a, b); + INSERT INTO tt VALUES('c d c g g f', 'a a a d g a'); + INSERT INTO tt VALUES('c d g b f d', 'b g e c g c'); + INSERT INTO tt VALUES('c c f d e d', 'c e g d b c'); + INSERT INTO tt VALUES('e a f c e f', 'g b a c d g'); + INSERT INTO tt VALUES('c g f b b d', 'g c d c f g'); + INSERT INTO tt VALUES('d a g a b b', 'g c g g c e'); + INSERT INTO tt VALUES('e f a b c e', 'f d c d c c'); + INSERT INTO tt VALUES('e c a g c d', 'b b g f f b'); + INSERT INTO tt VALUES('g b d d e b', 'f f b d a c'); + INSERT INTO tt VALUES('e a d a e d', 'c e a e f g'); +} +faultsim_save_and_close + +do_faultsim_test 1.1 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { INSERT INTO tt(tt) VALUES('rebuild') } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 1.2 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { REPLACE INTO tt(rowid, a, b) VALUES(6, 'x y z', 'l l l'); } +} -test { + faultsim_test_result {0 {}} +} + + +#------------------------------------------------------------------------- +# OOM within a special delete. +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE tt USING fts5(a, content=""); + INSERT INTO tt VALUES('c d c g g f'); + INSERT INTO tt VALUES('c d g b f d'); + INSERT INTO tt VALUES('c c f d e d'); + INSERT INTO tt VALUES('e a f c e f'); + INSERT INTO tt VALUES('c g f b b d'); + INSERT INTO tt VALUES('d a g a b b'); + INSERT INTO tt VALUES('e f a b c e'); + INSERT INTO tt VALUES('e c a g c d'); + INSERT INTO tt VALUES('g b d d e b'); + INSERT INTO tt VALUES('e a d a e d'); +} +faultsim_save_and_close + +do_faultsim_test 2.1 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { INSERT INTO tt(tt, rowid, a) VALUES('delete', 3, 'c d g b f d'); } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 2.2 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { INSERT INTO tt(tt) VALUES('delete-all') } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 2.3 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { INSERT INTO tt VALUES('x y z') } +} -test { + faultsim_test_result {0 {}} +} + +finish_test + diff --git a/ext/fts5/test/fts5integrity.test b/ext/fts5/test/fts5integrity.test index 9e244c26fa..8f56e9273e 100644 --- a/ext/fts5/test/fts5integrity.test +++ b/ext/fts5/test/fts5integrity.test @@ -48,6 +48,50 @@ do_execsql_test 3.0 { do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); } +#-------------------------------------------------------------------- +# Mess around with a docsize record. And the averages record. Then +# check that integrity-check picks it up. +# +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE aa USING fts5(zz); + INSERT INTO aa(zz) VALUES('a b c d e'); + INSERT INTO aa(zz) VALUES('a b c d'); + INSERT INTO aa(zz) VALUES('a b c'); + INSERT INTO aa(zz) VALUES('a b'); + INSERT INTO aa(zz) VALUES('a'); + SELECT length(sz) FROM aa_docsize; +} {1 1 1 1 1} +do_execsql_test 4.1 { + INSERT INTO aa(aa) VALUES('integrity-check'); +} + +do_catchsql_test 4.2 { + BEGIN; + UPDATE aa_docsize SET sz = X'44' WHERE rowid = 3; + INSERT INTO aa(aa) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + +do_catchsql_test 4.3 { + ROLLBACK; + BEGIN; + UPDATE aa_data SET block = X'44' WHERE rowid = 1; + INSERT INTO aa(aa) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + +do_catchsql_test 4.4 { + ROLLBACK; + BEGIN; + INSERT INTO aa_docsize VALUES(23, X'04'); + INSERT INTO aa(aa) VALUES('integrity-check'); +} {1 {database disk image is malformed}} + +do_catchsql_test 4.5 { + ROLLBACK; + BEGIN; + INSERT INTO aa_docsize VALUES(23, X'00'); + INSERT INTO aa_content VALUES(23, ''); + INSERT INTO aa(aa) VALUES('integrity-check'); +} {1 {database disk image is malformed}} #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r} #exit diff --git a/manifest b/manifest index 7a404472a1..e116913237 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sbug\sin\stest\sscript\sfts5fault4.test. -D 2015-05-18T18:24:41.367 +C Improve\stest\scoverage\sof\sfts5_storage.c. +D 2015-05-18T20:34:59.915 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -113,7 +113,7 @@ F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 F ext/fts5/fts5_expr.c 0c4b50bb48740c76b8e8b89d5d40a55f8dbffd07 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 -F ext/fts5/fts5_storage.c cb8b585bfb7870a36101f1a8fa0b0777f4d1b68d +F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d @@ -121,7 +121,7 @@ F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 -F ext/fts5/test/fts5aa.test 29409f14dec2ee67bd82ebf15111d6167b1003df +F ext/fts5/test/fts5aa.test 26f1a462213f3aa067c208bd508d6218c54a620f F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d @@ -133,14 +133,14 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5aux.test d9cd26ee45ad5f628b4899f1ac5b757ce7a77bdf +F ext/fts5/test/fts5aux.test db9035ef292f3ae57ac392f974b1e6b1dd48c6c7 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5config.test c9cc535f3b36cde1e5a32bf579f3f5962a9e82b2 -F ext/fts5/test/fts5content.test 532e15b541254410adc7bfb51f94631cfe82de8f +F ext/fts5/test/fts5content.test e46904decd896e38c848ad4f38fa4e80251a028b F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 -F ext/fts5/test/fts5corrupt2.test 7000030df189f1f3ca58b555b459bcbf9b8f8f77 -F ext/fts5/test/fts5corrupt3.test fe42c0ce0b58b7ad487a469049f91419d22c7459 +F ext/fts5/test/fts5corrupt2.test c231f532162de381fa83ec477b51cd8633fd9da7 +F ext/fts5/test/fts5corrupt3.test da4e2adb2308d8587c2eff31b5aa47447b8a2edb F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 F ext/fts5/test/fts5ea.test 6159e66c4fe9466c37cd6a0ed4197354588b4bcb @@ -150,9 +150,10 @@ F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 7c7c46559368f06b98daa940d376c4740bcdad25 F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 +F ext/fts5/test/fts5fault6.test 7cdfdceef362cb9d72f66388846d62bff44c6d01 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 -F ext/fts5/test/fts5integrity.test 98801bd0fb7c53a40bc770280134865d61724f3a +F ext/fts5/test/fts5integrity.test b45f633381a85dc000e41d68c96ab510985ca35e F ext/fts5/test/fts5merge.test 8077454f2975a63f35761f4b8a718b3a808b7c9c F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 @@ -1327,7 +1328,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ce08206b5c5bf6a81decf20c99a524dd92c3715b -R 2291f8df21e231273c06942c0f18b8cc +P a508059305641c2cb53197b0f6e93d7b723697e2 +R b66f4775f516a5c6e7555c8518e4e7c2 U dan -Z 4cefad1c255457ef6a42a53ca33a35a5 +Z 5fb8f7409531231cca8a8852556e2f56 diff --git a/manifest.uuid b/manifest.uuid index 2399b85853..c57946abf1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a508059305641c2cb53197b0f6e93d7b723697e2 \ No newline at end of file +4dfe2e5871704125338620705e6638f36baaacd7 \ No newline at end of file From b10210ea1bc3565e62b0f316f01f6ed6e3c5b4ce Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 19 May 2015 11:32:01 +0000 Subject: [PATCH 134/206] Fix a memory leak that could follow an OOM condition in fts5. FossilOrigin-Name: de9f8ef6ebf036df5a558cd78fb4927da2d83ce8 --- ext/fts5/fts5_tcl.c | 3 +++ ext/fts5/fts5_tokenize.c | 20 ++++++++++-------- ext/fts5/test/fts5_common.tcl | 1 + ext/fts5/test/fts5tokenizer.test | 35 +++++++++++++++++++++++++++++++- manifest | 18 ++++++++-------- manifest.uuid | 2 +- 6 files changed, 60 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 46e2f121b5..387e8a9920 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -154,6 +154,9 @@ static int xTokenizeCb( rc = Tcl_EvalObjEx(p->interp, pEval, 0); Tcl_DecrRefCount(pEval); + if( rc==TCL_OK ){ + rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp)); + } return rc; } diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 428a637e38..25316dd3c3 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -55,7 +55,14 @@ static void fts5AsciiAddExceptions( } /* -** Create a "ascii" tokenizer. +** Delete a "ascii" tokenizer. +*/ +static void fts5AsciiDelete(Fts5Tokenizer *p){ + sqlite3_free(p); +} + +/* +** Create an "ascii" tokenizer. */ static int fts5AsciiCreate( void *pCtx, @@ -85,6 +92,10 @@ static int fts5AsciiCreate( rc = SQLITE_ERROR; } } + if( rc!=SQLITE_OK ){ + fts5AsciiDelete((Fts5Tokenizer*)p); + p = 0; + } } } @@ -92,13 +103,6 @@ static int fts5AsciiCreate( return rc; } -/* -** Delete a "ascii" tokenizer. -*/ -static void fts5AsciiDelete(Fts5Tokenizer *p){ - sqlite3_free(p); -} - static void asciiFold(char *aOut, const char *aIn, int nByte){ int i; diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 9c612d202d..70f5063b79 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -51,6 +51,7 @@ proc fts5_test_columntotalsize {cmd} { proc test_append_token {varname token iStart iEnd} { upvar $varname var lappend var $token + return "SQLITE_OK" } proc fts5_test_tokenize {cmd} { set res [list] diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index 42f5956492..a365854295 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -9,7 +9,7 @@ # #*********************************************************************** # -# Tests focusing on the fts5 tokenizers +# Tests focusing on the built-in fts5 tokenizers. # source [file join [file dirname [info script]] fts5_common.tcl] @@ -101,5 +101,38 @@ foreach {tn tokenizer} {1 ascii 2 unicode61} { } } +#------------------------------------------------------------------------- +# Miscellaneous tests for the ascii tokenizer. +# +# 5.1.*: Test that the ascii tokenizer ignores non-ASCII characters in the +# 'separators' option. But unicode61 does not. +# +# 5.2.*: An option without an argument is an error. +# + +do_test 5.1.1 { + execsql " + CREATE VIRTUAL TABLE a1 USING fts5(x, tokenize=`ascii separators '\u1234'`); + INSERT INTO a1 VALUES('abc\u1234def'); + " + execsql { SELECT rowid FROM a1 WHERE a1 MATCH 'def' } +} {} + +do_test 5.1.2 { + execsql " + CREATE VIRTUAL TABLE a2 USING fts5( + x, tokenize=`unicode61 separators '\u1234'`); + INSERT INTO a2 VALUES('abc\u1234def'); + " + execsql { SELECT rowid FROM a2 WHERE a2 MATCH 'def' } +} {1} + +do_catchsql_test 5.2 { + CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars'); +} {1 {error in tokenizer constructor}} +do_catchsql_test 5.3 { + CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg'); +} {1 {error in tokenizer constructor}} + finish_test diff --git a/manifest b/manifest index e116913237..3125dc22c9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\stest\scoverage\sof\sfts5_storage.c. -D 2015-05-18T20:34:59.915 +C Fix\sa\smemory\sleak\sthat\scould\sfollow\san\sOOM\scondition\sin\sfts5. +D 2015-05-19T11:32:01.082 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -114,13 +114,13 @@ F ext/fts5/fts5_expr.c 0c4b50bb48740c76b8e8b89d5d40a55f8dbffd07 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 -F ext/fts5/fts5_tcl.c f18eeb125d733f4e815a11679b971fa61cd7ec77 -F ext/fts5/fts5_tokenize.c 830eae0d35a5a5a90af34df65da3427f46d942fc +F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c +F ext/fts5/fts5_tokenize.c 4d9d50478169a8446686ab255cc723a6b4f4c20b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 +F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 F ext/fts5/test/fts5aa.test 26f1a462213f3aa067c208bd508d6218c54a620f F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd @@ -164,7 +164,7 @@ F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 -F ext/fts5/test/fts5tokenizer.test bbcde2a7473dcaa9a1fc6809aa8965acb7b846ff +F ext/fts5/test/fts5tokenizer.test 45cc65223a686b303b409b19154ac310ba70678d F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 @@ -1328,7 +1328,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a508059305641c2cb53197b0f6e93d7b723697e2 -R b66f4775f516a5c6e7555c8518e4e7c2 +P 4dfe2e5871704125338620705e6638f36baaacd7 +R ae51969adfb13f0909e21c548fe61d33 U dan -Z 5fb8f7409531231cca8a8852556e2f56 +Z 9066bd5c4ea85d49ccee057135986e82 diff --git a/manifest.uuid b/manifest.uuid index c57946abf1..693354493e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4dfe2e5871704125338620705e6638f36baaacd7 \ No newline at end of file +de9f8ef6ebf036df5a558cd78fb4927da2d83ce8 \ No newline at end of file From 08e7199feb7b7951985ecf99f5978c54b017aa10 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 19 May 2015 11:38:32 +0000 Subject: [PATCH 135/206] Update Makefile.in so that the amalgamation files built by "make sqlite3.c" include fts5. FossilOrigin-Name: 2870a80593302e7835c5f5d167f42710d8439e7d --- Makefile.in | 28 ++++++++++++++++++++++++++++ manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/Makefile.in b/Makefile.in index a6984b2fd8..45cd692552 100644 --- a/Makefile.in +++ b/Makefile.in @@ -340,6 +340,22 @@ SRC += \ $(TOP)/ext/rtree/rtree.h \ $(TOP)/ext/rtree/rtree.c +SRC += \ + $(TOP)/ext/fts5/fts5.h \ + $(TOP)/ext/fts5/fts5Int.h \ + $(TOP)/ext/fts5/fts5_aux.c \ + $(TOP)/ext/fts5/fts5_buffer.c \ + $(TOP)/ext/fts5/fts5.c \ + $(TOP)/ext/fts5/fts5_config.c \ + $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_hash.c \ + $(TOP)/ext/fts5/fts5_index.c \ + fts5parse.c fts5parse.h \ + $(TOP)/ext/fts5/fts5_storage.c \ + $(TOP)/ext/fts5/fts5_tokenize.c \ + $(TOP)/ext/fts5/fts5_unicode2.c \ + $(TOP)/ext/fts5/fts5_vocab.c + # Generated source code files # @@ -942,6 +958,18 @@ fts3_write.lo: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) rtree.lo: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c +fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon + cp $(TOP)/ext/fts5/fts5parse.y . + rm -f fts5parse.h + ./lemon $(OPTS) fts5parse.y + mv fts5parse.c fts5parse.c.orig + echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c + cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \ + | sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c + echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c + +fts5parse.h: fts5parse.c + # Rules to build the 'testfixture' application. # diff --git a/manifest b/manifest index 3125dc22c9..50a6c9ec79 100644 --- a/manifest +++ b/manifest @@ -1,7 +1,7 @@ -C Fix\sa\smemory\sleak\sthat\scould\sfollow\san\sOOM\scondition\sin\sfts5. -D 2015-05-19T11:32:01.082 +C Update\sMakefile.in\sso\sthat\sthe\samalgamation\sfiles\sbuilt\sby\s"make\ssqlite3.c"\sinclude\sfts5. +D 2015-05-19T11:38:32.860 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f -F Makefile.in edfc69769e613a6359c42c06ea1d42c3bece1736 +F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 F Makefile.msc a9fd7fd02265aa5b3b2522f5e39d975972ff906d F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858 @@ -1328,7 +1328,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 4dfe2e5871704125338620705e6638f36baaacd7 -R ae51969adfb13f0909e21c548fe61d33 +P de9f8ef6ebf036df5a558cd78fb4927da2d83ce8 +R 48462bff9aab6209ce6b76328087ee6c U dan -Z 9066bd5c4ea85d49ccee057135986e82 +Z 49662f04b4638e7ccdb8bbbc0c7d2948 diff --git a/manifest.uuid b/manifest.uuid index 693354493e..12095dd529 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -de9f8ef6ebf036df5a558cd78fb4927da2d83ce8 \ No newline at end of file +2870a80593302e7835c5f5d167f42710d8439e7d \ No newline at end of file From 116eaee4a0a9ed7db81e7acc9a7d30c7b9f7f3ef Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 19 May 2015 19:37:09 +0000 Subject: [PATCH 136/206] Add tests for fts5 tokenizers. FossilOrigin-Name: 4f90ba20e2be6ec5755fe894938ac97342d6fbf6 --- ext/fts5/test/fts5fault6.test | 48 ++++++++++++++++++++ ext/fts5/test/fts5tokenizer.test | 76 ++++++++++++++++++++++++++++++++ manifest | 14 +++--- manifest.uuid | 2 +- 4 files changed, 132 insertions(+), 8 deletions(-) diff --git a/ext/fts5/test/fts5fault6.test b/ext/fts5/test/fts5fault6.test index 73e488ba63..b9657be1cc 100644 --- a/ext/fts5/test/fts5fault6.test +++ b/ext/fts5/test/fts5fault6.test @@ -100,5 +100,53 @@ do_faultsim_test 2.3 -faults oom-t* -prep { faultsim_test_result {0 {}} } +#------------------------------------------------------------------------- +# OOM in the ASCII tokenizer with very large tokens. +# +# Also the unicode tokenizer. +# +set t1 [string repeat wxyz 20] +set t2 [string repeat wxyz 200] +set t3 [string repeat wxyz 2000] +set doc "$t1 $t2 $t3" +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content=""); + CREATE VIRTUAL TABLE xyz2 USING fts5(c, content=""); +} +faultsim_save_and_close + +do_faultsim_test 3.1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM xyz } +} -body { + db eval { INSERT INTO xyz VALUES($::doc) } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 3.2 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM xyz2 } +} -body { + db eval { INSERT INTO xyz2 VALUES($::doc) } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +# OOM while initializing a unicode61 tokenizer. +# +reset_db +faultsim_save_and_close +do_faultsim_test 4.1 -faults oom-t* -prep { + faultsim_restore_and_reopen +} -body { + db eval { + CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc"); + } +} -test { + faultsim_test_result {0 {}} +} + finish_test diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index a365854295..83ad169188 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -134,5 +134,81 @@ do_catchsql_test 5.3 { CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg'); } {1 {error in tokenizer constructor}} +#------------------------------------------------------------------------- +# Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE +# correctly. +# + +proc test_token_cb {varname token iStart iEnd} { + upvar $varname var + lappend var $token + if {[llength $var]==3} { return "SQLITE_DONE" } + return "SQLITE_OK" +} + +proc tokenize {cmd} { + set res [list] + $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res] + set res +} +sqlite3_fts5_create_function db tokenize tokenize + +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii); + INSERT INTO x1 VALUES('q w e r t y'); + INSERT INTO x1 VALUES('y t r e w q'); + SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r'; +} { + {q w e} {y t r} +} + +do_execsql_test 6.1 { + CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61); + INSERT INTO x2 VALUES('q w e r t y'); + INSERT INTO x2 VALUES('y t r e w q'); + SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r'; +} { + {q w e} {y t r} +} + + +#------------------------------------------------------------------------- +# Miscellaneous tests for the unicode tokenizer. +# +do_catchsql_test 6.1 { + CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars'); +} {1 {error in tokenizer constructor}} +do_catchsql_test 6.2 { + CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b'); +} {1 {error in tokenizer constructor}} +do_catchsql_test 6.3 { + CREATE VIRTUAL TABLE a3 USING fts5( + x, y, tokenize = 'unicode61 remove_diacritics 2' + ); +} {1 {error in tokenizer constructor}} +do_catchsql_test 6.4 { + CREATE VIRTUAL TABLE a3 USING fts5( + x, y, tokenize = 'unicode61 remove_diacritics 10' + ); +} {1 {error in tokenizer constructor}} + +#------------------------------------------------------------------------- +# Porter tokenizer with very large tokens. +# +set a [string repeat a 100] +set b [string repeat b 500] +set c [string repeat c 1000] +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter); + INSERT INTO e5 VALUES($a || ' ' || $b); + INSERT INTO e5 VALUES($b || ' ' || $c); + INSERT INTO e5 VALUES($c || ' ' || $a); +} + +do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } +do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } +do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } + + finish_test diff --git a/manifest b/manifest index 50a6c9ec79..90de227169 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sMakefile.in\sso\sthat\sthe\samalgamation\sfiles\sbuilt\sby\s"make\ssqlite3.c"\sinclude\sfts5. -D 2015-05-19T11:38:32.860 +C Add\stests\sfor\sfts5\stokenizers. +D 2015-05-19T19:37:09.304 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -150,7 +150,7 @@ F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 7c7c46559368f06b98daa940d376c4740bcdad25 F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 -F ext/fts5/test/fts5fault6.test 7cdfdceef362cb9d72f66388846d62bff44c6d01 +F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 F ext/fts5/test/fts5integrity.test b45f633381a85dc000e41d68c96ab510985ca35e @@ -164,7 +164,7 @@ F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 -F ext/fts5/test/fts5tokenizer.test 45cc65223a686b303b409b19154ac310ba70678d +F ext/fts5/test/fts5tokenizer.test f54bbbff67ff03ce49c153c0f6a5e3f8369f986a F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 @@ -1328,7 +1328,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P de9f8ef6ebf036df5a558cd78fb4927da2d83ce8 -R 48462bff9aab6209ce6b76328087ee6c +P 2870a80593302e7835c5f5d167f42710d8439e7d +R 63f128b09262f76dbe78be4c38aa78c8 U dan -Z 49662f04b4638e7ccdb8bbbc0c7d2948 +Z e801c590b1575eb988d36c609d9907aa diff --git a/manifest.uuid b/manifest.uuid index 12095dd529..1348da3add 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2870a80593302e7835c5f5d167f42710d8439e7d \ No newline at end of file +4f90ba20e2be6ec5755fe894938ac97342d6fbf6 \ No newline at end of file From 8c1f46de502ed8a91185ee1f30f0103b8e5afaab Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 20 May 2015 09:27:51 +0000 Subject: [PATCH 137/206] Improve test coverage of fts5_tokenize.c. FossilOrigin-Name: 0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 --- ext/fts5/fts5_tokenize.c | 7 ++-- ext/fts5/test/fts5porter2.test | 64 ++++++++++++++++++++++++++++++++ ext/fts5/test/fts5tokenizer.test | 31 ++++++++++++++++ ext/fts5/test/fts5unicode2.test | 19 ++++++++++ manifest | 17 +++++---- manifest.uuid | 2 +- 6 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 ext/fts5/test/fts5porter2.test diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 25316dd3c3..b340d45d36 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -666,8 +666,8 @@ static int fts5Porter_Ostar(char *zStem, int nStem){ /* porter rule condition: (m > 1 and (*S or *T)) */ static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ - return nStem>0 - && (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') + assert( nStem>0 ); + return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') && fts5Porter_MGt1(zStem, nStem); } @@ -1167,7 +1167,8 @@ static int fts5PorterCb( fts5PorterStep4(aBuf, &nBuf); /* Step 5a. */ - if( nBuf>0 && aBuf[nBuf-1]=='e' ){ + assert( nBuf>0 ); + if( aBuf[nBuf-1]=='e' ){ if( fts5Porter_MGt1(aBuf, nBuf-1) || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) ){ diff --git a/ext/fts5/test/fts5porter2.test b/ext/fts5/test/fts5porter2.test new file mode 100644 index 0000000000..7ea2e6994d --- /dev/null +++ b/ext/fts5/test/fts5porter2.test @@ -0,0 +1,64 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 porter stemmer implementation. +# +# These are extra tests added to those in fts5porter.test in order to +# improve test coverage of the porter stemmer implementation. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5porter2 + +set test_vocab { + tion tion + ation ation + vation vation + avation avat + vion vion + ion ion + relational relat + relation relat + relate relat + zzz zzz + ii ii + iiing ii + xtional xtional + xenci xenci + xlogi xlogi + realization realiz + realize realiz + xization xizat + capitalism capit + talism talism + xiveness xive + xfulness xful + xousness xous + xical xical + xicate xicat + xicity xiciti + ies ie + eed e + eing e + s s +} + +set i 0 +foreach {in out} $test_vocab { + do_test "1.$i.($in -> $out)" { + lindex [sqlite3_fts5_tokenize db porter $in] 0 + } $out + incr i +} + + +finish_test + diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index 83ad169188..1a3d253be8 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -209,6 +209,37 @@ do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } +#------------------------------------------------------------------------- +# Test the 'separators' option with the unicode61 tokenizer. +# +do_execsql_test 8.1 { + BEGIN; + CREATE VIRTUAL TABLE e6 USING fts5(x, + tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ" + ); + INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog'); + CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); + SELECT term FROM e7; + ROLLBACK; +} { + brown dog fox jumped lazy over quick the +} + +do_execsql_test 8.2 [subst { + BEGIN; + CREATE VIRTUAL TABLE e6 USING fts5(x, + tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'" + ); + INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01' + || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog' + ); + INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09'); + CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); + SELECT term FROM e7; + ROLLBACK; +}] [subst { + brown dog fox jumped lazy over quick the \u0E08 \u0E09 +}] finish_test diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test index 056106e18e..280d045db1 100644 --- a/ext/fts5/test/fts5unicode2.test +++ b/ext/fts5/test/fts5unicode2.test @@ -70,6 +70,12 @@ do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" +do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \ + "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3" + +do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \ + "abc abc def def" + #------------------------------------------------------------------------- # set docs [list { @@ -225,6 +231,10 @@ do_test 4.1 { INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } + + execsql "CREATE VIRTUAL TABLE t8 USING fts5( + a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\" + )" } {} do_test 4.2 { @@ -253,6 +263,15 @@ do_test 4.3 { } } {} +do_test 4.4 { + sqlite3_exec_hex db { + CREATE VIRTUAL TABLE t9 USING fts5(a, b, + tokenize="unicode61 separators '%C09004'" + ); + INSERT INTO t9(a) VALUES('abc%88def %89ghi%90'); + } +} {0 {}} + #------------------------------------------------------------------------- diff --git a/manifest b/manifest index 90de227169..d897c6433b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sfor\sfts5\stokenizers. -D 2015-05-19T19:37:09.304 +C Improve\stest\scoverage\sof\sfts5_tokenize.c. +D 2015-05-20T09:27:51.629 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -115,7 +115,7 @@ F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c -F ext/fts5/fts5_tokenize.c 4d9d50478169a8446686ab255cc723a6b4f4c20b +F ext/fts5/fts5_tokenize.c 6f4d2cbe7ed892821d1a233c7db613dafdb3877a F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 @@ -159,14 +159,15 @@ F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e +F ext/fts5/test/fts5porter2.test c534385e88e685b354c2b2020acc0c4920042c8e F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 -F ext/fts5/test/fts5tokenizer.test f54bbbff67ff03ce49c153c0f6a5e3f8369f986a +F ext/fts5/test/fts5tokenizer.test 668747fcb41de6fc7daebc478920b705164fccc1 F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d -F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee +F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa @@ -1328,7 +1329,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2870a80593302e7835c5f5d167f42710d8439e7d -R 63f128b09262f76dbe78be4c38aa78c8 +P 4f90ba20e2be6ec5755fe894938ac97342d6fbf6 +R 43528c0613d372060fbd8256efc47909 U dan -Z e801c590b1575eb988d36c609d9907aa +Z e3c696b644b37e5798613b4f15c87656 diff --git a/manifest.uuid b/manifest.uuid index 1348da3add..e111b8a77c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4f90ba20e2be6ec5755fe894938ac97342d6fbf6 \ No newline at end of file +0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 \ No newline at end of file From 21b7d2a9b818533d02eb408bcfc28d4c0a95eac7 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 22 May 2015 06:08:25 +0000 Subject: [PATCH 138/206] Improve test coverage of fts5_unicode2.c. FossilOrigin-Name: fea8a4db9d8c7b9a946017a0dc984cbca6ce240e --- ext/fts3/unicode/mkunicode.tcl | 158 ++---------------------------- ext/fts3/unicode/parseunicode.tcl | 146 +++++++++++++++++++++++++++ ext/fts5/fts5Int.h | 11 +++ ext/fts5/fts5_expr.c | 43 +++++++- ext/fts5/fts5_tokenize.c | 7 -- ext/fts5/fts5_unicode2.c | 14 +-- ext/fts5/test/fts5unicode3.test | 122 +++++++++++++++++++++++ manifest | 22 +++-- manifest.uuid | 2 +- 9 files changed, 349 insertions(+), 176 deletions(-) create mode 100644 ext/fts3/unicode/parseunicode.tcl create mode 100644 ext/fts5/test/fts5unicode3.test diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 692ba72bf8..32807a5cc5 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -1,77 +1,5 @@ -# -# Parameter $zName must be a path to the file UnicodeData.txt. This command -# reads the file and returns a list of mappings required to remove all -# diacritical marks from a unicode string. Each mapping is itself a list -# consisting of two elements - the unicode codepoint and the single ASCII -# character that it should be replaced with, or an empty string if the -# codepoint should simply be removed from the input. Examples: -# -# { 224 a } (replace codepoint 224 to "a") -# { 769 "" } (remove codepoint 769 from input) -# -# Mappings are only returned for non-upper case codepoints. It is assumed -# that the input has already been folded to lower case. -# -proc rd_load_unicodedata_text {zName} { - global tl_lookup_table - - set fd [open $zName] - set lField { - code - character_name - general_category - canonical_combining_classes - bidirectional_category - character_decomposition_mapping - decimal_digit_value - digit_value - numeric_value - mirrored - unicode_1_name - iso10646_comment_field - uppercase_mapping - lowercase_mapping - titlecase_mapping - } - set lRet [list] - - while { ![eof $fd] } { - set line [gets $fd] - if {$line == ""} continue - - set fields [split $line ";"] - if {[llength $fields] != [llength $lField]} { error "parse error: $line" } - foreach $lField $fields {} - if { [llength $character_decomposition_mapping]!=2 - || [string is xdigit [lindex $character_decomposition_mapping 0]]==0 - } { - continue - } - - set iCode [expr "0x$code"] - set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] - set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] - - if {[info exists tl_lookup_table($iCode)]} continue - - if { ($iAscii >= 97 && $iAscii <= 122) - || ($iAscii >= 65 && $iAscii <= 90) - } { - lappend lRet [list $iCode [string tolower [format %c $iAscii]]] - set dia($iDia) 1 - } - } - - foreach d [array names dia] { - lappend lRet [list $d ""] - } - set lRet [lsort -integer -index 0 $lRet] - - close $fd - set lRet -} - +source [file join [file dirname [info script]] parseunicode.tcl] proc print_rd {map} { global tl_lookup_table @@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} { #------------------------------------------------------------------------- -# Parameter $zName must be a path to the file UnicodeData.txt. This command -# reads the file and returns a list of codepoints (integers). The list -# contains all codepoints in the UnicodeData.txt assigned to any "General -# Category" that is not a "Letter" or "Number". -# -proc an_load_unicodedata_text {zName} { - set fd [open $zName] - set lField { - code - character_name - general_category - canonical_combining_classes - bidirectional_category - character_decomposition_mapping - decimal_digit_value - digit_value - numeric_value - mirrored - unicode_1_name - iso10646_comment_field - uppercase_mapping - lowercase_mapping - titlecase_mapping - } - set lRet [list] - - while { ![eof $fd] } { - set line [gets $fd] - if {$line == ""} continue - - set fields [split $line ";"] - if {[llength $fields] != [llength $lField]} { error "parse error: $line" } - foreach $lField $fields {} - - set iCode [expr "0x$code"] - set bAlnum [expr { - [lsearch {L N} [string range $general_category 0 0]] >= 0 - || $general_category=="Co" - }] - - if { !$bAlnum } { lappend lRet $iCode } - } - - close $fd - set lRet -} - proc an_load_separator_ranges {} { global unicodedata.txt set lSep [an_load_unicodedata_text ${unicodedata.txt}] @@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} { #------------------------------------------------------------------------- -proc tl_load_casefolding_txt {zName} { - global tl_lookup_table - - set fd [open $zName] - while { ![eof $fd] } { - set line [gets $fd] - if {[string range $line 0 0] == "#"} continue - if {$line == ""} continue - - foreach x {a b c d} {unset -nocomplain $x} - foreach {a b c d} [split $line ";"] {} - - set a2 [list] - set c2 [list] - foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] } - foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] } - set b [string trim $b] - set d [string trim $d] - - if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } - } -} - proc tl_create_records {} { global tl_lookup_table @@ -635,10 +493,12 @@ proc print_fold {zFunc} { if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ + const struct TableEntry *p; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; + assert( c>aEntry[0].iCode ); while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); @@ -649,14 +509,12 @@ proc print_fold {zFunc} { iHi = iTest-1; } } - assert( iRes<0 || c>=aEntry[iRes].iCode ); - if( iRes>=0 ){ - const struct TableEntry *p = &aEntry[iRes]; - if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ - ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; - assert( ret>0 ); - } + assert( iRes>=0 && c>=aEntry[iRes].iCode ); + p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); } if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); diff --git a/ext/fts3/unicode/parseunicode.tcl b/ext/fts3/unicode/parseunicode.tcl new file mode 100644 index 0000000000..0cb2c83a18 --- /dev/null +++ b/ext/fts3/unicode/parseunicode.tcl @@ -0,0 +1,146 @@ + +#-------------------------------------------------------------------------- +# Parameter $zName must be a path to the file UnicodeData.txt. This command +# reads the file and returns a list of mappings required to remove all +# diacritical marks from a unicode string. Each mapping is itself a list +# consisting of two elements - the unicode codepoint and the single ASCII +# character that it should be replaced with, or an empty string if the +# codepoint should simply be removed from the input. Examples: +# +# { 224 a } (replace codepoint 224 to "a") +# { 769 "" } (remove codepoint 769 from input) +# +# Mappings are only returned for non-upper case codepoints. It is assumed +# that the input has already been folded to lower case. +# +proc rd_load_unicodedata_text {zName} { + global tl_lookup_table + + set fd [open $zName] + set lField { + code + character_name + general_category + canonical_combining_classes + bidirectional_category + character_decomposition_mapping + decimal_digit_value + digit_value + numeric_value + mirrored + unicode_1_name + iso10646_comment_field + uppercase_mapping + lowercase_mapping + titlecase_mapping + } + set lRet [list] + + while { ![eof $fd] } { + set line [gets $fd] + if {$line == ""} continue + + set fields [split $line ";"] + if {[llength $fields] != [llength $lField]} { error "parse error: $line" } + foreach $lField $fields {} + if { [llength $character_decomposition_mapping]!=2 + || [string is xdigit [lindex $character_decomposition_mapping 0]]==0 + } { + continue + } + + set iCode [expr "0x$code"] + set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] + set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] + + if {[info exists tl_lookup_table($iCode)]} continue + + if { ($iAscii >= 97 && $iAscii <= 122) + || ($iAscii >= 65 && $iAscii <= 90) + } { + lappend lRet [list $iCode [string tolower [format %c $iAscii]]] + set dia($iDia) 1 + } + } + + foreach d [array names dia] { + lappend lRet [list $d ""] + } + set lRet [lsort -integer -index 0 $lRet] + + close $fd + set lRet +} + +#------------------------------------------------------------------------- +# Parameter $zName must be a path to the file UnicodeData.txt. This command +# reads the file and returns a list of codepoints (integers). The list +# contains all codepoints in the UnicodeData.txt assigned to any "General +# Category" that is not a "Letter" or "Number". +# +proc an_load_unicodedata_text {zName} { + set fd [open $zName] + set lField { + code + character_name + general_category + canonical_combining_classes + bidirectional_category + character_decomposition_mapping + decimal_digit_value + digit_value + numeric_value + mirrored + unicode_1_name + iso10646_comment_field + uppercase_mapping + lowercase_mapping + titlecase_mapping + } + set lRet [list] + + while { ![eof $fd] } { + set line [gets $fd] + if {$line == ""} continue + + set fields [split $line ";"] + if {[llength $fields] != [llength $lField]} { error "parse error: $line" } + foreach $lField $fields {} + + set iCode [expr "0x$code"] + set bAlnum [expr { + [lsearch {L N} [string range $general_category 0 0]] >= 0 + || $general_category=="Co" + }] + + if { !$bAlnum } { lappend lRet $iCode } + } + + close $fd + set lRet +} + +proc tl_load_casefolding_txt {zName} { + global tl_lookup_table + + set fd [open $zName] + while { ![eof $fd] } { + set line [gets $fd] + if {[string range $line 0 0] == "#"} continue + if {$line == ""} continue + + foreach x {a b c d} {unset -nocomplain $x} + foreach {a b c d} [split $line ";"] {} + + set a2 [list] + set c2 [list] + foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] } + foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] } + set b [string trim $b] + set d [string trim $d] + + if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } + } +} + + diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 2a66fd8b18..f914c983b1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -631,4 +631,15 @@ int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); ** End of interface to code in fts5_vocab.c. **************************************************************************/ + +/************************************************************************** +** Interface to automatically generated code in fts5_unicode2.c. +*/ +int sqlite3Fts5UnicodeIsalnum(int c); +int sqlite3Fts5UnicodeIsdiacritic(int c); +int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); +/* +** End of interface to code in fts5_unicode2.c. +**************************************************************************/ + #endif diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index e0496fb8d2..3b8601bcbd 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1618,6 +1618,45 @@ static void fts5ExprFunctionTcl( fts5ExprFunction(pCtx, nArg, apVal, 1); } +/* +** The implementation of an SQLite user-defined-function that accepts a +** single integer as an argument. If the integer is an alpha-numeric +** unicode code point, 1 is returned. Otherwise 0. +*/ +static void fts5ExprIsAlnum( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + int iCode; + if( nArg!=1 ){ + sqlite3_result_error(pCtx, + "wrong number of arguments to function fts5_isalnum", -1 + ); + return; + } + iCode = sqlite3_value_int(apVal[0]); + sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode)); +} + +static void fts5ExprFold( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + if( nArg!=1 && nArg!=2 ){ + sqlite3_result_error(pCtx, + "wrong number of arguments to function fts5_fold", -1 + ); + }else{ + int iCode; + int bRemoveDiacritics = 0; + iCode = sqlite3_value_int(apVal[0]); + if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]); + sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); + } +} + /* ** This is called during initialization to register the fts5_expr() scalar ** UDF with the SQLite handle passed as the only argument. @@ -1627,8 +1666,10 @@ int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ const char *z; void (*x)(sqlite3_context*,int,sqlite3_value**); } aFunc[] = { - { "fts5_expr", fts5ExprFunctionHr }, + { "fts5_expr", fts5ExprFunctionHr }, { "fts5_expr_tcl", fts5ExprFunctionTcl }, + { "fts5_isalnum", fts5ExprIsAlnum }, + { "fts5_fold", fts5ExprFold }, }; int i; int rc = SQLITE_OK; diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index b340d45d36..7dacc2912f 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -174,13 +174,6 @@ static int fts5AsciiTokenize( ** Start of unicode61 tokenizer implementation. */ -/* -** Functions in fts5_unicode2.c. -*/ -int sqlite3Fts5UnicodeIsalnum(int c); -int sqlite3Fts5UnicodeIsdiacritic(int c); -int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); - /* ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c index 972e7ed975..11e211c0de 100644 --- a/ext/fts5/fts5_unicode2.c +++ b/ext/fts5/fts5_unicode2.c @@ -327,10 +327,12 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ + const struct TableEntry *p; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; + assert( c>aEntry[0].iCode ); while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); @@ -341,14 +343,12 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ iHi = iTest-1; } } - assert( iRes<0 || c>=aEntry[iRes].iCode ); - if( iRes>=0 ){ - const struct TableEntry *p = &aEntry[iRes]; - if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ - ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; - assert( ret>0 ); - } + assert( iRes>=0 && c>=aEntry[iRes].iCode ); + p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); } if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); diff --git a/ext/fts5/test/fts5unicode3.test b/ext/fts5/test/fts5unicode3.test new file mode 100644 index 0000000000..a9efa4474c --- /dev/null +++ b/ext/fts5/test/fts5unicode3.test @@ -0,0 +1,122 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 tokenizers +# + +proc fts3_unicode_path {file} { + file join [file dirname [info script]] .. .. fts3 unicode $file +} + +source [file join [file dirname [info script]] fts5_common.tcl] +source [fts3_unicode_path parseunicode.tcl] +set testprefix fts5unicode3 + +set CF [fts3_unicode_path CaseFolding.txt] +set UD [fts3_unicode_path UnicodeData.txt] + +tl_load_casefolding_txt $CF +foreach x [an_load_unicodedata_text $UD] { + set aNotAlnum($x) 1 +} + +foreach {y} [rd_load_unicodedata_text $UD] { + foreach {code ascii} $y {} + if {$ascii==""} { + set int 0 + } else { + binary scan $ascii c int + } + set aDiacritic($code) $int +} + +proc tcl_fold {i {bRemoveDiacritic 0}} { + global tl_lookup_table + global aDiacritic + + if {[info exists tl_lookup_table($i)]} { + set i $tl_lookup_table($i) + } + if {$bRemoveDiacritic && [info exists aDiacritic($i)]} { + set i $aDiacritic($i) + } + expr $i +} +db func tcl_fold tcl_fold + +proc tcl_isalnum {i} { + global aNotAlnum + expr {![info exists aNotAlnum($i)]} +} +db func tcl_isalnum tcl_isalnum + + +do_catchsql_test 1.0.1 { + SELECT fts5_isalnum(1, 2, 3); +} {1 {wrong number of arguments to function fts5_isalnum}} +do_catchsql_test 1.0.2 { + SELECT fts5_fold(); +} {1 {wrong number of arguments to function fts5_fold}} +do_catchsql_test 1.0.3 { + SELECT fts5_fold(1,2,3); +} {1 {wrong number of arguments to function fts5_fold}} + +do_execsql_test 1.1 { + WITH ii(i) AS ( + SELECT -1 + UNION ALL + SELECT i+1 FROM ii WHERE i<100000 + ) + SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int); +} {0 {}} + +do_execsql_test 1.2 { + WITH ii(i) AS ( + SELECT -1 + UNION ALL + SELECT i+1 FROM ii WHERE i<100000 + ) + SELECT count(*), min(i) FROM ii + WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int); +} {0 {}} + +do_execsql_test 1.3 { + WITH ii(i) AS ( + SELECT -1 + UNION ALL + SELECT i+1 FROM ii WHERE i<100000 + ) + SELECT count(*), min(i) FROM ii + WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int); +} {0 {}} + +do_test 1.4 { + set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=} + append str {"unicode61 separators '} + for {set i 700} {$i<900} {incr i} { + append str [format %c $i] + } + append str {'");} + execsql $str +} {} +do_test 1.5 { + set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=} + append str {"unicode61 tokenchars '} + for {set i 700} {$i<900} {incr i} { + append str [format %c $i] + } + append str {'");} + execsql $str +} {} + + +finish_test + diff --git a/manifest b/manifest index d897c6433b..3f410ed24c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\stest\scoverage\sof\sfts5_tokenize.c. -D 2015-05-20T09:27:51.629 +C Improve\stest\scoverage\sof\sfts5_unicode2.c. +D 2015-05-22T06:08:25.338 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -102,21 +102,22 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 8e53d0190a7b3443764bbd32ad47be2bd852026d F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad +F ext/fts3/unicode/mkunicode.tcl b321eea0c1604954a098775ce0b7860bc449f686 +F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h 9e581dc077d4c6758eaeb0d6a85dc875f53918dc +F ext/fts5/fts5Int.h ba0fd64be01cf7bf47ad20fcd23b629fdde6c4dc F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c 0c4b50bb48740c76b8e8b89d5d40a55f8dbffd07 +F ext/fts5/fts5_expr.c f9a2ef4efbc4b133e0173e4bf7d7ebff33eddcf1 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c -F ext/fts5/fts5_tokenize.c 6f4d2cbe7ed892821d1a233c7db613dafdb3877a -F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d +F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec +F ext/fts5/fts5_unicode2.c c75022368f940a38afa1d2f0164c78b11ab2f383 F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba @@ -168,6 +169,7 @@ F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 F ext/fts5/test/fts5tokenizer.test 668747fcb41de6fc7daebc478920b705164fccc1 F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba +F ext/fts5/test/fts5unicode3.test 273f9086ad33935566bbc0d0c94d0d9687ef686b F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa @@ -1329,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 4f90ba20e2be6ec5755fe894938ac97342d6fbf6 -R 43528c0613d372060fbd8256efc47909 +P 0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 +R dac2002cb3b723a15f8d8c03f8a4c974 U dan -Z e3c696b644b37e5798613b4f15c87656 +Z b9f569713ab52c4f747377183dfd6e18 diff --git a/manifest.uuid b/manifest.uuid index e111b8a77c..04571c82b5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 \ No newline at end of file +fea8a4db9d8c7b9a946017a0dc984cbca6ce240e \ No newline at end of file From 0d45967b7fa8972427b253855fb0f71ea9dd1e45 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 22 May 2015 07:44:44 +0000 Subject: [PATCH 139/206] Increase test coverage of fts5_vocab.c. FossilOrigin-Name: 065ab83a6ce36e16d3b95a61505aa3cff0bfea84 --- ext/fts5/fts5_vocab.c | 20 ++++----- ext/fts5/test/fts5fault4.test | 36 ++++++++++++++++ ext/fts5/test/fts5vocab.test | 78 +++++++++++++++++++++++++++++++++++ manifest | 16 +++---- manifest.uuid | 2 +- 5 files changed, 133 insertions(+), 19 deletions(-) diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index 6d03877ad0..715811b1ca 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -260,8 +260,10 @@ static int fts5VocabOpenMethod( rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); } sqlite3_free(zSql); + assert( rc==SQLITE_OK || pStmt==0 ); + if( rc==SQLITE_ERROR ) rc = SQLITE_OK; - if( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ + if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ i64 iId = sqlite3_column_int64(pStmt, 0); pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol); } @@ -271,8 +273,8 @@ static int fts5VocabOpenMethod( pStmt = 0; if( rc==SQLITE_OK ){ pVTab->zErrMsg = sqlite3_mprintf( - "no such fts5 table: %Q.%Q", pTab->zFts5Db, pTab->zFts5Tbl - ); + "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl + ); rc = SQLITE_ERROR; } } @@ -304,13 +306,11 @@ static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ ** on the xClose method of the virtual table interface. */ static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ - if( pCursor ){ - Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; - fts5VocabResetCursor(pCsr); - sqlite3Fts5BufferFree(&pCsr->term); - sqlite3_finalize(pCsr->pStmt); - sqlite3_free(pCsr); - } + Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + fts5VocabResetCursor(pCsr); + sqlite3Fts5BufferFree(&pCsr->term); + sqlite3_finalize(pCsr->pStmt); + sqlite3_free(pCsr); return SQLITE_OK; } diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index ad1315bec0..fe3fb0796a 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -335,5 +335,41 @@ do_faultsim_test 11.1 -faults oom-t* -body { faultsim_test_result {0 {}} {1 {disk I/O error}} } +#------------------------------------------------------------------------- +# OOM while creating an fts5vocab table. +# +reset_db +do_execsql_test 12.0 { + CREATE VIRTUAL TABLE ft USING fts5(x); +} +faultsim_save_and_close +do_faultsim_test 12.1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM sqlite_master } +} -body { + db eval { CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row') } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +# OOM while querying an fts5vocab table. +# +reset_db +do_execsql_test 13.0 { + CREATE VIRTUAL TABLE ft USING fts5(x); + INSERT INTO ft VALUES('a b'); + CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row'); +} +faultsim_save_and_close +do_faultsim_test 13.1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM vv } +} -body { + db eval { SELECT * FROM vv } +} -test { + faultsim_test_result {0 {a 1 1 b 1 1}} +} + finish_test diff --git a/ext/fts5/test/fts5vocab.test b/ext/fts5/test/fts5vocab.test index fcef2b793d..2d2faac3e0 100644 --- a/ext/fts5/test/fts5vocab.test +++ b/ext/fts5/test/fts5vocab.test @@ -120,6 +120,7 @@ foreach {tn tbl resname} { } #------------------------------------------------------------------------- +# Test errors in the CREATE VIRTUAL TABLE statement. # foreach {tn sql} { 1 { CREATE VIRTUAL TABLE aa USING fts5vocab() } @@ -129,5 +130,82 @@ foreach {tn sql} { } { do_catchsql_test 3.$tn $sql {1 {wrong number of vtable arguments}} } + +do_catchsql_test 4.0 { + CREATE VIRTUAL TABLE cc USING fts5vocab(tbl, unknown); +} {1 {fts5vocab: unknown table type: 'unknown'}} + +do_catchsql_test 4.1 { + ATTACH 'test.db' AS aux; + CREATE VIRTUAL TABLE aux.cc USING fts5vocab(main, tbl, row); +} {1 {wrong number of vtable arguments}} + +#------------------------------------------------------------------------- +# Test fts5vocab tables created in the temp schema. +# +reset_db +forcedelete test.db2 +do_execsql_test 5.0 { + ATTACH 'test.db2' AS aux; + CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE temp.t1 USING fts5(x); + CREATE VIRTUAL TABLE aux.t1 USING fts5(x); + + INSERT INTO main.t1 VALUES('a b c'); + INSERT INTO main.t1 VALUES('d e f'); + INSERT INTO main.t1 VALUES('a e c'); + + INSERT INTO temp.t1 VALUES('1 2 3'); + INSERT INTO temp.t1 VALUES('4 5 6'); + INSERT INTO temp.t1 VALUES('1 5 3'); + + INSERT INTO aux.t1 VALUES('x y z'); + INSERT INTO aux.t1 VALUES('m n o'); + INSERT INTO aux.t1 VALUES('x n z'); +} + +breakpoint +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE temp.vm USING fts5vocab(main, t1, row); + CREATE VIRTUAL TABLE temp.vt1 USING fts5vocab(t1, row); + CREATE VIRTUAL TABLE temp.vt2 USING fts5vocab(temp, t1, row); + CREATE VIRTUAL TABLE temp.va USING fts5vocab(aux, t1, row); +} + +do_execsql_test 5.2 { SELECT * FROM vm } { + a 2 2 b 1 1 c 2 2 d 1 1 e 2 2 f 1 1 +} +do_execsql_test 5.3 { SELECT * FROM vt1 } { + 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 +} +do_execsql_test 5.4 { SELECT * FROM vt2 } { + 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 +} +do_execsql_test 5.5 { SELECT * FROM va } { + m 1 1 n 2 2 o 1 1 x 2 2 y 1 1 z 2 2 +} + +#------------------------------------------------------------------------- +# +do_execsql_test 6.0 { + CREATE TABLE iii(iii); + CREATE TABLE jjj(x); +} + +do_catchsql_test 6.1 { + CREATE VIRTUAL TABLE vocab1 USING fts5vocab(iii, row); + SELECT * FROM vocab1; +} {1 {no such fts5 table: main.iii}} + +do_catchsql_test 6.2 { + CREATE VIRTUAL TABLE vocab2 USING fts5vocab(jjj, row); + SELECT * FROM vocab2; +} {1 {no such fts5 table: main.jjj}} + +do_catchsql_test 6.2 { + CREATE VIRTUAL TABLE vocab3 USING fts5vocab(lll, row); + SELECT * FROM vocab3; +} {1 {no such fts5 table: main.lll}} + finish_test diff --git a/manifest b/manifest index 3f410ed24c..4511ac2102 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\stest\scoverage\sof\sfts5_unicode2.c. -D 2015-05-22T06:08:25.338 +C Increase\stest\scoverage\sof\sfts5_vocab.c. +D 2015-05-22T07:44:44.808 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -118,7 +118,7 @@ F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec F ext/fts5/fts5_unicode2.c c75022368f940a38afa1d2f0164c78b11ab2f383 -F ext/fts5/fts5_vocab.c b54301e376f59f08f662b5dde1cfaf26e86e4db6 +F ext/fts5/fts5_vocab.c 3d06e4306660fcd92a596c1e57c8be58dcc779dd F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 @@ -149,7 +149,7 @@ F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 7c7c46559368f06b98daa940d376c4740bcdad25 +F ext/fts5/test/fts5fault4.test e7170486d71de72fe88018b5b920c0a9f6c19801 F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d @@ -172,7 +172,7 @@ F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba F ext/fts5/test/fts5unicode3.test 273f9086ad33935566bbc0d0c94d0d9687ef686b F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 -F ext/fts5/test/fts5vocab.test 80fb22850dd3b2c92a3896e6021605e08c0872aa +F ext/fts5/test/fts5vocab.test 389e5fe4928eae5fddcf26bcc5a6890b0791aa75 F ext/fts5/tool/loadfts5.tcl add4d349ae5463c5f60b26e821c24e60ed8054d3 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0e91a6a520f040b8902da6a1a4d9107dc66c0ea3 -R dac2002cb3b723a15f8d8c03f8a4c974 +P fea8a4db9d8c7b9a946017a0dc984cbca6ce240e +R 0a9b7fd2943b1fd62b28165e907889dd U dan -Z b9f569713ab52c4f747377183dfd6e18 +Z c38b4b46643fdff88e3ee48e8e4a1b8f diff --git a/manifest.uuid b/manifest.uuid index 04571c82b5..54bfcc0d8c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fea8a4db9d8c7b9a946017a0dc984cbca6ce240e \ No newline at end of file +065ab83a6ce36e16d3b95a61505aa3cff0bfea84 \ No newline at end of file From 2e7d35e2fee95943de75f38bda2d5a8f730b78c2 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 23 May 2015 15:43:05 +0000 Subject: [PATCH 140/206] Avoid making redundant copies of position-lists within the fts5 code. FossilOrigin-Name: 5165de548b84825cb000d33e5d3de12b0ef112c0 --- ext/fts3/unicode/mkunicode.tcl | 1 - ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_expr.c | 88 ++++++++++++++++++++-------------- ext/fts5/fts5_index.c | 74 ++++++++++++++++++++-------- ext/fts5/fts5_unicode2.c | 1 - ext/fts5/tool/loadfts5.tcl | 2 +- manifest | 22 ++++----- manifest.uuid | 2 +- 8 files changed, 122 insertions(+), 69 deletions(-) diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 32807a5cc5..6eea9bbc64 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -487,7 +487,6 @@ proc print_fold {zFunc} { puts [subst -nocommands { int ret = c; - assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index f914c983b1..7221a979dd 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -285,6 +285,7 @@ int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn); +int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 3b8601bcbd..1d1e359b63 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -672,49 +672,67 @@ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ - int rc = SQLITE_OK; Fts5ExprNearset *pNear = pNode->pNear; - while( 1 ){ - int i; - /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); - if( rc!=SQLITE_OK || pNode->bEof ) break; + if( pNear->nPhrase==1 + && pNear->apPhrase[0]->nTerm==1 + && pNear->iCol<0 + ){ + /* If this "NEAR" object is actually a single phrase that consists of + ** a single term only, then the row that it currently points to must + ** be a match. All that is required is to populate pPhrase->poslist + ** with the position-list data for the only term. */ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + assert( pPhrase->poslist.nSpace==0 ); + pNode->iRowid = sqlite3Fts5IterRowid(pIter); + return sqlite3Fts5IterPoslist(pIter, + (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n + ); + }else{ + int rc = SQLITE_OK; - /* Check that each phrase in the nearset matches the current row. - ** Populate the pPhrase->poslist buffers at the same time. If any - ** phrase is not a match, break out of the loop early. */ - for(i=0; rc==SQLITE_OK && inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ - int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); - if( bMatch==0 ) break; - }else{ - int n; - const u8 *a; - rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &a, &n); - fts5BufferSet(&rc, &pPhrase->poslist, n, a); + while( 1 ){ + int i; + + /* Advance the iterators until they all point to the same rowid */ + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + if( rc!=SQLITE_OK || pNode->bEof ) break; + + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); + if( bMatch==0 ) break; + }else{ + rc = sqlite3Fts5IterPoslistBuffer( + pPhrase->aTerm[0].pIter, &pPhrase->poslist + ); + } } - } - if( rc==SQLITE_OK && i==pNear->nPhrase ){ - int bMatch = 1; - if( pNear->nPhrase>1 ){ - rc = fts5ExprNearIsMatch(pNear, &bMatch); + if( rc==SQLITE_OK && i==pNear->nPhrase ){ + int bMatch = 1; + if( pNear->nPhrase>1 ){ + rc = fts5ExprNearIsMatch(pNear, &bMatch); + } + if( rc!=SQLITE_OK || bMatch ) break; } - if( rc!=SQLITE_OK || bMatch ) break; + + /* If control flows to here, then the current rowid is not a match. + ** Advance all term iterators in all phrases to the next rowid. */ + if( rc==SQLITE_OK ){ + rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); + } + if( pNode->bEof || rc!=SQLITE_OK ) break; } - /* If control flows to here, then the current rowid is not a match. - ** Advance all term iterators in all phrases to the next rowid. */ - if( rc==SQLITE_OK ){ - rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); - } - if( pNode->bEof || rc!=SQLITE_OK ) break; + return rc; } - - return rc; } /* @@ -1050,7 +1068,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ sqlite3Fts5IterClose(pTerm->pIter); } } - fts5BufferFree(&pPhrase->poslist); + if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index dd0f42fa30..8759cf5901 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4057,6 +4057,27 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ return fts5IndexReturn(p); } +/* +** Iterator pIter currently points to a valid entry (not EOF). This +** function appends the position list data for the current entry to +** buffer pBuf. It does not make a copy of the position-list size +** field. +*/ +static void fts5SegiterPoslist( + Fts5Index *p, + Fts5SegIter *pSeg, + Fts5Buffer *pBuf +){ + if( p->rc==SQLITE_OK ){ + Fts5ChunkIter iter; + fts5ChunkIterInit(p, pSeg, &iter); + while( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); + fts5ChunkIterNext(p, &iter); + } + fts5ChunkIterRelease(&iter); + } +} /* ** Iterator pMulti currently points to a valid entry (not EOF). This @@ -4069,27 +4090,18 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ static void fts5MultiIterPoslist( Fts5Index *p, Fts5MultiSegIter *pMulti, - int bSz, + int bSz, /* Append a size field before the data */ Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ - Fts5ChunkIter iter; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); - fts5ChunkIterInit(p, pSeg, &iter); - - if( fts5ChunkIterEof(p, &iter)==0 ){ - if( bSz ){ - /* WRITEPOSLISTSIZE */ - fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); - } - while( fts5ChunkIterEof(p, &iter)==0 ){ - fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); - fts5ChunkIterNext(p, &iter); - } + if( bSz ){ + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2); } - fts5ChunkIterRelease(&iter); + fts5SegiterPoslist(p, pSeg, pBuf); } } @@ -4686,15 +4698,39 @@ int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ *pn = pIter->pDoclist->nPoslist; *pp = pIter->pDoclist->aPoslist; }else{ - Fts5Index *p = pIter->pIndex; - fts5BufferZero(&pIter->poslist); - fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); - *pn = pIter->poslist.n; - *pp = pIter->poslist.p; + Fts5MultiSegIter *pMulti = pIter->pMulti; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + *pn = pSeg->nPos; + if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ + *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; + }else{ + fts5BufferZero(&pIter->poslist); + fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); + *pp = pIter->poslist.p; + } } return fts5IndexReturn(pIter->pIndex); } +/* +** This function is similar to sqlite3Fts5IterPoslist(), except that it +** copies the position list into the buffer supplied as the second +** argument. +*/ +int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ + Fts5Index *p = pIter->pIndex; + Fts5DoclistIter *pDoclist = pIter->pDoclist; + assert( p->rc==SQLITE_OK ); + if( pDoclist ){ + fts5BufferSet(&p->rc, pBuf, pDoclist->nPoslist, pDoclist->aPoslist); + }else{ + Fts5MultiSegIter *pMulti = pIter->pMulti; + fts5BufferZero(pBuf); + fts5MultiIterPoslist(p, pMulti, 0, pBuf); + } + return fts5IndexReturn(p); +} + /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c index 11e211c0de..493cabf431 100644 --- a/ext/fts5/fts5_unicode2.c +++ b/ext/fts5/fts5_unicode2.c @@ -321,7 +321,6 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ int ret = c; - assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index 3110954a8c..b61b567c60 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -109,7 +109,7 @@ db transaction { if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" - # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" + db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { diff --git a/manifest b/manifest index 4511ac2102..3367ab84de 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Increase\stest\scoverage\sof\sfts5_vocab.c. -D 2015-05-22T07:44:44.808 +C Avoid\smaking\sredundant\scopies\sof\sposition-lists\swithin\sthe\sfts5\scode. +D 2015-05-23T15:43:05.567 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -102,22 +102,22 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 8e53d0190a7b3443764bbd32ad47be2bd852026d F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl b321eea0c1604954a098775ce0b7860bc449f686 +F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h ba0fd64be01cf7bf47ad20fcd23b629fdde6c4dc +F ext/fts5/fts5Int.h 271d2197ac32049adf3c947d671b6e682d8432b6 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c f9a2ef4efbc4b133e0173e4bf7d7ebff33eddcf1 +F ext/fts5/fts5_expr.c 638df4962683986e8c6e627d06934ee87ed68da2 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 2c4500c35072b049d1391bbb4e64e4c0e3d3dd43 +F ext/fts5/fts5_index.c 985bfa5ab258918b34b4c44866ce9f9a0f2a6b0e F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec -F ext/fts5/fts5_unicode2.c c75022368f940a38afa1d2f0164c78b11ab2f383 +F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 F ext/fts5/fts5_vocab.c 3d06e4306660fcd92a596c1e57c8be58dcc779dd F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba @@ -173,7 +173,7 @@ F ext/fts5/test/fts5unicode3.test 273f9086ad33935566bbc0d0c94d0d9687ef686b F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 F ext/fts5/test/fts5vocab.test 389e5fe4928eae5fddcf26bcc5a6890b0791aa75 -F ext/fts5/tool/loadfts5.tcl add4d349ae5463c5f60b26e821c24e60ed8054d3 +F ext/fts5/tool/loadfts5.tcl 7ef3e62131f0434a78e4f5c5b056b09d221710a8 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fea8a4db9d8c7b9a946017a0dc984cbca6ce240e -R 0a9b7fd2943b1fd62b28165e907889dd +P 065ab83a6ce36e16d3b95a61505aa3cff0bfea84 +R c8769c201431bb53a20a3f0848ead683 U dan -Z c38b4b46643fdff88e3ee48e8e4a1b8f +Z a11fb9d59a1c2f9d5ef19052d7f0a43f diff --git a/manifest.uuid b/manifest.uuid index 54bfcc0d8c..9d1be15fc6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -065ab83a6ce36e16d3b95a61505aa3cff0bfea84 \ No newline at end of file +5165de548b84825cb000d33e5d3de12b0ef112c0 \ No newline at end of file From 494016a424ebf8c2b1457f0bcd4bf0cd60838a2f Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 25 May 2015 11:46:33 +0000 Subject: [PATCH 141/206] Avoid redundant loads from the %_data table in the fts5 code. FossilOrigin-Name: 02069782f8b7896a582582c79185b50418622736 --- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_expr.c | 6 +- ext/fts5/fts5_index.c | 188 ++++++++++++++++---------------------- ext/fts5/fts5_vocab.c | 3 +- ext/fts5/test/fts5ah.test | 6 +- manifest | 20 ++-- manifest.uuid | 2 +- 7 files changed, 100 insertions(+), 127 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 7221a979dd..b0e9484c79 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -284,7 +284,7 @@ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); -int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn); +int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi); int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf); /* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 1d1e359b63..945bb637f8 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -357,9 +357,10 @@ static int fts5ExprPhraseIsMatch( /* Initialize a term iterator for each term in the phrase */ for(i=0; inTerm; i++){ + i64 dummy; int n; const u8 *a; - rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n); + rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy); if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ goto ismatch_out; } @@ -685,9 +686,8 @@ static int fts5ExprNearNextMatch( Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; assert( pPhrase->poslist.nSpace==0 ); - pNode->iRowid = sqlite3Fts5IterRowid(pIter); return sqlite3Fts5IterPoslist(pIter, - (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n + (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid ); }else{ int rc = SQLITE_OK; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8759cf5901..8ed53190ed 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -309,7 +309,6 @@ int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; -typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; @@ -317,7 +316,6 @@ typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; -typedef struct Fts5PosIter Fts5PosIter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; @@ -516,6 +514,7 @@ struct Fts5SegIter { int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ + Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset @@ -541,30 +540,6 @@ struct Fts5SegIter { #define FTS5_SEGITER_REVERSE 0x02 -/* -** Object for iterating through paginated data. -*/ -struct Fts5ChunkIter { - Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ - i64 iLeafRowid; /* Absolute rowid of current leaf */ - int nRem; /* Remaining bytes of data to read */ - - /* Output parameters */ - u8 *p; /* Pointer to chunk of data */ - int n; /* Size of buffer p in bytes */ -}; - -/* -** Object for iterating through a single position list on disk. -*/ -struct Fts5PosIter { - Fts5ChunkIter chunk; /* Current chunk of data */ - int iOff; /* Offset within chunk data */ - - int iCol; - int iPos; -}; - /* ** Object for iterating through the conents of a single internal node in ** memory. @@ -1713,7 +1688,11 @@ static void fts5SegIterNextPage( Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; - if( pIter->iLeafPgno<=pSeg->pgnoLast ){ + if( pIter->pNextLeaf ){ + assert( pIter->iLeafPgno<=pSeg->pgnoLast ); + pIter->pLeaf = pIter->pNextLeaf; + pIter->pNextLeaf = 0; + }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) ); @@ -1958,7 +1937,7 @@ static void fts5SegIterNext( assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ - + assert( pIter->pNextLeaf==0 ); if( pIter->iRowidOffset>0 ){ u8 *a = pIter->pLeaf->p; int iOff; @@ -2337,6 +2316,7 @@ static void fts5SegIterHashInit( static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); + fts5DataRelease(pIter->pNextLeaf); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); @@ -2483,9 +2463,12 @@ static void fts5SegIterGotoPage( int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); + if( iLeafPgno>pIter->pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; }else{ + fts5DataRelease(pIter->pNextLeaf); + pIter->pNextLeaf = 0; pIter->iLeafPgno = iLeafPgno-1; fts5SegIterNextPage(p, pIter); assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); @@ -2537,6 +2520,7 @@ static void fts5SegIterNextFrom( bMove = 0; } }else{ + assert( pIter->pNextLeaf==0 ); assert( iMatchiRowid ); while( !fts5DlidxIterEof(p, pDlidx) && iMatchterm.p; } -/* -** Return true if the chunk iterator passed as the second argument is -** at EOF. Or if an error has already occurred. Otherwise, return false. -*/ -static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){ - return (p->rc || pIter->pLeaf==0); -} +static void fts5ChunkIterate( + Fts5Index *p, /* Index object */ + Fts5SegIter *pSeg, /* Poslist of this iterator */ + void *pCtx, /* Context pointer for xChunk callback */ + void (*xChunk)(Fts5Index*, void*, const u8*, int) +){ + int nRem = pSeg->nPos; /* Number of bytes still to come */ + Fts5Data *pData = 0; + u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; + int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset); + int pgno = pSeg->iLeafPgno; + int pgnoSave = 0; -/* -** Advance the chunk-iterator to the next chunk of data to read. -*/ -static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){ - assert( pIter->nRem>=pIter->n ); - pIter->nRem -= pIter->n; - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - pIter->p = 0; - if( pIter->nRem>0 ){ - Fts5Data *pLeaf; - pIter->iLeafRowid++; - pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); - if( pLeaf ){ - pIter->n = MIN(pIter->nRem, pLeaf->n-4); - pIter->p = pLeaf->p+4; + if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ + pgnoSave = pgno+1; + } + + while( 1 ){ + xChunk(p, pCtx, pChunk, nChunk); + nRem -= nChunk; + fts5DataRelease(pData); + if( nRem<=0 ){ + break; + }else{ + pgno++; + pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); + if( pData==0 ) break; + pChunk = &pData->p[4]; + nChunk = MIN(nRem, pData->n - 4); + if( pgno==pgnoSave ){ + assert( pSeg->pNextLeaf==0 ); + pSeg->pNextLeaf = pData; + pData = 0; + } } } } -/* -** Intialize the chunk iterator to read the position list data for which -** the size field is at offset iOff of leaf pLeaf. -*/ -static void fts5ChunkIterInit( - Fts5Index *p, /* FTS5 backend object */ - Fts5SegIter *pSeg, /* Segment iterator to read poslist from */ - Fts5ChunkIter *pIter /* Initialize this object */ -){ - Fts5Data *pLeaf = pSeg->pLeaf; - int iOff = pSeg->iLeafOffset; - - memset(pIter, 0, sizeof(*pIter)); - /* If Fts5SegIter.pSeg is NULL, then this iterator iterates through data - ** currently stored in a hash table. In this case there is no leaf-rowid - ** to calculate. */ - if( pSeg->pSeg ){ - i64 rowid = FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pSeg->iLeafPgno); - pIter->iLeafRowid = rowid; - } - - fts5DataReference(pLeaf); - pIter->pLeaf = pLeaf; - pIter->nRem = pSeg->nPos; - pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); - pIter->p = pLeaf->p + iOff; - if( pIter->n==0 ){ - fts5ChunkIterNext(p, pIter); - } -} - -static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; -} /* @@ -3512,6 +3471,15 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ fts5BufferFree(&buf); } +static void fts5MergeChunkCallback( + Fts5Index *p, + void *pCtx, + const u8 *pChunk, int nChunk +){ + Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; + fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); +} + /* ** */ @@ -3583,7 +3551,6 @@ fflush(stdout); fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; - Fts5ChunkIter sPos; /* Used to iterate through position list */ int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm; @@ -3591,12 +3558,9 @@ fflush(stdout); /* Check for key annihilation. */ if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue; - fts5ChunkIterInit(p, pSeg, &sPos); - pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ - fts5ChunkIterRelease(&sPos); break; } @@ -3614,11 +3578,8 @@ fflush(stdout); nPos = pSeg->nPos*2 + pSeg->bDel; fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); - for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ - fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); - } - - fts5ChunkIterRelease(&sPos); + /* Append the position-list data to the output */ + fts5ChunkIterate(p, pSeg, (void*)&writer, fts5MergeChunkCallback); } /* Flush the last leaf page to disk. Set the output segment b-tree height @@ -4057,6 +4018,14 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ return fts5IndexReturn(p); } +static void fts5PoslistCallback( + Fts5Index *p, + void *pCtx, + const u8 *pChunk, int nChunk +){ + fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); +} + /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to @@ -4068,15 +4037,7 @@ static void fts5SegiterPoslist( Fts5SegIter *pSeg, Fts5Buffer *pBuf ){ - if( p->rc==SQLITE_OK ){ - Fts5ChunkIter iter; - fts5ChunkIterInit(p, pSeg, &iter); - while( fts5ChunkIterEof(p, &iter)==0 ){ - fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); - fts5ChunkIterNext(p, &iter); - } - fts5ChunkIterRelease(&iter); - } + fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); } /* @@ -4692,14 +4653,22 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ ** The returned position list does not include the "number of bytes" varint ** field that starts the position list on disk. */ -int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ +int sqlite3Fts5IterPoslist( + Fts5IndexIter *pIter, + const u8 **pp, /* OUT: Pointer to position-list data */ + int *pn, /* OUT: Size of position-list in bytes */ + i64 *piRowid /* OUT: Current rowid */ +){ + Fts5DoclistIter *pDoclist = pIter->pDoclist; assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - *pn = pIter->pDoclist->nPoslist; - *pp = pIter->pDoclist->aPoslist; + if( pDoclist ){ + *pn = pDoclist->nPoslist; + *pp = pDoclist->aPoslist; + *piRowid = pDoclist->iRowid; }else{ Fts5MultiSegIter *pMulti = pIter->pMulti; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + *piRowid = pSeg->iRowid; *pn = pSeg->nPos; if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; @@ -4983,10 +4952,11 @@ static int fts5QueryCksum( int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + i64 dummy; const u8 *pPos; int nPos; i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index 715811b1ca..c21ec2effe 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -347,11 +347,12 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ + i64 dummy; const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ - rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos); + rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ if( pTab->eType==FTS5_VOCAB_ROW ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test index 1ee4ab123e..ed2940763d 100644 --- a/ext/fts5/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -90,12 +90,14 @@ foreach {tn q res} " do_test 1.6.$tn.1 { set n [execsql_reads $q] - expr {$n < ($nReadX / 10)} + puts -nonewline "(n=$n nReadX=$nReadX)" + expr {$n < ($nReadX / 8)} } {1} do_test 1.6.$tn.2 { set n [execsql_reads "$q ORDER BY rowid DESC"] - expr {$n < ($nReadX / 10)} + puts -nonewline "(n=$n nReadX=$nReadX)" + expr {$n < ($nReadX / 8)} } {1} do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res] diff --git a/manifest b/manifest index 3367ab84de..8ad943d33d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\smaking\sredundant\scopies\sof\sposition-lists\swithin\sthe\sfts5\scode. -D 2015-05-23T15:43:05.567 +C Avoid\sredundant\sloads\sfrom\sthe\s%_data\stable\sin\sthe\sfts5\scode. +D 2015-05-25T11:46:33.325 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,18 +107,18 @@ F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h 271d2197ac32049adf3c947d671b6e682d8432b6 +F ext/fts5/fts5Int.h 2ce5c5e68852dd16de404b7a9a2a78f4f4588eb4 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c 638df4962683986e8c6e627d06934ee87ed68da2 +F ext/fts5/fts5_expr.c a8b31d363c02108dae01e13948661859f449ebb9 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 985bfa5ab258918b34b4c44866ce9f9a0f2a6b0e +F ext/fts5/fts5_index.c 79b8fcf40bee484dc62a7a0ba1f3d8de0a662812 F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 -F ext/fts5/fts5_vocab.c 3d06e4306660fcd92a596c1e57c8be58dcc779dd +F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 @@ -129,7 +129,7 @@ F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 -F ext/fts5/test/fts5ah.test d74cf8b7de5b8424f732acef69fe12122a12f2bf +F ext/fts5/test/fts5ah.test dbc37d736886e1e38cfa5cd523812db1ad8d0a31 F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 065ab83a6ce36e16d3b95a61505aa3cff0bfea84 -R c8769c201431bb53a20a3f0848ead683 +P 5165de548b84825cb000d33e5d3de12b0ef112c0 +R 0a35d34e585361d7ce3301bdbbc300de U dan -Z a11fb9d59a1c2f9d5ef19052d7f0a43f +Z 478e4cabed12f1b6c85b518dcaf965c8 diff --git a/manifest.uuid b/manifest.uuid index 9d1be15fc6..139c411caa 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5165de548b84825cb000d33e5d3de12b0ef112c0 \ No newline at end of file +02069782f8b7896a582582c79185b50418622736 \ No newline at end of file From 7f64db13436e8cdd893f0e8cb805fb3bce6b3613 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 26 May 2015 18:22:01 +0000 Subject: [PATCH 142/206] Simplifications and minor optimizations to fts5 prefix queries that cannot use a prefix index. FossilOrigin-Name: aef89d9f6aa536efee347367558cf5d4ea81b238 --- ext/fts5/fts5_index.c | 236 ++++++++++++++++---------------------- ext/fts5/test/fts5ad.test | 2 +- manifest | 14 +-- manifest.uuid | 2 +- 4 files changed, 110 insertions(+), 144 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 8ed53190ed..4b32b8e5c9 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -326,7 +326,6 @@ typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int n; /* Size of record in bytes */ - int nRef; /* Ref count */ }; /* @@ -358,7 +357,6 @@ struct Fts5Index { }; struct Fts5DoclistIter { - int bDesc; /* True for DESC order, false for ASC */ u8 *a; int n; int i; @@ -376,7 +374,6 @@ struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; Fts5MultiSegIter *pMulti; - Fts5DoclistIter *pDoclist; Fts5Buffer poslist; /* Buffer containing current poslist */ }; @@ -869,7 +866,6 @@ static Fts5Data *fts5DataReadOrBuffer( if( pRet ){ pRet->n = nByte; aOut = pRet->p = (u8*)&pRet[1]; - pRet->nRef = 1; } } @@ -917,15 +913,7 @@ static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ ** fts5DataRead(). */ static void fts5DataRelease(Fts5Data *pData){ - if( pData ){ - assert( pData->nRef>0 ); - pData->nRef--; - if( pData->nRef==0 ) sqlite3_free(pData); - } -} - -static void fts5DataReference(Fts5Data *pData){ - pData->nRef++; + sqlite3_free(pData); } /* @@ -2295,7 +2283,6 @@ static void fts5SegIterHashInit( sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; - pLeaf->nRef = 1; pLeaf->p = (u8*)pList; pLeaf->n = nList; pIter->pLeaf = pLeaf; @@ -2644,6 +2631,27 @@ static void fts5MultiIterNext( } } +static Fts5MultiSegIter *fts5MultiIterAlloc( + Fts5Index *p, /* FTS5 backend to iterate within */ + int nSeg +){ + Fts5MultiSegIter *pNew; + int nSlot; /* Power of two >= nSeg */ + + for(nSlot=2; nSlotaSeg[] */ + sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ + ); + if( pNew ){ + pNew->nSeg = nSlot; + pNew->aSeg = (Fts5SegIter*)&pNew[1]; + pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; + } + return pNew; +} + /* ** Allocate a new Fts5MultiSegIter object. ** @@ -2666,7 +2674,6 @@ static void fts5MultiIterNew( Fts5MultiSegIter **ppOut /* New object */ ){ int nSeg; /* Number of segment-iters in use */ - int nSlot = 0; /* Power of two >= nSeg */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl; @@ -2683,18 +2690,9 @@ static void fts5MultiIterNew( }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } - for(nSlot=2; nSlotaSeg[] */ - sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ - ); + *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); if( pNew==0 ) return; - pNew->nSeg = nSlot; - pNew->aSeg = (Fts5SegIter*)&pNew[1]; - pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = bSkipEmpty; @@ -2730,7 +2728,7 @@ static void fts5MultiIterNew( ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ - for(iIter=nSlot-1; iIter>0; iIter--){ + for(iIter=pNew->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ fts5SegIterNext(p, &pNew->aSeg[iEq], 0); @@ -2748,6 +2746,42 @@ static void fts5MultiIterNew( } } +/* +** Create an Fts5MultiSegIter that iterates through the doclist provided +** as the second argument. +*/ +static void fts5MultiIterNew2( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5Data *pData, /* Doclist to iterate through */ + int bDesc, /* True for descending rowid order */ + Fts5MultiSegIter **ppOut /* New object */ +){ + Fts5MultiSegIter *pNew; + pNew = fts5MultiIterAlloc(p, 2); + if( pNew ){ + Fts5SegIter *pIter = &pNew->aSeg[1]; + + pIter->flags = FTS5_SEGITER_ONETERM; + if( pData->n>0 ){ + pIter->pLeaf = pData; + pIter->iLeafOffset = getVarint(pData->p, (u64*)&pIter->iRowid); + pNew->aFirst[1].iFirst = 1; + if( bDesc ){ + pNew->bRev = 1; + pIter->flags |= FTS5_SEGITER_REVERSE; + fts5SegIterReverseInitPage(p, pIter); + }else{ + fts5SegIterLoadNPos(p, pIter); + } + pData = 0; + } + + *ppOut = pNew; + } + + fts5DataRelease(pData); +} + /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. @@ -4072,11 +4106,7 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ if( pIter->i ){ i64 iDelta; pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); - if( pIter->bDesc ){ - pIter->iRowid -= iDelta; - }else{ - pIter->iRowid += iDelta; - } + pIter->iRowid += iDelta; }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } @@ -4092,13 +4122,11 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ static void fts5DoclistIterInit( Fts5Buffer *pBuf, - int bDesc, Fts5DoclistIter *pIter ){ memset(pIter, 0, sizeof(*pIter)); pIter->a = pBuf->p; pIter->n = pBuf->n; - pIter->bDesc = bDesc; fts5DoclistIterNext(pIter); } @@ -4107,15 +4135,12 @@ static void fts5DoclistIterInit( */ static void fts5MergeAppendDocid( int *pRc, /* IN/OUT: Error code */ - int bDesc, Fts5Buffer *pBuf, /* Buffer to write to */ i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ i64 iRowid /* Rowid to append */ ){ if( pBuf->n==0 ){ fts5BufferAppendVarint(pRc, pBuf, iRowid); - }else if( bDesc ){ - fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); }else{ fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); } @@ -4132,7 +4157,6 @@ static void fts5MergeAppendDocid( */ static void fts5MergePrefixLists( Fts5Index *p, /* FTS5 backend object */ - int bDesc, Fts5Buffer *p1, /* First list to merge */ Fts5Buffer *p2 /* Second list to merge */ ){ @@ -4145,14 +4169,12 @@ static void fts5MergePrefixLists( memset(&out, 0, sizeof(out)); memset(&tmp, 0, sizeof(tmp)); - fts5DoclistIterInit(p1, bDesc, &i1); - fts5DoclistIterInit(p2, bDesc, &i2); + fts5DoclistIterInit(p1, &i1); + fts5DoclistIterInit(p2, &i2); while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ - if( i2.aPoslist==0 || (i1.aPoslist && - ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowidrc, bDesc, &out, &iLastRowid, i1.iRowid); + fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i1.iRowid); /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); @@ -4160,7 +4182,7 @@ static void fts5MergePrefixLists( } else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ - fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); + fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); @@ -4174,7 +4196,7 @@ static void fts5MergePrefixLists( memset(&writer, 0, sizeof(writer)); /* Merge the two position lists. */ - fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); + fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); fts5BufferZero(&tmp); sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); @@ -4227,10 +4249,10 @@ static void fts5SetupPrefixIter( if( aBuf && pStruct ){ const int flags = FTS5INDEX_QUERY_SCAN; - Fts5DoclistIter *pDoclist; int i; i64 iLastRowid = 0; Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ + Fts5Data *pData; Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); @@ -4244,45 +4266,37 @@ static void fts5SetupPrefixIter( assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( nTerm0 - && ((!bDesc && iRowid<=iLastRowid) || (bDesc && iRowid>=iLastRowid)) - ){ - + if( doclist.n>0 && iRowid<=iLastRowid ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ assert( irc, &doclist, iRowid); - }else if( bDesc ){ - fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); - }else{ - fts5BufferAppendVarint(&p->rc, &doclist, iRowid - iLastRowid); - } - iLastRowid = iRowid; + + fts5MergeAppendDocid(&p->rc, &doclist, &iLastRowid, iRowid); fts5MultiIterPoslist(p, p1, 1, &doclist); } for(i=0; ipDoclist = pDoclist; - fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist); + pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); + if( pData ){ + pData->p = (u8*)&pData[1]; + pData->n = doclist.n; + memcpy(pData->p, doclist.p, doclist.n); + fts5MultiIterNew2(p, pData, bDesc, &pIter->pMulti); } + fts5BufferFree(&doclist); } fts5StructureRelease(pStruct); @@ -4550,11 +4564,7 @@ int sqlite3Fts5IndexQuery( */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - return pIter->pDoclist->aPoslist==0; - }else{ - return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); - } + return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); } /* @@ -4562,12 +4572,8 @@ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ */ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); - if( pIter->pDoclist ){ - fts5DoclistIterNext(pIter->pDoclist); - }else{ - fts5BufferZero(&pIter->poslist); - fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); - } + fts5BufferZero(&pIter->poslist); + fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); return fts5IndexReturn(pIter->pIndex); } @@ -4594,32 +4600,13 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ return fts5IndexReturn(pIter->pIndex); } -/* -** Move the doclist-iter passed as the first argument to the next -** matching rowid that occurs at or after iMatch. The definition of "at -** or after" depends on whether this iterator iterates in ascending or -** descending rowid order. -*/ -static void fts5DoclistIterNextFrom(Fts5DoclistIter *p, i64 iMatch){ - do{ - i64 iRowid = p->iRowid; - if( p->bDesc==0 && iRowid>=iMatch ) break; - if( p->bDesc!=0 && iRowid<=iMatch ) break; - fts5DoclistIterNext(p); - }while( p->aPoslist ); -} - /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ - if( pIter->pDoclist ){ - fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); - }else{ - fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); - } + fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); return fts5IndexReturn(pIter->pIndex); } @@ -4627,11 +4614,7 @@ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ ** Return the current rowid. */ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ - if( pIter->pDoclist ){ - return pIter->pDoclist->iRowid; - }else{ - return fts5MultiIterRowid(pIter->pMulti); - } + return fts5MultiIterRowid(pIter->pMulti); } /* @@ -4659,24 +4642,17 @@ int sqlite3Fts5IterPoslist( int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ - Fts5DoclistIter *pDoclist = pIter->pDoclist; + Fts5MultiSegIter *pMulti = pIter->pMulti; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); - if( pDoclist ){ - *pn = pDoclist->nPoslist; - *pp = pDoclist->aPoslist; - *piRowid = pDoclist->iRowid; + *piRowid = pSeg->iRowid; + *pn = pSeg->nPos; + if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ + *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ - Fts5MultiSegIter *pMulti = pIter->pMulti; - Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; - *piRowid = pSeg->iRowid; - *pn = pSeg->nPos; - if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ - *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; - }else{ - fts5BufferZero(&pIter->poslist); - fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); - *pp = pIter->poslist.p; - } + fts5BufferZero(&pIter->poslist); + fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); + *pp = pIter->poslist.p; } return fts5IndexReturn(pIter->pIndex); } @@ -4688,15 +4664,11 @@ int sqlite3Fts5IterPoslist( */ int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ Fts5Index *p = pIter->pIndex; - Fts5DoclistIter *pDoclist = pIter->pDoclist; + Fts5MultiSegIter *pMulti = pIter->pMulti; + assert( p->rc==SQLITE_OK ); - if( pDoclist ){ - fts5BufferSet(&p->rc, pBuf, pDoclist->nPoslist, pDoclist->aPoslist); - }else{ - Fts5MultiSegIter *pMulti = pIter->pMulti; - fts5BufferZero(pBuf); - fts5MultiIterPoslist(p, pMulti, 0, pBuf); - } + fts5BufferZero(pBuf); + fts5MultiIterPoslist(p, pMulti, 0, pBuf); return fts5IndexReturn(p); } @@ -4705,14 +4677,9 @@ int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ */ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ if( pIter ){ - if( pIter->pDoclist ){ - sqlite3_free(pIter->pDoclist->a); - sqlite3_free(pIter->pDoclist); - }else{ - fts5MultiIterFree(pIter->pIndex, pIter->pMulti); - fts5StructureRelease(pIter->pStruct); - fts5BufferFree(&pIter->poslist); - } + fts5MultiIterFree(pIter->pIndex, pIter->pMulti); + fts5StructureRelease(pIter->pStruct); + fts5BufferFree(&pIter->poslist); fts5CloseReader(pIter->pIndex); sqlite3_free(pIter); } @@ -5448,7 +5415,6 @@ static void fts5DecodeFunction( dlidx.p = a; dlidx.n = n; - dlidx.nRef = 2; memset(&lvl, 0, sizeof(Fts5DlidxLvl)); lvl.pData = &dlidx; diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index 461fe41e50..66ca1f1640 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -215,7 +215,7 @@ foreach {T create} { 11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*} 16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*} 21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*} - 27 {x*} + 27 {x*} 28 {a f*} 29 {a* f*} 30 {a* fghij*} } { set res [prefix_query $prefix] diff --git a/manifest b/manifest index 8ad943d33d..eb1730fed3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\sredundant\sloads\sfrom\sthe\s%_data\stable\sin\sthe\sfts5\scode. -D 2015-05-25T11:46:33.325 +C Simplifications\sand\sminor\soptimizations\sto\sfts5\sprefix\squeries\sthat\scannot\suse\sa\sprefix\sindex. +D 2015-05-26T18:22:01.843 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -113,7 +113,7 @@ F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 F ext/fts5/fts5_expr.c a8b31d363c02108dae01e13948661859f449ebb9 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 79b8fcf40bee484dc62a7a0ba1f3d8de0a662812 +F ext/fts5/fts5_index.c c41e4f90876ad7d1feeaddb14d29d4e4c7dd17db F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec @@ -125,7 +125,7 @@ F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 F ext/fts5/test/fts5aa.test 26f1a462213f3aa067c208bd508d6218c54a620f F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd -F ext/fts5/test/fts5ad.test 2141b0360dc4397bfed30f0b0d700fa64b44835d +F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5165de548b84825cb000d33e5d3de12b0ef112c0 -R 0a35d34e585361d7ce3301bdbbc300de +P 02069782f8b7896a582582c79185b50418622736 +R 01e4b9608560855e31f532d223b68ea6 U dan -Z 478e4cabed12f1b6c85b518dcaf965c8 +Z 2a2ddee3129d7371ffa2412643605052 diff --git a/manifest.uuid b/manifest.uuid index 139c411caa..0edfe481c8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -02069782f8b7896a582582c79185b50418622736 \ No newline at end of file +aef89d9f6aa536efee347367558cf5d4ea81b238 \ No newline at end of file From af5cd564f01a4bc71a906d2c83877362d6436377 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 28 May 2015 14:37:26 +0000 Subject: [PATCH 143/206] Remove some dead code from fts5. Add auxiliary function api tests to the same. FossilOrigin-Name: 0f9df202cc58097afddb8dad662b7c7fdc2c7d0c --- ext/fts5/fts5_index.c | 93 ----------------------------------- ext/fts5/test/fts5aux.test | 20 ++++++++ ext/fts5/test/fts5fault5.test | 4 +- manifest | 16 +++--- manifest.uuid | 2 +- 5 files changed, 31 insertions(+), 104 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 4b32b8e5c9..2e94516227 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -272,29 +272,11 @@ #define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno) #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) -#if 0 -/* -** The height of segment b-trees is actually limited to one less than -** (1<pConfig->zDb, p->zDataTbl, iRowid); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ - sqlite3_stmt *pStmt; - rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, &pStmt, 0); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - bOk = sqlite3_column_int(pStmt, 0); - } - rc = sqlite3_finalize(pStmt); - } - sqlite3_free(zSql); - } - - if( rc==SQLITE_OK ){ - rc = bOk ? SQLITE_ERROR : FTS5_CORRUPT; - } - - return rc; -} -#endif - static Fts5Data *fts5DataReadOrBuffer( Fts5Index *p, Fts5Buffer *pBuf, @@ -978,20 +920,6 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ p->rc = sqlite3_reset(p->pDeleter); } -/* -** Close the sqlite3_blob handle used to read records from the %_data table. -** And discard any cached reads. This function is called at the end of -** a read transaction or when any sub-transaction is rolled back. -*/ -#if 0 -static void fts5DataReset(Fts5Index *p){ - if( p->pReader ){ - sqlite3_blob_close(p->pReader); - p->pReader = 0; - } -} -#endif - /* ** Remove all records associated with segment iSegid. */ @@ -3309,22 +3237,6 @@ static void fts5WriteAppendRowid( } } -#if 0 -static void fts5WriteAppendPoslistInt( - Fts5Index *p, - Fts5SegWriter *pWriter, - int iVal -){ - if( p->rc==SQLITE_OK ){ - Fts5PageWriter *pPage = &pWriter->aWriter[0]; - fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); - if( pPage->buf.n>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } - } -} -#endif - static void fts5WriteAppendPoslistData( Fts5Index *p, Fts5SegWriter *pWriter, @@ -3574,11 +3486,6 @@ static void fts5IndexMergeLevel( } bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); -#if 0 -fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); -fflush(stdout); -#endif - assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; diff --git a/ext/fts5/test/fts5aux.test b/ext/fts5/test/fts5aux.test index 9237241474..bbb6cf81f1 100644 --- a/ext/fts5/test/fts5aux.test +++ b/ext/fts5/test/fts5aux.test @@ -220,5 +220,25 @@ do_execsql_test 7.2 { FROM t1 WHERE t1 MATCH 'a OR b+c' } {0 1 2 0} +#------------------------------------------------------------------------- +# +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE x1 USING fts5(a); +} + +foreach {tn lRow res} { + 4 {"a a a" "b" "a d"} {"[a] [a] [a]" "[a] d"} + 1 {"b d" "a b"} {"[b] [d]" "[a] b"} + 2 {"d b" "a d"} {"[d] [b]" "[a] d"} + 3 {"a a d"} {"[a] [a] d"} +} { + execsql { DELETE FROM x1 } + foreach row $lRow { execsql { INSERT INTO x1 VALUES($row) } } + breakpoint + do_execsql_test 8.$tn { + SELECT highlight(x1, 0, '[', ']') FROM x1 WHERE x1 MATCH 'a OR (b AND d)'; + } $res +} + finish_test diff --git a/ext/fts5/test/fts5fault5.test b/ext/fts5/test/fts5fault5.test index 21a4e5d6b4..c14f394eb6 100644 --- a/ext/fts5/test/fts5fault5.test +++ b/ext/fts5/test/fts5fault5.test @@ -76,13 +76,13 @@ do_test 3.0 { BEGIN; } for {set i 0} {$i < 20} {incr i} { - set str [string repeat "$i " 20] + set str [string repeat "$i " 50] execsql { INSERT INTO tt VALUES($str) } } execsql COMMIT } {} -do_faultsim_test 2.1 -faults oom-t* -body { +do_faultsim_test 3.1 -faults oom-t* -body { db eval { SELECT term FROM tv; } diff --git a/manifest b/manifest index eb1730fed3..328ed4e922 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Simplifications\sand\sminor\soptimizations\sto\sfts5\sprefix\squeries\sthat\scannot\suse\sa\sprefix\sindex. -D 2015-05-26T18:22:01.843 +C Remove\ssome\sdead\scode\sfrom\sfts5.\sAdd\sauxiliary\sfunction\sapi\stests\sto\sthe\ssame. +D 2015-05-28T14:37:26.732 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -113,7 +113,7 @@ F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 F ext/fts5/fts5_expr.c a8b31d363c02108dae01e13948661859f449ebb9 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c c41e4f90876ad7d1feeaddb14d29d4e4c7dd17db +F ext/fts5/fts5_index.c a693ba741b82539da5779329214e5d2609e82e5f F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec @@ -134,7 +134,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5aux.test db9035ef292f3ae57ac392f974b1e6b1dd48c6c7 +F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5config.test c9cc535f3b36cde1e5a32bf579f3f5962a9e82b2 @@ -150,7 +150,7 @@ F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test e7170486d71de72fe88018b5b920c0a9f6c19801 -F ext/fts5/test/fts5fault5.test 98e7e77bc1d8bb47c955e7d6dc870ab5736536e3 +F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875 F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 02069782f8b7896a582582c79185b50418622736 -R 01e4b9608560855e31f532d223b68ea6 +P aef89d9f6aa536efee347367558cf5d4ea81b238 +R bb8e2d4390fd848d148991824b1dfd3a U dan -Z 2a2ddee3129d7371ffa2412643605052 +Z cb932577f097e506aa4f837dda1f2a1b diff --git a/manifest.uuid b/manifest.uuid index 0edfe481c8..d0242f4b60 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -aef89d9f6aa536efee347367558cf5d4ea81b238 \ No newline at end of file +0f9df202cc58097afddb8dad662b7c7fdc2c7d0c \ No newline at end of file From e96efb642633606c5c87ef3649e7fe183158b601 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 28 May 2015 19:57:12 +0000 Subject: [PATCH 144/206] Optimizations for fts5 queries that match against a specific column. FossilOrigin-Name: b29ac50af0491a780a5a4c0985d88d0e5e014ba3 --- ext/fts5/fts5_expr.c | 106 ++++++++++++++++++++++++++++---------- ext/fts5/test/fts5aa.test | 6 +++ manifest | 14 ++--- manifest.uuid | 2 +- 4 files changed, 93 insertions(+), 35 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 945bb637f8..891bb30382 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -652,6 +652,45 @@ static int fts5ExprNearNextRowidMatch( return rc; } +/* +** IN/OUT parameter (*pa) points to a position list n bytes in size. If +** the position list contains entries for column iCol, then (*pa) is set +** to point to the sub-position-list for that column and the number of +** bytes in it returned. Or, if the argument position list does not +** contain any entries for column iCol, return 0. +*/ +static int fts5ExprExtractCol( + const u8 **pa, /* IN/OUT: Pointer to poslist */ + int n, /* IN: Size of poslist in bytes */ + int iCol /* Column to extract from poslist */ +){ + int ii; + int iCurrent = 0; + const u8 *p = *pa; + const u8 *pEnd = &p[n]; /* One byte past end of position list */ + u8 prev = 0; + + while( iCol!=iCurrent ){ + /* Advance pointer p until it points to pEnd or an 0x01 byte that is + ** not part of a varint */ + while( !(prev & 0x80) && *p!=0x01 ){ + prev = *p++; + if( p==pEnd ) return 0; + } + *pa = p++; + p += getVarint32(p, iCurrent); + } + + /* Advance pointer p until it points to pEnd or an 0x01 byte that is + ** not part of a varint */ + while( ppNear; + int rc = SQLITE_OK; - if( pNear->nPhrase==1 - && pNear->apPhrase[0]->nTerm==1 - && pNear->iCol<0 - ){ - /* If this "NEAR" object is actually a single phrase that consists of - ** a single term only, then the row that it currently points to must - ** be a match. All that is required is to populate pPhrase->poslist - ** with the position-list data for the only term. */ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - assert( pPhrase->poslist.nSpace==0 ); - return sqlite3Fts5IterPoslist(pIter, - (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid - ); - }else{ - int rc = SQLITE_OK; + while( 1 ){ + int i; - while( 1 ){ - int i; + if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + /* If this "NEAR" object is actually a single phrase that consists + ** of a single term only, then grab pointers into the poslist + ** managed by the fts5_index.c iterator object. This is much faster + ** than synthesizing a new poslist the way we have to for more + ** complicated phrase or NEAR expressions. */ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + assert( pPhrase->poslist.nSpace==0 ); + rc = sqlite3Fts5IterPoslist(pIter, + (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid + ); + + /* If the term may match any column, then this must be a match. + ** Return immediately in this case. Otherwise, try to find the + ** part of the poslist that corresponds to the required column. + ** If it can be found, return. If it cannot, the next iteration + ** of the loop will test the next rowid in the database for this + ** term. */ + if( pNear->iCol<0 ) return rc; + + pPhrase->poslist.n = fts5ExprExtractCol( + (const u8**)&pPhrase->poslist.p, + pPhrase->poslist.n, + pNear->iCol + ); + if( pPhrase->poslist.n ) return rc; + }else{ /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode); @@ -711,7 +763,7 @@ static int fts5ExprNearNextMatch( }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); + ); } } @@ -722,17 +774,17 @@ static int fts5ExprNearNextMatch( } if( rc!=SQLITE_OK || bMatch ) break; } - - /* If control flows to here, then the current rowid is not a match. - ** Advance all term iterators in all phrases to the next rowid. */ - if( rc==SQLITE_OK ){ - rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); - } - if( pNode->bEof || rc!=SQLITE_OK ) break; } - return rc; + /* If control flows to here, then the current rowid is not a match. + ** Advance all term iterators in all phrases to the next rowid. */ + if( rc==SQLITE_OK ){ + rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); + } + if( pNode->bEof || rc!=SQLITE_OK ) break; } + + return rc; } /* diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 800215d2bb..e7c37ccbf8 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -426,6 +426,12 @@ do_test 17.2 { set res } {{a b c} {a b c} {a b c}} +reset_db +do_execsql_test 18.1 { + CREATE VIRTUAL TABLE c2 USING fts5(x, y); + INSERT INTO c2 VALUES('x x x', 'x x x'); + SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; +} {1} finish_test diff --git a/manifest b/manifest index 328ed4e922..887ea5a8a5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\ssome\sdead\scode\sfrom\sfts5.\sAdd\sauxiliary\sfunction\sapi\stests\sto\sthe\ssame. -D 2015-05-28T14:37:26.732 +C Optimizations\sfor\sfts5\squeries\sthat\smatch\sagainst\sa\sspecific\scolumn. +D 2015-05-28T19:57:12.367 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 2ce5c5e68852dd16de404b7a9a2a78f4f4588eb4 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c a8b31d363c02108dae01e13948661859f449ebb9 +F ext/fts5/fts5_expr.c c94983eaff58391d7c0d62e99de917cecd0f1dbc F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c a693ba741b82539da5779329214e5d2609e82e5f F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 @@ -122,7 +122,7 @@ F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 -F ext/fts5/test/fts5aa.test 26f1a462213f3aa067c208bd508d6218c54a620f +F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P aef89d9f6aa536efee347367558cf5d4ea81b238 -R bb8e2d4390fd848d148991824b1dfd3a +P 0f9df202cc58097afddb8dad662b7c7fdc2c7d0c +R f25569d3bfd1393da78f5986e0e8acff U dan -Z cb932577f097e506aa4f837dda1f2a1b +Z d652fde1b36e85f62688dc3a9737ccda diff --git a/manifest.uuid b/manifest.uuid index d0242f4b60..4025ca1b11 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0f9df202cc58097afddb8dad662b7c7fdc2c7d0c \ No newline at end of file +b29ac50af0491a780a5a4c0985d88d0e5e014ba3 \ No newline at end of file From 6d21f42db22647650477aa46e2b5a25a193e411a Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 29 May 2015 15:55:30 +0000 Subject: [PATCH 145/206] Add syntax to fts5 used to specify that a phrase or NEAR group should match a subset of columns. For example "[col1 col2 ...] : ". FossilOrigin-Name: 0fc0ea20920615f3e48ea2dbe2b7dcd979b0993e --- ext/fts5/fts5Int.h | 9 +- ext/fts5/fts5_expr.c | 172 +++++++++++++++++++++++++++++++------- ext/fts5/fts5_index.c | 2 - ext/fts5/fts5parse.y | 21 ++++- ext/fts5/test/fts5ac.test | 46 ++++++++-- manifest | 20 ++--- manifest.uuid | 2 +- 7 files changed, 217 insertions(+), 55 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index b0e9484c79..e52e3bc19e 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -511,6 +511,7 @@ typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; typedef struct Fts5ExprNearset Fts5ExprNearset; +typedef struct Fts5ExprColset Fts5ExprColset; struct Fts5Token { const char *p; /* Token text (not NULL terminated) */ @@ -578,12 +579,18 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5ExprPhrase* ); +Fts5ExprColset *sqlite3Fts5ParseColset( + Fts5Parse*, + Fts5ExprColset*, + Fts5Token * +); + void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); -void sqlite3Fts5ParseSetColumn(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); +void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5ExprColset*); void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 891bb30382..23827293fe 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -79,13 +79,23 @@ struct Fts5ExprPhrase { Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; +/* +** If a NEAR() clump may only match a specific set of columns, then +** Fts5ExprNearset.pColset points to an object of the following type. +** Each entry in the aiCol[] array +*/ +struct Fts5ExprColset { + int nCol; + int aiCol[1]; +}; + /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { int nNear; /* NEAR parameter */ - int iCol; /* Column to search (-1 -> all columns) */ + Fts5ExprColset *pColset; /* Columns to search (NULL -> all columns) */ int nPhrase; /* Number of entries in aPhrase[] array */ Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */ }; @@ -136,6 +146,8 @@ static int fts5ExprGetToken( switch( *z ){ case '(': tok = FTS5_LP; break; case ')': tok = FTS5_RP; break; + case '[': tok = FTS5_LSP; break; + case ']': tok = FTS5_RSP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; @@ -275,7 +287,6 @@ int sqlite3Fts5ExprPhraseExpr( pNode->eType = FTS5_STRING; pNode->pNear = pNear; - pNear->iCol = -1; pNear->nPhrase = 1; pNear->apPhrase[0] = pCopy; @@ -335,7 +346,7 @@ void sqlite3Fts5ExprFree(Fts5Expr *p){ */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - int iCol, /* If >=0, search for matches in iCol only */ + Fts5ExprColset *pColset, /* Restrict matches to these columns */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ @@ -344,6 +355,7 @@ static int fts5ExprPhraseIsMatch( Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; + int iCol = pColset ? pColset->aiCol[0] : -1; fts5BufferZero(&pPhrase->poslist); @@ -664,7 +676,6 @@ static int fts5ExprExtractCol( int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ - int ii; int iCurrent = 0; const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ @@ -716,7 +727,6 @@ static int fts5ExprNearNextMatch( int rc = SQLITE_OK; while( 1 ){ - int i; if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ /* If this "NEAR" object is actually a single phrase that consists @@ -726,10 +736,11 @@ static int fts5ExprNearNextMatch( ** complicated phrase or NEAR expressions. */ Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - assert( pPhrase->poslist.nSpace==0 ); - rc = sqlite3Fts5IterPoslist(pIter, - (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid - ); + Fts5ExprColset *pColset = pNear->pColset; + const u8 *pPos; + int nPos; + + rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the @@ -737,15 +748,31 @@ static int fts5ExprNearNextMatch( ** If it can be found, return. If it cannot, the next iteration ** of the loop will test the next rowid in the database for this ** term. */ - if( pNear->iCol<0 ) return rc; + if( pColset==0 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.p = (u8*)pPos; + pPhrase->poslist.n = nPos; + }else if( pColset->nCol==1 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); + pPhrase->poslist.p = (u8*)pPos; + }else{ + int i; + fts5BufferZero(&pPhrase->poslist); + for(i=0; inCol; i++){ + const u8 *pSub = pPos; + int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]); + if( nSub ){ + fts5BufferAppendBlob(&rc, &pPhrase->poslist, nSub, pSub); + } + } + } - pPhrase->poslist.n = fts5ExprExtractCol( - (const u8**)&pPhrase->poslist.p, - pPhrase->poslist.n, - pNear->iCol - ); if( pPhrase->poslist.n ) return rc; }else{ + int i; + + assert( pNear->pColset==0 || pNear->pColset->nCol==1 ); /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode); @@ -756,14 +783,14 @@ static int fts5ExprNearNextMatch( ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && inPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ + if( pPhrase->nTerm>1 || pNear->pColset ){ int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); + rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); + ); } } @@ -1152,7 +1179,6 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( pParse->rc = SQLITE_NOMEM; }else{ memset(pRet, 0, nByte); - pRet->iCol = -1; } }else if( (pNear->nPhrase % SZALLOC)==0 ){ int nNew = pNear->nPhrase + SZALLOC; @@ -1235,6 +1261,7 @@ void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ for(i=0; inPhrase; i++){ fts5ExprPhraseFree(pNear->apPhrase[i]); } + sqlite3_free(pNear->pColset); sqlite3_free(pNear); } } @@ -1313,7 +1340,7 @@ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ void sqlite3Fts5ParseSetDistance( Fts5Parse *pParse, - Fts5ExprNearset *pNear, + Fts5ExprNearset *pNear, Fts5Token *p ){ int nNear = 0; @@ -1335,30 +1362,100 @@ void sqlite3Fts5ParseSetDistance( pNear->nNear = nNear; } -void sqlite3Fts5ParseSetColumn( - Fts5Parse *pParse, - Fts5ExprNearset *pNear, +/* +** The second argument passed to this function may be NULL, or it may be +** an existing Fts5ExprColset object. This function returns a pointer to +** a new colset object containing the contents of (p) with new value column +** number iCol appended. +** +** If an OOM error occurs, store an error code in pParse and return NULL. +** The old colset object (if any) is not freed in this case. +*/ +static Fts5ExprColset *fts5ParseColset( + Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ + Fts5ExprColset *p, /* Existing colset object */ + int iCol /* New column to add to colset object */ +){ + int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ + Fts5ExprColset *pNew; /* New colset object to return */ + + assert( pParse->rc==SQLITE_OK ); + assert( iCol>=0 && iColpConfig->nCol ); + + pNew = sqlite3_realloc(p, sizeof(Fts5ExprColset) + sizeof(int)*nCol); + if( pNew==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + int *aiCol = pNew->aiCol; + int i, j; + for(i=0; iiCol ) break; + } + for(j=nCol; j>i; j--){ + aiCol[j] = aiCol[j-1]; + } + aiCol[i] = iCol; + pNew->nCol = nCol+1; + +#ifndef NDEBUG + /* Check that the array is in order and contains no duplicate entries. */ + for(i=1; inCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); +#endif + } + + return pNew; +} + +Fts5ExprColset *sqlite3Fts5ParseColset( + Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ + Fts5ExprColset *pColset, /* Existing colset object */ Fts5Token *p ){ + Fts5ExprColset *pRet = 0; + if( pParse->rc==SQLITE_OK ){ + int iCol; char *z = 0; int rc = fts5ParseStringFromToken(p, &z); if( rc==SQLITE_OK ){ Fts5Config *pConfig = pParse->pConfig; - int i; - for(i=0; inCol; i++){ - if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ - pNear->iCol = i; + sqlite3Fts5Dequote(z); + for(iCol=0; iColnCol; iCol++){ + if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ){ break; } } - if( i==pConfig->nCol ){ + if( iCol==pConfig->nCol ){ sqlite3Fts5ParseError(pParse, "no such column: %s", z); } sqlite3_free(z); }else{ pParse->rc = rc; } + + if( pParse->rc==SQLITE_OK ){ + pRet = fts5ParseColset(pParse, pColset, iCol); + } + } + + if( pParse->rc!=SQLITE_OK ){ + assert( pRet==0 ); + sqlite3_free(pColset); + } + + return pRet; +} + +void sqlite3Fts5ParseSetColset( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5ExprColset *pColset +){ + if( pNear ){ + pNear->pColset = pColset; + }else{ + sqlite3_free(pColset); } } @@ -1463,8 +1560,18 @@ static char *fts5ExprPrintTcl( zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); if( zRet==0 ) return 0; - if( pNear->iCol>=0 ){ - zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol); + if( pNear->pColset ){ + int *aiCol = pNear->pColset->aiCol; + int nCol = pNear->pColset->nCol; + if( nCol==1 ){ + zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); + }else{ + zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); + for(i=1; ipColset->nCol; i++){ + zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); + } + zRet = fts5PrintfAppend(zRet, "} "); + } if( zRet==0 ) return 0; } @@ -1530,8 +1637,9 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ int i; int iTerm; - if( pNear->iCol>=0 ){ - zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[pNear->iCol]); + if( pNear->pColset ){ + int iCol = pNear->pColset->aiCol[0]; + zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]); if( zRet==0 ) return 0; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 2e94516227..85ea0eb897 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4479,7 +4479,6 @@ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ */ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); - fts5BufferZero(&pIter->poslist); fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); return fts5IndexReturn(pIter->pIndex); } @@ -4494,7 +4493,6 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); assert( pMulti ); - fts5BufferZero(&pIter->poslist); fts5MultiIterNext(p, pMulti, 0, 0); if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; diff --git a/ext/fts5/fts5parse.y b/ext/fts5/fts5parse.y index ec52bdbeeb..43ed42e5a9 100644 --- a/ext/fts5/fts5parse.y +++ b/ext/fts5/fts5parse.y @@ -95,11 +95,28 @@ exprlist(A) ::= exprlist(X) cnearset(Y). { cnearset(A) ::= nearset(X). { A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); } -cnearset(A) ::= STRING(X) COLON nearset(Y). { - sqlite3Fts5ParseSetColumn(pParse, Y, &X); +cnearset(A) ::= colset(X) COLON nearset(Y). { + sqlite3Fts5ParseSetColset(pParse, Y, X); A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); } +%type colset {Fts5ExprColset*} +%destructor colset { sqlite3_free($$); } +%type colsetlist {Fts5ExprColset*} +%destructor colsetlist { sqlite3_free($$); } + +colset(A) ::= LSP colsetlist(X) RSP. { A = X; } +colset(A) ::= STRING(X). { + A = sqlite3Fts5ParseColset(pParse, 0, &X); +} + +colsetlist(A) ::= colsetlist(Y) STRING(X). { + A = sqlite3Fts5ParseColset(pParse, Y, &X); } +colsetlist(A) ::= STRING(X). { + A = sqlite3Fts5ParseColset(pParse, 0, &X); +} + + %type nearset {Fts5ExprNearset*} %type nearphrases {Fts5ExprNearset*} %destructor nearset { sqlite3Fts5ParseNearsetFree($$); } diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index 3370063bea..eb58ca9791 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -125,13 +125,32 @@ set data { 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } +#------------------------------------------------------------------------- # Usage: # # poslist aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... # +# This command is used to test if a document (set of column values) matches +# the logical equivalent of a single FTS5 NEAR() clump and, if so, return +# the equivalent of an FTS5 position list. +# +# Parameter $aCol is passed a list of the column values for the document +# to test. Parameters $phrase1 and so on are the phrases. +# +# The result is a list of phrase hits. Each phrase hit is formatted as +# three integers separated by "." characters, in the following format: +# +# . . +# +# Options: +# +# -near N (NEAR distance. Default 10) +# -col C (List of column indexes to match against) +# -pc VARNAME (variable in caller frame to use for phrase numbering) +# proc poslist {aCol args} { set O(-near) 10 - set O(-col) -1 + set O(-col) {} set O(-pc) "" set nOpt [lsearch -exact $args --] @@ -161,8 +180,7 @@ proc poslist {aCol args} { set iCol -1 foreach col $aCol { incr iCol - if {$O(-col)>=0 && $O(-col)!=$iCol} continue - + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue set nToken [llength $col] set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] @@ -361,10 +379,24 @@ foreach {tn2 sql} { # Queries on a specific column. # foreach {tn expr} { - 1 "x:a" - 2 "y:a" - 3 "x:b" - 4 "y:b" + 1.1 "x:a" + 1.2 "y:a" + 1.3 "x:b" + 1.4 "y:b" + 2.1 "[x]:a" + 2.2 "[y]:a" + 2.3 "[x]:b" + 2.4 "[y]:b" + + 3.1 "[x y]:a" + 3.2 "[y x]:a" + 3.3 "[x x]:b" + 3.4 "[y y]:b" + + 4.1 {["x" "y"]:a} + 4.2 {["y" x]:a} + 4.3 {[x "x"]:b} + 4.4 {["y" y]:b} } { set res [matchdata 1 $expr] do_execsql_test $tn2.3.$tn.[llength $res] { diff --git a/manifest b/manifest index 887ea5a8a5..47d2f6be5c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Optimizations\sfor\sfts5\squeries\sthat\smatch\sagainst\sa\sspecific\scolumn. -D 2015-05-28T19:57:12.367 +C Add\ssyntax\sto\sfts5\sused\sto\sspecify\sthat\sa\sphrase\sor\sNEAR\sgroup\sshould\smatch\sa\ssubset\sof\scolumns.\sFor\sexample\s"[col1\scol2\s...]\s:\s". +D 2015-05-29T15:55:30.046 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -107,24 +107,24 @@ F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h 2ce5c5e68852dd16de404b7a9a2a78f4f4588eb4 +F ext/fts5/fts5Int.h 3bcecc469fe570ab188d123e1d33d6e5e11a5129 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c c94983eaff58391d7c0d62e99de917cecd0f1dbc +F ext/fts5/fts5_expr.c c607282529c7b5747fc2bcf80770d6abc22638bb F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c a693ba741b82539da5779329214e5d2609e82e5f +F ext/fts5/fts5_index.c 59b8a3dfde24ddb80c31088148a3dfc779db22ab F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 -F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 +F ext/fts5/fts5parse.y 4ee667932d561a150d96483cf563281b95a9e523 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad -F ext/fts5/test/fts5ac.test 05008e00bd2761cc45df838a0988ecf318cbe1fd +F ext/fts5/test/fts5ac.test d35bbe22dd23b3dbac3e1d3f07eed0206213a480 F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a @@ -1331,7 +1331,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0f9df202cc58097afddb8dad662b7c7fdc2c7d0c -R f25569d3bfd1393da78f5986e0e8acff +P b29ac50af0491a780a5a4c0985d88d0e5e014ba3 +R 34ff180b006ad5f21871399f838e5dbb U dan -Z d652fde1b36e85f62688dc3a9737ccda +Z f15f3e2fe41d81aa9045dd94b23ec6a4 diff --git a/manifest.uuid b/manifest.uuid index 4025ca1b11..d1b95a249f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b29ac50af0491a780a5a4c0985d88d0e5e014ba3 \ No newline at end of file +0fc0ea20920615f3e48ea2dbe2b7dcd979b0993e \ No newline at end of file From a8c024905cb690b548c0536bf67656bba8e90753 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 29 May 2015 19:00:22 +0000 Subject: [PATCH 146/206] Add extra tests and fixes for multi-column matches. FossilOrigin-Name: ae6794ffa23ef6191bd8834422abf322d978c11b --- ext/fts5/fts5_expr.c | 123 ++++++++------ ext/fts5/test/fts5_common.tcl | 136 +++++++++++++++ ext/fts5/test/fts5ac.test | 149 ++--------------- ext/fts5/test/fts5auto.test | 307 ++++++++++++++++++++++++++++++++++ ext/fts5/test/fts5fault4.test | 2 +- manifest | 19 ++- manifest.uuid | 2 +- 7 files changed, 537 insertions(+), 201 deletions(-) create mode 100644 ext/fts5/test/fts5auto.test diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 23827293fe..18bcd0d374 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -333,6 +333,14 @@ void sqlite3Fts5ExprFree(Fts5Expr *p){ } } +static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){ + int i; + for(i=0; inCol; i++){ + if( pColset->aiCol[i]==iCol ) return 1; + } + return 0; +} + /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function @@ -355,7 +363,12 @@ static int fts5ExprPhraseIsMatch( Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; - int iCol = pColset ? pColset->aiCol[0] : -1; + int iCol = -1; + + if( pColset && pColset->nCol==1 ){ + iCol = pColset->aiCol[0]; + pColset = 0; + } fts5BufferZero(&pPhrase->poslist); @@ -396,9 +409,11 @@ static int fts5ExprPhraseIsMatch( } }while( bMatch==0 ); - /* Append position iPos to the output */ - rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); - if( rc!=SQLITE_OK ) goto ismatch_out; + if( pColset==0 || fts5ExprColsetTest(pColset, FTS5_POS2COLUMN(iPos)) ){ + /* Append position iPos to the output */ + rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); + if( rc!=SQLITE_OK ) goto ismatch_out; + } for(i=0; inTerm; i++){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; @@ -459,25 +474,25 @@ struct Fts5NearTrimmer { ** one phrase. All phrases currently point to the same row. The ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function ** tests if the current row contains instances of each phrase sufficiently -** close together to meet the NEAR constraint. Output variable *pbMatch -** is set to true if it does, or false otherwise. +** close together to meet the NEAR constraint. Non-zero is returned if it +** does, or zero otherwise. ** -** If no error occurs, SQLITE_OK is returned. Or, if an error does occur, -** an SQLite error code. If a value other than SQLITE_OK is returned, the -** final value of *pbMatch is undefined. -** -** TODO: This function should also edit the position lists associated -** with each phrase to remove any phrase instances that are not part of -** a set of intances that collectively matches the NEAR constraint. +** If in/out parameter (*pRc) is set to other than SQLITE_OK when this +** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) +** occurs within this function (*pRc) is set accordingly before returning. +** The return value is undefined in both these cases. +** +** If no error occurs and non-zero (a match) is returned, the position-list +** of each phrase object is edited to contain only those entries that +** meet the constraint before returning. */ -static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ +static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ Fts5NearTrimmer aStatic[4]; Fts5NearTrimmer *a = aStatic; - Fts5ExprPhrase **apPhrase = pNear->apPhrase; int i; - int rc = SQLITE_OK; + int rc = *pRc; int bMatch; assert( pNear->nPhrase>1 ); @@ -486,12 +501,14 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; - a = (Fts5NearTrimmer*)sqlite3_malloc(nByte); - if( !a ) return SQLITE_NOMEM; - memset(a, 0, nByte); + a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); }else{ memset(aStatic, 0, sizeof(aStatic)); } + if( rc!=SQLITE_OK ){ + *pRc = rc; + return 0; + } /* Initialize a lookahead iterator for each phrase. After passing the ** buffer and buffer size to the lookaside-reader init function, zero @@ -551,10 +568,12 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; } - ismatch_out: - *pbMatch = (a[0].pOut->n>0); - if( a!=aStatic ) sqlite3_free(a); - return rc; + ismatch_out: { + int bRet = a[0].pOut->n>0; + *pRc = rc; + if( a!=aStatic ) sqlite3_free(a); + return bRet; + } } /* @@ -700,6 +719,24 @@ static int fts5ExprExtractCol( return p - (*pa); } +static int fts5ExprExtractColset ( + Fts5ExprColset *pColset, /* Colset to filter on */ + const u8 *pPos, int nPos, /* Position list */ + Fts5Buffer *pBuf /* Output buffer */ +){ + int rc = SQLITE_OK; + int i; + + fts5BufferZero(pBuf); + for(i=0; inCol; i++){ + const u8 *pSub = pPos; + int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]); + if( nSub ){ + fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); + } + } + return rc; +} /* @@ -756,24 +793,14 @@ static int fts5ExprNearNextMatch( assert( pPhrase->poslist.nSpace==0 ); pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); pPhrase->poslist.p = (u8*)pPos; - }else{ - int i; - fts5BufferZero(&pPhrase->poslist); - for(i=0; inCol; i++){ - const u8 *pSub = pPos; - int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]); - if( nSub ){ - fts5BufferAppendBlob(&rc, &pPhrase->poslist, nSub, pSub); - } - } + }else if( rc==SQLITE_OK ){ + rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); } if( pPhrase->poslist.n ) return rc; }else{ int i; - assert( pNear->pColset==0 || pNear->pColset->nCol==1 ); - /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextRowidMatch(pExpr, pNode); if( rc!=SQLITE_OK || pNode->bEof ) break; @@ -794,12 +821,9 @@ static int fts5ExprNearNextMatch( } } - if( rc==SQLITE_OK && i==pNear->nPhrase ){ - int bMatch = 1; - if( pNear->nPhrase>1 ){ - rc = fts5ExprNearIsMatch(pNear, &bMatch); - } - if( rc!=SQLITE_OK || bMatch ) break; + if( i==pNear->nPhrase ){ + if( i==1 ) break; + if( fts5ExprNearIsMatch(&rc, pNear) ) break; } } @@ -1558,7 +1582,7 @@ static char *fts5ExprPrintTcl( int i; int iTerm; - zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); + zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); if( zRet==0 ) return 0; if( pNear->pColset ){ int *aiCol = pNear->pColset->aiCol; @@ -1596,7 +1620,6 @@ static char *fts5ExprPrintTcl( if( zRet==0 ) return 0; } - zRet = fts5PrintfAppend(zRet, "]"); if( zRet==0 ) return 0; }else{ @@ -1604,24 +1627,18 @@ static char *fts5ExprPrintTcl( char *z1 = 0; char *z2 = 0; switch( pExpr->eType ){ - case FTS5_AND: zOp = "&&"; break; - case FTS5_NOT: zOp = "&& !"; break; + case FTS5_AND: zOp = "AND"; break; + case FTS5_NOT: zOp = "NOT"; break; default: assert( pExpr->eType==FTS5_OR ); - zOp = "||"; + zOp = "OR"; break; } z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft); z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight); if( z1 && z2 ){ - int b1 = pExpr->pLeft->eType!=FTS5_STRING; - int b2 = pExpr->pRight->eType!=FTS5_STRING; - zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", - b1 ? "(" : "", z1, b1 ? ")" : "", - zOp, - b2 ? "(" : "", z2, b2 ? ")" : "" - ); + zRet = sqlite3_mprintf("%s [%s] [%s]", zOp, z1, z2); } sqlite3_free(z1); sqlite3_free(z2); diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 70f5063b79..e4a689bb72 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -147,3 +147,139 @@ proc fts5_rnddoc {n} { set doc } +#------------------------------------------------------------------------- +# Usage: +# +# nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... +# +# This command is used to test if a document (set of column values) matches +# the logical equivalent of a single FTS5 NEAR() clump and, if so, return +# the equivalent of an FTS5 position list. +# +# Parameter $aCol is passed a list of the column values for the document +# to test. Parameters $phrase1 and so on are the phrases. +# +# The result is a list of phrase hits. Each phrase hit is formatted as +# three integers separated by "." characters, in the following format: +# +# . . +# +# Options: +# +# -near N (NEAR distance. Default 10) +# -col C (List of column indexes to match against) +# -pc VARNAME (variable in caller frame to use for phrase numbering) +# +proc nearset {aCol args} { + set O(-near) 10 + set O(-col) {} + set O(-pc) "" + + set nOpt [lsearch -exact $args --] + if {$nOpt<0} { error "no -- option" } + + foreach {k v} [lrange $args 0 [expr $nOpt-1]] { + if {[info exists O($k)]==0} { error "unrecognized option $k" } + set O($k) $v + } + + if {$O(-pc) == ""} { + set counter 0 + } else { + upvar $O(-pc) counter + } + + # Set $phraselist to be a list of phrases. $nPhrase its length. + set phraselist [lrange $args [expr $nOpt+1] end] + set nPhrase [llength $phraselist] + + for {set j 0} {$j < [llength $aCol]} {incr j} { + for {set i 0} {$i < $nPhrase} {incr i} { + set A($j,$i) [list] + } + } + + set iCol -1 + foreach col $aCol { + incr iCol + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue + set nToken [llength $col] + + set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] + for { } {$iFL < $nToken} {incr iFL} { + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set B($iPhrase) [list] + } + + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set p [lindex $phraselist $iPhrase] + set nPm1 [expr {[llength $p] - 1}] + set iFirst [expr $iFL - $O(-near) - [llength $p]] + + for {set i $iFirst} {$i <= $iFL} {incr i} { + if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } + } + if {[llength $B($iPhrase)] == 0} break + } + + if {$iPhrase==$nPhrase} { + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] + set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] + } + } + } + } + + set res [list] + #puts [array names A] + + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { + foreach a $A($iCol,$iPhrase) { + lappend res "$counter.$iCol.$a" + } + } + incr counter + } + + #puts $res + sort_poslist $res +} + +#------------------------------------------------------------------------- +# Usage: +# +# sort_poslist LIST +# +# Sort a position list of the type returned by command [nearset] +# +proc sort_poslist {L} { + lsort -command instcompare $L +} +proc instcompare {lhs rhs} { + foreach {p1 c1 o1} [split $lhs .] {} + foreach {p2 c2 o2} [split $rhs .] {} + + set res [expr $c1 - $c2] + if {$res==0} { set res [expr $o1 - $o2] } + if {$res==0} { set res [expr $p1 - $p2] } + + return $res +} + +#------------------------------------------------------------------------- +# Logical operators used by the commands returned by fts5_tcl_expr(). +# +proc AND {a b} { + if {[llength $a]==0 || [llength $b]==0} { return [list] } + sort_poslist [concat $a $b] +} +proc OR {a b} { + sort_poslist [concat $a $b] +} +proc NOT {a b} { + if {[llength $b]} { return [list] } + return $a +} + diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index eb58ca9791..96c2e9f290 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -125,126 +125,6 @@ set data { 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } -#------------------------------------------------------------------------- -# Usage: -# -# poslist aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... -# -# This command is used to test if a document (set of column values) matches -# the logical equivalent of a single FTS5 NEAR() clump and, if so, return -# the equivalent of an FTS5 position list. -# -# Parameter $aCol is passed a list of the column values for the document -# to test. Parameters $phrase1 and so on are the phrases. -# -# The result is a list of phrase hits. Each phrase hit is formatted as -# three integers separated by "." characters, in the following format: -# -# . . -# -# Options: -# -# -near N (NEAR distance. Default 10) -# -col C (List of column indexes to match against) -# -pc VARNAME (variable in caller frame to use for phrase numbering) -# -proc poslist {aCol args} { - set O(-near) 10 - set O(-col) {} - set O(-pc) "" - - set nOpt [lsearch -exact $args --] - if {$nOpt<0} { error "no -- option" } - - foreach {k v} [lrange $args 0 [expr $nOpt-1]] { - if {[info exists O($k)]==0} { error "unrecognized option $k" } - set O($k) $v - } - - if {$O(-pc) == ""} { - set counter 0 - } else { - upvar $O(-pc) counter - } - - # Set $phraselist to be a list of phrases. $nPhrase its length. - set phraselist [lrange $args [expr $nOpt+1] end] - set nPhrase [llength $phraselist] - - for {set j 0} {$j < [llength $aCol]} {incr j} { - for {set i 0} {$i < $nPhrase} {incr i} { - set A($j,$i) [list] - } - } - - set iCol -1 - foreach col $aCol { - incr iCol - if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue - set nToken [llength $col] - - set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] - for { } {$iFL < $nToken} {incr iFL} { - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set B($iPhrase) [list] - } - - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set p [lindex $phraselist $iPhrase] - set nPm1 [expr {[llength $p] - 1}] - set iFirst [expr $iFL - $O(-near) - [llength $p]] - - for {set i $iFirst} {$i <= $iFL} {incr i} { - if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } - } - if {[llength $B($iPhrase)] == 0} break - } - - if {$iPhrase==$nPhrase} { - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] - set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] - } - } - } - } - - set res [list] -#puts [array names A] - - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { - foreach a $A($iCol,$iPhrase) { - lappend res "$counter.$iCol.$a" - } - } - incr counter - } - - #puts $res - return $res -} - -# Usage: -# -# nearset aCol ?-near N? ?-col C? -- phrase1 phrase2... -# -proc nearset {args} { - set plist [poslist {*}$args] - return [expr [llength [lindex $plist 0]]>0] -} - -proc instcompare {lhs rhs} { - foreach {p1 c1 o1} [split $lhs .] {} - foreach {p2 c2 o2} [split $rhs .] {} - - set res [expr $c1 - $c2] - if {$res==0} { set res [expr $o1 - $o2] } - if {$res==0} { set res [expr $p1 - $p2] } - - return $res -} - # Argument $expr is an FTS5 match expression designed to be executed against # an FTS5 table with the following schema: # @@ -262,25 +142,20 @@ proc instcompare {lhs rhs} { # proc matchdata {bPos expr {bAsc 1}} { - set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}] + set tclexpr [db one { + SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y') + }] set res [list] #puts $tclexpr foreach {id x y} $::data { set cols [list $x $y] - if $tclexpr { + set ::pc 0 + #set hits [lsort -command instcompare [eval $tclexpr]] + set hits [eval $tclexpr] + if {[llength $hits]>0} { if {$bPos} { - set N [regexp -all -inline {\[nearset [^\]]*\]} $tclexpr] - set rowres [list] - set cnt 0 - foreach phrase $N { - set arglist [string range $phrase 9 end-1] - set cmd "poslist [lindex $arglist 0] -pc cnt [lrange $arglist 1 end]" - set pos [eval $cmd] - set rowres [concat $rowres $pos] - } - set rowres [lsort -command instcompare $rowres] - lappend res [list $id $rowres] + lappend res [list $id $hits] } else { lappend res $id } @@ -425,11 +300,11 @@ foreach {tn2 sql} { } $res } - do_test $tn2.4.1 { poslist {{a b c}} -- a } {0.0.0} - do_test $tn2.4.2 { poslist {{a b c}} -- c } {0.0.2} + do_test $tn2.4.1 { nearset {{a b c}} -- a } {0.0.0} + do_test $tn2.4.2 { nearset {{a b c}} -- c } {0.0.2} foreach {tn expr tclexpr} { - 1 {a b} {[N $x -- {a}] && [N $x -- {b}]} + 1 {a b} {AND [N $x -- {a}] [N $x -- {b}]} } { do_execsql_test $tn2.5.$tn { SELECT fts5_expr_tcl($expr, 'N $x') @@ -477,7 +352,7 @@ foreach {tn2 sql} { do_execsql_test 3.1 { SELECT fts5_expr_tcl('a AND b'); -} {{[nearset -- {a}] && [nearset -- {b}]}} +} {{AND [nearset -- {a}] [nearset -- {b}]}} finish_test diff --git a/ext/fts5/test/fts5auto.test b/ext/fts5/test/fts5auto.test new file mode 100644 index 0000000000..48d9148882 --- /dev/null +++ b/ext/fts5/test/fts5auto.test @@ -0,0 +1,307 @@ +# 2015 May 30 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file contains automatically generated tests for various types +# of MATCH expressions. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5auto + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + + +set data { + -4026076 + {n x w k b p x b n t t d s} {f j j s p j o} + {w v i y r} {i p y s} + {a o q v e n q r} {q v g u c y a z y} + 3995120 + {c} {e e w d t} + {x c p f w r s m l r b f d} {g g u e} + {s n u t d v p d} {b k v p m f} + -2913881 + {k m} {a} + {w r j z n s l} {m j i w d t w e l} + {z n c} {v f b m} + 174082 + {j} {q l w u k e q v r i} + {j l} {u v w r s p e l} + {p i k j k q c t g u s} {g u y s m h q k g t e s o r} + 3207399 + {e t} {} + {p} {y v r b e k h d e v} + {t m w z b g q t s d d h} {o n v u i t o y k j} + 182399 + {} {m o s o x d y f a x j z} + {x n z r c d} {n r x i r} + {s v s} {a u} + 768994 + {e u t q v z q k j p u f j p} {y c b} + {p s d} {k n w p m p p} + {u o x s d} {f s g r d b d r m m m z y} + 3931037 + {c j p x e} {c n k t h z o i} + {} {r r p j k x w q} + {o r d z d} {x} + 3105748 + {p x r u} {x i s w o t o g x m z i w} + {q x m z} {h c j w b l y w x c o} + {m b k v} {t v q i s a d x} + -2501642 + {o u d n w o m o o s n t r h} {k p e u y p e z d j r y g} + {v b b h d d q y j q j} {a m w d t} + {y e f n} {a k x i x} + -1745680 + {z u w j f d b f} {j w i c g u d w e} + {m f p v m a s p v c o s} {s c r z o t w l b e a q} + {m k q} {k b a v o} + -932328 + {r v i u m q d r} {f z u v h c m r f g} + {r x r} {k p i d h h w h z u a x} + {k m j p} {h l j a e u c i q x x f x g} + -3923818 + {t t p b n u i h e c k} {m z} + {v u d c} {v y y j s g} + {o a f k k q p h g x e n z x} {h d w c o l} + -2145922 + {z z l f a l g e d c d h} {j b j p k o o u b q} + {d i g q t f d r h k} {n w g j c x r p t y f l c t} + {d o c u k f o} {r y s x z s p p h g t p y c} + 4552917 + {j w j y h l k u} {n a} + {y h w c n k} {b} + {w} {z l r t s i m v c y} + 2292008 + {q v q j w y y x u t} {r q z n h a b o} + {d q y} {y v o e j} + {} {a b h c d l p d x} + 1407892 + {n j j u q d o a u c f} {r d b w o q n g} + {d e v w s} {v d v o u o x s l s j z y} + {j y w h i f g i h m} {v n z b n y} + -4412544 + {g h h r s} {h e r e} + {n q s} {o p z r m l l t} + {p} {f s u o b j} + 1209110 + {o a a z t t u h j} {z z i r k r} + {i c x q w g v o x z i z p} {q o g k i n z x e d v w v} + {p f v b g f e d n p u c y k} {q z z a i p a a s r e z} + 3448977 + {i v} {l u x t b o k} + {f h u v p} {k a o y j} + {d m k c j} {v c e r u e f i t} + -4703774 + {d h v w u z r e h x o l t} {p s f y w y r q d a m w} + {c h g c g j j f t b i c q} {s e} + {c t q j g f} {v n r w y r a g e j d} + 2414151 + {s o o s d s k q b f q v p e} {j r o b t o p d l o o x} + {d d k t v e} {} + {t v o d w} {w e q w h y c y y i j b a m} + -3342407 + {m c h n e p d o c r w n t} {j d k s p q l} + {t g s r w x j l r z r} {h} + {r q v x i r a n h s} {m y p b v w r a u o g q r} + -993951 + {l n p u o j d x t u u c o j} {k r n a r e k v i t o e} + {q f t t a a c z v f} {o n m p v f o e n} + {h z h i p s b j z h} {i t w m k c u g n i} + 1575251 + {} {z s i j d o x j a r t} + {h g j u j n v e n z} {p z j n n f} + {s q q f d w r l y i z d o m} {b a n d h t b y g h d} + 4263668 + {q g t h f s} {s g x p f q z i s o f l i} + {q k} {w v h a x n a r b} + {m j a h o b i x k r w z q u} {m t r g j o e q t m p u l} + 2487819 + {m w g x r n e u t s r} {b x a t u u j c r n} + {j} {w f j r e e y l p} + {o u h b} {o c a c a b v} + 167966 + {o d b s d o a u m o x y} {c} + {r w d o b v} {z e b} + {i n z a f g z o} {m u b a g} + 1948599 + {n r g q d j s} {n k} + {l b p d v t k h y y} {u m k e c} + {t b n y o t b} {j w c i r x x} + 2941631 + {l d p l b g f} {e k e} + {p j} {m c s w t b k n l d x} + {f o v y v l} {c w p s w j w c u t y} + 3561104 + {d r j j r j i g p} {u} + {g r j q} {z l p d s n f c h t d c v z} + {w r c f s x z y} {g f o k g g} + -2223281 + {y e t j j z f p o m m z} {h k o g o} + {m x a t} {l q x l} + {r w k d l s y b} {q g k b} + -4502874 + {k k b x k l f} {r} + {} {q m z b k h k u n e z} + {z q g y m y u} {} + 1757599 + {d p z j y u r} {z p l q w j t j} + {n i r x r y j} {} + {h} {w t d q c x z z x e e} + -4809589 + {} {z p x u h i i n g} + {w q s u d b f x n} {l y k b b r x t i} + {n d v j q o t o d p z e} {u r y u v u c} + 1068408 + {y e} {e g s k e w t p v o b k} + {z c m s} {r u r u h n h b p q g b} + {j k b l} {m c d t s r s q a d b o f} + -1972554 + {m s w} {d k v s a r k p a r i v} + {g j z k p} {y k c v r e u o q f i b a} + {i p i} {c z w c y b n z i v} + -2052385 + {} {x e u f f g n c i x n e i e} + {} {p s w d x p g} + {} {s j a h n} + 2805981 + {m x g c w o e} {k g u y r y i u e g g} + {f k j v t x p h x k u} {w i} + {b l f z f v t n} {i u d o d p h s m u} + 2507621 + {} {u b n l x f n j t} + {u r x l h} {h r l m r} + {d y e n b s q v t k n q q} {x l t v w h a s k} + -3138375 + {e o f j y x u w v e w z} {r d q g k n n v r c z n e w} + {l y i q z k j p u f q s k} {c i l l i m a a g a z r x f} + {a v k h m q z b y n z} {q g w c y r r o a} + -457971 + {j x a w e c s h f l f} {q} + {j f v j u m d q r v v} {x n v a w} + {i e h d h f u w t t z} {v s u l s v o v i k n e} + 2265221 + {z t c y w n y r t} {n b a x s} + {q w a v} {a b s d x i g w t e z h} + {t l} {j k r w f f y j o k u} + -3941280 + {r x t o z} {f j n z k} + {t x e b t d b k w i s} {j t y h i h} + {y q g n g s u v c z j z n g} {n n g t l p h} + 2084745 + {z d z d} {j} + {o e k t b k a z l w} {o p i h k c x} + {c r b t i j f} {z e n m} + 1265843 + {} {j s g j j x u y} + {u q t f} {g o g} + {w o j e d} {w q n a c t q x j} + -2941116 + {i n c u o} {f b} + {o m s q d o z a q} {f s v o b b} + {o a z c h r} {j e w h b f z} + -1265441 + {p g z q v a o a x a} {s t h} + {w i p o c} {s n d g f z w q o d v v l j} + {y f b i a s v} {u m o z k k s t s d p b l p} + -1989158 + {r i c n} {r e w w i n z} + {q u s y b w u g y g f o} {y} + {d} {j x i b x u y d c p v a h} + 2391989 + {b n w x w f q h p i} {e u b b i n a i o c d g} + {v a z o i e n l x l r} {r u f o r k w m d w} + {k s} {r f e j q p w} +} + +do_test 1.0 { + execsql { + BEGIN; + CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); + } + foreach {rowid a b c d e f} $data { + execsql { + INSERT INTO tt(rowid, a, b, c, d, e, f) + VALUES($rowid, $a, $b, $c, $d, $e, $f) + } + } + execsql { + COMMIT; + } +} {} + +proc fts5_test_poslist {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xInstCount]} {incr i} { + lappend res [string map {{ } .} [$cmd xInst $i]] + } + set res +} +sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist + +proc matchdata {expr} { + set tclexpr [db one { + SELECT fts5_expr_tcl( + $expr, 'nearset $cols -pc ::pc', 'a','b','c','d','e','f' + ) + }] + set res [list] + + db eval {SELECT rowid, * FROM tt} { + set cols [list $a $b $c $d $e $f] + set ::pc 0 + set rowdata [eval $tclexpr] + + if {$rowdata != ""} { + lappend res $rowid $rowdata + } + } + + set res +} + +#------------------------------------------------------------------------- +# + +do_execsql_test 2.0 { + SELECT rowid, fts5_test_poslist(tt) FROM tt WHERE tt MATCH 'a AND b'; +} [matchdata "a AND b"] + +do_test 2.1 { + llength [matchdata "a AND b"] +} 62 + +foreach {tn expr} { + 1 { [a] : x } + 2 { [a b] : x } + 3 { [a b f] : x } + 4 { [f a b] : x } + 5 { [f a b] : x y } + 6 { [f a b] : x + y } + 7 { [c a b] : x + c } + 8 { [c d] : "l m" } + 9 { [c e] : "l m" } +} { + set res [matchdata $expr] + do_test 3.$tn.[llength $res] { + execsql { + SELECT rowid, fts5_test_poslist(tt) FROM tt WHERE tt MATCH $expr + } + } $res +} + + +finish_test + diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index fe3fb0796a..417d470ed3 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -312,7 +312,7 @@ do_faultsim_test 10.1 -faults oom-t* -body { do_faultsim_test 10.2 -faults oom-t* -body { db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } } -test { - set res {[ns -col 0 -- {a b c}] && ([ns -- {b}] && [ns -near 10 -- {a} {b}])} + set res {AND [ns -col 0 -- {a b c}] [AND [ns -- {b}] [ns -near 10 -- {a} {b}]]} faultsim_test_result [list 0 $res] } diff --git a/manifest b/manifest index 47d2f6be5c..ecafa77346 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssyntax\sto\sfts5\sused\sto\sspecify\sthat\sa\sphrase\sor\sNEAR\sgroup\sshould\smatch\sa\ssubset\sof\scolumns.\sFor\sexample\s"[col1\scol2\s...]\s:\s". -D 2015-05-29T15:55:30.046 +C Add\sextra\stests\sand\sfixes\sfor\smulti-column\smatches. +D 2015-05-29T19:00:22.571 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 3bcecc469fe570ab188d123e1d33d6e5e11a5129 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c c607282529c7b5747fc2bcf80770d6abc22638bb +F ext/fts5/fts5_expr.c 1685b331ecb880cb8807e2dc7fc4184d2933bb96 F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 F ext/fts5/fts5_index.c 59b8a3dfde24ddb80c31088148a3dfc779db22ab F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 @@ -121,10 +121,10 @@ F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 4ee667932d561a150d96483cf563281b95a9e523 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 6d663e8c3d8409857363f66560df96b8ca813e79 +F ext/fts5/test/fts5_common.tcl 632ff0fd8bf3dd55c2ddaac2c16428548d5af7be F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad -F ext/fts5/test/fts5ac.test d35bbe22dd23b3dbac3e1d3f07eed0206213a480 +F ext/fts5/test/fts5ac.test 999fd5f44579f1eb565ed7cf3861c427537ff097 F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a @@ -134,6 +134,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 +F ext/fts5/test/fts5auto.test 62e62fa7d60c50d334c5f6cf6b1ed1d49fa3d8d8 F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b @@ -149,7 +150,7 @@ F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test e7170486d71de72fe88018b5b920c0a9f6c19801 +F ext/fts5/test/fts5fault4.test b854f9895cb07cec58204d5a7ca82d03ce824e73 F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875 F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d @@ -1331,7 +1332,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b29ac50af0491a780a5a4c0985d88d0e5e014ba3 -R 34ff180b006ad5f21871399f838e5dbb +P 0fc0ea20920615f3e48ea2dbe2b7dcd979b0993e +R 358d1aaaac138ca9e6a38de66a871d33 U dan -Z f15f3e2fe41d81aa9045dd94b23ec6a4 +Z c843d103a5028eb0d546bbe1e7e6abdc diff --git a/manifest.uuid b/manifest.uuid index d1b95a249f..6a7e8e7df1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0fc0ea20920615f3e48ea2dbe2b7dcd979b0993e \ No newline at end of file +ae6794ffa23ef6191bd8834422abf322d978c11b \ No newline at end of file From 3f3074e0c1871bd27daea8f65fad11d8b8a310f5 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 30 May 2015 11:49:58 +0000 Subject: [PATCH 147/206] Remove the "#include sqlite3Int.h" from fts5Int.h. FossilOrigin-Name: e008c3c8e29c843ec945ddad54b9688bbf2bdb44 --- ext/fts5/fts5.c | 2 +- ext/fts5/fts5Int.h | 43 ++++- ext/fts5/fts5_buffer.c | 8 +- ext/fts5/fts5_expr.c | 2 +- ext/fts5/fts5_hash.c | 10 +- ext/fts5/fts5_index.c | 138 ++++------------ ext/fts5/fts5_storage.c | 14 +- ext/fts5/fts5_tokenize.c | 4 +- ext/fts5/fts5_varint.c | 344 +++++++++++++++++++++++++++++++++++++++ main.mk | 5 + manifest | 31 ++-- manifest.uuid | 2 +- tool/mksqlite3c.tcl | 1 + 13 files changed, 457 insertions(+), 147 deletions(-) create mode 100644 ext/fts5/fts5_varint.c diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index aa23937f43..07a92c3744 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -601,7 +601,7 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ for(i=0; i<(pSorter->nIdx-1); i++){ int iVal; - a += getVarint32(a, iVal); + a += fts5GetVarint32(a, iVal); iOff += iVal; pSorter->aIdx[i] = iOff; } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index e52e3bc19e..6684c2ba8a 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -15,7 +15,27 @@ #define _FTS5INT_H #include "fts5.h" -#include "sqliteInt.h" + +#include +#include + +#ifndef SQLITE_AMALGAMATION + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned short u16; +typedef sqlite3_int64 i64; +typedef sqlite3_uint64 u64; + +#define ArraySize(x) (sizeof(x) / sizeof(x[0])) + +#define testcase(x) +#define ALWAYS(x) 1 +#define NEVER(x) 0 + +#define MIN(x,y) (((x) < (y)) ? (x) : (y)) + +#endif /* @@ -377,15 +397,26 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); -int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); -#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) - -int sqlite3Fts5GetVarintLen(u32 iVal); - /* ** End of interface to code in fts5_index.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5_varint.c. +*/ +int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); +int sqlite3Fts5GetVarintLen(u32 iVal); +u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); +int sqlite3Fts5PutVarint(unsigned char *p, u64 v); + +#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) +#define fts5GetVarint sqlite3Fts5GetVarint + +/* +** End of interface to code in fts5_varint.c. +**************************************************************************/ + + /************************************************************************** ** Interface to code in fts5.c. */ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 24e7d8f65f..5c4e518bcc 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -44,7 +44,7 @@ int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ */ void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return; - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iVal); + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); } void sqlite3Fts5Put32(u8 *aBuf, int iVal){ @@ -169,11 +169,11 @@ int sqlite3Fts5PoslistNext64( }else{ i64 iOff = *piOff; int iVal; - i += getVarint32(&a[i], iVal); + i += fts5GetVarint32(&a[i], iVal); if( iVal==1 ){ - i += getVarint32(&a[i], iVal); + i += fts5GetVarint32(&a[i], iVal); iOff = ((i64)iVal) << 32; - i += getVarint32(&a[i], iVal); + i += fts5GetVarint32(&a[i], iVal); } *piOff = iOff + (iVal-2); *pi = i; diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 18bcd0d374..1a5f2887f4 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -708,7 +708,7 @@ static int fts5ExprExtractCol( if( p==pEnd ) return 0; } *pa = p++; - p += getVarint32(p, iCurrent); + p += fts5GetVarint32(p, iCurrent); } /* Advance pointer p until it points to pEnd or an 0x01 byte that is diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 39821d04a2..4cc507167e 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -188,7 +188,7 @@ static void fts5HashAddPoslistSize(Fts5HashEntry *p){ }else{ int nByte = sqlite3Fts5GetVarintLen((u32)nPos); memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); - sqlite3PutVarint(&pPtr[p->iSzPoslist], nPos); + sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); p->nData += (nByte-1); } p->bDel = 0; @@ -239,7 +239,7 @@ int sqlite3Fts5HashWrite( assert( iHash==fts5HashKey(pHash->nSlot, p->zKey, nToken+1) ); p->zKey[nToken+1] = '\0'; p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); - p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); + p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iRowid = iRowid; @@ -276,7 +276,7 @@ int sqlite3Fts5HashWrite( ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ fts5HashAddPoslistSize(p); - p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iCol = 0; @@ -289,13 +289,13 @@ int sqlite3Fts5HashWrite( assert( iCol>=p->iCol ); if( iCol!=p->iCol ){ pPtr[p->nData++] = 0x01; - p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol); + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); p->iCol = iCol; p->iPos = 0; } /* Append the new position offset */ - p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); p->iPos = iPos; }else{ /* This is a delete. Set the delete flag. */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 85ea0eb897..6d2e042fc2 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -612,81 +612,7 @@ static void fts5PutU16(u8 *aOut, u16 iVal){ static u16 fts5GetU16(const u8 *aIn){ return ((u16)aIn[0] << 8) + aIn[1]; -} - -/* -** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. -** Except, this version does handle the single byte case that the core -** version depends on being handled before its function is called. -*/ -int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ - u32 a,b; - - /* The 1-byte case. Overwhelmingly the most common. */ - a = *p; - /* a: p0 (unmasked) */ - if (!(a&0x80)) - { - /* Values between 0 and 127 */ - *v = a; - return 1; - } - - /* The 2-byte case */ - p++; - b = *p; - /* b: p1 (unmasked) */ - if (!(b&0x80)) - { - /* Values between 128 and 16383 */ - a &= 0x7f; - a = a<<7; - *v = a | b; - return 2; - } - - /* The 3-byte case */ - p++; - a = a<<14; - a |= *p; - /* a: p0<<14 | p2 (unmasked) */ - if (!(a&0x80)) - { - /* Values between 16384 and 2097151 */ - a &= (0x7f<<14)|(0x7f); - b &= 0x7f; - b = b<<7; - *v = a | b; - return 3; - } - - /* A 32-bit varint is used to store size information in btrees. - ** Objects are rarely larger than 2MiB limit of a 3-byte varint. - ** A 3-byte varint is sufficient, for example, to record the size - ** of a 1048569-byte BLOB or string. - ** - ** We only unroll the first 1-, 2-, and 3- byte cases. The very - ** rare larger cases can be handled by the slower 64-bit varint - ** routine. - */ - { - u64 v64; - u8 n; - p -= 2; - n = sqlite3GetVarint(p, &v64); - *v = (u32)v64; - assert( n>3 && n<=9 ); - return n; - } -} - -int sqlite3Fts5GetVarintLen(u32 iVal){ - if( iVal<(1 << 7 ) ) return 1; - if( iVal<(1 << 14) ) return 2; - if( iVal<(1 << 21) ) return 3; - if( iVal<(1 << 28) ) return 4; - return 5; -} +} /* ** Allocate and return a buffer at least nByte bytes in size. @@ -986,7 +912,7 @@ static int fts5StructureDecode( if( pRet ){ pRet->nLevel = nLevel; pRet->nSegment = nSegment; - i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter); + i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); for(iLvl=0; rc==SQLITE_OK && iLvlaLevel[iLvl]; @@ -1350,7 +1276,7 @@ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ assert( pLvl->bEof==0 ); pLvl->iOff = 1; pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); - pLvl->iOff += getVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); + pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; @@ -1361,7 +1287,7 @@ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ if( iOffn ){ i64 iVal; pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; - iOff += getVarint(&pData->p[iOff], (u64*)&iVal); + iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); pLvl->iRowid += iVal; pLvl->iOff = iOff; }else{ @@ -1468,7 +1394,7 @@ static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ if( (a[iOff-1] & 0x80)==0 ) break; } - getVarint(&a[iOff], (u64*)&iVal); + fts5GetVarint(&a[iOff], (u64*)&iVal); pLvl->iRowid -= iVal; pLvl->iLeafPgno--; @@ -1690,7 +1616,7 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ iOff = 4; a = pIter->pLeaf->p; } - iOff += sqlite3GetVarint(&a[iOff], (u64*)&pIter->iRowid); + iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } @@ -1761,7 +1687,7 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); i += nPos; if( i>=n ) break; - i += getVarint(&a[i], (u64*)&iDelta); + i += fts5GetVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; pIter->iRowid += iDelta; @@ -1815,7 +1741,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ if( pIter->pLeaf ){ u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; - pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid); + pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); break; }else{ fts5DataRelease(pNew); @@ -1865,7 +1791,7 @@ static void fts5SegIterNext( pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); iOff += nPos; - getVarint(&a[iOff], (u64*)&iDelta); + fts5GetVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid -= iDelta; fts5SegIterLoadNPos(p, pIter); }else{ @@ -1886,7 +1812,7 @@ static void fts5SegIterNext( if( iOffiLeafOffset = iOff; if( iDelta==0 ){ bNewTerm = 1; @@ -1914,7 +1840,7 @@ static void fts5SegIterNext( pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->n = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); - pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid); + pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } }else{ iOff = 0; @@ -1924,7 +1850,7 @@ static void fts5SegIterNext( pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ - iOff += sqlite3GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); + iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ @@ -1993,7 +1919,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ if( iOff>=pLeaf->n ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ - nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) break; iOff += nPos; } @@ -2040,7 +1966,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; fts5LeafHeader(pLast, &iOff, &dummy); - iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); + iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } @@ -2072,7 +1998,7 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ i64 iDelta; /* iOff is currently the offset of the start of position list data */ - iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; assert_nc( iOffn ); iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); @@ -2214,7 +2140,7 @@ static void fts5SegIterHashInit( pLeaf->p = (u8*)pList; pLeaf->n = nList; pIter->pLeaf = pLeaf; - pIter->iLeafOffset = getVarint(pLeaf->p, (u64*)&pIter->iRowid); + pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; @@ -2397,7 +2323,7 @@ static void fts5SegIterGotoPage( if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; }else{ - iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; fts5SegIterLoadNPos(p, pIter); } @@ -2692,7 +2618,7 @@ static void fts5MultiIterNew2( pIter->flags = FTS5_SEGITER_ONETERM; if( pData->n>0 ){ pIter->pLeaf = pData; - pIter->iLeafOffset = getVarint(pData->p, (u64*)&pIter->iRowid); + pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; @@ -3024,8 +2950,8 @@ static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ i64 iRowid; int iOff; - iOff = 1 + getVarint(&pBuf->p[1], (u64*)&iRowid); - getVarint(&pBuf->p[iOff], (u64*)&iRowid); + iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); + fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); return iRowid; } @@ -3253,7 +3179,7 @@ static void fts5WriteAppendPoslistData( int nCopy = 0; while( nCopyrc, &pPage->buf, nCopy, a); a += nCopy; @@ -3759,7 +3685,7 @@ static void fts5FlushOneHash(Fts5Index *p){ ** as well. */ if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); nSuffix = nTerm - nPre; }else{ fts5PutU16(&pBuf->p[2], pBuf->n); @@ -3772,7 +3698,7 @@ static void fts5FlushOneHash(Fts5Index *p){ } nSuffix = nTerm; } - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix); fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); /* We just wrote a term into page writer.aWriter[0].pgno. If a @@ -3798,18 +3724,18 @@ static void fts5FlushOneHash(Fts5Index *p){ int nPos; int nCopy; int bDummy; - iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta); + iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta); nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); nCopy += nPos; iRowid += iDelta; if( writer.bFirstRowidInPage ){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ - pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); @@ -4012,10 +3938,10 @@ static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ int bDummy; if( pIter->i ){ i64 iDelta; - pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); + pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&iDelta); pIter->iRowid += iDelta; }else{ - pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); + pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } pIter->i += fts5GetPoslistSize( &pIter->a[pIter->i], &pIter->nPoslist, &bDummy @@ -4866,7 +4792,7 @@ static void fts5TestTerm( }else if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ u32 cksum3 = *pCksum; - const char *zTerm = &pPrev->p[1]; /* The term without the prefix-byte */ + const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); @@ -5006,7 +4932,7 @@ static void fts5IndexIntegrityCheckSegment( if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); - getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } @@ -5266,7 +5192,7 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; - iOff = sqlite3GetVarint(&a[iOff], (u64*)&iDocid); + iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); while( iOffeContent==FTS5_CONTENT_NORMAL ){ + int nDefn = 32 + pConfig->nCol*10; char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); if( zDefn==0 ){ rc = SQLITE_NOMEM; }else{ int i; - int iOff = sprintf(zDefn, "id INTEGER PRIMARY KEY"); + int iOff; + sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); + iOff = strlen(zDefn); for(i=0; inCol; i++){ - iOff += sprintf(&zDefn[iOff], ", c%d", i); + sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); + iOff += strlen(&zDefn[iOff]); } rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); } @@ -395,9 +399,9 @@ static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ if( rc==SQLITE_OK && buf.n ){ int i = 0; int iCol; - i += getVarint(&buf.p[i], (u64*)&p->nTotalRow); + i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow); for(iCol=0; iaTotalSize[iCol]); + i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]); } } sqlite3_free(buf.p); @@ -907,7 +911,7 @@ static int fts5StorageDecodeSizeArray( int iOff = 0; for(i=0; i=nBlob ) return 1; - iOff += getVarint32(&aBlob[iOff], aCol[i]); + iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]); } return (iOff!=nBlob); } diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 7dacc2912f..36ef26cb35 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -13,9 +13,7 @@ #if defined(SQLITE_ENABLE_FTS5) -#include "fts5.h" -#include -#include +#include "fts5Int.h" /************************************************************************** ** Start of ascii tokenizer implementation. diff --git a/ext/fts5/fts5_varint.c b/ext/fts5/fts5_varint.c new file mode 100644 index 0000000000..386c0d676e --- /dev/null +++ b/ext/fts5/fts5_varint.c @@ -0,0 +1,344 @@ +/* +** 2015 May 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Routines for varint serialization and deserialization. +*/ + +#ifdef SQLITE_ENABLE_FTS5 + +#include "fts5Int.h" + +/* +** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. +** Except, this version does handle the single byte case that the core +** version depends on being handled before its function is called. +*/ +int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ + u32 a,b; + + /* The 1-byte case. Overwhelmingly the most common. */ + a = *p; + /* a: p0 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 0 and 127 */ + *v = a; + return 1; + } + + /* The 2-byte case */ + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 128 and 16383 */ + a &= 0x7f; + a = a<<7; + *v = a | b; + return 2; + } + + /* The 3-byte case */ + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 16384 and 2097151 */ + a &= (0x7f<<14)|(0x7f); + b &= 0x7f; + b = b<<7; + *v = a | b; + return 3; + } + + /* A 32-bit varint is used to store size information in btrees. + ** Objects are rarely larger than 2MiB limit of a 3-byte varint. + ** A 3-byte varint is sufficient, for example, to record the size + ** of a 1048569-byte BLOB or string. + ** + ** We only unroll the first 1-, 2-, and 3- byte cases. The very + ** rare larger cases can be handled by the slower 64-bit varint + ** routine. + */ + { + u64 v64; + u8 n; + p -= 2; + n = sqlite3Fts5GetVarint(p, &v64); + *v = (u32)v64; + assert( n>3 && n<=9 ); + return n; + } +} + + +/* +** Bitmasks used by sqlite3GetVarint(). These precomputed constants +** are defined here rather than simply putting the constant expressions +** inline in order to work around bugs in the RVT compiler. +** +** SLOT_2_0 A mask for (0x7f<<14) | 0x7f +** +** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 +*/ +#define SLOT_2_0 0x001fc07f +#define SLOT_4_2_0 0xf01fc07f + +/* +** Read a 64-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read. The value is stored in *v. +*/ +u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ + u32 a,b,s; + + a = *p; + /* a: p0 (unmasked) */ + if (!(a&0x80)) + { + *v = a; + return 1; + } + + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + a &= 0x7f; + a = a<<7; + a |= b; + *v = a; + return 2; + } + + /* Verify that constants are precomputed correctly */ + assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); + assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); + + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + a &= SLOT_2_0; + b &= 0x7f; + b = b<<7; + a |= b; + *v = a; + return 3; + } + + /* CSE1 from below */ + a &= SLOT_2_0; + p++; + b = b<<14; + b |= *p; + /* b: p1<<14 | p3 (unmasked) */ + if (!(b&0x80)) + { + b &= SLOT_2_0; + /* moved CSE1 up */ + /* a &= (0x7f<<14)|(0x7f); */ + a = a<<7; + a |= b; + *v = a; + return 4; + } + + /* a: p0<<14 | p2 (masked) */ + /* b: p1<<14 | p3 (unmasked) */ + /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + /* moved CSE1 up */ + /* a &= (0x7f<<14)|(0x7f); */ + b &= SLOT_2_0; + s = a; + /* s: p0<<14 | p2 (masked) */ + + p++; + a = a<<14; + a |= *p; + /* a: p0<<28 | p2<<14 | p4 (unmasked) */ + if (!(a&0x80)) + { + /* we can skip these cause they were (effectively) done above in calc'ing s */ + /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ + /* b &= (0x7f<<14)|(0x7f); */ + b = b<<7; + a |= b; + s = s>>18; + *v = ((u64)s)<<32 | a; + return 5; + } + + /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + s = s<<7; + s |= b; + /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + + p++; + b = b<<14; + b |= *p; + /* b: p1<<28 | p3<<14 | p5 (unmasked) */ + if (!(b&0x80)) + { + /* we can skip this cause it was (effectively) done above in calc'ing s */ + /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ + a &= SLOT_2_0; + a = a<<7; + a |= b; + s = s>>18; + *v = ((u64)s)<<32 | a; + return 6; + } + + p++; + a = a<<14; + a |= *p; + /* a: p2<<28 | p4<<14 | p6 (unmasked) */ + if (!(a&0x80)) + { + a &= SLOT_4_2_0; + b &= SLOT_2_0; + b = b<<7; + a |= b; + s = s>>11; + *v = ((u64)s)<<32 | a; + return 7; + } + + /* CSE2 from below */ + a &= SLOT_2_0; + p++; + b = b<<14; + b |= *p; + /* b: p3<<28 | p5<<14 | p7 (unmasked) */ + if (!(b&0x80)) + { + b &= SLOT_4_2_0; + /* moved CSE2 up */ + /* a &= (0x7f<<14)|(0x7f); */ + a = a<<7; + a |= b; + s = s>>4; + *v = ((u64)s)<<32 | a; + return 8; + } + + p++; + a = a<<15; + a |= *p; + /* a: p4<<29 | p6<<15 | p8 (unmasked) */ + + /* moved CSE2 up */ + /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ + b &= SLOT_2_0; + b = b<<8; + a |= b; + + s = s<<4; + b = p[-4]; + b &= 0x7f; + b = b>>3; + s |= b; + + *v = ((u64)s)<<32 | a; + + return 9; +} + +/* +** The variable-length integer encoding is as follows: +** +** KEY: +** A = 0xxxxxxx 7 bits of data and one flag bit +** B = 1xxxxxxx 7 bits of data and one flag bit +** C = xxxxxxxx 8 bits of data +** +** 7 bits - A +** 14 bits - BA +** 21 bits - BBA +** 28 bits - BBBA +** 35 bits - BBBBA +** 42 bits - BBBBBA +** 49 bits - BBBBBBA +** 56 bits - BBBBBBBA +** 64 bits - BBBBBBBBC +*/ + +#ifdef SQLITE_NOINLINE +# define FTS5_NOINLINE SQLITE_NOINLINE +#else +# define FTS5_NOINLINE +#endif + +/* +** Write a 64-bit variable-length integer to memory starting at p[0]. +** The length of data write will be between 1 and 9 bytes. The number +** of bytes written is returned. +** +** A variable-length integer consists of the lower 7 bits of each byte +** for all bytes that have the 8th bit set and one byte with the 8th +** bit clear. Except, if we get to the 9th byte, it stores the full +** 8 bits and is the last byte. +*/ +static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ + int i, j, n; + u8 buf[10]; + if( v & (((u64)0xff000000)<<32) ){ + p[8] = (u8)v; + v >>= 8; + for(i=7; i>=0; i--){ + p[i] = (u8)((v & 0x7f) | 0x80); + v >>= 7; + } + return 9; + } + n = 0; + do{ + buf[n++] = (u8)((v & 0x7f) | 0x80); + v >>= 7; + }while( v!=0 ); + buf[0] &= 0x7f; + assert( n<=9 ); + for(i=0, j=n-1; j>=0; j--, i++){ + p[i] = buf[j]; + } + return n; +} + +int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ + if( v<=0x7f ){ + p[0] = v&0x7f; + return 1; + } + if( v<=0x3fff ){ + p[0] = ((v>>7)&0x7f)|0x80; + p[1] = v&0x7f; + return 2; + } + return fts5PutVarint64(p,v); +} + + +int sqlite3Fts5GetVarintLen(u32 iVal){ + if( iVal<(1 << 7 ) ) return 1; + if( iVal<(1 << 14) ) return 2; + if( iVal<(1 << 21) ) return 3; + if( iVal<(1 << 28) ) return 4; + return 5; +} + +#endif /* SQLITE_ENABLE_FTS5 */ diff --git a/main.mk b/main.mk index 80914dcec3..efd0b1104b 100644 --- a/main.mk +++ b/main.mk @@ -82,6 +82,7 @@ LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o LIBOBJ += fts5_tokenize.o LIBOBJ += fts5_unicode2.o +LIBOBJ += fts5_varint.o LIBOBJ += fts5_vocab.o LIBOBJ += fts5parse.o @@ -249,6 +250,7 @@ SRC += \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ $(TOP)/ext/fts5/fts5_unicode2.c \ + $(TOP)/ext/fts5/fts5_varint.c \ $(TOP)/ext/fts5/fts5_vocab.c @@ -667,6 +669,9 @@ fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) fts5_unicode2.o: $(TOP)/ext/fts5/fts5_unicode2.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c +fts5_varint.o: $(TOP)/ext/fts5/fts5_varint.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_varint.c + fts5_vocab.o: $(TOP)/ext/fts5/fts5_vocab.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_vocab.c diff --git a/manifest b/manifest index ecafa77346..2f6be39866 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sextra\stests\sand\sfixes\sfor\smulti-column\smatches. -D 2015-05-29T19:00:22.571 +C Remove\sthe\s"#include\ssqlite3Int.h"\sfrom\sfts5Int.h. +D 2015-05-30T11:49:58.614 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,19 +105,20 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 74d18b4dc7518c7cd85609f1541e83bc564619a2 +F ext/fts5/fts5.c 34e5098e85ed14cc120004c5622536b77ddf4976 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h 3bcecc469fe570ab188d123e1d33d6e5e11a5129 +F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 -F ext/fts5/fts5_buffer.c 861599a0abe2383f0cd0352c57001140a26b0930 +F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c 1685b331ecb880cb8807e2dc7fc4184d2933bb96 -F ext/fts5/fts5_hash.c 54dd25348a46ea62ea96322c572e08cd1fb37304 -F ext/fts5/fts5_index.c 59b8a3dfde24ddb80c31088148a3dfc779db22ab -F ext/fts5/fts5_storage.c 5d2b51adb304643d8f825ba89283d628418b20c2 +F ext/fts5/fts5_expr.c 6a683326d6ae4e58420792e84576af9c7a8a89e4 +F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc +F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b +F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c -F ext/fts5/fts5_tokenize.c 24649425adfea2c4877d8f69f2754b70374940ec +F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 +F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 4ee667932d561a150d96483cf563281b95a9e523 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba @@ -225,7 +226,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk a17ff4be35788f0a007b80af5e2a4f3036f03882 +F main.mk 5ff584ca0d7bd7d7006965d6f04c95b73a444824 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk d5e22023b5238985bb54a72d33e0ac71fe4f8a32 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1297,7 +1298,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 94f196c9961e0ca3513e29f57125a3197808be2d F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 69bae8ce4aa52d2ff82d4a8a856bf283ec035b2e -F tool/mksqlite3c.tcl ddd1ab091b2b42cf5cef07db5003b4c88272754e +F tool/mksqlite3c.tcl 23c7cddd9f9ccd192e7a73758aaf46a8159441bb F tool/mksqlite3h.tcl 44730d586c9031638cdd2eb443b801c0d2dbd9f8 F tool/mksqlite3internalh.tcl eb994013e833359137eb53a55acdad0b5ae1049b F tool/mkvsix.tcl 3b58b9398f91c7dbf18d49eb87cefeee9efdbce1 @@ -1332,7 +1333,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0fc0ea20920615f3e48ea2dbe2b7dcd979b0993e -R 358d1aaaac138ca9e6a38de66a871d33 +P ae6794ffa23ef6191bd8834422abf322d978c11b +R bc0305687d74df992086e66a1770f40c U dan -Z c843d103a5028eb0d546bbe1e7e6abdc +Z 008bbbc1e4c3598d73e809e2e8e489be diff --git a/manifest.uuid b/manifest.uuid index 6a7e8e7df1..3d91049339 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ae6794ffa23ef6191bd8834422abf322d978c11b \ No newline at end of file +e008c3c8e29c843ec945ddad54b9688bbf2bdb44 \ No newline at end of file diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index a61868a6bf..199fbebd71 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -379,6 +379,7 @@ foreach file { fts5_storage.c fts5_tokenize.c fts5_unicode2.c + fts5_varint.c fts5_vocab.c rtree.c From 34efc82eed62be840e8bf78564aa35a44f328e8e Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 1 Jun 2015 09:15:20 +0000 Subject: [PATCH 148/206] Change fts5 expression processing to avoid linear scans of long doclists caused by phrases that match specific columns only. FossilOrigin-Name: ec69e09a55b4daf1c40aeaaf9ee95091fe86f5c0 --- ext/fts5/fts5_expr.c | 265 +++++++++++++++++++++++----------- ext/fts5/test/fts5_common.tcl | 2 +- ext/fts5/test/fts5auto.test | 120 ++++++++------- manifest | 16 +- manifest.uuid | 2 +- 5 files changed, 260 insertions(+), 145 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 1a5f2887f4..9b3e04a74e 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -653,10 +653,8 @@ static int fts5ExprNearNextRowidMatch( Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; - int rc = SQLITE_OK; - int i, j; /* Phrase and token index, respectively */ i64 iLast; /* Lastest rowid any iterator points to */ - int bMatch; /* True if all terms are at the same rowid */ + int rc = SQLITE_OK; /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means @@ -664,20 +662,24 @@ static int fts5ExprNearNextRowidMatch( ** means the minimum rowid. */ iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); - do { - bMatch = 1; - for(i=0; inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - for(j=0; jnTerm; j++){ - Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid!=iLast ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast, &rc, &pNode->bEof) ){ - return rc; + if( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ){ + int i, j; /* Phrase and token index, respectively */ + int bMatch; /* True if all terms are at the same rowid */ + do { + bMatch = 1; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid!=iLast ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ + return rc; + } } } - } - }while( bMatch==0 ); + }while( bMatch==0 ); + } pNode->iRowid = iLast; return rc; @@ -738,6 +740,76 @@ static int fts5ExprExtractColset ( return rc; } +static int fts5ExprNearTest( + int *pRc, + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ +){ + Fts5ExprNearset *pNear = pNode->pNear; + int rc = *pRc; + + if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + /* If this "NEAR" object is actually a single phrase that consists + ** of a single term only, then grab pointers into the poslist + ** managed by the fts5_index.c iterator object. This is much faster + ** than synthesizing a new poslist the way we have to for more + ** complicated phrase or NEAR expressions. */ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + Fts5ExprColset *pColset = pNear->pColset; + const u8 *pPos; + int nPos; + + if( rc!=SQLITE_OK ) return 0; + rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); + + /* If the term may match any column, then this must be a match. + ** Return immediately in this case. Otherwise, try to find the + ** part of the poslist that corresponds to the required column. + ** If it can be found, return. If it cannot, the next iteration + ** of the loop will test the next rowid in the database for this + ** term. */ + if( pColset==0 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.p = (u8*)pPos; + pPhrase->poslist.n = nPos; + }else if( pColset->nCol==1 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); + pPhrase->poslist.p = (u8*)pPos; + }else if( rc==SQLITE_OK ){ + rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); + } + + *pRc = rc; + return (pPhrase->poslist.n>0); + }else{ + int i; + + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 || pNear->pColset ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); + if( bMatch==0 ) break; + }else{ + rc = sqlite3Fts5IterPoslistBuffer( + pPhrase->aTerm[0].pIter, &pPhrase->poslist + ); + } + } + + *pRc = rc; + if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ + return 1; + } + } + + return 0; +} /* ** Argument pNode points to a NEAR node. All individual term iterators @@ -760,72 +832,16 @@ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ - Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; + assert( pNode->pNear ); while( 1 ){ - if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ - /* If this "NEAR" object is actually a single phrase that consists - ** of a single term only, then grab pointers into the poslist - ** managed by the fts5_index.c iterator object. This is much faster - ** than synthesizing a new poslist the way we have to for more - ** complicated phrase or NEAR expressions. */ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - Fts5ExprColset *pColset = pNear->pColset; - const u8 *pPos; - int nPos; + /* Advance the iterators until they all point to the same rowid */ + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + if( rc!=SQLITE_OK || pNode->bEof ) break; - rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); - - /* If the term may match any column, then this must be a match. - ** Return immediately in this case. Otherwise, try to find the - ** part of the poslist that corresponds to the required column. - ** If it can be found, return. If it cannot, the next iteration - ** of the loop will test the next rowid in the database for this - ** term. */ - if( pColset==0 ){ - assert( pPhrase->poslist.nSpace==0 ); - pPhrase->poslist.p = (u8*)pPos; - pPhrase->poslist.n = nPos; - }else if( pColset->nCol==1 ){ - assert( pPhrase->poslist.nSpace==0 ); - pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); - pPhrase->poslist.p = (u8*)pPos; - }else if( rc==SQLITE_OK ){ - rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); - } - - if( pPhrase->poslist.n ) return rc; - }else{ - int i; - - /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); - if( rc!=SQLITE_OK || pNode->bEof ) break; - - /* Check that each phrase in the nearset matches the current row. - ** Populate the pPhrase->poslist buffers at the same time. If any - ** phrase is not a match, break out of the loop early. */ - for(i=0; rc==SQLITE_OK && inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->pColset ){ - int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); - if( bMatch==0 ) break; - }else{ - rc = sqlite3Fts5IterPoslistBuffer( - pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); - } - } - - if( i==pNear->nPhrase ){ - if( i==1 ) break; - if( fts5ExprNearIsMatch(&rc, pNear) ) break; - } - } + if( fts5ExprNearTest(&rc, pExpr, pNode) ) break; /* If control flows to here, then the current rowid is not a match. ** Advance all term iterators in all phrases to the next rowid. */ @@ -942,10 +958,11 @@ static int fts5ExprNodeNext( }; case FTS5_AND: { - rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); - if( rc==SQLITE_OK ){ - /* todo: update (iFrom/bFromValid) here */ - rc = fts5ExprNodeNext(pExpr, pNode->pRight, bFromValid, iFrom); + Fts5ExprNode *pLeft = pNode->pLeft; + rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); + if( rc==SQLITE_OK && pLeft->bEof==0 ){ + assert( !bFromValid || fts5RowidCmp(pExpr, pLeft->iRowid, iFrom)>=0 ); + rc = fts5ExprNodeNext(pExpr, pNode->pRight, 1, pLeft->iRowid); } break; } @@ -994,6 +1011,67 @@ static int fts5ExprNodeNext( return rc; } +static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ + if( pNode->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pNode->pNear; + int i; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + pPhrase->poslist.n = 0; + } + }else{ + fts5ExprNodeZeroPoslist(pNode->pLeft); + fts5ExprNodeZeroPoslist(pNode->pRight); + } +} + +static int fts5ExprNodeTest( + int *pRc, + Fts5Expr *pExpr, + i64 iRowid, + Fts5ExprNode *pNode +){ + int bRes = 0; + if( pNode->bEof || pNode->iRowid!=iRowid ){ + bRes = 0; + }else { + switch( pNode->eType ){ + case FTS5_STRING: + bRes = fts5ExprNearTest(pRc, pExpr, pNode); + if( *pRc ) bRes = 0; + break; + + case FTS5_AND: { + int bRes1 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + int bRes2 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pRight); + assert( (bRes1==0 || bRes1==1) && (bRes2==0 || bRes2==1) ); + + bRes = (bRes1 && bRes2); + if( bRes1!=bRes2 ){ + fts5ExprNodeZeroPoslist(bRes1 ? pNode->pLeft : pNode->pRight); + } + break; + } + + case FTS5_OR: { + int bRes1 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + int bRes2 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pRight); + + bRes = (bRes1 || bRes2); + break; + } + + default: + assert( pNode->eType==FTS5_NOT ); + bRes = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + break; + } + } + + return bRes; +} + + static void fts5ExprSetEof(Fts5ExprNode *pNode){ if( pNode ){ pNode->bEof = 1; @@ -1016,7 +1094,10 @@ static int fts5ExprNodeNextMatch( switch( pNode->eType ){ case FTS5_STRING: { +#if 0 rc = fts5ExprNearNextMatch(pExpr, pNode); +#endif + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); break; } @@ -1065,7 +1146,7 @@ static int fts5ExprNodeNextMatch( cmp = fts5NodeCompare(pExpr, p1, p2); } assert( rc!=SQLITE_OK || cmp<=0 ); - if( rc || cmp<0 ) break; + if( 0==fts5ExprNodeTest(&rc, pExpr, p1->iRowid, p2) ) break; rc = fts5ExprNodeNext(pExpr, p1, 0, 0); } pNode->bEof = p1->bEof; @@ -1096,7 +1177,10 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ /* Attempt to advance to the first match */ if( rc==SQLITE_OK && pNode->bEof==0 ){ +#if 0 rc = fts5ExprNearNextMatch(pExpr, pNode); +#endif + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); } }else{ @@ -1112,7 +1196,6 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ } - /* ** Begin iterating through the set of documents in index pIdx matched by ** the MATCH expression passed as the first argument. If the "bDesc" parameter @@ -1123,11 +1206,18 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ ** is not considered an error if the query does not match any documents. */ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ + Fts5ExprNode *pRoot = p->pRoot; int rc = SQLITE_OK; - if( p->pRoot ){ + if( pRoot ){ p->pIndex = pIdx; p->bDesc = bDesc; - rc = fts5ExprNodeFirst(p, p->pRoot); + rc = fts5ExprNodeFirst(p, pRoot); + if( pRoot->bEof==0 + && 0==fts5ExprNodeTest(&rc, p, pRoot->iRowid, pRoot) + && rc==SQLITE_OK + ){ + rc = sqlite3Fts5ExprNext(p); + } } return rc; } @@ -1140,7 +1230,12 @@ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ */ int sqlite3Fts5ExprNext(Fts5Expr *p){ int rc; - rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); + do { + rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); + }while( p->pRoot->bEof==0 + && fts5ExprNodeTest(&rc, p, p->pRoot->iRowid, p->pRoot)==0 + && rc==SQLITE_OK + ); return rc; } diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index e4a689bb72..deffec5c4a 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -279,7 +279,7 @@ proc OR {a b} { sort_poslist [concat $a $b] } proc NOT {a b} { - if {[llength $b]} { return [list] } + if {[llength $b]>0} { return [list] } return $a } diff --git a/ext/fts5/test/fts5auto.test b/ext/fts5/test/fts5auto.test index 48d9148882..30333de221 100644 --- a/ext/fts5/test/fts5auto.test +++ b/ext/fts5/test/fts5auto.test @@ -226,32 +226,13 @@ set data { {k s} {r f e j q p w} } -do_test 1.0 { - execsql { - BEGIN; - CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); - } - foreach {rowid a b c d e f} $data { - execsql { - INSERT INTO tt(rowid, a, b, c, d, e, f) - VALUES($rowid, $a, $b, $c, $d, $e, $f) - } - } - execsql { - COMMIT; - } +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); } {} -proc fts5_test_poslist {cmd} { - set res [list] - for {set i 0} {$i < [$cmd xInstCount]} {incr i} { - lappend res [string map {{ } .} [$cmd xInst $i]] - } - set res -} -sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist +fts5_aux_test_functions db -proc matchdata {expr} { +proc matchdata {expr {order ASC}} { set tclexpr [db one { SELECT fts5_expr_tcl( $expr, 'nearset $cols -pc ::pc', 'a','b','c','d','e','f' @@ -259,49 +240,88 @@ proc matchdata {expr} { }] set res [list] - db eval {SELECT rowid, * FROM tt} { + db eval "SELECT rowid, * FROM tt ORDER BY rowid $order" { set cols [list $a $b $c $d $e $f] set ::pc 0 set rowdata [eval $tclexpr] - - if {$rowdata != ""} { - lappend res $rowid $rowdata - } + if {$rowdata != ""} { lappend res $rowid $rowdata } } set res } +proc do_auto_test {tn expr} { + foreach order {asc desc} { + set res [matchdata $expr $order] + set testname "3.$tn.[string range $order 0 0].rows=[expr [llength $res]/2]" + + set ::autotest_expr $expr + do_execsql_test $testname [subst -novar { + SELECT rowid, fts5_test_poslist(tt) FROM tt + WHERE tt MATCH $::autotest_expr ORDER BY rowid [set order] + }] $res + } + + +} + #------------------------------------------------------------------------- # -do_execsql_test 2.0 { - SELECT rowid, fts5_test_poslist(tt) FROM tt WHERE tt MATCH 'a AND b'; -} [matchdata "a AND b"] +for {set fold 0} {$fold < 3} {incr fold} { + switch $fold { + 0 { set map {} } + 1 { set map { + a a b a c b d b e c f c g d h d + i e j e k f l f m g g g o h p h + q i r i s j t j u k v k w l x l + y m z m + }} -do_test 2.1 { - llength [matchdata "a AND b"] -} 62 + 2 { set map { + a a b a c a d a e a f a g a h a + i b j b k b l b m b g b o b p b + q c r c s c t c u c v c w c x c + }} + } -foreach {tn expr} { - 1 { [a] : x } - 2 { [a b] : x } - 3 { [a b f] : x } - 4 { [f a b] : x } - 5 { [f a b] : x y } - 6 { [f a b] : x + y } - 7 { [c a b] : x + c } - 8 { [c d] : "l m" } - 9 { [c e] : "l m" } -} { - set res [matchdata $expr] - do_test 3.$tn.[llength $res] { + execsql { + BEGIN; + DELETE FROM tt; + } + foreach {rowid a b c d e f} [string map $map $data] { execsql { - SELECT rowid, fts5_test_poslist(tt) FROM tt WHERE tt MATCH $expr + INSERT INTO tt(rowid, a, b, c, d, e, f) + VALUES($rowid, $a, $b, $c, $d, $e, $f) } - } $res -} + } + execsql COMMIT + foreach {tn expr} { + 3.1 { [a] : x } + 3.2 { [a b] : x } + 3.3 { [a b f] : x } + 3.4 { [f a b] : x } + 3.5 { [f a b] : x y } + 3.6 { [f a b] : x + y } + 3.7 { [c a b] : x + c } + 3.8 { [c d] : "l m" } + 3.9 { [c e] : "l m" } + + 4.1 { a NOT b } + 4.2 { a NOT a:b } + 4.3 { a OR (b AND c) } + 4.4 { a OR (b AND [a b c]:c) } + 4.5 { a OR "b c" } + 4.6 { a OR b OR c } + + 5.1 { a OR (b AND "b c") } + 5.2 { a OR (b AND "z c") } + } { + do_auto_test 3.$fold.$tn $expr + } +} + finish_test diff --git a/manifest b/manifest index 2f6be39866..874c169da6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sthe\s"#include\ssqlite3Int.h"\sfrom\sfts5Int.h. -D 2015-05-30T11:49:58.614 +C Change\sfts5\sexpression\sprocessing\sto\savoid\slinear\sscans\sof\slong\sdoclists\scaused\sby\sphrases\sthat\smatch\sspecific\scolumns\sonly. +D 2015-06-01T09:15:20.958 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c 6a683326d6ae4e58420792e84576af9c7a8a89e4 +F ext/fts5/fts5_expr.c e58c9dec148a92e9040abc613eb3c7506d741d4f F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b @@ -122,7 +122,7 @@ F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 4ee667932d561a150d96483cf563281b95a9e523 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 632ff0fd8bf3dd55c2ddaac2c16428548d5af7be +F ext/fts5/test/fts5_common.tcl 339115b24a57244e792db465c5bad482e0e7db72 F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 999fd5f44579f1eb565ed7cf3861c427537ff097 @@ -135,7 +135,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5auto.test 62e62fa7d60c50d334c5f6cf6b1ed1d49fa3d8d8 +F ext/fts5/test/fts5auto.test 3810c1c4928be0161b87dfc479ecf1b873f37c6c F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b @@ -1333,7 +1333,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ae6794ffa23ef6191bd8834422abf322d978c11b -R bc0305687d74df992086e66a1770f40c +P e008c3c8e29c843ec945ddad54b9688bbf2bdb44 +R 99cd144645ddb0146b8edfaf69348c90 U dan -Z 008bbbc1e4c3598d73e809e2e8e489be +Z 9df575eada62ea84610594e2c9d9937b diff --git a/manifest.uuid b/manifest.uuid index 3d91049339..64f51bd23f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e008c3c8e29c843ec945ddad54b9688bbf2bdb44 \ No newline at end of file +ec69e09a55b4daf1c40aeaaf9ee95091fe86f5c0 \ No newline at end of file From 5350d91ed3b8c54e47c7073e69c70979b147c629 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 1 Jun 2015 19:17:06 +0000 Subject: [PATCH 149/206] Improve performance of the fts5 AND operator. FossilOrigin-Name: b43e9a5b7a0483ccb102316a4dbc5e32b5bc69ec --- ext/fts5/fts5_expr.c | 321 ++++++++++++++++++++++------------ ext/fts5/test/fts5_common.tcl | 12 +- ext/fts5/test/fts5ea.test | 4 +- ext/fts5/test/fts5fault4.test | 4 +- manifest | 18 +- manifest.uuid | 2 +- 6 files changed, 228 insertions(+), 133 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 9b3e04a74e..ab0874bced 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -44,18 +44,21 @@ struct Fts5Expr { ** eType: ** Expression node type. Always one of: ** -** FTS5_AND (pLeft, pRight valid) -** FTS5_OR (pLeft, pRight valid) -** FTS5_NOT (pLeft, pRight valid) +** FTS5_AND (nChild, apChild valid) +** FTS5_OR (nChild, apChild valid) +** FTS5_NOT (nChild, apChild valid) ** FTS5_STRING (pNear valid) */ struct Fts5ExprNode { int eType; /* Node type */ - Fts5ExprNode *pLeft; /* Left hand child node */ - Fts5ExprNode *pRight; /* Right hand child node */ - Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ int bEof; /* True at EOF */ i64 iRowid; /* Current rowid */ + Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ + + /* Child nodes. For a NOT node, this array always contains 2 entries. For + ** AND or OR nodes, it contains 2 or more entries. */ + int nChild; /* Number of child nodes */ + Fts5ExprNode *apChild[0]; /* Array of child nodes */ }; /* @@ -315,8 +318,10 @@ int sqlite3Fts5ExprPhraseExpr( */ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ - sqlite3Fts5ParseNodeFree(p->pLeft); - sqlite3Fts5ParseNodeFree(p->pRight); + int i; + for(i=0; inChild; i++){ + sqlite3Fts5ParseNodeFree(p->apChild[i]); + } sqlite3Fts5ParseNearsetFree(p->pNear); sqlite3_free(p); } @@ -685,6 +690,7 @@ static int fts5ExprNearNextRowidMatch( return rc; } + /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set @@ -898,6 +904,16 @@ static int fts5ExprNearInitAll( static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); +/* +** If pExpr is an ASC iterator, this function returns a value with the +** same sign as: +** +** (iLhs - iRhs) +** +** Otherwise, if this is a DESC iterator, the opposite is returned: +** +** (iRhs - iLhs) +*/ static int fts5RowidCmp( Fts5Expr *pExpr, i64 iLhs, @@ -913,6 +929,68 @@ static int fts5RowidCmp( } } +static void fts5ExprSetEof(Fts5ExprNode *pNode){ + if( pNode ){ + int i; + pNode->bEof = 1; + for(i=0; inChild; i++){ + fts5ExprSetEof(pNode->apChild[i]); + } + } +} + + +static int fts5ExprNodeNext(Fts5Expr*, Fts5ExprNode*, int, i64); + +/* +** Argument pNode is an FTS5_AND node. +*/ +static int fts5ExprAndNextRowid( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNode *pAnd /* FTS5_AND node to advance */ +){ + int iChild; + i64 iLast = pAnd->iRowid; + int rc = SQLITE_OK; + int bMatch; + + assert( pAnd->bEof==0 ); + do { + bMatch = 1; + for(iChild=0; iChildnChild; iChild++){ + Fts5ExprNode *pChild = pAnd->apChild[iChild]; + if( 0 && pChild->eType==FTS5_STRING ){ + /* TODO */ + }else{ + int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); + if( cmp>0 ){ + /* Advance pChild until it points to iLast or laster */ + rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); + if( rc!=SQLITE_OK ) return rc; + } + } + + /* If the child node is now at EOF, so is the parent AND node. Otherwise, + ** the child node is guaranteed to have advanced at least as far as + ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the + ** new lastest rowid seen so far. */ + assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); + if( pChild->bEof ){ + fts5ExprSetEof(pAnd); + bMatch = 1; + break; + }else if( iLast!=pChild->iRowid ){ + bMatch = 0; + iLast = pChild->iRowid; + } + } + }while( bMatch==0 ); + + pAnd->iRowid = iLast; + return SQLITE_OK; +} + + /* ** Compare the values currently indicated by the two nodes as follows: ** @@ -958,27 +1036,24 @@ static int fts5ExprNodeNext( }; case FTS5_AND: { - Fts5ExprNode *pLeft = pNode->pLeft; + Fts5ExprNode *pLeft = pNode->apChild[0]; rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); - if( rc==SQLITE_OK && pLeft->bEof==0 ){ - assert( !bFromValid || fts5RowidCmp(pExpr, pLeft->iRowid, iFrom)>=0 ); - rc = fts5ExprNodeNext(pExpr, pNode->pRight, 1, pLeft->iRowid); - } break; } case FTS5_OR: { - Fts5ExprNode *p1 = pNode->pLeft; - Fts5ExprNode *p2 = pNode->pRight; - int cmp = fts5NodeCompare(pExpr, p1, p2); + int i; + int iLast = pNode->iRowid; - if( cmp<=0 || (bFromValid && fts5RowidCmp(pExpr,p1->iRowid,iFrom)<0) ){ - rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); - } - - if( cmp>=0 || (bFromValid && fts5RowidCmp(pExpr,p2->iRowid,iFrom)<0) ){ - if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); + for(i=0; rc==SQLITE_OK && inChild; i++){ + Fts5ExprNode *p1 = pNode->apChild[i]; + assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); + if( p1->bEof==0 ){ + if( (p1->iRowid==iLast) + || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) + ){ + rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + } } } @@ -986,7 +1061,8 @@ static int fts5ExprNodeNext( } default: assert( pNode->eType==FTS5_NOT ); { - rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); + assert( pNode->nChild==2 ); + rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); break; } } @@ -1020,8 +1096,10 @@ static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ pPhrase->poslist.n = 0; } }else{ - fts5ExprNodeZeroPoslist(pNode->pLeft); - fts5ExprNodeZeroPoslist(pNode->pRight); + int i; + for(i=0; inChild; i++){ + fts5ExprNodeZeroPoslist(pNode->apChild[i]); + } } } @@ -1042,28 +1120,36 @@ static int fts5ExprNodeTest( break; case FTS5_AND: { - int bRes1 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); - int bRes2 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pRight); - assert( (bRes1==0 || bRes1==1) && (bRes2==0 || bRes2==1) ); - - bRes = (bRes1 && bRes2); - if( bRes1!=bRes2 ){ - fts5ExprNodeZeroPoslist(bRes1 ? pNode->pLeft : pNode->pRight); + int i; + for(i=0; inChild; i++){ + if( fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->apChild[i])==0 ){ + break; + } } + bRes = (i==pNode->nChild); + if( bRes==0 && i>0 ){ + for(i=0; inChild; i++){ + fts5ExprNodeZeroPoslist(pNode->apChild[i]); + } + } + break; } case FTS5_OR: { - int bRes1 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); - int bRes2 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pRight); - - bRes = (bRes1 || bRes2); + int i; + for(i=0; inChild; i++){ + if( fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->apChild[i]) ){ + bRes = 1; + } + } break; } default: assert( pNode->eType==FTS5_NOT ); - bRes = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + assert( pNode->nChild==2 ); + bRes = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->apChild[0]); break; } } @@ -1072,14 +1158,6 @@ static int fts5ExprNodeTest( } -static void fts5ExprSetEof(Fts5ExprNode *pNode){ - if( pNode ){ - pNode->bEof = 1; - fts5ExprSetEof(pNode->pLeft); - fts5ExprSetEof(pNode->pRight); - } -} - /* ** If pNode currently points to a match, this function returns SQLITE_OK ** without modifying it. Otherwise, pNode is advanced until it does point @@ -1102,42 +1180,28 @@ static int fts5ExprNodeNextMatch( } case FTS5_AND: { - Fts5ExprNode *p1 = pNode->pLeft; - Fts5ExprNode *p2 = pNode->pRight; - - while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ - Fts5ExprNode *pAdv; - i64 iFrom; - assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); - if( pExpr->bDesc==(p1->iRowid > p2->iRowid) ){ - pAdv = p1; - iFrom = p2->iRowid; - }else{ - pAdv = p2; - iFrom = p1->iRowid; - } - rc = fts5ExprNodeNext(pExpr, pAdv, 1, iFrom); - if( rc!=SQLITE_OK ) break; - } - if( p1->bEof || p2->bEof ){ - fts5ExprSetEof(pNode); - } - pNode->iRowid = p1->iRowid; + rc = fts5ExprAndNextRowid(pExpr, pNode); break; } case FTS5_OR: { - Fts5ExprNode *p1 = pNode->pLeft; - Fts5ExprNode *p2 = pNode->pRight; - Fts5ExprNode *pNext = (fts5NodeCompare(pExpr, p1, p2) > 0 ? p2 : p1); - pNode->bEof = pNext->bEof; + Fts5ExprNode *pNext = pNode->apChild[0]; + int i; + for(i=1; inChild; i++){ + Fts5ExprNode *pChild = pNode->apChild[i]; + if( fts5NodeCompare(pExpr, pNext, pChild)>0 ){ + pNext = pChild; + } + } pNode->iRowid = pNext->iRowid; + pNode->bEof = pNext->bEof; break; } default: assert( pNode->eType==FTS5_NOT ); { - Fts5ExprNode *p1 = pNode->pLeft; - Fts5ExprNode *p2 = pNode->pRight; + Fts5ExprNode *p1 = pNode->apChild[0]; + Fts5ExprNode *p2 = pNode->apChild[1]; + assert( pNode->nChild==2 ); while( rc==SQLITE_OK && p1->bEof==0 ){ int cmp = fts5NodeCompare(pExpr, p1, p2); @@ -1184,10 +1248,12 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ } }else{ - rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); - if( rc==SQLITE_OK ){ - rc = fts5ExprNodeFirst(pExpr, pNode->pRight); + int i; + for(i=0; inChild && rc==SQLITE_OK; i++){ + rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); } + + pNode->iRowid = pNode->apChild[0]->iRowid; if( rc==SQLITE_OK ){ rc = fts5ExprNodeNextMatch(pExpr, pNode); } @@ -1230,10 +1296,11 @@ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ */ int sqlite3Fts5ExprNext(Fts5Expr *p){ int rc; + Fts5ExprNode *pRoot = p->pRoot; do { - rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); - }while( p->pRoot->bEof==0 - && fts5ExprNodeTest(&rc, p, p->pRoot->iRowid, p->pRoot)==0 + rc = fts5ExprNodeNext(p, pRoot, 0, 0); + }while( pRoot->bEof==0 + && fts5ExprNodeTest(&rc, p, pRoot->iRowid, p->pRoot)==0 && rc==SQLITE_OK ); return rc; @@ -1578,6 +1645,17 @@ void sqlite3Fts5ParseSetColset( } } +static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ + if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ + int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; + memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); + p->nChild += pSub->nChild; + sqlite3_free(pSub); + }else{ + p->apChild[p->nChild++] = pSub; + } +} + /* ** Allocate and return a new expression object. If anything goes wrong (i.e. ** OOM error), leave an error code in pParse and return NULL. @@ -1592,26 +1670,38 @@ Fts5ExprNode *sqlite3Fts5ParseNode( Fts5ExprNode *pRet = 0; if( pParse->rc==SQLITE_OK ){ + int nChild = 0; /* Number of children of returned node */ + int nByte; /* Bytes of space to allocate for this node */ + assert( (eType!=FTS5_STRING && !pNear) - || (eType==FTS5_STRING && !pLeft && !pRight) + || (eType==FTS5_STRING && !pLeft && !pRight) ); if( eType==FTS5_STRING && pNear==0 ) return 0; if( eType!=FTS5_STRING && pLeft==0 ) return pRight; if( eType!=FTS5_STRING && pRight==0 ) return pLeft; - pRet = (Fts5ExprNode*)sqlite3_malloc(sizeof(Fts5ExprNode)); - if( pRet==0 ){ - pParse->rc = SQLITE_NOMEM; - }else{ - memset(pRet, 0, sizeof(*pRet)); + + if( eType==FTS5_NOT ){ + nChild = 2; + }else if( eType==FTS5_AND || eType==FTS5_OR ){ + nChild = 2; + if( pLeft->eType==eType ) nChild += pLeft->nChild-1; + if( pRight->eType==eType ) nChild += pRight->nChild-1; + } + + nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*nChild; + pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); + + if( pRet ){ pRet->eType = eType; - pRet->pLeft = pLeft; - pRet->pRight = pRight; pRet->pNear = pNear; if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } + }else{ + fts5ExprAddChildren(pRet, pLeft); + fts5ExprAddChildren(pRet, pRight); } } } @@ -1718,9 +1808,8 @@ static char *fts5ExprPrintTcl( if( zRet==0 ) return 0; }else{ - char *zOp = 0; - char *z1 = 0; - char *z2 = 0; + char const *zOp = 0; + int i; switch( pExpr->eType ){ case FTS5_AND: zOp = "AND"; break; case FTS5_NOT: zOp = "NOT"; break; @@ -1730,13 +1819,16 @@ static char *fts5ExprPrintTcl( break; } - z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft); - z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight); - if( z1 && z2 ){ - zRet = sqlite3_mprintf("%s [%s] [%s]", zOp, z1, z2); + zRet = sqlite3_mprintf("%s", zOp); + for(i=0; zRet && inChild; i++){ + char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); + if( !z ){ + sqlite3_free(zRet); + zRet = 0; + }else{ + zRet = fts5PrintfAppend(zRet, " [%z]", z); + } } - sqlite3_free(z1); - sqlite3_free(z2); } return zRet; @@ -1785,31 +1877,32 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ } }else{ - char *zOp = 0; - char *z1 = 0; - char *z2 = 0; + char const *zOp = 0; + int i; + switch( pExpr->eType ){ - case FTS5_AND: zOp = "AND"; break; - case FTS5_NOT: zOp = "NOT"; break; + case FTS5_AND: zOp = " AND "; break; + case FTS5_NOT: zOp = " NOT "; break; default: assert( pExpr->eType==FTS5_OR ); - zOp = "OR"; + zOp = " OR "; break; } - z1 = fts5ExprPrint(pConfig, pExpr->pLeft); - z2 = fts5ExprPrint(pConfig, pExpr->pRight); - if( z1 && z2 ){ - int b1 = pExpr->pLeft->eType!=FTS5_STRING; - int b2 = pExpr->pRight->eType!=FTS5_STRING; - zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", - b1 ? "(" : "", z1, b1 ? ")" : "", - zOp, - b2 ? "(" : "", z2, b2 ? ")" : "" - ); + for(i=0; inChild; i++){ + char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); + if( z==0 ){ + sqlite3_free(zRet); + zRet = 0; + }else{ + int b = (pExpr->apChild[i]->eType!=FTS5_STRING); + zRet = fts5PrintfAppend(zRet, "%s%s%z%s", + (i==0 ? "" : zOp), + (b?"(":""), z, (b?")":"") + ); + } + if( zRet==0 ) break; } - sqlite3_free(z1); - sqlite3_free(z2); } return zRet; diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index deffec5c4a..78e45f8072 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -271,12 +271,14 @@ proc instcompare {lhs rhs} { #------------------------------------------------------------------------- # Logical operators used by the commands returned by fts5_tcl_expr(). # -proc AND {a b} { - if {[llength $a]==0 || [llength $b]==0} { return [list] } - sort_poslist [concat $a $b] +proc AND {args} { + foreach a $args { + if {[llength $a]==0} { return [list] } + } + sort_poslist [concat {*}$args] } -proc OR {a b} { - sort_poslist [concat $a $b] +proc OR {args} { + sort_poslist [concat {*}$args] } proc NOT {a b} { if {[llength $b]>0} { return [list] } diff --git a/ext/fts5/test/fts5ea.test b/ext/fts5/test/fts5ea.test index 4b5b14a441..ad05412ba9 100644 --- a/ext/fts5/test/fts5ea.test +++ b/ext/fts5/test/fts5ea.test @@ -44,8 +44,8 @@ foreach {tn expr res} { 9 {NEAR(one two)} {NEAR("one" "two", 10)} 10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)} 11 {a OR b NOT c} {"a" OR ("b" NOT "c")} - 12 "\x20one\x20two\x20three" {("one" AND "two") AND "three"} - 13 "\x09one\x0Atwo\x0Dthree" {("one" AND "two") AND "three"} + 12 "\x20one\x20two\x20three" {"one" AND "two" AND "three"} + 13 "\x09one\x0Atwo\x0Dthree" {"one" AND "two" AND "three"} 14 {"abc""def"} {"abc" + "def"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index 417d470ed3..6a37fcffc1 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -306,13 +306,13 @@ do_faultsim_test 9.1 -faults oom-* -body { do_faultsim_test 10.1 -faults oom-t* -body { db one { SELECT fts5_expr('a AND b NEAR(a b)') } } -test { - faultsim_test_result {0 {"a" AND ("b" AND NEAR("a" "b", 10))}} + faultsim_test_result {0 {"a" AND "b" AND NEAR("a" "b", 10)}} } do_faultsim_test 10.2 -faults oom-t* -body { db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } } -test { - set res {AND [ns -col 0 -- {a b c}] [AND [ns -- {b}] [ns -near 10 -- {a} {b}]]} + set res {AND [ns -col 0 -- {a b c}] [ns -- {b}] [ns -near 10 -- {a} {b}]} faultsim_test_result [list 0 $res] } diff --git a/manifest b/manifest index 874c169da6..cba1461913 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sfts5\sexpression\sprocessing\sto\savoid\slinear\sscans\sof\slong\sdoclists\scaused\sby\sphrases\sthat\smatch\sspecific\scolumns\sonly. -D 2015-06-01T09:15:20.958 +C Improve\sperformance\sof\sthe\sfts5\sAND\soperator. +D 2015-06-01T19:17:06.810 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c e58c9dec148a92e9040abc613eb3c7506d741d4f +F ext/fts5/fts5_expr.c e68f969e9276d312554195a158240f9705c374c1 F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b @@ -122,7 +122,7 @@ F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 4ee667932d561a150d96483cf563281b95a9e523 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 339115b24a57244e792db465c5bad482e0e7db72 +F ext/fts5/test/fts5_common.tcl 0b465b1127adcd1c8131f3454ab4264a6964674c F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 999fd5f44579f1eb565ed7cf3861c427537ff097 @@ -146,12 +146,12 @@ F ext/fts5/test/fts5corrupt2.test c231f532162de381fa83ec477b51cd8633fd9da7 F ext/fts5/test/fts5corrupt3.test da4e2adb2308d8587c2eff31b5aa47447b8a2edb F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 -F ext/fts5/test/fts5ea.test 6159e66c4fe9466c37cd6a0ed4197354588b4bcb +F ext/fts5/test/fts5ea.test 451bb37310ee6df8ef72e4354fda5621b3b51448 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test b854f9895cb07cec58204d5a7ca82d03ce824e73 +F ext/fts5/test/fts5fault4.test 25306f396d239fd2ef35b2cc273a7f40fab80173 F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875 F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d @@ -1333,7 +1333,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e008c3c8e29c843ec945ddad54b9688bbf2bdb44 -R 99cd144645ddb0146b8edfaf69348c90 +P ec69e09a55b4daf1c40aeaaf9ee95091fe86f5c0 +R acaa89910e5f0d02d970b6387ace6327 U dan -Z 9df575eada62ea84610594e2c9d9937b +Z 115cb3cf0377892edfa5b36bfcc4447b diff --git a/manifest.uuid b/manifest.uuid index 64f51bd23f..d1c605efb9 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ec69e09a55b4daf1c40aeaaf9ee95091fe86f5c0 \ No newline at end of file +b43e9a5b7a0483ccb102316a4dbc5e32b5bc69ec \ No newline at end of file From dbef6e752e9c1d26f9f1a0a62fee3e523b490bf0 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 2 Jun 2015 17:57:01 +0000 Subject: [PATCH 150/206] Reimplement [ec69e09a] so that each call to the xNext() method does not involve two iterations of the match expression tree (only one). FossilOrigin-Name: 80fe305b3eefb17310a9d6185d1c8cd73ee38b1e --- ext/fts5/fts5_expr.c | 325 ++++++++++++++++-------------------- ext/fts5/fts5parse.y | 1 + ext/fts5/test/fts5auto.test | 36 ++-- manifest | 16 +- manifest.uuid | 2 +- 5 files changed, 175 insertions(+), 205 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index ab0874bced..11dfd42502 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -48,10 +48,13 @@ struct Fts5Expr { ** FTS5_OR (nChild, apChild valid) ** FTS5_NOT (nChild, apChild valid) ** FTS5_STRING (pNear valid) +** FTS5_TERM (pNear valid) */ struct Fts5ExprNode { int eType; /* Node type */ int bEof; /* True at EOF */ + int bNomatch; /* True if entry is not a match */ + i64 iRowid; /* Current rowid */ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ @@ -61,6 +64,8 @@ struct Fts5ExprNode { Fts5ExprNode *apChild[0]; /* Array of child nodes */ }; +#define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING) + /* ** An instance of the following structure represents a single search term ** or term prefix. @@ -287,7 +292,7 @@ int sqlite3Fts5ExprPhraseExpr( pNew->apExprPhrase = apPhrase; pNew->apExprPhrase[0] = pCopy; - pNode->eType = FTS5_STRING; + pNode->eType = (pCopy->nTerm==1 ? FTS5_TERM : FTS5_STRING); pNode->pNear = pNear; pNear->nPhrase = 1; @@ -590,13 +595,14 @@ static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ */ static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprNode *pNode, /* FTS5_STRING node */ + Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; int rc; + assert( Fts5NodeIsString(pNode) ); if( bFromValid ){ rc = sqlite3Fts5IterNextFrom(pIter, iFrom); }else{ @@ -642,55 +648,6 @@ static int fts5ExprAdvanceto( return 0; } -/* -** All individual term iterators in pNear are guaranteed to be valid when -** this function is called. This function checks if all term iterators -** point to the same rowid, and if not, advances them until they do. -** If an EOF is reached before this happens, *pbEof is set to true before -** returning. -** -** SQLITE_OK is returned if an error occurs, or an SQLite error code -** otherwise. It is not considered an error code if an iterator reaches -** EOF. -*/ -static int fts5ExprNearNextRowidMatch( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprNode *pNode -){ - Fts5ExprNearset *pNear = pNode->pNear; - i64 iLast; /* Lastest rowid any iterator points to */ - int rc = SQLITE_OK; - - /* Initialize iLast, the "lastest" rowid any iterator points to. If the - ** iterator skips through rowids in the default ascending order, this means - ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it - ** means the minimum rowid. */ - iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); - - if( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ){ - int i, j; /* Phrase and token index, respectively */ - int bMatch; /* True if all terms are at the same rowid */ - do { - bMatch = 1; - for(i=0; inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - for(j=0; jnTerm; j++){ - Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid!=iLast ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ - return rc; - } - } - } - }while( bMatch==0 ); - } - - pNode->iRowid = iLast; - return rc; -} - - /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set @@ -817,45 +774,97 @@ static int fts5ExprNearTest( return 0; } +static int fts5ExprTokenTest( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ +){ + /* As this "NEAR" object is actually a single phrase that consists + ** of a single term only, grab pointers into the poslist managed by the + ** fts5_index.c iterator object. This is much faster than synthesizing + ** a new poslist the way we have to for more complicated phrase or NEAR + ** expressions. */ + Fts5ExprNearset *pNear = pNode->pNear; + Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + Fts5ExprColset *pColset = pNear->pColset; + const u8 *pPos; + int nPos; + int rc; + + assert( pNode->eType==FTS5_TERM ); + assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); + + rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); + + /* If the term may match any column, then this must be a match. + ** Return immediately in this case. Otherwise, try to find the + ** part of the poslist that corresponds to the required column. + ** If it can be found, return. If it cannot, the next iteration + ** of the loop will test the next rowid in the database for this + ** term. */ + if( pColset==0 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.p = (u8*)pPos; + pPhrase->poslist.n = nPos; + }else if( pColset->nCol==1 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); + pPhrase->poslist.p = (u8*)pPos; + }else if( rc==SQLITE_OK ){ + rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); + } + + pNode->bNomatch = (pPhrase->poslist.n==0); + return rc; +} + /* -** Argument pNode points to a NEAR node. All individual term iterators -** point to valid entries (not EOF). -* -** This function tests if the term iterators currently all point to the -** same rowid, and if so, if the row matches the phrase and NEAR constraints. -** If so, the pPhrase->poslist buffers are populated and the pNode->iRowid -** variable set before returning. Or, if the current combination of -** iterators is not a match, they are advanced until they are. If one of -** the iterators reaches EOF before a match is found, *pbEof is set to -** true before returning. The final values of the pPhrase->poslist and -** iRowid fields are undefined in this case. +** All individual term iterators in pNear are guaranteed to be valid when +** this function is called. This function checks if all term iterators +** point to the same rowid, and if not, advances them until they do. +** If an EOF is reached before this happens, *pbEof is set to true before +** returning. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprNearNextMatch( - Fts5Expr *pExpr, /* Expression that pNear is a part of */ - Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNode *pNode ){ + Fts5ExprNearset *pNear = pNode->pNear; + Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; + i64 iLast; /* Lastest rowid any iterator points to */ + int i, j; /* Phrase and token index, respectively */ + int bMatch; /* True if all terms are at the same rowid */ - assert( pNode->pNear ); - while( 1 ){ + assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ); - /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); - if( rc!=SQLITE_OK || pNode->bEof ) break; + /* Initialize iLast, the "lastest" rowid any iterator points to. If the + ** iterator skips through rowids in the default ascending order, this means + ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it + ** means the minimum rowid. */ + iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); - if( fts5ExprNearTest(&rc, pExpr, pNode) ) break; - - /* If control flows to here, then the current rowid is not a match. - ** Advance all term iterators in all phrases to the next rowid. */ - if( rc==SQLITE_OK ){ - rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); + do { + bMatch = 1; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid!=iLast ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ + return rc; + } + } } - if( pNode->bEof || rc!=SQLITE_OK ) break; - } + }while( bMatch==0 ); + + pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); + pNode->iRowid = iLast; return rc; } @@ -939,6 +948,22 @@ static void fts5ExprSetEof(Fts5ExprNode *pNode){ } } +static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ + if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ + Fts5ExprNearset *pNear = pNode->pNear; + int i; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + pPhrase->poslist.n = 0; + } + }else{ + int i; + for(i=0; inChild; i++){ + fts5ExprNodeZeroPoslist(pNode->apChild[i]); + } + } +} + static int fts5ExprNodeNext(Fts5Expr*, Fts5ExprNode*, int, i64); @@ -956,6 +981,7 @@ static int fts5ExprAndNextRowid( assert( pAnd->bEof==0 ); do { + pAnd->bNomatch = 0; bMatch = 1; for(iChild=0; iChildnChild; iChild++){ Fts5ExprNode *pChild = pAnd->apChild[iChild]; @@ -983,9 +1009,16 @@ static int fts5ExprAndNextRowid( bMatch = 0; iLast = pChild->iRowid; } + + if( pChild->bNomatch ){ + pAnd->bNomatch = 1; + } } }while( bMatch==0 ); + if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ + fts5ExprNodeZeroPoslist(pAnd); + } pAnd->iRowid = iLast; return SQLITE_OK; } @@ -1035,6 +1068,15 @@ static int fts5ExprNodeNext( break; }; + case FTS5_TERM: { + rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); + if( pNode->bEof==0 ){ + assert( rc==SQLITE_OK ); + rc = fts5ExprTokenTest(pExpr, pNode); + } + return rc; + }; + case FTS5_AND: { Fts5ExprNode *pLeft = pNode->apChild[0]; rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); @@ -1087,76 +1129,6 @@ static int fts5ExprNodeNext( return rc; } -static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ - if( pNode->eType==FTS5_STRING ){ - Fts5ExprNearset *pNear = pNode->pNear; - int i; - for(i=0; inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - pPhrase->poslist.n = 0; - } - }else{ - int i; - for(i=0; inChild; i++){ - fts5ExprNodeZeroPoslist(pNode->apChild[i]); - } - } -} - -static int fts5ExprNodeTest( - int *pRc, - Fts5Expr *pExpr, - i64 iRowid, - Fts5ExprNode *pNode -){ - int bRes = 0; - if( pNode->bEof || pNode->iRowid!=iRowid ){ - bRes = 0; - }else { - switch( pNode->eType ){ - case FTS5_STRING: - bRes = fts5ExprNearTest(pRc, pExpr, pNode); - if( *pRc ) bRes = 0; - break; - - case FTS5_AND: { - int i; - for(i=0; inChild; i++){ - if( fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->apChild[i])==0 ){ - break; - } - } - bRes = (i==pNode->nChild); - if( bRes==0 && i>0 ){ - for(i=0; inChild; i++){ - fts5ExprNodeZeroPoslist(pNode->apChild[i]); - } - } - - break; - } - - case FTS5_OR: { - int i; - for(i=0; inChild; i++){ - if( fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->apChild[i]) ){ - bRes = 1; - } - } - break; - } - - default: - assert( pNode->eType==FTS5_NOT ); - assert( pNode->nChild==2 ); - bRes = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->apChild[0]); - break; - } - } - - return bRes; -} - /* ** If pNode currently points to a match, this function returns SQLITE_OK @@ -1172,10 +1144,13 @@ static int fts5ExprNodeNextMatch( switch( pNode->eType ){ case FTS5_STRING: { -#if 0 + /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextMatch(pExpr, pNode); -#endif - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + break; + } + + case FTS5_TERM: { + rc = fts5ExprTokenTest(pExpr, pNode); break; } @@ -1187,14 +1162,17 @@ static int fts5ExprNodeNextMatch( case FTS5_OR: { Fts5ExprNode *pNext = pNode->apChild[0]; int i; + for(i=1; inChild; i++){ Fts5ExprNode *pChild = pNode->apChild[i]; - if( fts5NodeCompare(pExpr, pNext, pChild)>0 ){ + int cmp = fts5NodeCompare(pExpr, pNext, pChild); + if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ pNext = pChild; } } pNode->iRowid = pNext->iRowid; pNode->bEof = pNext->bEof; + pNode->bNomatch = pNext->bNomatch; break; } @@ -1210,7 +1188,7 @@ static int fts5ExprNodeNextMatch( cmp = fts5NodeCompare(pExpr, p1, p2); } assert( rc!=SQLITE_OK || cmp<=0 ); - if( 0==fts5ExprNodeTest(&rc, pExpr, p1->iRowid, p2) ) break; + if( cmp || p2->bNomatch ) break; rc = fts5ExprNodeNext(pExpr, p1, 0, 0); } pNode->bEof = p1->bEof; @@ -1234,29 +1212,19 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; pNode->bEof = 0; - if( pNode->eType==FTS5_STRING ){ - + if( Fts5NodeIsString(pNode) ){ /* Initialize all term iterators in the NEAR object. */ rc = fts5ExprNearInitAll(pExpr, pNode); - - /* Attempt to advance to the first match */ - if( rc==SQLITE_OK && pNode->bEof==0 ){ -#if 0 - rc = fts5ExprNearNextMatch(pExpr, pNode); -#endif - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); - } - }else{ int i; for(i=0; inChild && rc==SQLITE_OK; i++){ rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); } - pNode->iRowid = pNode->apChild[0]->iRowid; - if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode); - } + } + + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeNextMatch(pExpr, pNode); } return rc; } @@ -1278,11 +1246,9 @@ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ p->pIndex = pIdx; p->bDesc = bDesc; rc = fts5ExprNodeFirst(p, pRoot); - if( pRoot->bEof==0 - && 0==fts5ExprNodeTest(&rc, p, pRoot->iRowid, pRoot) - && rc==SQLITE_OK - ){ - rc = sqlite3Fts5ExprNext(p); + + while( pRoot->bNomatch && rc==SQLITE_OK && pRoot->bEof==0 ){ + rc = fts5ExprNodeNext(p, pRoot, 0, 0); } } return rc; @@ -1299,10 +1265,7 @@ int sqlite3Fts5ExprNext(Fts5Expr *p){ Fts5ExprNode *pRoot = p->pRoot; do { rc = fts5ExprNodeNext(p, pRoot, 0, 0); - }while( pRoot->bEof==0 - && fts5ExprNodeTest(&rc, p, pRoot->iRowid, p->pRoot)==0 - && rc==SQLITE_OK - ); + }while( pRoot->bNomatch && pRoot->bEof==0 && rc==SQLITE_OK ); return rc; } @@ -1699,6 +1662,9 @@ Fts5ExprNode *sqlite3Fts5ParseNode( for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } + if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + pRet->eType = FTS5_TERM; + } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); @@ -1762,7 +1728,7 @@ static char *fts5ExprPrintTcl( Fts5ExprNode *pExpr ){ char *zRet = 0; - if( pExpr->eType==FTS5_STRING ){ + if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; @@ -1836,7 +1802,7 @@ static char *fts5ExprPrintTcl( static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; - if( pExpr->eType==FTS5_STRING ){ + if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; @@ -1895,7 +1861,8 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ sqlite3_free(zRet); zRet = 0; }else{ - int b = (pExpr->apChild[i]->eType!=FTS5_STRING); + int e = pExpr->apChild[i]->eType; + int b = (e!=FTS5_STRING && e!=FTS5_TERM); zRet = fts5PrintfAppend(zRet, "%s%s%z%s", (i==0 ? "" : zOp), (b?"(":""), z, (b?")":"") diff --git a/ext/fts5/fts5parse.y b/ext/fts5/fts5parse.y index 43ed42e5a9..e9c6c3d6c4 100644 --- a/ext/fts5/fts5parse.y +++ b/ext/fts5/fts5parse.y @@ -63,6 +63,7 @@ %left OR. %left AND. %left NOT. +%left TERM. %left COLON. input ::= expr(X). { sqlite3Fts5ParseFinished(pParse, X); } diff --git a/ext/fts5/test/fts5auto.test b/ext/fts5/test/fts5auto.test index 30333de221..11d09ce823 100644 --- a/ext/fts5/test/fts5auto.test +++ b/ext/fts5/test/fts5auto.test @@ -290,34 +290,36 @@ for {set fold 0} {$fold < 3} {incr fold} { DELETE FROM tt; } foreach {rowid a b c d e f} [string map $map $data] { + if {$rowid==-4703774} { execsql { INSERT INTO tt(rowid, a, b, c, d, e, f) VALUES($rowid, $a, $b, $c, $d, $e, $f) } + } } execsql COMMIT foreach {tn expr} { - 3.1 { [a] : x } - 3.2 { [a b] : x } - 3.3 { [a b f] : x } - 3.4 { [f a b] : x } - 3.5 { [f a b] : x y } - 3.6 { [f a b] : x + y } - 3.7 { [c a b] : x + c } - 3.8 { [c d] : "l m" } - 3.9 { [c e] : "l m" } + A.1 { [a] : x } + A.2 { [a b] : x } + A.3 { [a b f] : x } + A.4 { [f a b] : x } + A.5 { [f a b] : x y } + A.6 { [f a b] : x + y } + A.7 { [c a b] : x + c } + A.8 { [c d] : "l m" } + A.9 { [c e] : "l m" } - 4.1 { a NOT b } - 4.2 { a NOT a:b } - 4.3 { a OR (b AND c) } - 4.4 { a OR (b AND [a b c]:c) } - 4.5 { a OR "b c" } - 4.6 { a OR b OR c } + B.1 { a NOT b } + B.2 { a NOT a:b } + B.3 { a OR (b AND c) } + B.4 { a OR (b AND [a b c]:c) } + B.5 { a OR "b c" } + B.6 { a OR b OR c } - 5.1 { a OR (b AND "b c") } - 5.2 { a OR (b AND "z c") } + C.1 { a OR (b AND "b c") } + C.2 { a OR (b AND "z c") } } { do_auto_test 3.$fold.$tn $expr } diff --git a/manifest b/manifest index cba1461913..fb06a6d43d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improve\sperformance\sof\sthe\sfts5\sAND\soperator. -D 2015-06-01T19:17:06.810 +C Reimplement\s[ec69e09a]\sso\sthat\seach\scall\sto\sthe\sxNext()\smethod\sdoes\snot\sinvolve\stwo\siterations\sof\sthe\smatch\sexpression\stree\s(only\sone). +D 2015-06-02T17:57:01.304 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c28e557780395095c307a6e5cb539419027eb5e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c e68f969e9276d312554195a158240f9705c374c1 +F ext/fts5/fts5_expr.c ae3cff45a4f36d0bc7561675fbd081d6a1df78b3 F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b @@ -120,7 +120,7 @@ F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 -F ext/fts5/fts5parse.y 4ee667932d561a150d96483cf563281b95a9e523 +F ext/fts5/fts5parse.y 7f256d4de575f60f06c7c42c1514537168f0c035 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 0b465b1127adcd1c8131f3454ab4264a6964674c F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 @@ -135,7 +135,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5auto.test 3810c1c4928be0161b87dfc479ecf1b873f37c6c +F ext/fts5/test/fts5auto.test 04286120430fea482ee4b3756ce1941acd3d3962 F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b @@ -1333,7 +1333,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ec69e09a55b4daf1c40aeaaf9ee95091fe86f5c0 -R acaa89910e5f0d02d970b6387ace6327 +P b43e9a5b7a0483ccb102316a4dbc5e32b5bc69ec +R bd2db0e568f1313170e13c32f86585fb U dan -Z 115cb3cf0377892edfa5b36bfcc4447b +Z 85ab84df13bd96f34cdaff4b243d8a96 diff --git a/manifest.uuid b/manifest.uuid index d1c605efb9..b809703e82 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b43e9a5b7a0483ccb102316a4dbc5e32b5bc69ec \ No newline at end of file +80fe305b3eefb17310a9d6185d1c8cd73ee38b1e \ No newline at end of file From f67bb4a21ba6c80f5cb321c286ae14978de128d9 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 2 Jun 2015 19:38:15 +0000 Subject: [PATCH 151/206] Change the fts5 multi-column syntax to use parenthesis instead of square brackets. FossilOrigin-Name: ab85a6fc4f7580278fc9d1f0090fdcf0a90d065b --- ext/fts5/fts5_expr.c | 4 ++-- ext/fts5/fts5parse.y | 2 +- ext/fts5/test/fts5ac.test | 24 ++++++++++++------------ ext/fts5/test/fts5auto.test | 20 ++++++++++---------- manifest | 18 +++++++++--------- manifest.uuid | 2 +- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 82ba8a7d2c..24e2c95dc3 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -154,8 +154,8 @@ static int fts5ExprGetToken( switch( *z ){ case '(': tok = FTS5_LP; break; case ')': tok = FTS5_RP; break; - case '[': tok = FTS5_LSP; break; - case ']': tok = FTS5_RSP; break; + case '{': tok = FTS5_LCP; break; + case '}': tok = FTS5_RCP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; diff --git a/ext/fts5/fts5parse.y b/ext/fts5/fts5parse.y index e9c6c3d6c4..c880dc92cb 100644 --- a/ext/fts5/fts5parse.y +++ b/ext/fts5/fts5parse.y @@ -106,7 +106,7 @@ cnearset(A) ::= colset(X) COLON nearset(Y). { %type colsetlist {Fts5ExprColset*} %destructor colsetlist { sqlite3_free($$); } -colset(A) ::= LSP colsetlist(X) RSP. { A = X; } +colset(A) ::= LCP colsetlist(X) RCP. { A = X; } colset(A) ::= STRING(X). { A = sqlite3Fts5ParseColset(pParse, 0, &X); } diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index 96c2e9f290..8bc8accb9b 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -258,20 +258,20 @@ foreach {tn2 sql} { 1.2 "y:a" 1.3 "x:b" 1.4 "y:b" - 2.1 "[x]:a" - 2.2 "[y]:a" - 2.3 "[x]:b" - 2.4 "[y]:b" + 2.1 "{x}:a" + 2.2 "{y}:a" + 2.3 "{x}:b" + 2.4 "{y}:b" - 3.1 "[x y]:a" - 3.2 "[y x]:a" - 3.3 "[x x]:b" - 3.4 "[y y]:b" + 3.1 "{x y}:a" + 3.2 "{y x}:a" + 3.3 "{x x}:b" + 3.4 "{y y}:b" - 4.1 {["x" "y"]:a} - 4.2 {["y" x]:a} - 4.3 {[x "x"]:b} - 4.4 {["y" y]:b} + 4.1 {{"x" "y"}:a} + 4.2 {{"y" x}:a} + 4.3 {{x "x"}:b} + 4.4 {{"y" y}:b} } { set res [matchdata 1 $expr] do_execsql_test $tn2.3.$tn.[llength $res] { diff --git a/ext/fts5/test/fts5auto.test b/ext/fts5/test/fts5auto.test index 11d09ce823..52a54fc04e 100644 --- a/ext/fts5/test/fts5auto.test +++ b/ext/fts5/test/fts5auto.test @@ -301,20 +301,20 @@ for {set fold 0} {$fold < 3} {incr fold} { foreach {tn expr} { - A.1 { [a] : x } - A.2 { [a b] : x } - A.3 { [a b f] : x } - A.4 { [f a b] : x } - A.5 { [f a b] : x y } - A.6 { [f a b] : x + y } - A.7 { [c a b] : x + c } - A.8 { [c d] : "l m" } - A.9 { [c e] : "l m" } + A.1 { {a} : x } + A.2 { {a b} : x } + A.3 { {a b f} : x } + A.4 { {f a b} : x } + A.5 { {f a b} : x y } + A.6 { {f a b} : x + y } + A.7 { {c a b} : x + c } + A.8 { {c d} : "l m" } + A.9 { {c e} : "l m" } B.1 { a NOT b } B.2 { a NOT a:b } B.3 { a OR (b AND c) } - B.4 { a OR (b AND [a b c]:c) } + B.4 { a OR (b AND {a b c}:c) } B.5 { a OR "b c" } B.6 { a OR b OR c } diff --git a/manifest b/manifest index 1f323041f9..49ba67b417 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-06-02T18:07:58.512 +C Change\sthe\sfts5\smulti-column\ssyntax\sto\suse\sparenthesis\sinstead\sof\ssquare\sbrackets. +D 2015-06-02T19:38:15.157 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c aa5683ce005fc8589d0336c10da153de5bebe14b +F ext/fts5/fts5_expr.c b28917bc2ec08eca4c8395f2d2b61adeae489462 F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b @@ -120,12 +120,12 @@ F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 -F ext/fts5/fts5parse.y 7f256d4de575f60f06c7c42c1514537168f0c035 +F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 0b465b1127adcd1c8131f3454ab4264a6964674c F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad -F ext/fts5/test/fts5ac.test 999fd5f44579f1eb565ed7cf3861c427537ff097 +F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a @@ -135,7 +135,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5auto.test 04286120430fea482ee4b3756ce1941acd3d3962 +F ext/fts5/test/fts5auto.test 3cef6f63c306bac05b95f47a94c3e87de71e61e3 F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b @@ -1357,7 +1357,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 80fe305b3eefb17310a9d6185d1c8cd73ee38b1e 9678646d9a14ba283a83839be329599a676a537a -R 085e764566126c29cff07829afec22bb +P c9ffda4abb4390bbc5719e269196e2807b254f97 +R 29c0ea9b35d0dc8e0559ad6dbdc1731d U dan -Z caf6ceaecaeb79d76ac38c51b7f66e87 +Z 95711865cf9f19b5a5f7ef868f1de2a7 diff --git a/manifest.uuid b/manifest.uuid index b4d7b058d4..0a6bf7e949 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c9ffda4abb4390bbc5719e269196e2807b254f97 \ No newline at end of file +ab85a6fc4f7580278fc9d1f0090fdcf0a90d065b \ No newline at end of file From 6f277eb6bcaf42f939dc67774b1e108e090b6055 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 3 Jun 2015 11:23:30 +0000 Subject: [PATCH 152/206] Fix an fts5 problem in extracting columns from position lists containing large varints. FossilOrigin-Name: 4ea015ab983300d420ef104cca550b22a6395866 --- ext/fts5/fts5_expr.c | 125 +++++++++++----------------------- ext/fts5/test/fts5auto.test | 76 +++++++++++++++++---- ext/fts5/test/fts5fault4.test | 28 ++++++++ manifest | 16 ++--- manifest.uuid | 2 +- 5 files changed, 138 insertions(+), 109 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 24e2c95dc3..6af3b84f31 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -668,7 +668,7 @@ static int fts5ExprExtractCol( while( iCol!=iCurrent ){ /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ - while( !(prev & 0x80) && *p!=0x01 ){ + while( (prev & 0x80) || *p!=0x01 ){ prev = *p++; if( p==pEnd ) return 0; } @@ -678,7 +678,8 @@ static int fts5ExprExtractCol( /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ - while( ppNear; int rc = *pRc; + int i; - if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ - /* If this "NEAR" object is actually a single phrase that consists - ** of a single term only, then grab pointers into the poslist - ** managed by the fts5_index.c iterator object. This is much faster - ** than synthesizing a new poslist the way we have to for more - ** complicated phrase or NEAR expressions. */ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - Fts5ExprColset *pColset = pNear->pColset; - const u8 *pPos; - int nPos; - - if( rc!=SQLITE_OK ) return 0; - rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); - - /* If the term may match any column, then this must be a match. - ** Return immediately in this case. Otherwise, try to find the - ** part of the poslist that corresponds to the required column. - ** If it can be found, return. If it cannot, the next iteration - ** of the loop will test the next rowid in the database for this - ** term. */ - if( pColset==0 ){ - assert( pPhrase->poslist.nSpace==0 ); - pPhrase->poslist.p = (u8*)pPos; - pPhrase->poslist.n = nPos; - }else if( pColset->nCol==1 ){ - assert( pPhrase->poslist.nSpace==0 ); - pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); - pPhrase->poslist.p = (u8*)pPos; - }else if( rc==SQLITE_OK ){ - rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 || pNear->pColset ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); + if( bMatch==0 ) break; + }else{ + rc = sqlite3Fts5IterPoslistBuffer( + pPhrase->aTerm[0].pIter, &pPhrase->poslist + ); } + } - *pRc = rc; - return (pPhrase->poslist.n>0); - }else{ - int i; - - /* Check that each phrase in the nearset matches the current row. - ** Populate the pPhrase->poslist buffers at the same time. If any - ** phrase is not a match, break out of the loop early. */ - for(i=0; rc==SQLITE_OK && inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->pColset ){ - int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); - if( bMatch==0 ) break; - }else{ - rc = sqlite3Fts5IterPoslistBuffer( - pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); - } - } - - *pRc = rc; - if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ - return 1; - } + *pRc = rc; + if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ + return 1; } return 0; @@ -939,12 +902,10 @@ static int fts5RowidCmp( } static void fts5ExprSetEof(Fts5ExprNode *pNode){ - if( pNode ){ - int i; - pNode->bEof = 1; - for(i=0; inChild; i++){ - fts5ExprSetEof(pNode->apChild[i]); - } + int i; + pNode->bEof = 1; + for(i=0; inChild; i++){ + fts5ExprSetEof(pNode->apChild[i]); } } @@ -1562,34 +1523,26 @@ Fts5ExprColset *sqlite3Fts5ParseColset( Fts5Token *p ){ Fts5ExprColset *pRet = 0; + int iCol; + char *z; /* Dequoted copy of token p */ + z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); if( pParse->rc==SQLITE_OK ){ - int iCol; - char *z = 0; - int rc = fts5ParseStringFromToken(p, &z); - if( rc==SQLITE_OK ){ - Fts5Config *pConfig = pParse->pConfig; - sqlite3Fts5Dequote(z); - for(iCol=0; iColnCol; iCol++){ - if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ){ - break; - } - } - if( iCol==pConfig->nCol ){ - sqlite3Fts5ParseError(pParse, "no such column: %s", z); - } - sqlite3_free(z); - }else{ - pParse->rc = rc; + Fts5Config *pConfig = pParse->pConfig; + sqlite3Fts5Dequote(z); + for(iCol=0; iColnCol; iCol++){ + if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break; } - - if( pParse->rc==SQLITE_OK ){ + if( iCol==pConfig->nCol ){ + sqlite3Fts5ParseError(pParse, "no such column: %s", z); + }else{ pRet = fts5ParseColset(pParse, pColset, iCol); } + sqlite3_free(z); } - if( pParse->rc!=SQLITE_OK ){ - assert( pRet==0 ); + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); sqlite3_free(pColset); } @@ -1771,8 +1724,6 @@ static char *fts5ExprPrintTcl( if( zRet==0 ) return 0; } - if( zRet==0 ) return 0; - }else{ char const *zOp = 0; int i; diff --git a/ext/fts5/test/fts5auto.test b/ext/fts5/test/fts5auto.test index 52a54fc04e..771a0b64d8 100644 --- a/ext/fts5/test/fts5auto.test +++ b/ext/fts5/test/fts5auto.test @@ -232,33 +232,43 @@ do_execsql_test 1.0 { fts5_aux_test_functions db -proc matchdata {expr {order ASC}} { - set tclexpr [db one { +proc matchdata {expr tbl collist {order ASC}} { + + set cols "" + foreach e $collist { + append cols ", '$e'" + } + + set tclexpr [db one [subst -novar { SELECT fts5_expr_tcl( - $expr, 'nearset $cols -pc ::pc', 'a','b','c','d','e','f' + $expr, 'nearset $cols -pc ::pc' [set cols] ) - }] + }]] set res [list] - db eval "SELECT rowid, * FROM tt ORDER BY rowid $order" { - set cols [list $a $b $c $d $e $f] + db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { + set cols [list] + foreach col $x(*) { + if {$col != "rowid"} { lappend cols $x($col) } + } + # set cols [list $a $b $c $d $e $f] set ::pc 0 set rowdata [eval $tclexpr] - if {$rowdata != ""} { lappend res $rowid $rowdata } + if {$rowdata != ""} { lappend res $x(rowid) $rowdata } } set res } -proc do_auto_test {tn expr} { +proc do_auto_test {tn tbl cols expr} { foreach order {asc desc} { - set res [matchdata $expr $order] - set testname "3.$tn.[string range $order 0 0].rows=[expr [llength $res]/2]" + set res [matchdata $expr $tbl $cols $order] + set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]" set ::autotest_expr $expr do_execsql_test $testname [subst -novar { - SELECT rowid, fts5_test_poslist(tt) FROM tt - WHERE tt MATCH $::autotest_expr ORDER BY rowid [set order] + SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl] + WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order] }] $res } @@ -310,6 +320,7 @@ for {set fold 0} {$fold < 3} {incr fold} { A.7 { {c a b} : x + c } A.8 { {c d} : "l m" } A.9 { {c e} : "l m" } + A.10 { {a b c a b c a b c f f e} : "l m" } B.1 { a NOT b } B.2 { a NOT a:b } @@ -321,9 +332,48 @@ for {set fold 0} {$fold < 3} {incr fold} { C.1 { a OR (b AND "b c") } C.2 { a OR (b AND "z c") } } { - do_auto_test 3.$fold.$tn $expr + do_auto_test 3.$fold.$tn tt {a b c d e f} $expr } } +proc replace_elems {list args} { + set ret $list + foreach {idx elem} $args { + set ret [lreplace $ret $idx $idx $elem] + } + set ret +} + +#------------------------------------------------------------------------- +# +set bigdoc [string trim [string repeat "a " 1000]] +do_test 4.0 { + set a [replace_elems $bigdoc 50 x 950 x] + set b [replace_elems $bigdoc 20 y 21 x 887 x 888 y] + set c [replace_elems $bigdoc 1 z 444 z 789 z] + execsql { + CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3); + INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c); + INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c); + } +} {} + +foreach {tn expr} { + 1 x + 2 y + 3 z + + 4 {c1 : x} 5 {c2 : x} 6 {c3 : x} + 7 {c1 : y} 8 {c2 : y} 9 {c3 : y} + 10 {c1 : z} 11 {c2 : z} 12 {c3 : z} + + +} { +breakpoint + do_auto_test 4.$tn yy {c1 c2 c3} $expr +} + + + finish_test diff --git a/ext/fts5/test/fts5fault4.test b/ext/fts5/test/fts5fault4.test index 6a37fcffc1..f224df40cd 100644 --- a/ext/fts5/test/fts5fault4.test +++ b/ext/fts5/test/fts5fault4.test @@ -371,5 +371,33 @@ do_faultsim_test 13.1 -faults oom-t* -prep { faultsim_test_result {0 {a 1 1 b 1 1}} } +#------------------------------------------------------------------------- +# OOM in multi-column token query. +# +reset_db +do_execsql_test 13.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, y, z); + INSERT INTO ft(ft, rank) VALUES('pgsz', 32); + INSERT INTO ft VALUES( + 'x x x x x x x x x x x x x x x x', + 'y y y y y y y y y y y y y y y y', + 'z z z z z z z z x x x x x x x x' + ); + INSERT INTO ft SELECT * FROM ft; + INSERT INTO ft SELECT * FROM ft; + INSERT INTO ft SELECT * FROM ft; + INSERT INTO ft SELECT * FROM ft; +} +faultsim_save_and_close +do_faultsim_test 13.1 -faults oom-t* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM ft } +} -body { + db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' } +} -test { + faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}} +} + + finish_test diff --git a/manifest b/manifest index 49ba67b417..b7b3fe17f8 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sfts5\smulti-column\ssyntax\sto\suse\sparenthesis\sinstead\sof\ssquare\sbrackets. -D 2015-06-02T19:38:15.157 +C Fix\san\sfts5\sproblem\sin\sextracting\scolumns\sfrom\sposition\slists\scontaining\slarge\svarints. +D 2015-06-03T11:23:30.476 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -111,7 +111,7 @@ F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c b28917bc2ec08eca4c8395f2d2b61adeae489462 +F ext/fts5/fts5_expr.c 78a498ba149fbcfbd95c9630054c27955253309d F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b @@ -135,7 +135,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5auto.test 3cef6f63c306bac05b95f47a94c3e87de71e61e3 +F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b @@ -151,7 +151,7 @@ F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 -F ext/fts5/test/fts5fault4.test 25306f396d239fd2ef35b2cc273a7f40fab80173 +F ext/fts5/test/fts5fault4.test 8671f534136aa1c80a102e8fd25b4921885e6667 F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875 F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d @@ -1357,7 +1357,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c9ffda4abb4390bbc5719e269196e2807b254f97 -R 29c0ea9b35d0dc8e0559ad6dbdc1731d +P ab85a6fc4f7580278fc9d1f0090fdcf0a90d065b +R a504500377faead9f927e5b8c0ebee20 U dan -Z 95711865cf9f19b5a5f7ef868f1de2a7 +Z 68530452f05ae148227ba366408c8cfc diff --git a/manifest.uuid b/manifest.uuid index 0a6bf7e949..8efcca17c3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ab85a6fc4f7580278fc9d1f0090fdcf0a90d065b \ No newline at end of file +4ea015ab983300d420ef104cca550b22a6395866 \ No newline at end of file From e4449454c575a4a8926bf29d7ebeb013149000ac Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 5 Jun 2015 19:05:57 +0000 Subject: [PATCH 153/206] Make use of range constraints on the rowid field of an fts5 table in full-text queries. FossilOrigin-Name: 32cbc0ed3699cc21302f0b6a159493117ad4bd4f --- ext/fts5/fts5.c | 403 ++++++++++++++++++++++++------------ ext/fts5/fts5Int.h | 4 +- ext/fts5/fts5_expr.c | 26 ++- ext/fts5/test/fts5ah.test | 40 ++++ ext/fts5/test/fts5plan.test | 10 +- manifest | 20 +- manifest.uuid | 2 +- 7 files changed, 346 insertions(+), 159 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 07a92c3744..c3da8bccf6 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -156,10 +156,25 @@ struct Fts5Sorter { ** iSpecial: ** If this is a 'special' query (refer to function fts5SpecialMatch()), ** then this variable contains the result of the query. +** +** iFirstRowid, iLastRowid: +** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the +** cursor iterates in ascending order of rowids, iFirstRowid is the lower +** limit of rowids to return, and iLastRowid the upper. In other words, the +** WHERE clause in the user's query might have been: +** +** MATCH AND rowid BETWEEN $iFirstRowid AND $iLastRowid +** +** If the cursor iterates in descending order of rowid, iFirstRowid +** is the upper limit (i.e. the "first" rowid visited) and iLastRowid +** the lower. */ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - int idxNum; /* idxNum passed to xFilter() */ + int ePlan; /* FTS5_PLAN_XXX value */ + int bDesc; /* True for "ORDER BY rowid DESC" queries */ + i64 iFirstRowid; /* Return no rowids earlier than this */ + i64 iLastRowid; /* Return no rowids later than this */ sqlite3_stmt *pStmt; /* Statement used to read %_content */ Fts5Expr *pExpr; /* Expression for MATCH queries */ Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ @@ -181,10 +196,25 @@ struct Fts5Cursor { Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ int *aColumnSize; /* Values for xColumnSize() */ + /* Cache used by auxiliary functions xInst() and xInstCount() */ int nInstCount; /* Number of phrase instances */ int *aInst; /* 3 integers per phrase instance */ }; +/* +** Bits that make up the "idxNum" parameter passed indirectly by +** xBestIndex() to xFilter(). +*/ +#define FTS5_BI_MATCH 0x0001 /* MATCH ? */ +#define FTS5_BI_RANK 0x0002 /* rank MATCH ? */ +#define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */ +#define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */ +#define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */ + +#define FTS5_BI_ORDER_RANK 0x0020 +#define FTS5_BI_ORDER_ROWID 0x0040 +#define FTS5_BI_ORDER_DESC 0x0080 + /* ** Values for Fts5Cursor.csrflags */ @@ -194,6 +224,18 @@ struct Fts5Cursor { #define FTS5CSR_FREE_ZRANK 0x08 #define FTS5CSR_REQUIRE_RESEEK 0x10 +#define BitFlagAllTest(x,y) (((x) & (y))==(y)) +#define BitFlagTest(x,y) (((x) & (y))!=0) + +/* +** Constants for the largest and smallest possible 64-bit signed integers. +** These are copied from sqliteInt.h. +*/ +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) +#endif + /* ** Macros to Set(), Clear() and Test() cursor flags. */ @@ -394,7 +436,7 @@ static int fts5CreateMethod( } /* -** The three query plans xBestIndex may choose between. +** The different query plans. */ #define FTS5_PLAN_SCAN 1 /* No usable constraint */ #define FTS5_PLAN_MATCH 2 /* ( MATCH ?) */ @@ -403,85 +445,131 @@ static int fts5CreateMethod( #define FTS5_PLAN_SOURCE 5 /* A source cursor for SORTED_MATCH */ #define FTS5_PLAN_SPECIAL 6 /* An internal query */ -#define FTS5_PLAN(idxNum) ((idxNum) & 0x7) - -#define FTS5_ORDER_DESC 8 /* ORDER BY rowid DESC */ -#define FTS5_ORDER_ASC 16 /* ORDER BY rowid ASC */ - /* -** Search the object passed as the first argument for a usable constraint -** on column iCol using operator eOp. If one is found, return its index in -** the pInfo->aConstraint[] array. If no such constraint is found, return -** a negative value. -*/ -static int fts5FindConstraint(sqlite3_index_info *pInfo, int eOp, int iCol){ - int i; - for(i=0; inConstraint; i++){ - struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; - if( p->usable && p->iColumn==iCol && p->op==eOp ) return i; - } - return -1; -} - -/* -** Implementation of the xBestIndex method for FTS5 tables. There -** are three possible strategies, in order of preference: +** Implementation of the xBestIndex method for FTS5 tables. Within the +** WHERE constraint, it searches for the following: ** -** 1. Full-text search using a MATCH operator. -** 2. A by-rowid lookup. -** 3. A full-table scan. +** 1. A MATCH constraint against the special column. +** 2. A MATCH constraint against the "rank" column. +** 3. An == constraint against the rowid column. +** 4. A < or <= constraint against the rowid column. +** 5. A > or >= constraint against the rowid column. +** +** Within the ORDER BY, either: +** +** 5. ORDER BY rank [ASC|DESC] +** 6. ORDER BY rowid [ASC|DESC] +** +** Costs are assigned as follows: +** +** a) If an unusable MATCH operator is present in the WHERE clause, the +** cost is unconditionally set to 1e50 (a really big number). +** +** a) If a MATCH operator is present, the cost depends on the other +** constraints also present. As follows: +** +** * No other constraints: cost=1000.0 +** * One rowid range constraint: cost=750.0 +** * Both rowid range constraints: cost=500.0 +** * An == rowid constraint: cost=100.0 +** +** b) Otherwise, if there is no MATCH: +** +** * No other constraints: cost=1000000.0 +** * One rowid range constraint: cost=750000.0 +** * Both rowid range constraints: cost=250000.0 +** * An == rowid constraint: cost=10.0 +** +** Costs are not modified by the ORDER BY clause. */ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts5Table *pTab = (Fts5Table*)pVTab; Fts5Config *pConfig = pTab->pConfig; - int iCons; - int ePlan = FTS5_PLAN_SCAN; - int iRankMatch; + int idxFlags = 0; /* Parameter passed through to xFilter() */ + int bHasMatch; + int iNext; + int i; - iCons = fts5FindConstraint(pInfo,SQLITE_INDEX_CONSTRAINT_MATCH,pConfig->nCol); - if( iCons>=0 ){ - ePlan = FTS5_PLAN_MATCH; - pInfo->estimatedCost = 1.0; - }else{ - iCons = fts5FindConstraint(pInfo, SQLITE_INDEX_CONSTRAINT_EQ, -1); - if( iCons>=0 ){ - ePlan = FTS5_PLAN_ROWID; - pInfo->estimatedCost = 2.0; + struct Constraint { + int op; /* Mask against sqlite3_index_constraint.op */ + int fts5op; /* FTS5 mask for idxFlags */ + int iCol; /* 0==rowid, 1==tbl, 2==rank */ + int omit; /* True to omit this if found */ + int iConsIndex; /* Index in pInfo->aConstraint[] */ + } aConstraint[] = { + {SQLITE_INDEX_CONSTRAINT_MATCH, FTS5_BI_MATCH, 1, 1, -1}, + {SQLITE_INDEX_CONSTRAINT_MATCH, FTS5_BI_RANK, 2, 1, -1}, + {SQLITE_INDEX_CONSTRAINT_EQ, FTS5_BI_ROWID_EQ, 0, 0, -1}, + {SQLITE_INDEX_CONSTRAINT_LT|SQLITE_INDEX_CONSTRAINT_LE, + FTS5_BI_ROWID_LE, 0, 0, -1}, + {SQLITE_INDEX_CONSTRAINT_GT|SQLITE_INDEX_CONSTRAINT_GE, + FTS5_BI_ROWID_GE, 0, 0, -1}, + }; + + int aColMap[3]; + aColMap[0] = -1; + aColMap[1] = pConfig->nCol; + aColMap[2] = pConfig->nCol+1; + + /* Set idxFlags flags for all WHERE clause terms that will be used. */ + for(i=0; inConstraint; i++){ + struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; + int j; + for(j=0; jiColumn==aColMap[pC->iCol] && p->op & pC->op ){ + if( p->usable ){ + pC->iConsIndex = i; + idxFlags |= pC->fts5op; + }else if( j==0 ){ + /* As there exists an unusable MATCH constraint this is an + ** unusable plan. Set a prohibitively high cost. */ + pInfo->estimatedCost = 1e50; + return SQLITE_OK; + } + } } } - if( iCons>=0 ){ - pInfo->aConstraintUsage[iCons].argvIndex = 1; - pInfo->aConstraintUsage[iCons].omit = 1; - }else{ - pInfo->estimatedCost = 10000000.0; - } - + /* Set idxFlags flags for the ORDER BY clause */ if( pInfo->nOrderBy==1 ){ int iSort = pInfo->aOrderBy[0].iColumn; - if( iSort<0 ){ - /* ORDER BY rowid [ASC|DESC] */ - pInfo->orderByConsumed = 1; - }else if( iSort==(pConfig->nCol+1) && ePlan==FTS5_PLAN_MATCH ){ - /* ORDER BY rank [ASC|DESC] */ - pInfo->orderByConsumed = 1; - ePlan = FTS5_PLAN_SORTED_MATCH; + if( iSort==(pConfig->nCol+1) && BitFlagTest(idxFlags, FTS5_BI_MATCH) ){ + idxFlags |= FTS5_BI_ORDER_RANK; + }else if( iSort==-1 ){ + idxFlags |= FTS5_BI_ORDER_ROWID; } - - if( pInfo->orderByConsumed ){ - ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC; + if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ + pInfo->orderByConsumed = 1; + if( pInfo->aOrderBy[0].desc ){ + idxFlags |= FTS5_BI_ORDER_DESC; + } } } - iRankMatch = fts5FindConstraint( - pInfo, SQLITE_INDEX_CONSTRAINT_MATCH, pConfig->nCol+1 - ); - if( iRankMatch>=0 ){ - pInfo->aConstraintUsage[iRankMatch].argvIndex = 1 + (iCons>=0); - pInfo->aConstraintUsage[iRankMatch].omit = 1; + /* Calculate the estimated cost based on the flags set in idxFlags. */ + bHasMatch = BitFlagTest(idxFlags, FTS5_BI_MATCH); + if( BitFlagTest(idxFlags, FTS5_BI_ROWID_EQ) ){ + pInfo->estimatedCost = bHasMatch ? 100.0 : 10.0; + }else if( BitFlagAllTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){ + pInfo->estimatedCost = bHasMatch ? 500.0 : 250000.0; + }else if( BitFlagTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){ + pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0; + }else{ + pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0; } - - pInfo->idxNum = ePlan; + + /* Assign argvIndex values to each constraint in use. */ + iNext = 1; + for(i=0; iiConsIndex>=0 ){ + pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++; + pInfo->aConstraintUsage[pC->iConsIndex].omit = pC->omit; + } + } + + pInfo->idxNum = idxFlags; return SQLITE_OK; } @@ -511,9 +599,9 @@ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ return rc; } -static int fts5StmtType(int idxNum){ - if( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ){ - return (idxNum&FTS5_ORDER_DESC) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; +static int fts5StmtType(Fts5Cursor *pCsr){ + if( pCsr->ePlan==FTS5_PLAN_SCAN ){ + return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; } return FTS5_STMT_LOOKUP; } @@ -544,7 +632,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ fts5CsrNewrow(pCsr); if( pCsr->pStmt ){ - int eStmt = fts5StmtType(pCsr->idxNum); + int eStmt = fts5StmtType(pCsr); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } if( pCsr->pSorter ){ @@ -553,7 +641,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ sqlite3_free(pSorter); } - if( pCsr->idxNum!=FTS5_PLAN_SOURCE ){ + if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){ sqlite3Fts5ExprFree(pCsr->pExpr); } @@ -622,7 +710,7 @@ static int fts5SorterNext(Fts5Cursor *pCsr){ static void fts5TripCursors(Fts5Table *pTab){ Fts5Cursor *pCsr; for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ - if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_MATCH + if( pCsr->ePlan==FTS5_PLAN_MATCH && pCsr->base.pVtab==(sqlite3_vtab*)pTab ){ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK); @@ -647,18 +735,12 @@ static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ assert( *pbSkip==0 ); if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); - int bDesc = ((pCsr->idxNum & FTS5_ORDER_DESC) ? 1 : 0); + int bDesc = pCsr->bDesc; i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); - rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bDesc); - while( rc==SQLITE_OK && sqlite3Fts5ExprEof(pCsr->pExpr)==0 ){ - i64 ii = sqlite3Fts5ExprRowid(pCsr->pExpr); - if( ii==iRowid ) break; - if( (bDesc && iiiRowid) ){ - *pbSkip = 1; - break; - } - rc = sqlite3Fts5ExprNext(pCsr->pExpr); + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, iRowid, bDesc); + if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ + *pbSkip = 1; } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK); @@ -681,7 +763,7 @@ static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ */ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int ePlan = FTS5_PLAN(pCsr->idxNum); + int ePlan = pCsr->ePlan; int bSkip = 0; int rc; @@ -690,7 +772,7 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ switch( ePlan ){ case FTS5_PLAN_MATCH: case FTS5_PLAN_SOURCE: - rc = sqlite3Fts5ExprNext(pCsr->pExpr); + rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } @@ -777,8 +859,9 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ int rc; - rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bDesc); - if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + Fts5Expr *pExpr = pCsr->pExpr; + rc = sqlite3Fts5ExprFirst(pExpr, pTab->pIndex, pCsr->iFirstRowid, bDesc); + if( sqlite3Fts5ExprEof(pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } fts5CsrNewrow(pCsr); @@ -804,7 +887,7 @@ static int fts5SpecialMatch( for(n=0; z[n] && z[n]!=' '; n++); assert( pTab->base.zErrMsg==0 ); - pCsr->idxNum = FTS5_PLAN_SPECIAL; + pCsr->ePlan = FTS5_PLAN_SPECIAL; if( 0==sqlite3_strnicmp("reads", z, n) ){ pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex); @@ -927,6 +1010,16 @@ static int fts5CursorParseRank( return rc; } +static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); + } + } + return iDefault; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -947,14 +1040,17 @@ static int fts5FilterMethod( ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int bDesc = ((idxNum & FTS5_ORDER_DESC) ? 1 : 0); - int rc = SQLITE_OK; + int rc = SQLITE_OK; /* Error code */ + int iVal = 0; /* Counter for apVal[] */ + int bDesc; /* True if ORDER BY [rank|rowid] DESC */ + int bOrderByRank; /* True if ORDER BY rank */ + sqlite3_value *pMatch = 0; /* MATCH ? expression (or NULL) */ + sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ + sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ + sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ + sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ char **pzErrmsg = pTab->pConfig->pzErrmsg; - assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg ); - pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; - - assert( nVal<=2 ); assert( pCsr->pStmt==0 ); assert( pCsr->pExpr==0 ); assert( pCsr->csrflags==0 ); @@ -962,6 +1058,38 @@ static int fts5FilterMethod( assert( pCsr->zRank==0 ); assert( pCsr->zRankArgs==0 ); + assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg ); + pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; + + /* Decode the arguments passed through to this function. + ** + ** Note: The following set of if(...) statements must be in the same + ** order as the corresponding entries in the struct at the top of + ** fts5BestIndexMethod(). */ + if( BitFlagTest(idxNum, FTS5_BI_MATCH) ) pMatch = apVal[iVal++]; + if( BitFlagTest(idxNum, FTS5_BI_RANK) ) pRank = apVal[iVal++]; + if( BitFlagTest(idxNum, FTS5_BI_ROWID_EQ) ) pRowidEq = apVal[iVal++]; + if( BitFlagTest(idxNum, FTS5_BI_ROWID_LE) ) pRowidLe = apVal[iVal++]; + if( BitFlagTest(idxNum, FTS5_BI_ROWID_GE) ) pRowidGe = apVal[iVal++]; + assert( iVal==nVal ); + bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0); + pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0); + + /* Set the cursor upper and lower rowid limits. Only some strategies + ** actually use them. This is ok, as the xBestIndex() method leaves the + ** sqlite3_index_constraint.omit flag clear for range constraints + ** on the rowid field. */ + if( pRowidEq ){ + pRowidLe = pRowidGe = pRowidEq; + } + if( bDesc ){ + pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); + pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); + }else{ + pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); + pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); + } + if( pTab->pSortCsr ){ /* If pSortCsr is non-NULL, then this call is being made as part of ** processing for a "... MATCH ORDER BY rank" query (ePlan is @@ -969,48 +1097,49 @@ static int fts5FilterMethod( ** return results to the user for this query. The current cursor ** (pCursor) is used to execute the query issued by function ** fts5CursorFirstSorted() above. */ - assert( FTS5_PLAN(idxNum)==FTS5_PLAN_SCAN ); - pCsr->idxNum = FTS5_PLAN_SOURCE; + assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 ); + assert( nVal==0 && pMatch==0 && bOrderByRank==0 && bDesc==0 ); + assert( pCsr->iLastRowid==LARGEST_INT64 ); + assert( pCsr->iFirstRowid==SMALLEST_INT64 ); + pCsr->ePlan = FTS5_PLAN_SOURCE; pCsr->pExpr = pTab->pSortCsr->pExpr; rc = fts5CursorFirst(pTab, pCsr, bDesc); - }else{ - int ePlan = FTS5_PLAN(idxNum); - pCsr->idxNum = idxNum; - if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){ - const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); + }else if( pMatch ){ + const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); - rc = fts5CursorParseRank(pTab->pConfig, pCsr, (nVal==2 ? apVal[1] : 0)); - if( rc==SQLITE_OK ){ - if( zExpr[0]=='*' ){ - /* The user has issued a query of the form "MATCH '*...'". This - ** indicates that the MATCH expression is not a full text query, - ** but a request for an internal parameter. */ - rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); - }else{ - char **pzErr = &pTab->base.zErrMsg; - rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); - if( rc==SQLITE_OK ){ - if( ePlan==FTS5_PLAN_MATCH ){ - rc = fts5CursorFirst(pTab, pCsr, bDesc); - }else{ - rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); - } + rc = fts5CursorParseRank(pTab->pConfig, pCsr, pRank); + if( rc==SQLITE_OK ){ + if( zExpr[0]=='*' ){ + /* The user has issued a query of the form "MATCH '*...'". This + ** indicates that the MATCH expression is not a full text query, + ** but a request for an internal parameter. */ + rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); + }else{ + char **pzErr = &pTab->base.zErrMsg; + rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + if( rc==SQLITE_OK ){ + if( bOrderByRank ){ + pCsr->ePlan = FTS5_PLAN_SORTED_MATCH; + rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); + }else{ + pCsr->ePlan = FTS5_PLAN_MATCH; + rc = fts5CursorFirst(pTab, pCsr, bDesc); } } } - }else{ - /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup - ** by rowid (ePlan==FTS5_PLAN_ROWID). */ - int eStmt = fts5StmtType(idxNum); - rc = sqlite3Fts5StorageStmt( - pTab->pStorage, eStmt, &pCsr->pStmt, &pTab->base.zErrMsg - ); - if( rc==SQLITE_OK ){ - if( ePlan==FTS5_PLAN_ROWID ){ - sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); - } - rc = fts5NextMethod(pCursor); + } + }else{ + /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup + ** by rowid (ePlan==FTS5_PLAN_ROWID). */ + pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN); + rc = sqlite3Fts5StorageStmt( + pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->base.zErrMsg + ); + if( rc==SQLITE_OK ){ + if( pCsr->ePlan==FTS5_PLAN_ROWID ){ + sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); } + rc = fts5NextMethod(pCursor); } } @@ -1031,9 +1160,9 @@ static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ ** Return the rowid that the cursor currently points to. */ static i64 fts5CursorRowid(Fts5Cursor *pCsr){ - assert( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_MATCH - || FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SORTED_MATCH - || FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE + assert( pCsr->ePlan==FTS5_PLAN_MATCH + || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH + || pCsr->ePlan==FTS5_PLAN_SOURCE ); if( pCsr->pSorter ){ return pCsr->pSorter->iRowid; @@ -1050,7 +1179,7 @@ static i64 fts5CursorRowid(Fts5Cursor *pCsr){ */ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; - int ePlan = FTS5_PLAN(pCsr->idxNum); + int ePlan = pCsr->ePlan; assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); switch( ePlan ){ @@ -1082,7 +1211,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ /* If the cursor does not yet have a statement handle, obtain one now. */ if( pCsr->pStmt==0 ){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); - int eStmt = fts5StmtType(pCsr->idxNum); + int eStmt = fts5StmtType(pCsr); rc = sqlite3Fts5StorageStmt( pTab->pStorage, eStmt, &pCsr->pStmt, &pTab->base.zErrMsg ); @@ -1613,7 +1742,9 @@ static int fts5ApiQueryPhrase( rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); if( rc==SQLITE_OK ){ Fts5Config *pConf = pTab->pConfig; - pNew->idxNum = FTS5_PLAN_MATCH; + pNew->ePlan = FTS5_PLAN_MATCH; + pNew->iFirstRowid = SMALLEST_INT64; + pNew->iLastRowid = LARGEST_INT64; pNew->base.pVtab = (sqlite3_vtab*)pTab; rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); } @@ -1761,7 +1892,7 @@ static int fts5ColumnMethod( assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); - if( pCsr->idxNum==FTS5_PLAN_SPECIAL ){ + if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){ if( iCol==pConfig->nCol ){ sqlite3_result_int64(pCtx, pCsr->iSpecial); } @@ -1776,11 +1907,11 @@ static int fts5ColumnMethod( }else if( iCol==pConfig->nCol+1 ){ /* The value of the "rank" column. */ - if( FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SOURCE ){ + if( pCsr->ePlan==FTS5_PLAN_SOURCE ){ fts5PoslistBlob(pCtx, pCsr); }else if( - FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_MATCH - || FTS5_PLAN(pCsr->idxNum)==FTS5_PLAN_SORTED_MATCH + pCsr->ePlan==FTS5_PLAN_MATCH + || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH ){ if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){ fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 6684c2ba8a..54c23df629 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -566,8 +566,8 @@ int sqlite3Fts5ExprNew( ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); ** } */ -int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, int bDesc); -int sqlite3Fts5ExprNext(Fts5Expr*); +int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); +int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); int sqlite3Fts5ExprEof(Fts5Expr*); i64 sqlite3Fts5ExprRowid(Fts5Expr*); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 6af3b84f31..9707e517aa 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1193,14 +1193,20 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ /* ** Begin iterating through the set of documents in index pIdx matched by -** the MATCH expression passed as the first argument. If the "bDesc" parameter -** is passed a non-zero value, iteration is in descending rowid order. Or, -** if it is zero, in ascending order. +** the MATCH expression passed as the first argument. If the "bDesc" +** parameter is passed a non-zero value, iteration is in descending rowid +** order. Or, if it is zero, in ascending order. +** +** If iterating in ascending rowid order (bDesc==0), the first document +** visited is that with the smallest rowid that is larger than or equal +** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), +** then the first document visited must have a rowid smaller than or +** equal to iFirst. ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ -int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ +int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ Fts5ExprNode *pRoot = p->pRoot; int rc = SQLITE_OK; if( pRoot ){ @@ -1208,6 +1214,13 @@ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ p->bDesc = bDesc; rc = fts5ExprNodeFirst(p, pRoot); + /* If not at EOF but the current rowid occurs earlier than iFirst in + ** the iteration order, move to document iFirst or later. */ + if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){ + rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); + } + + /* If the iterator is not at a real match, skip forward until it is. */ while( pRoot->bNomatch && rc==SQLITE_OK && pRoot->bEof==0 ){ rc = fts5ExprNodeNext(p, pRoot, 0, 0); } @@ -1221,12 +1234,15 @@ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ -int sqlite3Fts5ExprNext(Fts5Expr *p){ +int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ int rc; Fts5ExprNode *pRoot = p->pRoot; do { rc = fts5ExprNodeNext(p, pRoot, 0, 0); }while( pRoot->bNomatch && pRoot->bEof==0 && rc==SQLITE_OK ); + if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ + pRoot->bEof = 1; + } return rc; } diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test index ed2940763d..3c8ad253d1 100644 --- a/ext/fts5/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -104,6 +104,46 @@ foreach {tn q res} " do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res] } +#------------------------------------------------------------------------- +# Now test that adding range constraints on the rowid field reduces the +# number of pages loaded from disk. +# +foreach {tn fraction tail cnt} { + 1 0.6 {rowid > 5000} 5000 + 2 0.2 {rowid > 9000} 1000 + 3 0.2 {rowid < 1000} 999 + 4 0.2 {rowid BETWEEN 4000 AND 5000} 1001 + 5 0.6 {rowid >= 5000} 5001 + 6 0.2 {rowid >= 9000} 1001 + 7 0.2 {rowid <= 1000} 1000 + 8 0.6 {rowid > '5000'} 5000 + 9 0.2 {rowid > '9000'} 1000 + 10 0.1 {rowid = 444} 1 +} { + set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail" + set n [execsql_reads $q] + set ret [llength [execsql $q]] + + do_test "1.7.$tn.asc.(n=$n ret=$ret)" { + expr {$n < ($fraction*$nReadX) && $ret==$cnt} + } {1} + + set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail ORDER BY rowid DESC" + set n [execsql_reads $q] + set ret [llength [execsql $q]] + do_test "1.7.$tn.desc.(n=$n ret=$ret)" { + expr {$n < 2*$fraction*$nReadX && $ret==$cnt} + } {1} +} + +do_execsql_test 1.8.1 { + SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND +rowid < 'text'; +} {10000} +do_execsql_test 1.8.2 { + SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text'; +} {10000} + + #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} finish_test diff --git a/ext/fts5/test/fts5plan.test b/ext/fts5/test/fts5plan.test index 1670f89faa..72fdc60de3 100644 --- a/ext/fts5/test/fts5plan.test +++ b/ext/fts5/test/fts5plan.test @@ -24,34 +24,34 @@ do_eqp_test 1.1 { SELECT * FROM t1, f1 WHERE f1 MATCH t1.x } { 0 0 0 {SCAN TABLE t1} - 0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:} + 0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} } do_eqp_test 1.2 { SELECT * FROM t1, f1 WHERE f1 > t1.x } { - 0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} + 0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:} 0 1 0 {SCAN TABLE t1} } do_eqp_test 1.3 { SELECT * FROM f1 WHERE f1 MATCH ? ORDER BY ff } { - 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:} + 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} 0 0 0 {USE TEMP B-TREE FOR ORDER BY} } do_eqp_test 1.4 { SELECT * FROM f1 ORDER BY rank } { - 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} + 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:} 0 0 0 {USE TEMP B-TREE FOR ORDER BY} } do_eqp_test 1.5 { SELECT * FROM f1 WHERE rank MATCH ? } { - 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} + 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:} } diff --git a/manifest b/manifest index b7b3fe17f8..f34e791c6c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sproblem\sin\sextracting\scolumns\sfrom\sposition\slists\scontaining\slarge\svarints. -D 2015-06-03T11:23:30.476 +C Make\suse\sof\srange\sconstraints\son\sthe\srowid\sfield\sof\san\sfts5\stable\sin\sfull-text\squeries. +D 2015-06-05T19:05:57.541 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,13 +105,13 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 34e5098e85ed14cc120004c5622536b77ddf4976 +F ext/fts5/fts5.c f5800895e4d24b7351d44a3858c3a1611bb68dac F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 -F ext/fts5/fts5Int.h 4c677f3b797acde90ba1b7730eca6a32e7def742 +F ext/fts5/fts5Int.h 3de83c9639bd8332eb84a13c1eb2387e83e128bf F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 -F ext/fts5/fts5_expr.c 78a498ba149fbcfbd95c9630054c27955253309d +F ext/fts5/fts5_expr.c 549bda1f7edcf10365fbfbc002bdea1be3c287bb F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b @@ -130,7 +130,7 @@ F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 -F ext/fts5/test/fts5ah.test dbc37d736886e1e38cfa5cd523812db1ad8d0a31 +F ext/fts5/test/fts5ah.test b9e78fa986a7bd564ebadfb244de02c84d7ac3ae F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 @@ -160,7 +160,7 @@ F ext/fts5/test/fts5integrity.test b45f633381a85dc000e41d68c96ab510985ca35e F ext/fts5/test/fts5merge.test 8077454f2975a63f35761f4b8a718b3a808b7c9c F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 -F ext/fts5/test/fts5plan.test 89783f70dab89ff936ed6f21d88959b49c853a47 +F ext/fts5/test/fts5plan.test 7f38179220c9385f88e1470aae6cba134a308b40 F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5porter2.test c534385e88e685b354c2b2020acc0c4920042c8e F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e @@ -1357,7 +1357,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ab85a6fc4f7580278fc9d1f0090fdcf0a90d065b -R a504500377faead9f927e5b8c0ebee20 +P 4ea015ab983300d420ef104cca550b22a6395866 +R 375eda4e39d4e39f9d43e9373e13d973 U dan -Z 68530452f05ae148227ba366408c8cfc +Z 9b2a7bdd6b256eb0f1a2b017a5425890 diff --git a/manifest.uuid b/manifest.uuid index 8efcca17c3..d997303f04 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4ea015ab983300d420ef104cca550b22a6395866 \ No newline at end of file +32cbc0ed3699cc21302f0b6a159493117ad4bd4f \ No newline at end of file From 7193b69f520c44a899ad1d2441b53f5ca0357a04 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 6 Jun 2015 16:28:29 +0000 Subject: [PATCH 154/206] Fix handling of fts5 rowid constraints in the absence of a MATCH clause. Add tests to cover recently added branches. FossilOrigin-Name: 3a9cb648138a059862fb438c0787fdd888f5e88e --- ext/fts5/fts5.c | 3 ++ ext/fts5/fts5_storage.c | 27 +++++++++++++---- ext/fts5/test/fts5aa.test | 63 +++++++++++++++++++++++++++++++++++++++ manifest | 16 +++++----- manifest.uuid | 2 +- 5 files changed, 96 insertions(+), 15 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index c3da8bccf6..f5d6a64acb 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1138,6 +1138,9 @@ static int fts5FilterMethod( if( rc==SQLITE_OK ){ if( pCsr->ePlan==FTS5_PLAN_ROWID ){ sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + }else{ + sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); + sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); } rc = fts5NextMethod(pCursor); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index c154fb578c..cd677c2449 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -23,7 +23,7 @@ struct Fts5Storage { int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ i64 nTotalRow; /* Total number of rows in FTS table */ i64 *aTotalSize; /* Total sizes of each column */ - sqlite3_stmt *aStmt[10]; + sqlite3_stmt *aStmt[11]; }; @@ -48,6 +48,8 @@ struct Fts5Storage { #define FTS5_STMT_REPLACE_CONFIG 9 +#define FTS5_STMT_SCAN 10 + /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and ** Fts5Storage.pInsertDocsize - if they have not already been prepared. @@ -65,9 +67,9 @@ static int fts5StorageGetStmt( assert( eStmt>=0 && eStmtaStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { - "SELECT %s FROM %s T ORDER BY T.%Q ASC", /* SCAN_ASC */ - "SELECT %s FROM %s T ORDER BY T.%Q DESC", /* SCAN_DESC */ - "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ + "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", + "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", + "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ @@ -78,13 +80,26 @@ static int fts5StorageGetStmt( "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ + "SELECT %s FROM %s AS T", /* SCAN */ }; Fts5Config *pC = p->pConfig; char *zSql = 0; switch( eStmt ){ + case FTS5_STMT_SCAN: + zSql = sqlite3_mprintf(azStmt[eStmt], + pC->zContentExprlist, pC->zContent + ); + break; + case FTS5_STMT_SCAN_ASC: case FTS5_STMT_SCAN_DESC: + zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, + pC->zContent, pC->zContentRowid, pC->zContentRowid, + pC->zContentRowid + ); + break; + case FTS5_STMT_LOOKUP: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, pC->zContent, pC->zContentRowid @@ -571,7 +586,7 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){ } if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan, 0); + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); } while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){ @@ -797,7 +812,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ /* Generate the expected index checksum based on the contents of the ** %_content table. This block stores the checksum in ctx.cksum. */ - rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN_ASC, &pScan, 0); + rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); if( rc==SQLITE_OK ){ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index e7c37ccbf8..77ef19dd67 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -433,6 +433,69 @@ do_execsql_test 18.1 { SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; } {1} +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 17.1 { + CREATE VIRTUAL TABLE uio USING fts5(ttt); + INSERT INTO uio VALUES(NULL); + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + INSERT INTO uio SELECT NULL FROM uio; + SELECT count(*) FROM uio; +} {256} + +do_execsql_test 17.2 { + SELECT count(*) FROM uio WHERE rowid BETWEEN 8 AND 17 +} {10} +do_execsql_test 17.3 { + SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 +} {8 9 10 11 12 13 14 15 16 17} +do_execsql_test 17.4 { + SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 ORDER BY rowid DESC +} {17 16 15 14 13 12 11 10 9 8} +do_execsql_test 17.5 { + SELECT count(*) FROM uio +} {256} + +do_execsql_test 17.6 { + INSERT INTO uio(rowid) VALUES(9223372036854775807); + INSERT INTO uio(rowid) VALUES(-9223372036854775808); + SELECT count(*) FROM uio; +} {258} +do_execsql_test 17.7 { + SELECT min(rowid), max(rowid) FROM uio; +} {-9223372036854775808 9223372036854775807} + +do_execsql_test 17.8 { + INSERT INTO uio DEFAULT VALUES; + SELECT min(rowid), max(rowid), count(*) FROM uio; +} {-9223372036854775808 9223372036854775807 259} + +do_execsql_test 17.9 { + SELECT min(rowid), max(rowid), count(*) FROM uio WHERE rowid < 10; +} {-9223372036854775808 9 10} + +#-------------------------------------------------------------------- +# +do_execsql_test 18.1 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b); + CREATE VIRTUAL TABLE t2 USING fts5(c, d); + INSERT INTO t1 VALUES('abc*', NULL); + INSERT INTO t2 VALUES(1, 'abcdefg'); +} +do_execsql_test 18.2 { + SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c +} {1 1} +do_execsql_test 18.3 { + SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c +} {1 1} + finish_test diff --git a/manifest b/manifest index f34e791c6c..03cf31358f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Make\suse\sof\srange\sconstraints\son\sthe\srowid\sfield\sof\san\sfts5\stable\sin\sfull-text\squeries. -D 2015-06-05T19:05:57.541 +C Fix\shandling\sof\sfts5\srowid\sconstraints\sin\sthe\sabsence\sof\sa\sMATCH\sclause.\sAdd\stests\sto\scover\srecently\sadded\sbranches. +D 2015-06-06T16:28:29.334 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,7 +105,7 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c f5800895e4d24b7351d44a3858c3a1611bb68dac +F ext/fts5/fts5.c 1c7424b9ba39f1e244f776556a4fface71abe772 F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 F ext/fts5/fts5Int.h 3de83c9639bd8332eb84a13c1eb2387e83e128bf F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 @@ -114,7 +114,7 @@ F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 F ext/fts5/fts5_expr.c 549bda1f7edcf10365fbfbc002bdea1be3c287bb F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b -F ext/fts5/fts5_storage.c 04e6717656b78eb230a1c730cac3b935eb94889b +F ext/fts5/fts5_storage.c 770ab52377c18a9aa4dc843ee79388febdb184d4 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 @@ -123,7 +123,7 @@ F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 0b465b1127adcd1c8131f3454ab4264a6964674c -F ext/fts5/test/fts5aa.test 5f73afe6a1394fdba9bc18302876ded81021bee6 +F ext/fts5/test/fts5aa.test 0be21c89fd66b588db355a6398911fd875bdcc6c F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 @@ -1357,7 +1357,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 4ea015ab983300d420ef104cca550b22a6395866 -R 375eda4e39d4e39f9d43e9373e13d973 +P 32cbc0ed3699cc21302f0b6a159493117ad4bd4f +R 6f9b9f6d43c04c7ec55a64f8c057a03e U dan -Z 9b2a7bdd6b256eb0f1a2b017a5425890 +Z 85736f383e03d41cf70fac675094eadf diff --git a/manifest.uuid b/manifest.uuid index d997303f04..a933537909 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -32cbc0ed3699cc21302f0b6a159493117ad4bd4f \ No newline at end of file +3a9cb648138a059862fb438c0787fdd888f5e88e \ No newline at end of file From 90502c3dc7b7d6f449dfc25f735f544923fbe7b0 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 6 Jun 2015 19:23:32 +0000 Subject: [PATCH 155/206] Fix a comment in fts5.h. FossilOrigin-Name: e964b5877497b16cf985d3d847e82529bb3fa4a3 --- ext/fts5/fts5.h | 7 ++++++- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 7782bf697c..d9b30c620d 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -67,7 +67,12 @@ typedef void (*fts5_extension_function)( ** Reports the size in tokens of a column value from the current row. ** ** xColumnText: -** Reports the size in tokens of a column value from the current row. +** This function attempts to retrieve the text of column iCol of the +** current document. If successful, (*pz) is set to point to a buffer +** containing the text in utf-8 encoding, (*pn) is set to the size in bytes +** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, +** if an error occurs, an SQLite error code is returned and the final values +** of (*pz) and (*pn) are undefined. ** ** xPhraseCount: ** Returns the number of phrases in the current query expression. diff --git a/manifest b/manifest index 03cf31358f..8040ab30e4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\shandling\sof\sfts5\srowid\sconstraints\sin\sthe\sabsence\sof\sa\sMATCH\sclause.\sAdd\stests\sto\scover\srecently\sadded\sbranches. -D 2015-06-06T16:28:29.334 +C Fix\sa\scomment\sin\sfts5.h. +D 2015-06-06T19:23:32.945 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,7 +106,7 @@ F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1c7424b9ba39f1e244f776556a4fface71abe772 -F ext/fts5/fts5.h 4266c6231094005b051dbfc8dd85d2bc57243d34 +F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba F ext/fts5/fts5Int.h 3de83c9639bd8332eb84a13c1eb2387e83e128bf F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf @@ -1357,7 +1357,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 32cbc0ed3699cc21302f0b6a159493117ad4bd4f -R 6f9b9f6d43c04c7ec55a64f8c057a03e +P 3a9cb648138a059862fb438c0787fdd888f5e88e +R 01f96e31f05863ae01feddea5f82854e U dan -Z 85736f383e03d41cf70fac675094eadf +Z 479453162469a9d47c2a3d0d93cd23e2 diff --git a/manifest.uuid b/manifest.uuid index a933537909..4112b379fe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3a9cb648138a059862fb438c0787fdd888f5e88e \ No newline at end of file +e964b5877497b16cf985d3d847e82529bb3fa4a3 \ No newline at end of file From bcc2f04c6818d30c85b8c8d240d2aaa8815b1056 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 9 Jun 2015 20:58:39 +0000 Subject: [PATCH 156/206] Add the "columnsize=" option to fts5, similar to fts4's "matchinfo=fts3". FossilOrigin-Name: aa12f9d9b79c2f523fd6b00e47bcb66dba09ce0c --- ext/fts5/fts5.c | 61 +++++++++++++--- ext/fts5/fts5Int.h | 6 ++ ext/fts5/fts5_buffer.c | 15 ++++ ext/fts5/fts5_config.c | 46 +++++++----- ext/fts5/fts5_storage.c | 98 ++++++++++++++++---------- ext/fts5/test/fts5ae.test | 6 +- ext/fts5/test/fts5columnsize.test | 112 ++++++++++++++++++++++++++++++ manifest | 23 +++--- manifest.uuid | 2 +- 9 files changed, 293 insertions(+), 76 deletions(-) create mode 100644 ext/fts5/test/fts5columnsize.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index f5d6a64acb..2c397c92b8 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1039,6 +1039,7 @@ static int fts5FilterMethod( sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); + Fts5Config *pConfig = pTab->pConfig; Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int rc = SQLITE_OK; /* Error code */ int iVal = 0; /* Counter for apVal[] */ @@ -1049,7 +1050,7 @@ static int fts5FilterMethod( sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ - char **pzErrmsg = pTab->pConfig->pzErrmsg; + char **pzErrmsg = pConfig->pzErrmsg; assert( pCsr->pStmt==0 ); assert( pCsr->pExpr==0 ); @@ -1059,7 +1060,7 @@ static int fts5FilterMethod( assert( pCsr->zRankArgs==0 ); assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg ); - pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; + pConfig->pzErrmsg = &pTab->base.zErrMsg; /* Decode the arguments passed through to this function. ** @@ -1107,7 +1108,7 @@ static int fts5FilterMethod( }else if( pMatch ){ const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); - rc = fts5CursorParseRank(pTab->pConfig, pCsr, pRank); + rc = fts5CursorParseRank(pConfig, pCsr, pRank); if( rc==SQLITE_OK ){ if( zExpr[0]=='*' ){ /* The user has issued a query of the form "MATCH '*...'". This @@ -1116,7 +1117,7 @@ static int fts5FilterMethod( rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); }else{ char **pzErr = &pTab->base.zErrMsg; - rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ if( bOrderByRank ){ pCsr->ePlan = FTS5_PLAN_SORTED_MATCH; @@ -1128,6 +1129,11 @@ static int fts5FilterMethod( } } } + }else if( pConfig->zContent==0 ){ + *pConfig->pzErrmsg = sqlite3_mprintf( + "%s: table does not support scanning", pConfig->zName + ); + rc = SQLITE_ERROR; }else{ /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup ** by rowid (ePlan==FTS5_PLAN_ROWID). */ @@ -1146,7 +1152,7 @@ static int fts5FilterMethod( } } - pTab->pConfig->pzErrmsg = pzErrmsg; + pConfig->pzErrmsg = pzErrmsg; return rc; } @@ -1621,23 +1627,58 @@ static int fts5ApiColumnText( return rc; } +static int fts5ColumnSizeCb( + void *pContext, /* Pointer to int */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd /* End offset of token */ +){ + int *pCnt = (int*)pContext; + *pCnt = *pCnt + 1; + return SQLITE_OK; +} + static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + Fts5Config *pConfig = pTab->pConfig; int rc = SQLITE_OK; if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ - i64 iRowid = fts5CursorRowid(pCsr); - rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); + if( pConfig->bColumnsize ){ + i64 iRowid = fts5CursorRowid(pCsr); + rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); + }else if( pConfig->zContent==0 ){ + int i; + for(i=0; inCol; i++){ + if( pConfig->abUnindexed[i]==0 ){ + pCsr->aColumnSize[i] = -1; + } + } + }else{ + int i; + for(i=0; rc==SQLITE_OK && inCol; i++){ + if( pConfig->abUnindexed[i]==0 ){ + const char *z; int n; + void *p = (void*)(&pCsr->aColumnSize[i]); + pCsr->aColumnSize[i] = 0; + rc = fts5ApiColumnText(pCtx, i, &z, &n); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb); + } + } + } + } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); } if( iCol<0 ){ int i; *pnToken = 0; - for(i=0; ipConfig->nCol; i++){ + for(i=0; inCol; i++){ *pnToken += pCsr->aColumnSize[i]; } - }else if( iColpConfig->nCol ){ + }else if( iColnCol ){ *pnToken = pCsr->aColumnSize[iCol]; }else{ *pnToken = 0; @@ -1956,7 +1997,7 @@ static int fts5FindFunctionMethod( } /* -** Implementation of FTS3 xRename method. Rename an fts5 table. +** Implementation of FTS5 xRename method. Rename an fts5 table. */ static int fts5RenameMethod( sqlite3_vtab *pVtab, /* Virtual table handle */ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 54c23df629..b3fb4611fc 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -105,6 +105,9 @@ typedef struct Fts5Config Fts5Config; ** decent error message if it encounters a file-format version it does ** not understand. ** +** bColumnsize: +** True if the %_docsize table is created. +** */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -118,6 +121,7 @@ struct Fts5Config { int eContent; /* An FTS5_CONTENT value */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ + int bColumnsize; /* "columnsize=" option value (dflt==1) */ char *zContentExprlist; Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; @@ -196,6 +200,8 @@ void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int); +char *sqlite3Fts5Mprintf(int *pRc, char *zFmt, ...); + #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) #define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c) diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 5c4e518bcc..b900ada744 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -125,6 +125,21 @@ void sqlite3Fts5BufferAppendPrintf( } } +char *sqlite3Fts5Mprintf(int *pRc, char *zFmt, ...){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + va_list ap; + va_start(ap, zFmt); + zRet = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + if( zRet==0 ){ + *pRc = SQLITE_NOMEM; + } + } + return zRet; +} + + /* ** Free any buffer allocated by pBuf. Zero the structure before returning. */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 4b35ac8b2c..04366fcea7 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -196,7 +196,7 @@ void sqlite3Fts5Dequote(char *z){ } /* -** Parse the "special" CREATE VIRTUAL TABLE directive and update +** Parse a "special" CREATE VIRTUAL TABLE directive and update ** configuration object pConfig as appropriate. ** ** If successful, object pConfig is updated and SQLITE_OK returned. If @@ -211,10 +211,10 @@ static int fts5ConfigParseSpecial( const char *zArg, /* Argument to parse */ char **pzErr /* OUT: Error message */ ){ + int rc = SQLITE_OK; int nCmd = strlen(zCmd); if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; - int rc = SQLITE_OK; const char *p; if( pConfig->aPrefix ){ *pzErr = sqlite3_mprintf("multiple prefix=... directives"); @@ -248,7 +248,6 @@ static int fts5ConfigParseSpecial( } if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ - int rc = SQLITE_OK; const char *p = (const char*)zArg; int nArg = strlen(zArg) + 1; char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); @@ -293,7 +292,6 @@ static int fts5ConfigParseSpecial( } if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){ - int rc = SQLITE_OK; if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ *pzErr = sqlite3_mprintf("multiple content=... directives"); rc = SQLITE_ERROR; @@ -301,19 +299,15 @@ static int fts5ConfigParseSpecial( if( zArg[0] ){ pConfig->eContent = FTS5_CONTENT_EXTERNAL; pConfig->zContent = sqlite3_mprintf("%Q.%Q", pConfig->zDb, zArg); + if( pConfig->zContent==0 ) rc = SQLITE_NOMEM; }else{ pConfig->eContent = FTS5_CONTENT_NONE; - pConfig->zContent = sqlite3_mprintf( - "%Q.'%q_docsize'", pConfig->zDb, pConfig->zName - ); } - if( pConfig->zContent==0 ) rc = SQLITE_NOMEM; } return rc; } if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ - int rc = SQLITE_OK; if( pConfig->zContentRowid ){ *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); rc = SQLITE_ERROR; @@ -323,6 +317,16 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ + *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); + rc = SQLITE_ERROR; + }else{ + pConfig->bColumnsize = (zArg[0]=='1'); + } + return rc; + } + *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } @@ -477,6 +481,7 @@ int sqlite3Fts5ConfigParse( pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); + pRet->bColumnsize = 1; if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; @@ -530,15 +535,24 @@ int sqlite3Fts5ConfigParse( } /* If no zContent option was specified, fill in the default values. */ - if( rc==SQLITE_OK && pRet->eContent==FTS5_CONTENT_NORMAL ){ - pRet->zContent = sqlite3_mprintf("%Q.'%q_content'", pRet->zDb, pRet->zName); - if( pRet->zContent==0 ){ - rc = SQLITE_NOMEM; - }else{ - sqlite3_free(pRet->zContentRowid); - pRet->zContentRowid = 0; + if( rc==SQLITE_OK && pRet->zContent==0 ){ + const char *zTail = 0; + assert( pRet->eContent==FTS5_CONTENT_NORMAL + || pRet->eContent==FTS5_CONTENT_NONE + ); + if( pRet->eContent==FTS5_CONTENT_NORMAL ){ + zTail = "content"; + }else if( pRet->bColumnsize ){ + zTail = "docsize"; + } + + if( zTail ){ + pRet->zContent = sqlite3Fts5Mprintf( + &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail + ); } } + if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index cd677c2449..4dd72c2d41 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -39,15 +39,11 @@ struct Fts5Storage { #define FTS5_STMT_INSERT_CONTENT 3 #define FTS5_STMT_REPLACE_CONTENT 4 - #define FTS5_STMT_DELETE_CONTENT 5 #define FTS5_STMT_REPLACE_DOCSIZE 6 #define FTS5_STMT_DELETE_DOCSIZE 7 - #define FTS5_STMT_LOOKUP_DOCSIZE 8 - #define FTS5_STMT_REPLACE_CONFIG 9 - #define FTS5_STMT_SCAN 10 /* @@ -64,6 +60,14 @@ static int fts5StorageGetStmt( ){ int rc = SQLITE_OK; + /* If there is no %_docsize table, there should be no requests for + ** statements to operate on it. */ + assert( p->pConfig->bColumnsize || ( + eStmt!=FTS5_STMT_REPLACE_DOCSIZE + && eStmt!=FTS5_STMT_DELETE_DOCSIZE + && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE + )); + assert( eStmt>=0 && eStmtaStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { @@ -175,12 +179,16 @@ static int fts5ExecPrintf( int sqlite3Fts5DropAll(Fts5Config *pConfig){ int rc = fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_data';" - "DROP TABLE IF EXISTS %Q.'%q_docsize';" "DROP TABLE IF EXISTS %Q.'%q_config';", pConfig->zDb, pConfig->zName, - pConfig->zDb, pConfig->zName, pConfig->zDb, pConfig->zName ); + if( rc==SQLITE_OK && pConfig->bColumnsize ){ + rc = fts5ExecPrintf(pConfig->db, 0, + "DROP TABLE IF EXISTS %Q.'%q_docsize';", + pConfig->zDb, pConfig->zName + ); + } if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ rc = fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_content';", @@ -266,7 +274,7 @@ int sqlite3Fts5StorageOpen( sqlite3_free(zDefn); } - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = sqlite3Fts5CreateTable( pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr ); @@ -374,19 +382,25 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ ** Insert a record into the %_docsize table. Specifically, do: ** ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); +** +** If there is no %_docsize table (as happens if the columnsize=0 option +** is specified when the FTS5 table is created), this function is a no-op. */ static int fts5StorageInsertDocsize( Fts5Storage *p, /* Storage module to write to */ i64 iRowid, /* id value */ Fts5Buffer *pBuf /* sz value */ ){ - sqlite3_stmt *pReplace = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pReplace, 1, iRowid); - sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); - sqlite3_step(pReplace); - rc = sqlite3_reset(pReplace); + int rc = SQLITE_OK; + if( p->pConfig->bColumnsize ){ + sqlite3_stmt *pReplace = 0; + rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pReplace, 1, iRowid); + sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + } } return rc; } @@ -455,6 +469,7 @@ static int fts5StorageSaveTotals(Fts5Storage *p){ ** Remove a row from the FTS table. */ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ + Fts5Config *pConfig = p->pConfig; int rc; sqlite3_stmt *pDel; @@ -466,7 +481,7 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ } /* Delete the %_docsize record */ - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); } if( rc==SQLITE_OK ){ @@ -528,13 +543,15 @@ int sqlite3Fts5StorageSpecialDelete( } /* Delete the %_docsize record */ - if( rc==SQLITE_OK ){ - rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); - } - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDel, 1, iDel); - sqlite3_step(pDel); - rc = sqlite3_reset(pDel); + if( pConfig->bColumnsize ){ + if( rc==SQLITE_OK ){ + rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDel, 1, iDel); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } } /* Write the averages record */ @@ -554,11 +571,15 @@ int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ /* Delete the contents of the %_data and %_docsize tables. */ rc = fts5ExecPrintf(pConfig->db, 0, - "DELETE FROM %Q.'%q_data';" - "DELETE FROM %Q.'%q_docsize';", - pConfig->zDb, pConfig->zName, + "DELETE FROM %Q.'%q_data';", pConfig->zDb, pConfig->zName ); + if( rc==SQLITE_OK && pConfig->bColumnsize ){ + rc = fts5ExecPrintf(pConfig->db, 0, + "DELETE FROM %Q.'%q_docsize';", + pConfig->zDb, pConfig->zName + ); + } /* Reinitialize the %_data table. This call creates the initial structure ** and averages records. */ @@ -635,18 +656,24 @@ int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ ** a NULL value is inserted into the rowid column. The new rowid is allocated ** by inserting a dummy row into the %_docsize table. The dummy will be ** overwritten later. +** +** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In +** this case the user is required to provide a rowid explicitly. */ static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ - sqlite3_stmt *pReplace = 0; - int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_null(pReplace, 1); - sqlite3_bind_null(pReplace, 2); - sqlite3_step(pReplace); - rc = sqlite3_reset(pReplace); - } - if( rc==SQLITE_OK ){ - *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); + int rc = SQLITE_MISMATCH; + if( p->pConfig->bColumnsize ){ + sqlite3_stmt *pReplace = 0; + rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_null(pReplace, 1); + sqlite3_bind_null(pReplace, 2); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + } + if( rc==SQLITE_OK ){ + *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); + } } return rc; } @@ -958,6 +985,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ rc = FTS5_CORRUPT; } } + return rc; } diff --git a/ext/fts5/test/fts5ae.test b/ext/fts5/test/fts5ae.test index d310e723be..e32e1a3b6e 100644 --- a/ext/fts5/test/fts5ae.test +++ b/ext/fts5/test/fts5ae.test @@ -148,7 +148,7 @@ do_execsql_test 5.2 { 1 {4 6} } -do_execsql_test 5.2 { +do_execsql_test 5.3 { SELECT rowid, fts5_test_columntext(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { @@ -157,7 +157,7 @@ do_execsql_test 5.2 { 1 {{a b c d} {e f g h i j}} } -do_execsql_test 5.3 { +do_execsql_test 5.4 { SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { @@ -166,7 +166,7 @@ do_execsql_test 5.3 { 1 {5 7} } -do_execsql_test 5.4 { +do_execsql_test 5.5 { INSERT INTO t5 VALUES('x y z', 'v w x y z'); SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; diff --git a/ext/fts5/test/fts5columnsize.test b/ext/fts5/test/fts5columnsize.test new file mode 100644 index 0000000000..4dead06f62 --- /dev/null +++ b/ext/fts5/test/fts5columnsize.test @@ -0,0 +1,112 @@ +# 2015 Jun 10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on fts5 tables with the columnsize=0 option. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5columnsize + +#------------------------------------------------------------------------- +# Check that the option can be parsed and that the %_docsize table is +# only created if it is set to true. +# +foreach {tn outcome stmt} { + 1 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0) } + 2 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=1) } + 3 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='0') } + 4 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='1') } + 5 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='') } + 6 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=2) } + 7 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0, columnsize=1) } + 8 1 { CREATE VIRTUAL TABLE t1 USING fts5(x) } +} { + execsql { + DROP TABLE IF EXISTS t1; + } + if {$outcome==2} { + do_catchsql_test 1.$tn.1 $stmt {1 {malformed columnsize=... directive}} + } else { + do_execsql_test 1.$tn.2 $stmt + do_execsql_test 1.$tn.3 { + SELECT count(*) FROM sqlite_master WHERE name = 't1_docsize' + } $outcome + } +} + +#------------------------------------------------------------------------- +# Run tests on a table with no %_content or %_docsize backing store. +# +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(x, columnsize=0, content=''); +} +do_catchsql_test 2.1 { + INSERT INTO t2 VALUES('a b c d e f'); +} {1 {datatype mismatch}} +do_execsql_test 2.2 { + INSERT INTO t2(rowid, x) VALUES(1, 'c d e f'); + INSERT INTO t2(rowid, x) VALUES(2, 'c d e f g h'); + INSERT INTO t2(rowid, x) VALUES(3, 'a b c d e f g h'); +} {} +do_execsql_test 2.3 { + SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::'; + SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::'; + SELECT rowid FROM t2 WHERE t2 MATCH 'h'; +} {3 :: 1 2 3 :: 2 3} +do_execsql_test 2.4 { + INSERT INTO t2(t2, rowid, x) VALUES('delete', 2, 'c d e f g h'); + SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::'; + SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::'; + SELECT rowid FROM t2 WHERE t2 MATCH 'h'; +} {3 :: 1 3 :: 3} +do_execsql_test 2.5 { + INSERT INTO t2(t2) VALUES('delete-all'); + SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::'; + SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::'; + SELECT rowid FROM t2 WHERE t2 MATCH 'h'; +} {:: ::} +do_execsql_test 2.6 { + INSERT INTO t2(rowid, x) VALUES(1, 'o t t f'); + INSERT INTO t2(rowid, x) VALUES(2, 'f s s e'); + INSERT INTO t2(rowid, x) VALUES(3, 'n t e t'); +} + +do_catchsql_test 2.7.1 { + SELECT rowid FROM t2 +} {1 {t2: table does not support scanning}} +do_catchsql_test 2.7.2 { + SELECT rowid FROM t2 WHERE rowid=2 +} {1 {t2: table does not support scanning}} +do_catchsql_test 2.7.3 { + SELECT rowid FROM t2 WHERE rowid BETWEEN 1 AND 3 +} {1 {t2: table does not support scanning}} + +do_execsql_test 2.X { + DROP TABLE t2 +} + +#------------------------------------------------------------------------- +# Test the xColumnSize() API +# +fts5_aux_test_functions db + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t3 USING fts5(x, y UNINDEXED, z, columnsize=0); + INSERT INTO t3 VALUES('a a', 'b b b', 'c'); + INSERT INTO t3 VALUES('x a x', 'b b b y', ''); +} +do_execsql_test 3.1 { + SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a' +} { + 1 {2 0 1} 2 {3 0 0} +} + +finish_test diff --git a/manifest b/manifest index 8040ab30e4..3b31e16d5b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\scomment\sin\sfts5.h. -D 2015-06-06T19:23:32.945 +C Add\sthe\s"columnsize="\soption\sto\sfts5,\ssimilar\sto\sfts4's\s"matchinfo=fts3". +D 2015-06-09T20:58:39.182 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,16 +105,16 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 1c7424b9ba39f1e244f776556a4fface71abe772 +F ext/fts5/fts5.c 8af8014b40c382a987998a27f72490b339ce3726 F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba -F ext/fts5/fts5Int.h 3de83c9639bd8332eb84a13c1eb2387e83e128bf +F ext/fts5/fts5Int.h a6d1c30e1655bd91484cb98661581e35a130b87b F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 -F ext/fts5/fts5_buffer.c 9ec57c75c81e81dca118568876b1caead0aadadf -F ext/fts5/fts5_config.c 11f969ed711a0a8b611d47431d74c372ad78c713 +F ext/fts5/fts5_buffer.c be0dc80a9406151b350be27c7ec2956722578771 +F ext/fts5/fts5_config.c 6ae691e36f90185896f4db0a819ae2394f880ca1 F ext/fts5/fts5_expr.c 549bda1f7edcf10365fbfbc002bdea1be3c287bb F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b -F ext/fts5/fts5_storage.c 770ab52377c18a9aa4dc843ee79388febdb184d4 +F ext/fts5/fts5_storage.c 684ef9575dd1709c3faacbfd1765e623fb1d0505 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 @@ -127,7 +127,7 @@ F ext/fts5/test/fts5aa.test 0be21c89fd66b588db355a6398911fd875bdcc6c F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 -F ext/fts5/test/fts5ae.test 9175201baf8c885fc1cbb2da11a0c61fd11224db +F ext/fts5/test/fts5ae.test ddc558e3e3b52db0101f7541b2e3849b77052c92 F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 F ext/fts5/test/fts5ah.test b9e78fa986a7bd564ebadfb244de02c84d7ac3ae @@ -139,6 +139,7 @@ F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b +F ext/fts5/test/fts5columnsize.test c7333cf079022c1ad25d04538b8f279fad4c2f8d F ext/fts5/test/fts5config.test c9cc535f3b36cde1e5a32bf579f3f5962a9e82b2 F ext/fts5/test/fts5content.test e46904decd896e38c848ad4f38fa4e80251a028b F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 @@ -1357,7 +1358,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 3a9cb648138a059862fb438c0787fdd888f5e88e -R 01f96e31f05863ae01feddea5f82854e +P e964b5877497b16cf985d3d847e82529bb3fa4a3 +R 26fe69b53869c7d4cebecf3b1c47f607 U dan -Z 479453162469a9d47c2a3d0d93cd23e2 +Z 7aaa26406a2384f1f8538b962eb28bef diff --git a/manifest.uuid b/manifest.uuid index 4112b379fe..da2e7a4579 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e964b5877497b16cf985d3d847e82529bb3fa4a3 \ No newline at end of file +aa12f9d9b79c2f523fd6b00e47bcb66dba09ce0c \ No newline at end of file From 1153e60cf072db23b470ed3565766dba35e40512 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 10 Jun 2015 10:45:34 +0000 Subject: [PATCH 157/206] Fix the fts5 xRename() method. FossilOrigin-Name: 0f7fd51325875fbf0f1eaca3bbbd170ef99c4208 --- ext/fts5/fts5.c | 4 +- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_storage.c | 29 +++++++++++++ ext/fts5/test/fts5alter.test | 81 ++++++++++++++++++++++++++++++++++++ manifest | 17 ++++---- manifest.uuid | 2 +- 6 files changed, 123 insertions(+), 11 deletions(-) create mode 100644 ext/fts5/test/fts5alter.test diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 2c397c92b8..2a1fba8fd6 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -2003,8 +2003,8 @@ static int fts5RenameMethod( sqlite3_vtab *pVtab, /* Virtual table handle */ const char *zName /* New name of table */ ){ - int rc = SQLITE_OK; - return rc; + Fts5Table *pTab = (Fts5Table*)pVtab; + return sqlite3Fts5StorageRename(pTab->pStorage, zName); } /* diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index b3fb4611fc..eef36f811e 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -504,6 +504,7 @@ typedef struct Fts5Storage Fts5Storage; int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); int sqlite3Fts5StorageClose(Fts5Storage *p); +int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); int sqlite3Fts5DropAll(Fts5Config*); int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 4dd72c2d41..588f98147d 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -198,6 +198,35 @@ int sqlite3Fts5DropAll(Fts5Config *pConfig){ return rc; } +static void fts5StorageRenameOne( + Fts5Config *pConfig, /* Current FTS5 configuration */ + int *pRc, /* IN/OUT: Error code */ + const char *zTail, /* Tail of table name e.g. "data", "config" */ + const char *zName /* New name of FTS5 table */ +){ + if( *pRc==SQLITE_OK ){ + *pRc = fts5ExecPrintf(pConfig->db, 0, + "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", + pConfig->zDb, pConfig->zName, zTail, zName, zTail + ); + } +} + +int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ + Fts5Config *pConfig = pStorage->pConfig; + int rc = sqlite3Fts5StorageSync(pStorage, 1); + + fts5StorageRenameOne(pConfig, &rc, "data", zName); + fts5StorageRenameOne(pConfig, &rc, "config", zName); + if( pConfig->bColumnsize ){ + fts5StorageRenameOne(pConfig, &rc, "docsize", zName); + } + if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ + fts5StorageRenameOne(pConfig, &rc, "content", zName); + } + return rc; +} + /* ** Create the shadow table named zPost, with definition zDefn. Return ** SQLITE_OK if successful, or an SQLite error code otherwise. diff --git a/ext/fts5/test/fts5alter.test b/ext/fts5/test/fts5alter.test new file mode 100644 index 0000000000..f2000ff1dc --- /dev/null +++ b/ext/fts5/test/fts5alter.test @@ -0,0 +1,81 @@ +# 2015 Jun 10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The tests in this file focus on renaming FTS5 tables using the +# "ALTER TABLE ... RENAME TO ..." command +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5alter + + +#------------------------------------------------------------------------- +# Test renaming regular, contentless and columnsize=0 FTS5 tables. +# +do_execsql_test 1.1.0 { + CREATE VIRTUAL TABLE "a x" USING fts5(a, x); + INSERT INTO "a x" VALUES('a a a', 'x x x'); + ALTER TABLE "a x" RENAME TO "x y"; +} +do_execsql_test 1.1.1 { + SELECT * FROM "x y"; + SELECT rowid FROM "x y" WHERE "x y" MATCH 'a' +} {{a a a} {x x x} 1} + +do_execsql_test 1.2.0 { + CREATE VIRTUAL TABLE "one/two" USING fts5(one, columnsize=0); + INSERT INTO "one/two"(rowid, one) VALUES(456, 'd d d'); + ALTER TABLE "one/two" RENAME TO "three/four"; +} +do_execsql_test 1.2.1 { + SELECT * FROM "three/four"; + SELECT rowid FROM "three/four" WHERE "three/four" MATCH 'd' +} {{d d d} 456} + +do_execsql_test 1.3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(val, content=''); + INSERT INTO t1(rowid, val) VALUES(-1, 'drop table'); + INSERT INTO t1(rowid, val) VALUES(-2, 'drop view'); + ALTER TABLE t1 RENAME TO t2; +} +do_execsql_test 1.3.1 { + SELECT rowid, * FROM t2; + SELECT rowid FROM t2 WHERE t2 MATCH 'table' +} {-2 {} -1 {} -1} + +#------------------------------------------------------------------------- +# Test renaming an FTS5 table within a transaction. +# +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE zz USING fts5(a); + INSERT INTO zz(rowid, a) VALUES(-56, 'a b c'); + BEGIN; + INSERT INTO zz(rowid, a) VALUES(-22, 'a b c'); + ALTER TABLE zz RENAME TO yy; + SELECT rowid FROM yy WHERE yy MATCH 'a + b + c'; + COMMIT; +} {-56 -22} + +do_execsql_test 2.2 { + BEGIN; + ALTER TABLE yy RENAME TO ww; + INSERT INTO ww(rowid, a) VALUES(-11, 'a b c'); + SELECT rowid FROM ww WHERE ww MATCH 'a + b + c'; +} {-56 -22 -11} + +do_execsql_test 2.3 { + ROLLBACK; + SELECT rowid FROM yy WHERE yy MATCH 'a + b + c'; +} {-56 -22} + + +finish_test + diff --git a/manifest b/manifest index 3b31e16d5b..517ec49ea9 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\s"columnsize="\soption\sto\sfts5,\ssimilar\sto\sfts4's\s"matchinfo=fts3". -D 2015-06-09T20:58:39.182 +C Fix\sthe\sfts5\sxRename()\smethod. +D 2015-06-10T10:45:34.820 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in d272f8755b464f20e02dd7799bfe16794c9574c4 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,16 +105,16 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 8af8014b40c382a987998a27f72490b339ce3726 +F ext/fts5/fts5.c 4ce5d0990c61a41155fc014b0066ae6d25a388d3 F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba -F ext/fts5/fts5Int.h a6d1c30e1655bd91484cb98661581e35a130b87b +F ext/fts5/fts5Int.h 21eb91e02ad119e1d92ff100f366a976e12190de F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c be0dc80a9406151b350be27c7ec2956722578771 F ext/fts5/fts5_config.c 6ae691e36f90185896f4db0a819ae2394f880ca1 F ext/fts5/fts5_expr.c 549bda1f7edcf10365fbfbc002bdea1be3c287bb F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b -F ext/fts5/fts5_storage.c 684ef9575dd1709c3faacbfd1765e623fb1d0505 +F ext/fts5/fts5_storage.c 7e77d1b2da424283d1d58a77e9a98067dc96f2c7 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 @@ -135,6 +135,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 +F ext/fts5/test/fts5alter.test 3342e7fd58556d2a7e5299a7d9dec62e358028ed F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b @@ -1358,7 +1359,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P e964b5877497b16cf985d3d847e82529bb3fa4a3 -R 26fe69b53869c7d4cebecf3b1c47f607 +P aa12f9d9b79c2f523fd6b00e47bcb66dba09ce0c +R 92d332093f5da6f2c10bbd12aba6b7b5 U dan -Z 7aaa26406a2384f1f8538b962eb28bef +Z 4c307463292fc937d885d935d5d6ac74 diff --git a/manifest.uuid b/manifest.uuid index da2e7a4579..052ab5492f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -aa12f9d9b79c2f523fd6b00e47bcb66dba09ce0c \ No newline at end of file +0f7fd51325875fbf0f1eaca3bbbd170ef99c4208 \ No newline at end of file From 51ef0f57c788bca09a88a397896fd6165ec7d45d Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 23 Jun 2015 18:47:55 +0000 Subject: [PATCH 158/206] Improve test coverage of fts5. FossilOrigin-Name: df5ccea80e8f0da83af5e595b539687006085120 --- ext/fts5/fts5.c | 25 ++++++++++++----------- ext/fts5/fts5_config.c | 34 +++++++++++-------------------- ext/fts5/fts5_index.c | 14 +++++-------- ext/fts5/fts5_vocab.c | 6 ++---- ext/fts5/test/fts5columnsize.test | 19 ++++++++++++++--- manifest | 20 +++++++++--------- manifest.uuid | 2 +- 7 files changed, 59 insertions(+), 61 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 2a1fba8fd6..a851909094 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -827,15 +827,14 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ ** table, saving it creates a circular reference. ** ** If SQLite a built-in statement cache, this wouldn't be a problem. */ - zSql = sqlite3_mprintf("SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", + zSql = sqlite3Fts5Mprintf(&rc, + "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", pConfig->zDb, pConfig->zName, zRank, pConfig->zName, (zRankArgs ? ", " : ""), (zRankArgs ? zRankArgs : ""), bDesc ? "DESC" : "ASC" ); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ + if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pSorter->pStmt, 0); sqlite3_free(zSql); } @@ -930,10 +929,8 @@ static int fts5FindRankFunction(Fts5Cursor *pCsr){ const char *zRankArgs = pCsr->zRankArgs; if( zRankArgs ){ - char *zSql = sqlite3_mprintf("SELECT %s", zRankArgs); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ + char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); + if( zSql ){ sqlite3_stmt *pStmt = 0; rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); @@ -1213,8 +1210,11 @@ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ /* ** If the cursor requires seeking (bSeekRequired flag is set), seek it. ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. +** +** If argument bErrormsg is true and an error occurs, an error message may +** be left in sqlite3_vtab.zErrMsg. */ -static int fts5SeekCursor(Fts5Cursor *pCsr){ +static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ int rc = SQLITE_OK; /* If the cursor does not yet have a statement handle, obtain one now. */ @@ -1222,8 +1222,9 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int eStmt = fts5StmtType(pCsr); rc = sqlite3Fts5StorageStmt( - pTab->pStorage, eStmt, &pCsr->pStmt, &pTab->base.zErrMsg + pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->base.zErrMsg:0) ); + assert( rc!=SQLITE_OK || pTab->base.zErrMsg==0 ); assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); } @@ -1618,7 +1619,7 @@ static int fts5ApiColumnText( *pz = 0; *pn = 0; }else{ - rc = fts5SeekCursor(pCsr); + rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); @@ -1962,7 +1963,7 @@ static int fts5ColumnMethod( } } }else if( !fts5IsContentless(pTab) ){ - rc = fts5SeekCursor(pCsr); + rc = fts5SeekCursor(pCsr, 1); if( rc==SQLITE_OK ){ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); } diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 04366fcea7..4c478d79c9 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -298,8 +298,7 @@ static int fts5ConfigParseSpecial( }else{ if( zArg[0] ){ pConfig->eContent = FTS5_CONTENT_EXTERNAL; - pConfig->zContent = sqlite3_mprintf("%Q.%Q", pConfig->zDb, zArg); - if( pConfig->zContent==0 ) rc = SQLITE_NOMEM; + pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); }else{ pConfig->eContent = FTS5_CONTENT_NONE; } @@ -601,28 +600,21 @@ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ */ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ int i; - int rc; + int rc = SQLITE_OK; char *zSql; char *zOld; - zSql = (char*)sqlite3_mprintf("CREATE TABLE x("); + zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); for(i=0; zSql && inCol; i++){ - zOld = zSql; - zSql = sqlite3_mprintf("%s%s%Q", zOld, (i==0?"":", "), pConfig->azCol[i]); - sqlite3_free(zOld); + const char *zSep = (i==0?"":", "); + zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); } + zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", + zSql, pConfig->zName, FTS5_RANK_NAME + ); + assert( zSql || rc==SQLITE_NOMEM ); if( zSql ){ - zOld = zSql; - zSql = sqlite3_mprintf("%s, %Q HIDDEN, %s HIDDEN)", - zOld, pConfig->zName, FTS5_RANK_NAME - ); - sqlite3_free(zOld); - } - - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ rc = sqlite3_declare_vtab(pConfig->db, zSql); sqlite3_free(zSql); } @@ -823,7 +815,7 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; char *zSql; sqlite3_stmt *p = 0; - int rc; + int rc = SQLITE_OK; int iVersion = 0; /* Set default values */ @@ -831,10 +823,8 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; - zSql = sqlite3_mprintf(zSelect, pConfig->zDb, pConfig->zName); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ + zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); + if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); sqlite3_free(zSql); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6d2e042fc2..dee9cd7ef3 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -791,14 +791,12 @@ static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->rc!=SQLITE_OK ) return; if( p->pWriter==0 ){ - int rc; + int rc = SQLITE_OK; Fts5Config *pConfig = p->pConfig; - char *zSql = sqlite3_mprintf( + char *zSql = sqlite3Fts5Mprintf(&rc, "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl ); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ + if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); sqlite3_free(zSql); } @@ -4218,10 +4216,8 @@ int sqlite3Fts5IndexOpen( p->pConfig = pConfig; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; - p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); - if( p->zDataTbl==0 ){ - rc = SQLITE_NOMEM; - }else if( bCreate ){ + p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); + if( p->zDataTbl && bCreate ){ rc = sqlite3Fts5CreateTable( pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr ); diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index c21ec2effe..ebbaf362e3 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -250,13 +250,11 @@ static int fts5VocabOpenMethod( char *zSql = 0; int nByte; - zSql = sqlite3_mprintf( + zSql = sqlite3Fts5Mprintf(&rc, "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl ); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ + if( zSql ){ rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); } sqlite3_free(zSql); diff --git a/ext/fts5/test/fts5columnsize.test b/ext/fts5/test/fts5columnsize.test index 3e725366da..2e54770736 100644 --- a/ext/fts5/test/fts5columnsize.test +++ b/ext/fts5/test/fts5columnsize.test @@ -28,6 +28,7 @@ foreach {tn outcome stmt} { 6 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=2) } 7 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0, columnsize=1) } 8 1 { CREATE VIRTUAL TABLE t1 USING fts5(x) } + 9 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=11) } } { execsql { DROP TABLE IF EXISTS t1; @@ -98,17 +99,17 @@ do_execsql_test 2.X { # fts5_aux_test_functions db -do_execsql_test 3.0 { +do_execsql_test 3.1.0 { CREATE VIRTUAL TABLE t3 USING fts5(x, y UNINDEXED, z, columnsize=0); INSERT INTO t3 VALUES('a a', 'b b b', 'c'); INSERT INTO t3 VALUES('x a x', 'b b b y', ''); } -do_execsql_test 3.1 { +do_execsql_test 3.1.1 { SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a' } { 1 {2 0 1} 2 {3 0 0} } -do_execsql_test 3.1 { +do_execsql_test 3.1.2 { INSERT INTO t3 VALUES(NULL, NULL, 'a a a a'); DELETE FROM t3 WHERE rowid = 1; SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a' @@ -116,4 +117,16 @@ do_execsql_test 3.1 { 2 {3 0 0} 3 {0 0 4} } +do_execsql_test 3.2.0 { + CREATE VIRTUAL TABLE t4 USING fts5(x, y UNINDEXED, z, columnsize=0, content=''); + INSERT INTO t4(rowid, x, y, z) VALUES(1, 'a a', 'b b b', 'c'); + INSERT INTO t4(rowid, x, y, z) VALUES(2, 'x a x', 'b b b y', ''); +} +do_execsql_test 3.2.1 { + SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a' +} { + 1 {-1 0 -1} 2 {-1 0 -1} +} + + finish_test diff --git a/manifest b/manifest index 61510aa714..adfbb0e1f5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch.\sAdd\stests\sfor\scolumnsize=0. -D 2015-06-23T15:06:13.029 +C Improve\stest\scoverage\sof\sfts5. +D 2015-06-23T18:47:55.733 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 6fa5a3c6f1f558bb443429e33806e2e494823e44 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -105,21 +105,21 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 4ce5d0990c61a41155fc014b0066ae6d25a388d3 +F ext/fts5/fts5.c d7c58bd8c025a2202365194f2340930533f6ac24 F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba F ext/fts5/fts5Int.h 21eb91e02ad119e1d92ff100f366a976e12190de F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c be0dc80a9406151b350be27c7ec2956722578771 -F ext/fts5/fts5_config.c 6ae691e36f90185896f4db0a819ae2394f880ca1 +F ext/fts5/fts5_config.c c232d181d6324f0ae3a2a31319924473999e5816 F ext/fts5/fts5_expr.c 549bda1f7edcf10365fbfbc002bdea1be3c287bb F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc -F ext/fts5/fts5_index.c 7cea402924cd3d8cd5943a7f9514c9153696571b +F ext/fts5/fts5_index.c 707bbc0c30e37748af69a5d1871ad2f59e5c5179 F ext/fts5/fts5_storage.c b2fa301fce865d582d367a5e1bb438fe60c03cb5 F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 -F ext/fts5/fts5_vocab.c 1f8543b2c1ae4427f127a911bc8e60873fcd7bf9 +F ext/fts5/fts5_vocab.c e454fa58c6d591024659a9b61eece0d708e8b575 F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 0b465b1127adcd1c8131f3454ab4264a6964674c @@ -140,7 +140,7 @@ F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b -F ext/fts5/test/fts5columnsize.test bd07a42a80a6805e84afa7daf54ecd4563f752d0 +F ext/fts5/test/fts5columnsize.test 771401dd7fc628ecb38316613ed2f3cbe8f2b78a F ext/fts5/test/fts5config.test c9cc535f3b36cde1e5a32bf579f3f5962a9e82b2 F ext/fts5/test/fts5content.test e46904decd896e38c848ad4f38fa4e80251a028b F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 @@ -1363,7 +1363,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0f7fd51325875fbf0f1eaca3bbbd170ef99c4208 4df852ce26c95d5d23c83dbe9c59d2c3435acddf -R 3ac475f744e56c1ad80818d65ed40944 +P ef44c71a22518727030dd90c0139af8973b05841 +R 41ab20c81b8eca2b8b1f4493d7071c96 U dan -Z b74a37a10556c5365b257dc3f6c234f4 +Z 674a570e3df1810e4f64caae14985366 diff --git a/manifest.uuid b/manifest.uuid index 92aa404f55..a040de9765 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ef44c71a22518727030dd90c0139af8973b05841 \ No newline at end of file +df5ccea80e8f0da83af5e595b539687006085120 \ No newline at end of file From e674bf1648eb65bb6852122409da39ff8776f8b5 Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 25 Jun 2015 16:01:44 +0000 Subject: [PATCH 159/206] Reorder the terms of a conditional for a small performance gain. FossilOrigin-Name: d67b0ed1054cbb7ea2cdd74720d4d6e0227cec14 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/manifest b/manifest index 1771c78149..fbf418370d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sa\sNEVER()\sthat\sis\sin\sfact\sreachable. -D 2015-06-25T15:21:52.642 +C Reorder\sthe\sterms\sof\sa\sconditional\sfor\sa\ssmall\sperformance\sgain. +D 2015-06-25T16:01:44.112 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -192,7 +192,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c cb50b3f00fec44481c4b0a7f9ab1e4bed2ffce18 +F src/btree.c 85679f63ebc2c67593479cf22f88539f71f2af70 F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h fdd1aff02fb2a63812bd95716e7f579fc3759107 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d007bc339b9ea6d59a3dd2b0c5e77e9c801b16f6 -R b3d0d7d3ac81c0743783e0e414777712 +P f824e66b0dc120bed227c7446e2663fcad7cc4f6 +R 8aa067d659e72dd17b19d63682b0eebd U drh -Z ec9d3f29e33fb82221154b9d11e7c8ba +Z c674ba4fda0e43aa32d59f882342a9d7 diff --git a/manifest.uuid b/manifest.uuid index 3239fd0d3d..67e0ce0f60 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f824e66b0dc120bed227c7446e2663fcad7cc4f6 \ No newline at end of file +d67b0ed1054cbb7ea2cdd74720d4d6e0227cec14 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 0cd871923f..646367c2e0 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1457,7 +1457,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ testcase( gap+2==top ); testcase( gap+1==top ); testcase( gap==top ); - if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){ + if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ int bDefrag = 0; u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag); if( rc ) return rc; From b7580e84a84fb5a0424979e3c99834d666b4a3c5 Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 25 Jun 2015 18:36:13 +0000 Subject: [PATCH 160/206] More simplifications and performance improvements to cell allocation logic associated with allocateSpace(). FossilOrigin-Name: 78da0f69cb3289e332018864004f319f2764a5c8 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 40 +++++++++++++++++++--------------------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/manifest b/manifest index fbf418370d..848f3d66c6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Reorder\sthe\sterms\sof\sa\sconditional\sfor\sa\ssmall\sperformance\sgain. -D 2015-06-25T16:01:44.112 +C More\ssimplifications\sand\sperformance\simprovements\sto\scell\sallocation\nlogic\sassociated\swith\sallocateSpace(). +D 2015-06-25T18:36:13.826 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -192,7 +192,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 85679f63ebc2c67593479cf22f88539f71f2af70 +F src/btree.c 960a641306010ed25690af8e05d599fe4b9a005d F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h fdd1aff02fb2a63812bd95716e7f579fc3759107 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f824e66b0dc120bed227c7446e2663fcad7cc4f6 -R 8aa067d659e72dd17b19d63682b0eebd +P d67b0ed1054cbb7ea2cdd74720d4d6e0227cec14 +R 4a51cc2794802bb6f018a7f1798d3a79 U drh -Z c674ba4fda0e43aa32d59f882342a9d7 +Z 2ec930a14eaee4b6a3bd2ab311de6bca diff --git a/manifest.uuid b/manifest.uuid index 67e0ce0f60..6e77614794 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d67b0ed1054cbb7ea2cdd74720d4d6e0227cec14 \ No newline at end of file +78da0f69cb3289e332018864004f319f2764a5c8 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 646367c2e0..9dbdab9b74 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1351,18 +1351,20 @@ static int defragmentPage(MemPage *pPage){ ** This function may detect corruption within pPg. If corruption is ** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. ** -** If a slot of at least nByte bytes is found but cannot be used because -** there are already at least 60 fragmented bytes on the page, return NULL. -** In this case, if pbDefrag parameter is not NULL, set *pbDefrag to true. +** Slots on the free list that are between 1 and 3 bytes larger than nByte +** will be ignored if adding the extra space to the fragmentation count +** causes the fragmentation count to exceed 60. */ -static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ +static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ const int hdr = pPg->hdrOffset; u8 * const aData = pPg->aData; - int iAddr; - int pc; + int iAddr = hdr + 1; + int pc = get2byte(&aData[iAddr]); + int x; int usableSize = pPg->pBt->usableSize; - for(iAddr=hdr+1; (pc = get2byte(&aData[iAddr]))>0; iAddr=pc){ + assert( pc>0 ); + do{ int size; /* Size of the free slot */ /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ @@ -1374,8 +1376,7 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ ** freeblock form a big-endian integer which is the size of the freeblock ** in bytes, including the 4-byte header. */ size = get2byte(&aData[pc+2]); - if( size>=nByte ){ - int x = size - nByte; + if( (x = size - nByte)>=0 ){ testcase( x==4 ); testcase( x==3 ); if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){ @@ -1384,10 +1385,8 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ }else if( x<4 ){ /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total ** number of bytes in fragments may not exceed 60. */ - if( aData[hdr+7]>=60 ){ - if( pbDefrag ) *pbDefrag = 1; - return 0; - } + if( aData[hdr+7]>57 ) return 0; + /* Remove the slot from the free-list. Update the number of ** fragmented bytes within the page. */ memcpy(&aData[iAddr], &aData[pc], 2); @@ -1399,7 +1398,9 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ } return &aData[pc + x]; } - } + iAddr = pc; + pc = get2byte(&aData[pc]); + }while( pc ); return 0; } @@ -1458,14 +1459,13 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ testcase( gap+1==top ); testcase( gap==top ); if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ - int bDefrag = 0; - u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag); - if( rc ) return rc; - if( bDefrag ) goto defragment_page; + u8 *pSpace = pageFindSlot(pPage, nByte, &rc); if( pSpace ){ assert( pSpace>=data && (pSpace - data)<65536 ); *pIdx = (int)(pSpace - data); return SQLITE_OK; + }else if( rc ){ + return rc; } } @@ -1474,7 +1474,6 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ */ testcase( gap+2+nByte==top ); if( gap+2+nByte>top ){ - defragment_page: assert( pPage->nCell>0 || CORRUPT_DB ); rc = defragmentPage(pPage); if( rc ) return rc; @@ -6414,14 +6413,13 @@ static int pageInsertArray( int i; u8 *aData = pPg->aData; u8 *pData = *ppData; - const int bFreelist = aData[1] || aData[2]; int iEnd = iFirst + nCell; assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ for(i=iFirst; i Date: Thu, 25 Jun 2015 19:53:48 +0000 Subject: [PATCH 161/206] Simplifications and performance improvements in insertCell(). FossilOrigin-Name: 7d02e6c992ef92e1f77ebc13889e17c028454b06 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 22 +++++++++++----------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/manifest b/manifest index 848f3d66c6..f42ebf288a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C More\ssimplifications\sand\sperformance\simprovements\sto\scell\sallocation\nlogic\sassociated\swith\sallocateSpace(). -D 2015-06-25T18:36:13.826 +C Simplifications\sand\sperformance\simprovements\sin\sinsertCell(). +D 2015-06-25T19:53:48.178 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -192,7 +192,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 960a641306010ed25690af8e05d599fe4b9a005d +F src/btree.c 40e98c10725c2cec5429068e21c17924f4bf06cc F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h fdd1aff02fb2a63812bd95716e7f579fc3759107 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d67b0ed1054cbb7ea2cdd74720d4d6e0227cec14 -R 4a51cc2794802bb6f018a7f1798d3a79 +P 78da0f69cb3289e332018864004f319f2764a5c8 +R 0d165d2d8c01ecd2475fdbe233dfe5e0 U drh -Z 2ec930a14eaee4b6a3bd2ab311de6bca +Z 8a4a8bb2d9c667bdbc4293aaa6065234 diff --git a/manifest.uuid b/manifest.uuid index 6e77614794..e18ea8d4d0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -78da0f69cb3289e332018864004f319f2764a5c8 \ No newline at end of file +7d02e6c992ef92e1f77ebc13889e17c028454b06 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 9dbdab9b74..5769a236c8 100644 --- a/src/btree.c +++ b/src/btree.c @@ -6197,10 +6197,8 @@ static void insertCell( ){ int idx = 0; /* Where to write new cell content in data[] */ int j; /* Loop counter */ - int end; /* First byte past the last cell pointer in data[] */ - int ins; /* Index in data[] where new cell pointer is inserted */ - int cellOffset; /* Address of first cell pointer in data[] */ u8 *data; /* The content of the whole page */ + u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ if( *pRC ) return; @@ -6244,24 +6242,26 @@ static void insertCell( } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); data = pPage->aData; - cellOffset = pPage->cellOffset; - end = cellOffset + 2*pPage->nCell; - ins = cellOffset + 2*i; + assert( &data[pPage->cellOffset]==pPage->aCellIdx ); rc = allocateSpace(pPage, sz, &idx); if( rc ){ *pRC = rc; return; } /* The allocateSpace() routine guarantees the following properties ** if it returns successfully */ - assert( idx >= 0 && (idx >= end+2 || CORRUPT_DB) ); + assert( idx >= 0 ); + assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); assert( idx+sz <= (int)pPage->pBt->usableSize ); - pPage->nCell++; pPage->nFree -= (u16)(2 + sz); memcpy(&data[idx], pCell, sz); if( iChild ){ put4byte(&data[idx], iChild); } - memmove(&data[ins+2], &data[ins], end-ins); - put2byte(&data[ins], idx); - put2byte(&data[pPage->hdrOffset+3], pPage->nCell); + pIns = pPage->aCellIdx + i*2; + memmove(pIns+2, pIns, 2*(pPage->nCell - i)); + put2byte(pIns, idx); + pPage->nCell++; + /* increment the cell count */ + if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; + assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell ); #ifndef SQLITE_OMIT_AUTOVACUUM if( pPage->pBt->autoVacuum ){ /* The cell may contain a pointer to an overflow page. If so, write From ed2589f5556abcf379bbd0570460b182257b6e34 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 25 Jun 2015 20:10:24 +0000 Subject: [PATCH 162/206] Add a script to combine all fts5 code into a single file - fts5.c - that can be used to build an SQLite loadable extension. FossilOrigin-Name: 46e86b0637248fb4d623c97778cc041eabe3636c --- Makefile.in | 2 +- ext/fts5/fts5Int.h | 5 ++ ext/fts5/fts5_index.c | 2 +- ext/fts5/{fts5.c => fts5_main.c} | 23 +++++--- ext/fts5/test/fts5_common.tcl | 1 + ext/fts5/tool/mkfts5c.tcl | 78 ++++++++++++++++++++++++++ main.mk | 96 ++++++++++---------------------- manifest | 27 ++++----- manifest.uuid | 2 +- src/main.c | 4 +- tool/mksqlite3c.tcl | 17 ------ 11 files changed, 148 insertions(+), 109 deletions(-) rename ext/fts5/{fts5.c => fts5_main.c} (99%) create mode 100644 ext/fts5/tool/mkfts5c.tcl diff --git a/Makefile.in b/Makefile.in index 91bed06261..a4c585c7cd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -352,7 +352,7 @@ SRC += \ $(TOP)/ext/fts5/fts5Int.h \ $(TOP)/ext/fts5/fts5_aux.c \ $(TOP)/ext/fts5/fts5_buffer.c \ - $(TOP)/ext/fts5/fts5.c \ + $(TOP)/ext/fts5/fts5_main.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index eef36f811e..07d1e4fecb 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -14,7 +14,11 @@ #ifndef _FTS5INT_H #define _FTS5INT_H +#ifdef SQLITE_ENABLE_FTS5 + #include "fts5.h" +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 #include #include @@ -689,3 +693,4 @@ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); **************************************************************************/ #endif +#endif diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index dee9cd7ef3..ec930f1a3b 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4273,7 +4273,7 @@ static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of ** unicode characters in the string. */ -int fts5IndexCharlen(const char *pIn, int nIn){ +static int fts5IndexCharlen(const char *pIn, int nIn){ int nChar = 0; int i = 0; while( imallocFailed && rc==SQLITE_OK ){ - rc = sqlite3Fts5Init(db); + rc = sqlite3_fts5_init(db, 0, 0); } #endif diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 9f6699b9a6..5b00368427 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -96,9 +96,6 @@ foreach hdr { fts3Int.h fts3_hash.h fts3_tokenizer.h - fts5.h - fts5Int.h - fts5parse.h hash.h hwtime.h keywordhash.h @@ -372,20 +369,6 @@ foreach file { fts3_unicode.c fts3_unicode2.c - fts5_aux.c - fts5_buffer.c - fts5.c - fts5_config.c - fts5_expr.c - fts5_hash.c - fts5_index.c - fts5parse.c - fts5_storage.c - fts5_tokenize.c - fts5_unicode2.c - fts5_varint.c - fts5_vocab.c - rtree.c icu.c fts3_icu.c From 3e65f89ef6a1b6c6d08e415706c93f9703c252b6 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 25 Jun 2015 20:36:36 +0000 Subject: [PATCH 163/206] Fix Makefile.in to account for recent changes. Update fts5 test scripts so that they do not run if SQLITE_ENABLE_FTS5 is not defined. FossilOrigin-Name: 3175220747f5d4dadd17e09fb1899dc782c90926 --- Makefile.in | 43 ++++++++++------- ext/fts5/test/fts5_common.tcl | 1 + ext/fts5/test/fts5alter.test | 5 ++ ext/fts5/test/fts5aux.test | 6 +++ ext/fts5/test/fts5auxdata.test | 6 +++ ext/fts5/test/fts5bigpl.test | 6 +++ ext/fts5/test/fts5columnsize.test | 6 +++ ext/fts5/test/fts5config.test | 6 +++ ext/fts5/test/fts5content.test | 6 +++ ext/fts5/test/fts5corrupt.test | 6 +++ ext/fts5/test/fts5corrupt2.test | 6 +++ ext/fts5/test/fts5corrupt3.test | 6 +++ ext/fts5/test/fts5dlidx.test | 6 +++ ext/fts5/test/fts5doclist.test | 6 +++ ext/fts5/test/fts5full.test | 6 +++ ext/fts5/test/fts5hash.test | 6 +++ ext/fts5/test/fts5integrity.test | 6 +++ ext/fts5/test/fts5merge.test | 6 +++ ext/fts5/test/fts5near.test | 6 +++ ext/fts5/test/fts5optimize.test | 6 +++ ext/fts5/test/fts5plan.test | 6 +++ ext/fts5/test/fts5porter.test | 6 +++ ext/fts5/test/fts5porter2.test | 6 +++ ext/fts5/test/fts5prefix.test | 6 +++ ext/fts5/test/fts5rank.test | 6 +++ ext/fts5/test/fts5rebuild.test | 6 +++ ext/fts5/test/fts5restart.test | 6 +++ ext/fts5/test/fts5rowid.test | 6 +++ ext/fts5/test/fts5tokenizer.test | 6 +++ ext/fts5/test/fts5unicode.test | 6 +++ ext/fts5/test/fts5unicode2.test | 6 +++ ext/fts5/test/fts5unicode3.test | 9 +++- ext/fts5/test/fts5unindexed.test | 6 +++ ext/fts5/test/fts5version.test | 6 +++ ext/fts5/test/fts5vocab.test | 6 +++ manifest | 80 +++++++++++++++---------------- manifest.uuid | 2 +- 37 files changed, 268 insertions(+), 58 deletions(-) diff --git a/Makefile.in b/Makefile.in index a4c585c7cd..0a2389225b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -173,6 +173,7 @@ LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \ fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \ fts3_tokenize_vtab.lo \ fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \ + fts5.lo \ func.lo global.lo hash.lo \ icu.lo insert.lo journal.lo legacy.lo loadext.lo \ main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \ @@ -347,22 +348,6 @@ SRC += \ $(TOP)/ext/ota/sqlite3ota.h \ $(TOP)/ext/ota/sqlite3ota.c -SRC += \ - $(TOP)/ext/fts5/fts5.h \ - $(TOP)/ext/fts5/fts5Int.h \ - $(TOP)/ext/fts5/fts5_aux.c \ - $(TOP)/ext/fts5/fts5_buffer.c \ - $(TOP)/ext/fts5/fts5_main.c \ - $(TOP)/ext/fts5/fts5_config.c \ - $(TOP)/ext/fts5/fts5_expr.c \ - $(TOP)/ext/fts5/fts5_hash.c \ - $(TOP)/ext/fts5/fts5_index.c \ - fts5parse.c fts5parse.h \ - $(TOP)/ext/fts5/fts5_storage.c \ - $(TOP)/ext/fts5/fts5_tokenize.c \ - $(TOP)/ext/fts5/fts5_unicode2.c \ - $(TOP)/ext/fts5/fts5_vocab.c - # Generated source code files # @@ -429,6 +414,7 @@ TESTSRC += \ $(TOP)/ext/misc/eval.c \ $(TOP)/ext/misc/fileio.c \ $(TOP)/ext/misc/fuzzer.c \ + $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/percentile.c \ @@ -990,6 +976,25 @@ fts3_write.lo: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) rtree.lo: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c +# FTS5 things +# +FTS5_SRC = \ + $(TOP)/ext/fts5/fts5.h \ + $(TOP)/ext/fts5/fts5Int.h \ + $(TOP)/ext/fts5/fts5_aux.c \ + $(TOP)/ext/fts5/fts5_buffer.c \ + $(TOP)/ext/fts5/fts5_main.c \ + $(TOP)/ext/fts5/fts5_config.c \ + $(TOP)/ext/fts5/fts5_expr.c \ + $(TOP)/ext/fts5/fts5_hash.c \ + $(TOP)/ext/fts5/fts5_index.c \ + fts5parse.c fts5parse.h \ + $(TOP)/ext/fts5/fts5_storage.c \ + $(TOP)/ext/fts5/fts5_tokenize.c \ + $(TOP)/ext/fts5/fts5_unicode2.c \ + $(TOP)/ext/fts5/fts5_varint.c \ + $(TOP)/ext/fts5/fts5_vocab.c \ + fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h @@ -1002,6 +1007,12 @@ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon fts5parse.h: fts5parse.c +fts5.c: $(FTS5_SRC) + $(TCLSH_CMD) $(TOP)/ext/fts5/tool/mkfts5c.tcl + +fts5.lo: fts5.c $(HDR) $(EXTHDR) + $(LTCOMPILE) -DSQLITE_CORE -c fts5.c + # Rules to build the 'testfixture' application. # diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 06309e2163..ddc8060ecc 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -15,6 +15,7 @@ if {![info exists testdir]} { } source $testdir/tester.tcl + catch { sqlite3_fts5_may_be_corrupt 0 } proc fts5_test_poslist {cmd} { diff --git a/ext/fts5/test/fts5alter.test b/ext/fts5/test/fts5alter.test index f2000ff1dc..0ed788b8a9 100644 --- a/ext/fts5/test/fts5alter.test +++ b/ext/fts5/test/fts5alter.test @@ -16,6 +16,11 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5alter +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} #------------------------------------------------------------------------- # Test renaming regular, contentless and columnsize=0 FTS5 tables. diff --git a/ext/fts5/test/fts5aux.test b/ext/fts5/test/fts5aux.test index bbb6cf81f1..995fe85784 100644 --- a/ext/fts5/test/fts5aux.test +++ b/ext/fts5/test/fts5aux.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aux +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + proc inst {cmd i} { $cmd xInst $i } diff --git a/ext/fts5/test/fts5auxdata.test b/ext/fts5/test/fts5auxdata.test index ee408a0645..dbbb1dba78 100644 --- a/ext/fts5/test/fts5auxdata.test +++ b/ext/fts5/test/fts5auxdata.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5auxdata +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1'); diff --git a/ext/fts5/test/fts5bigpl.test b/ext/fts5/test/fts5bigpl.test index 172c0396be..85f74606c6 100644 --- a/ext/fts5/test/fts5bigpl.test +++ b/ext/fts5/test/fts5bigpl.test @@ -17,6 +17,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5bigpl +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + if { $tcl_platform(wordSize)<8 } { finish_test return diff --git a/ext/fts5/test/fts5columnsize.test b/ext/fts5/test/fts5columnsize.test index 2e54770736..ed0edd677e 100644 --- a/ext/fts5/test/fts5columnsize.test +++ b/ext/fts5/test/fts5columnsize.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5columnsize +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + #------------------------------------------------------------------------- # Check that the option can be parsed and that the %_docsize table is # only created if it is set to true. diff --git a/ext/fts5/test/fts5config.test b/ext/fts5/test/fts5config.test index 22e7631bc0..7c88e03d38 100644 --- a/ext/fts5/test/fts5config.test +++ b/ext/fts5/test/fts5config.test @@ -16,6 +16,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5config +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + #------------------------------------------------------------------------- # Try different types of quote characters. # diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index 1b5b6b3072..f87aa3d947 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5content +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + #------------------------------------------------------------------------- # Contentless tables # diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index efbe3f5d84..3f57eb515a 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -16,6 +16,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 28486b34ac..3e8323b984 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -16,6 +16,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt2 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document diff --git a/ext/fts5/test/fts5corrupt3.test b/ext/fts5/test/fts5corrupt3.test index 831eca5454..cf08a5b107 100644 --- a/ext/fts5/test/fts5corrupt3.test +++ b/ext/fts5/test/fts5corrupt3.test @@ -16,6 +16,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt3 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test index 013de1b4a0..07d7e2baeb 100644 --- a/ext/fts5/test/fts5dlidx.test +++ b/ext/fts5/test/fts5dlidx.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5dlidx +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + if { $tcl_platform(wordSize)<8 } { finish_test return diff --git a/ext/fts5/test/fts5doclist.test b/ext/fts5/test/fts5doclist.test index eb1dc9a695..411289a523 100644 --- a/ext/fts5/test/fts5doclist.test +++ b/ext/fts5/test/fts5doclist.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5doclist +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + #------------------------------------------------------------------------- # Create a table with 1000 columns. Then add some large documents to it. diff --git a/ext/fts5/test/fts5full.test b/ext/fts5/test/fts5full.test index 4563cedce5..c640f56e06 100644 --- a/ext/fts5/test/fts5full.test +++ b/ext/fts5/test/fts5full.test @@ -17,6 +17,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5full +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE x8 USING fts5(i); INSERT INTO x8(x8, rank) VALUES('automerge', 0); diff --git a/ext/fts5/test/fts5hash.test b/ext/fts5/test/fts5hash.test index 1189ef5391..a49fa2697d 100644 --- a/ext/fts5/test/fts5hash.test +++ b/ext/fts5/test/fts5hash.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5hash +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + #------------------------------------------------------------------------- # Return a list of tokens (a vocabulary) that all share the same hash # key value. This can be used to test hash collisions. diff --git a/ext/fts5/test/fts5integrity.test b/ext/fts5/test/fts5integrity.test index 8f56e9273e..478e790d6b 100644 --- a/ext/fts5/test/fts5integrity.test +++ b/ext/fts5/test/fts5integrity.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5integrity +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x); INSERT INTO xx VALUES('term'); diff --git a/ext/fts5/test/fts5merge.test b/ext/fts5/test/fts5merge.test index 023a2f7fe8..9dd1ecd026 100644 --- a/ext/fts5/test/fts5merge.test +++ b/ext/fts5/test/fts5merge.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5merge +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + db func repeat [list string repeat] #------------------------------------------------------------------------- diff --git a/ext/fts5/test/fts5near.test b/ext/fts5/test/fts5near.test index dffce29431..b4ae205dee 100644 --- a/ext/fts5/test/fts5near.test +++ b/ext/fts5/test/fts5near.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5near +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + proc do_near_test {tn doc near res} { uplevel [list do_execsql_test $tn " DELETE FROM t1; diff --git a/ext/fts5/test/fts5optimize.test b/ext/fts5/test/fts5optimize.test index 068cf4c225..984af8c532 100644 --- a/ext/fts5/test/fts5optimize.test +++ b/ext/fts5/test/fts5optimize.test @@ -14,6 +14,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5optimize +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + proc rnddoc {nWord} { set vocab {a b c d e f g h i j k l m n o p q r s t u v w x y z} set nVocab [llength $vocab] diff --git a/ext/fts5/test/fts5plan.test b/ext/fts5/test/fts5plan.test index 72fdc60de3..d7f5fd65a0 100644 --- a/ext/fts5/test/fts5plan.test +++ b/ext/fts5/test/fts5plan.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5plan +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE TABLE t1(x, y); CREATE VIRTUAL TABLE f1 USING fts5(ff); diff --git a/ext/fts5/test/fts5porter.test b/ext/fts5/test/fts5porter.test index 83ca852305..2535eb75b1 100644 --- a/ext/fts5/test/fts5porter.test +++ b/ext/fts5/test/fts5porter.test @@ -17,6 +17,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5porter +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + set test_vocab { a a aaron aaron abaissiez abaissiez abandon abandon diff --git a/ext/fts5/test/fts5porter2.test b/ext/fts5/test/fts5porter2.test index 7ea2e6994d..5e0aeb029f 100644 --- a/ext/fts5/test/fts5porter2.test +++ b/ext/fts5/test/fts5porter2.test @@ -18,6 +18,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5porter2 +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + set test_vocab { tion tion ation ation diff --git a/ext/fts5/test/fts5prefix.test b/ext/fts5/test/fts5prefix.test index c555080a27..076ecaa09b 100644 --- a/ext/fts5/test/fts5prefix.test +++ b/ext/fts5/test/fts5prefix.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5prefix +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1); INSERT INTO xx VALUES('one two three'); diff --git a/ext/fts5/test/fts5rank.test b/ext/fts5/test/fts5rank.test index 6da24adee2..2182ab3097 100644 --- a/ext/fts5/test/fts5rank.test +++ b/ext/fts5/test/fts5rank.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rank +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + #------------------------------------------------------------------------- # "ORDER BY rank" + highlight() + large poslists. diff --git a/ext/fts5/test/fts5rebuild.test b/ext/fts5/test/fts5rebuild.test index 96961792f5..1044421d5e 100644 --- a/ext/fts5/test/fts5rebuild.test +++ b/ext/fts5/test/fts5rebuild.test @@ -14,6 +14,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rebuild +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.1 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1(a, b) VALUES('one', 'o n e'); diff --git a/ext/fts5/test/fts5restart.test b/ext/fts5/test/fts5restart.test index 8fa3600ab3..0dd7d69454 100644 --- a/ext/fts5/test/fts5restart.test +++ b/ext/fts5/test/fts5restart.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5restart +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(ff); } diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index 5d0253472d..9ea5272d5b 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rowid +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_catchsql_test 1.1 { SELECT fts5_rowid() } {1 {should be: fts5_rowid(subject, ....)}} diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index 1a3d253be8..0246419572 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5tokenizer +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); diff --git a/ext/fts5/test/fts5unicode.test b/ext/fts5/test/fts5unicode.test index 0018a49030..46f4c4f1aa 100644 --- a/ext/fts5/test/fts5unicode.test +++ b/ext/fts5/test/fts5unicode.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + proc tokenize_test {tn tokenizer input output} { uplevel [list do_test $tn [subst -nocommands { set ret {} diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test index 280d045db1..e34bc840a5 100644 --- a/ext/fts5/test/fts5unicode2.test +++ b/ext/fts5/test/fts5unicode2.test @@ -17,6 +17,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode2 +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + proc do_unicode_token_test {tn input res} { uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input diff --git a/ext/fts5/test/fts5unicode3.test b/ext/fts5/test/fts5unicode3.test index a9efa4474c..876ad27461 100644 --- a/ext/fts5/test/fts5unicode3.test +++ b/ext/fts5/test/fts5unicode3.test @@ -12,11 +12,18 @@ # Tests focusing on the fts5 tokenizers # +source [file join [file dirname [info script]] fts5_common.tcl] + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + proc fts3_unicode_path {file} { file join [file dirname [info script]] .. .. fts3 unicode $file } -source [file join [file dirname [info script]] fts5_common.tcl] source [fts3_unicode_path parseunicode.tcl] set testprefix fts5unicode3 diff --git a/ext/fts5/test/fts5unindexed.test b/ext/fts5/test/fts5unindexed.test index e808064f05..16d43f84c2 100644 --- a/ext/fts5/test/fts5unindexed.test +++ b/ext/fts5/test/fts5unindexed.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unindexed +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED); diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test index ccd035ae4b..2176fee7d3 100644 --- a/ext/fts5/test/fts5version.test +++ b/ext/fts5/test/fts5version.test @@ -16,6 +16,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5version +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one); diff --git a/ext/fts5/test/fts5vocab.test b/ext/fts5/test/fts5vocab.test index 2d2faac3e0..c95f50b835 100644 --- a/ext/fts5/test/fts5vocab.test +++ b/ext/fts5/test/fts5vocab.test @@ -15,6 +15,12 @@ source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5vocab +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + do_execsql_test 1.1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1); diff --git a/manifest b/manifest index 42920706b3..81ca8a12d1 100644 --- a/manifest +++ b/manifest @@ -1,7 +1,7 @@ -C Merge\slatest\strunk\schanges\sinto\sthis\sbranch. -D 2015-06-25T20:16:23.149 +C Fix\sMakefile.in\sto\saccount\sfor\srecent\schanges.\sUpdate\sfts5\stest\sscripts\sso\sthat\sthey\sdo\snot\srun\sif\sSQLITE_ENABLE_FTS5\sis\snot\sdefined. +D 2015-06-25T20:36:36.724 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f -F Makefile.in 4757ec5c89c420d90f18b0afa6e63d7b884c881d +F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 F Makefile.msc b7db9ccbbad1c495b98e5326a06cac03aa206127 F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858 @@ -122,7 +122,7 @@ F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c e454fa58c6d591024659a9b61eece0d708e8b575 F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 26bab0c0ed684e95cd93c419ed995a3f83f4c258 +F ext/fts5/test/fts5_common.tcl 407ab0852318cda22544834a09db7af265085ecb F ext/fts5/test/fts5aa.test 0be21c89fd66b588db355a6398911fd875bdcc6c F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc @@ -135,19 +135,19 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5alter.test 3342e7fd58556d2a7e5299a7d9dec62e358028ed +F ext/fts5/test/fts5alter.test 78b63e088646dd623cacbdc1899a54d638dcf3d8 F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca -F ext/fts5/test/fts5aux.test e5631607bbc05ac1c38cf7d691000509aca71ef3 -F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b -F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b -F ext/fts5/test/fts5columnsize.test 771401dd7fc628ecb38316613ed2f3cbe8f2b78a -F ext/fts5/test/fts5config.test c9cc535f3b36cde1e5a32bf579f3f5962a9e82b2 -F ext/fts5/test/fts5content.test e46904decd896e38c848ad4f38fa4e80251a028b -F ext/fts5/test/fts5corrupt.test 35bfdbbb3cdcea46ae7385f6432e9b5c574e70a1 -F ext/fts5/test/fts5corrupt2.test c231f532162de381fa83ec477b51cd8633fd9da7 -F ext/fts5/test/fts5corrupt3.test da4e2adb2308d8587c2eff31b5aa47447b8a2edb -F ext/fts5/test/fts5dlidx.test 070531bd45685e545e3e6021deb543f730a4011b -F ext/fts5/test/fts5doclist.test 635b80ac785627841a59c583bac702b55d49fdc5 +F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f +F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e +F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb +F ext/fts5/test/fts5columnsize.test 97dc6bd66c91009d00407aa078dd5e9e8eb22f99 +F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b +F ext/fts5/test/fts5content.test d0d90a45f0bcf07d75d474500d81f941b45e2021 +F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6 +F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b +F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3 +F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1 +F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b F ext/fts5/test/fts5ea.test 451bb37310ee6df8ef72e4354fda5621b3b51448 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b @@ -156,27 +156,27 @@ F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 762991d526ee67c2b374351a17248097ea38bee7 F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875 F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215 -F ext/fts5/test/fts5full.test 0924bdca5416a242103239ace79c6f5aa34bab8d -F ext/fts5/test/fts5hash.test bdba7b591d503005d5a81871ba00a359daa1e969 -F ext/fts5/test/fts5integrity.test b45f633381a85dc000e41d68c96ab510985ca35e -F ext/fts5/test/fts5merge.test 8077454f2975a63f35761f4b8a718b3a808b7c9c -F ext/fts5/test/fts5near.test d2e3343e62d438f2efd96ebcd83a0d30a16ea6dc -F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54 -F ext/fts5/test/fts5plan.test 7f38179220c9385f88e1470aae6cba134a308b40 -F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e -F ext/fts5/test/fts5porter2.test c534385e88e685b354c2b2020acc0c4920042c8e -F ext/fts5/test/fts5prefix.test 7eba86fc270b110ba2b83ba286a1fd4b3b17955e -F ext/fts5/test/fts5rank.test f59a6b20ec8e08cb130d833dcece59cf9cd92890 -F ext/fts5/test/fts5rebuild.test 77c6613aa048f38b4a12ddfacb2e6e1342e1b066 -F ext/fts5/test/fts5restart.test cd58a5fb552ac10db549482698e503f82693bcd0 -F ext/fts5/test/fts5rowid.test ca9d91ccb3a4590fc561b2d7a884361bb21e8df5 -F ext/fts5/test/fts5tokenizer.test 668747fcb41de6fc7daebc478920b705164fccc1 -F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d -F ext/fts5/test/fts5unicode2.test ad38982b03dc9213445facb16e99f668a74cc4ba -F ext/fts5/test/fts5unicode3.test 273f9086ad33935566bbc0d0c94d0d9687ef686b -F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 -F ext/fts5/test/fts5version.test dc34a735af6625a1a7a4a916a38d122071343887 -F ext/fts5/test/fts5vocab.test 389e5fe4928eae5fddcf26bcc5a6890b0791aa75 +F ext/fts5/test/fts5full.test 6f6143af0c6700501d9fd597189dfab1555bb741 +F ext/fts5/test/fts5hash.test 42eb066f667e9a389a63437cb7038c51974d4fc6 +F ext/fts5/test/fts5integrity.test 29f41d2c7126c6122fbb5d54e556506456876145 +F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367 +F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc +F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5 +F ext/fts5/test/fts5plan.test 6a55ecbac9890765b0e16f8c421c7e0888cfe436 +F ext/fts5/test/fts5porter.test 7cdc07bef301d70eebbfa75dcaf45c3680e1d0e1 +F ext/fts5/test/fts5porter2.test 2e65633d58a1c525d5af0f6c01e5a59155bb3487 +F ext/fts5/test/fts5prefix.test 552a462f0e8595676611f41643de217fb4ac2808 +F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1 +F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b +F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 +F ext/fts5/test/fts5rowid.test f7674e19a40987bf59624d8db9827114cb7f7a3e +F ext/fts5/test/fts5tokenizer.test 83e7e01a21ec7fdf814d51f6184cc26bb77d7695 +F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 +F ext/fts5/test/fts5unicode2.test 84282d4a6dd34370dc19a3486dd6fecc89c7ed0b +F ext/fts5/test/fts5unicode3.test 35c3d02aa7acf7d43d8de3bfe32c15ba96e8928e +F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680 +F ext/fts5/test/fts5version.test bed59038e937c40d3c0056d08076db7874c6cd4a +F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c F ext/fts5/tool/loadfts5.tcl 7ef3e62131f0434a78e4f5c5b056b09d221710a8 F ext/fts5/tool/mkfts5c.tcl fdb449263837a18d9131bc2f61b256fd77e64361 F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 46e86b0637248fb4d623c97778cc041eabe3636c 7d02e6c992ef92e1f77ebc13889e17c028454b06 -R 86f1f29b38c83a25da1125d5a5eacdb5 +P fcd8f7ce601729dc51d880d16b97040c1be16aa2 +R d44c4485fa78076f45f58b5b04b05b9b U dan -Z c3d1693711def65e5756dddc3dd0ec59 +Z cdc9c7cdbdc63c34e596c23a4d9ea399 diff --git a/manifest.uuid b/manifest.uuid index f4c59d2e72..70e4e52c4d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fcd8f7ce601729dc51d880d16b97040c1be16aa2 \ No newline at end of file +3175220747f5d4dadd17e09fb1899dc782c90926 \ No newline at end of file From 591909c344eecf314505b31af0fb32ca068248f7 Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 25 Jun 2015 23:52:48 +0000 Subject: [PATCH 164/206] Optimization to sqlite3_value_bytes() for the common case that does not require a data transformation. FossilOrigin-Name: 8d79f3a1443391bee204bb8c49240f44477168db --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/vdbemem.c | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/manifest b/manifest index f42ebf288a..785ef7153f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Simplifications\sand\sperformance\simprovements\sin\sinsertCell(). -D 2015-06-25T19:53:48.178 +C Optimization\sto\ssqlite3_value_bytes()\sfor\sthe\scommon\scase\sthat\sdoes\snot\nrequire\sa\sdata\stransformation. +D 2015-06-25T23:52:48.104 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -319,7 +319,7 @@ F src/vdbeInt.h 20295e482121d13437f69985f77db211cdc8bac1 F src/vdbeapi.c 6a0d7757987018ff6b1b81bc5293219cd26bb299 F src/vdbeaux.c 4c82d6f686f72ea7d266d26d528a171b728626f7 F src/vdbeblob.c 4f2e8e075d238392df98c5e03a64342465b03f90 -F src/vdbemem.c 4e947cd322bb531e3f7f6f58f0f536d182b38ef8 +F src/vdbemem.c ae38a0d35ae71cf604381a887c170466ba518090 F src/vdbesort.c f5009e7a35e3065635d8918b9a31f498a499976b F src/vdbetrace.c 8befe829faff6d9e6f6e4dee5a7d3f85cc85f1a0 F src/vtab.c c535e80259ebe616467181a83a4263555b97c694 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 78da0f69cb3289e332018864004f319f2764a5c8 -R 0d165d2d8c01ecd2475fdbe233dfe5e0 +P 7d02e6c992ef92e1f77ebc13889e17c028454b06 +R 055499679190be686aed6dd5c50e96a1 U drh -Z 8a4a8bb2d9c667bdbc4293aaa6065234 +Z a43ea71012a447eaba4e577595223851 diff --git a/manifest.uuid b/manifest.uuid index e18ea8d4d0..d16841f228 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7d02e6c992ef92e1f77ebc13889e17c028454b06 \ No newline at end of file +8d79f3a1443391bee204bb8c49240f44477168db \ No newline at end of file diff --git a/src/vdbemem.c b/src/vdbemem.c index 1076812609..648a53d2a3 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -1694,17 +1694,26 @@ void sqlite3ValueFree(sqlite3_value *v){ } /* -** Return the number of bytes in the sqlite3_value object assuming -** that it uses the encoding "enc" +** The sqlite3ValueBytes() routine returns the number of bytes in the +** sqlite3_value object assuming that it uses the encoding "enc". +** The valueBytes() routine is a helper function. */ +static SQLITE_NOINLINE int valueBytes(sqlite3_value *pVal, u8 enc){ + return valueToText(pVal, enc)!=0 ? pVal->n : 0; +} int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ Mem *p = (Mem*)pVal; - if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){ + assert( (p->flags & MEM_Null)==0 || (p->flags & (MEM_Str|MEM_Blob))==0 ); + if( (p->flags & MEM_Str)!=0 && pVal->enc==enc ){ + return p->n; + } + if( (p->flags & MEM_Blob)!=0 ){ if( p->flags & MEM_Zero ){ return p->n + p->u.nZero; }else{ return p->n; } } - return 0; + if( p->flags & MEM_Null ) return 0; + return valueBytes(pVal, enc); } From 39065c60b34af8eaf96d70113896b570d0a5fe3b Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 26 Jun 2015 02:41:31 +0000 Subject: [PATCH 165/206] Simplify the pcache by not keeping continuous track of page 1 but instead just loading page 1 on the rare occasions when it is actually needed. FossilOrigin-Name: 015302f15e46a087ec92f3644c6741600dbf4306 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/pcache.c | 22 ++++++++-------------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/manifest b/manifest index 785ef7153f..a04cf6695b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Optimization\sto\ssqlite3_value_bytes()\sfor\sthe\scommon\scase\sthat\sdoes\snot\nrequire\sa\sdata\stransformation. -D 2015-06-25T23:52:48.104 +C Simplify\sthe\spcache\sby\snot\skeeping\scontinuous\strack\sof\spage\s1\sbut\sinstead\njust\sloading\spage\s1\son\sthe\srare\soccasions\swhen\sit\sis\sactually\sneeded. +D 2015-06-26T02:41:31.913 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -240,7 +240,7 @@ F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca F src/pager.c 9bc918a009285f96ec6dac62dd764c7063552455 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 -F src/pcache.c 10539fb959849ad6efff80050541cab3d25089d4 +F src/pcache.c d8b19632706dd6b81b03d0c5fd1e6bab8c13d0b9 F src/pcache.h b44658c9c932d203510279439d891a2a83e12ba8 F src/pcache1.c 8e3799b33c41d517d86444d4abefc80d4f02adca F src/pragma.c c1f4d012ea9f6b1ce52d341b2cd0ad72d560afd7 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7d02e6c992ef92e1f77ebc13889e17c028454b06 -R 055499679190be686aed6dd5c50e96a1 +P 8d79f3a1443391bee204bb8c49240f44477168db +R c56e8779f6e8f4f9de88fc3a0bf56b01 U drh -Z a43ea71012a447eaba4e577595223851 +Z 87d94359f0bb80945f07f56dd11de5b2 diff --git a/manifest.uuid b/manifest.uuid index d16841f228..fa2d20be1d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8d79f3a1443391bee204bb8c49240f44477168db \ No newline at end of file +015302f15e46a087ec92f3644c6741600dbf4306 \ No newline at end of file diff --git a/src/pcache.c b/src/pcache.c index d768fe00c8..d1b3f22a11 100644 --- a/src/pcache.c +++ b/src/pcache.c @@ -28,7 +28,6 @@ struct PCache { int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */ void *pStress; /* Argument to xStress */ sqlite3_pcache *pCache; /* Pluggable cache module */ - PgHdr *pPage1; /* Reference to page 1 */ }; /********************************** Linked List Management ********************/ @@ -106,9 +105,6 @@ static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){ */ static void pcacheUnpin(PgHdr *p){ if( p->pCache->bPurgeable ){ - if( p->pgno==1 ){ - p->pCache->pPage1 = 0; - } sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); } } @@ -201,7 +197,6 @@ int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); } pCache->pCache = pNew; - pCache->pPage1 = 0; pCache->szPage = szPage; } return SQLITE_OK; @@ -359,9 +354,6 @@ PgHdr *sqlite3PcacheFetchFinish( pCache->nRef++; } pPgHdr->nRef++; - if( pgno==1 ){ - pCache->pPage1 = pPgHdr; - } return pPgHdr; } @@ -402,9 +394,6 @@ void sqlite3PcacheDrop(PgHdr *p){ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); } p->pCache->nRef--; - if( p->pgno==1 ){ - p->pCache->pPage1 = 0; - } sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); } @@ -495,9 +484,14 @@ void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){ sqlite3PcacheMakeClean(p); } } - if( pgno==0 && pCache->pPage1 ){ - memset(pCache->pPage1->pData, 0, pCache->szPage); - pgno = 1; + if( pgno==0 && pCache->nRef ){ + sqlite3_pcache_page *pPage1; + pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); + if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because + ** pCache->nRef>0 */ + memset(pPage1->pBuf, 0, pCache->szPage); + pgno = 1; + } } sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); } From 68cdd0eda2c4957eb0b6afcc02d37b910da3fdfc Mon Sep 17 00:00:00 2001 From: mistachkin Date: Fri, 26 Jun 2015 03:12:27 +0000 Subject: [PATCH 166/206] Fix harmless compiler warning in assert statement. FossilOrigin-Name: 7097716caed9d4aef49c7e766e41ea74abf5967f --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/btree.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/manifest b/manifest index a04cf6695b..bb80efcf48 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Simplify\sthe\spcache\sby\snot\skeeping\scontinuous\strack\sof\spage\s1\sbut\sinstead\njust\sloading\spage\s1\son\sthe\srare\soccasions\swhen\sit\sis\sactually\sneeded. -D 2015-06-26T02:41:31.913 +C Fix\sharmless\scompiler\swarning\sin\sassert\sstatement. +D 2015-06-26T03:12:27.469 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -192,7 +192,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 40e98c10725c2cec5429068e21c17924f4bf06cc +F src/btree.c c113f73fc4048038529cc30ed7147b24c34c2c5d F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h fdd1aff02fb2a63812bd95716e7f579fc3759107 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8d79f3a1443391bee204bb8c49240f44477168db -R c56e8779f6e8f4f9de88fc3a0bf56b01 -U drh -Z 87d94359f0bb80945f07f56dd11de5b2 +P 015302f15e46a087ec92f3644c6741600dbf4306 +R 868fbc5146ce4734a4f8cc498ff855f9 +U mistachkin +Z 40a758e35083225387fb9eeb28b88d0c diff --git a/manifest.uuid b/manifest.uuid index fa2d20be1d..0666abfec8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -015302f15e46a087ec92f3644c6741600dbf4306 \ No newline at end of file +7097716caed9d4aef49c7e766e41ea74abf5967f \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 5769a236c8..7e6f750b02 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1442,7 +1442,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ ** However, that integer is too large to be stored in a 2-byte unsigned ** integer, so a value of 0 is used in its place. */ top = get2byte(&data[hdr+5]); - assert( top<=pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */ + assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */ if( gap>top ){ if( top==0 && pPage->pBt->usableSize==65536 ){ top = 65536; From ed52f9ff488acfc66fdd3155fbe8b395f1330d49 Mon Sep 17 00:00:00 2001 From: mistachkin Date: Fri, 26 Jun 2015 04:34:36 +0000 Subject: [PATCH 167/206] Initial changes to get FTS5 working with MSVC. FossilOrigin-Name: ef2052f81e33ca98e85a60f8a78cdd19a7c1c35c --- Makefile.msc | 41 +++++++++++++++++++++++++++++++++++++++++ ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_buffer.c | 2 +- ext/fts5/fts5_config.c | 1 - ext/fts5/fts5_expr.c | 4 ++-- ext/fts5/fts5_index.c | 10 +++++----- ext/fts5/fts5_main.c | 4 ++-- ext/fts5/fts5_storage.c | 4 +++- manifest | 31 +++++++++++++++++-------------- manifest.uuid | 2 +- 10 files changed, 73 insertions(+), 28 deletions(-) diff --git a/Makefile.msc b/Makefile.msc index 70c16b11c3..0582459b06 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -827,6 +827,7 @@ LIBOBJS0 = vdbe.lo parse.lo alter.lo analyze.lo attach.lo auth.lo \ fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \ fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \ fts3_tokenize_vtab.lo fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \ + fts5.lo \ func.lo global.lo hash.lo \ icu.lo insert.lo journal.lo legacy.lo loadext.lo \ main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \ @@ -1078,6 +1079,7 @@ TESTEXT = \ $(TOP)\ext\misc\eval.c \ $(TOP)\ext\misc\fileio.c \ $(TOP)\ext\misc\fuzzer.c \ + $(TOP)\ext\fts5\fts5_tcl.c \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\nextchar.c \ $(TOP)\ext\misc\percentile.c \ @@ -1653,6 +1655,44 @@ fts3_write.lo: $(TOP)\ext\fts3\fts3_write.c $(HDR) $(EXTHDR) rtree.lo: $(TOP)\ext\rtree\rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\rtree\rtree.c +# FTS5 things +# +FTS5_SRC = \ + $(TOP)\ext\fts5\fts5.h \ + $(TOP)\ext\fts5\fts5Int.h \ + $(TOP)\ext\fts5\fts5_aux.c \ + $(TOP)\ext\fts5\fts5_buffer.c \ + $(TOP)\ext\fts5\fts5_main.c \ + $(TOP)\ext\fts5\fts5_config.c \ + $(TOP)\ext\fts5\fts5_expr.c \ + $(TOP)\ext\fts5\fts5_hash.c \ + $(TOP)\ext\fts5\fts5_index.c \ + fts5parse.c fts5parse.h \ + $(TOP)\ext\fts5\fts5_storage.c \ + $(TOP)\ext\fts5\fts5_tokenize.c \ + $(TOP)\ext\fts5\fts5_unicode2.c \ + $(TOP)\ext\fts5\fts5_varint.c \ + $(TOP)\ext\fts5\fts5_vocab.c + +fts5parse.c: $(TOP)\ext\fts5\fts5parse.y lemon.exe + copy $(TOP)\ext\fts5\fts5parse.y . + del /Q fts5parse.h 2>NUL + .\lemon.exe $(REQ_FEATURE_FLAGS) $(OPT_FEATURE_FLAGS) $(OPTS) fts5parse.y + move fts5parse.c fts5parse.c.orig + echo #ifdef SQLITE_ENABLE_FTS5 > $@ + type fts5parse.c.orig \ + | $(NAWK) "/.*/ { gsub(/yy/,\"fts5yy\");print }" \ + | $(NAWK) "/.*/ { gsub(/YY/,\"fts5YY\");print }" \ + | $(NAWK) "/.*/ { gsub(/TOKEN/,\"FTS5TOKEN\");print }" >> $@ + echo #endif /* SQLITE_ENABLE_FTS5 */ >> $@ + +fts5parse.h: fts5parse.c + +fts5.c: $(FTS5_SRC) + $(TCLSH_CMD) $(TOP)\ext\fts5\tool\mkfts5c.tcl + +fts5.lo: fts5.c $(HDR) $(EXTHDR) + $(LTCOMPILE) $(NO_WARN) -DSQLITE_ENABLE_FTS5 -c fts5.c # Rules to build the 'testfixture' application. # @@ -1792,6 +1832,7 @@ clean: del /Q sqlite3_analyzer.exe sqlite3_analyzer.c 2>NUL del /Q sqlite-*-output.vsix 2>NUL del /Q fuzzershell.exe fuzzcheck.exe sqldiff.exe 2>NUL + del /Q fts5.c fts5parse.* 2>NUL # Dynamic link library section. # diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 07d1e4fecb..7b8066ef34 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -204,7 +204,7 @@ void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int); -char *sqlite3Fts5Mprintf(int *pRc, char *zFmt, ...); +char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) #define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index b900ada744..33e648a931 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -125,7 +125,7 @@ void sqlite3Fts5BufferAppendPrintf( } } -char *sqlite3Fts5Mprintf(int *pRc, char *zFmt, ...){ +char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ char *zRet = 0; if( *pRc==SQLITE_OK ){ va_list ap; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 4c478d79c9..6342b92cb0 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -602,7 +602,6 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ int i; int rc = SQLITE_OK; char *zSql; - char *zOld; zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); for(i=0; zSql && inCol; i++){ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 9707e517aa..b29684561c 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1046,7 +1046,7 @@ static int fts5ExprNodeNext( case FTS5_OR: { int i; - int iLast = pNode->iRowid; + i64 iLast = pNode->iRowid; for(i=0; rc==SQLITE_OK && inChild; i++){ Fts5ExprNode *p1 = pNode->apChild[i]; @@ -1915,7 +1915,7 @@ static void fts5ExprFunction( sqlite3_result_error_code(pCtx, rc); } } - sqlite3_free(azConfig); + sqlite3_free((void *)azConfig); sqlite3Fts5ConfigFree(pConfig); sqlite3Fts5ExprFree(pExpr); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ec930f1a3b..c5516db07c 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3549,15 +3549,15 @@ static void fts5IndexAutomerge( ){ if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ Fts5Structure *pStruct = *ppStruct; - i64 nWrite; /* Initial value of write-counter */ + u64 nWrite; /* Initial value of write-counter */ int nWork; /* Number of work-quanta to perform */ int nRem; /* Number of leaf pages left to write */ /* Update the write-counter. While doing so, set nWork. */ nWrite = pStruct->nWriteCounter; - nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); + nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); pStruct->nWriteCounter += nLeaf; - nRem = p->nWorkUnit * nWork * pStruct->nLevel; + nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); fts5IndexMerge(p, ppStruct, nRem); } @@ -4552,11 +4552,11 @@ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ int rc; /* Return code */ Fts5Config *pConfig = p->pConfig; /* Configuration object */ u8 aCookie[4]; /* Binary representation of iNew */ + sqlite3_blob *pBlob = 0; assert( p->rc==SQLITE_OK ); - sqlite3Fts5Put32(aCookie, iNew); - sqlite3_blob *pBlob = 0; + rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", FTS5_STRUCTURE_ROWID, 1, &pBlob ); diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 3b8241f132..a598f1cfce 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -1387,8 +1387,8 @@ static int fts5UpdateMethod( rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); } }else{ - assert( nArg>1 ); sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; + assert( nArg>1 ); if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ const char *z = (const char*)sqlite3_value_text(pCmd); if( pConfig->eContent!=FTS5_CONTENT_NORMAL @@ -2224,7 +2224,7 @@ static void fts5Fts5Func( sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT); } -#ifdef _WIN32_ +#ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_fts5_init( diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 60c6401716..f57c2e2620 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -158,8 +158,10 @@ static int fts5ExecPrintf( ){ int rc; va_list ap; /* ... printf arguments */ + char *zSql; + va_start(ap, zFormat); - char *zSql = sqlite3_vmprintf(zFormat, ap); + zSql = sqlite3_vmprintf(zFormat, ap); if( zSql==0 ){ rc = SQLITE_NOMEM; diff --git a/manifest b/manifest index 81ca8a12d1..27528f47eb 100644 --- a/manifest +++ b/manifest @@ -1,9 +1,9 @@ -C Fix\sMakefile.in\sto\saccount\sfor\srecent\schanges.\sUpdate\sfts5\stest\sscripts\sso\sthat\sthey\sdo\snot\srun\sif\sSQLITE_ENABLE_FTS5\sis\snot\sdefined. -D 2015-06-25T20:36:36.724 +C Initial\schanges\sto\sget\sFTS5\sworking\swith\sMSVC. +D 2015-06-26T04:34:36.916 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 -F Makefile.msc b7db9ccbbad1c495b98e5326a06cac03aa206127 +F Makefile.msc 3ba6a2d04472e98b2aa882f47d959ec12af7c34c F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858 F README.md 8ecc12493ff9f820cdea6520a9016001cb2e59b7 F VERSION ce0ae95abd7121c534f6917c1c8f2b70d9acd4db @@ -106,15 +106,15 @@ F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba -F ext/fts5/fts5Int.h 41e9ed1f073770f660b1b59116357f504a8c5ff1 +F ext/fts5/fts5Int.h 918e947c0c20122ed5eb9ea695d83c6c8cf7239a F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 -F ext/fts5/fts5_buffer.c be0dc80a9406151b350be27c7ec2956722578771 -F ext/fts5/fts5_config.c c232d181d6324f0ae3a2a31319924473999e5816 -F ext/fts5/fts5_expr.c 549bda1f7edcf10365fbfbc002bdea1be3c287bb +F ext/fts5/fts5_buffer.c 7428b0bcb257641cbecc3bacce7f40686cf99f36 +F ext/fts5/fts5_config.c d40da53f5e94214c553b2af3d6fd9aebac5f3f05 +F ext/fts5/fts5_expr.c 3386ab0a71dbab7e1259c3b16d6113c97d14123e F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc -F ext/fts5/fts5_index.c 438b245e9e44be3cfa848c71351a4e769b75876a -F ext/fts5/fts5_main.c 35e90d3d8fafe4c936f232108e891d5f9a6294ca -F ext/fts5/fts5_storage.c b2fa301fce865d582d367a5e1bb438fe60c03cb5 +F ext/fts5/fts5_index.c 11dbb41cd9f1acbf6beb5c959b913e1be91dabc4 +F ext/fts5/fts5_main.c c5b2a219d65967c07fd1bc8fd45206863a2fe360 +F ext/fts5/fts5_storage.c 3e672a0d35f63979556903861b324e7b8932cecc F ext/fts5/fts5_tcl.c 7ea165878e4ae3598e89acd470a0ee1b5a00e33c F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989 F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9 @@ -1364,7 +1364,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fcd8f7ce601729dc51d880d16b97040c1be16aa2 -R d44c4485fa78076f45f58b5b04b05b9b -U dan -Z cdc9c7cdbdc63c34e596c23a4d9ea399 +P 3175220747f5d4dadd17e09fb1899dc782c90926 +R d72f4db2847a4a23ecfc8905fb081d41 +T *branch * fts5Msvc +T *sym-fts5Msvc * +T -sym-fts5 * +U mistachkin +Z cca0d5b27b94cebaae71758fdb2c5cae diff --git a/manifest.uuid b/manifest.uuid index 70e4e52c4d..f3b9d6f26c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3175220747f5d4dadd17e09fb1899dc782c90926 \ No newline at end of file +ef2052f81e33ca98e85a60f8a78cdd19a7c1c35c \ No newline at end of file From b39e8cf2395267c30ed26158052700cf9ee3e166 Mon Sep 17 00:00:00 2001 From: mistachkin Date: Fri, 26 Jun 2015 16:17:36 +0000 Subject: [PATCH 168/206] Add FTS5 DLL target for MSVC. FossilOrigin-Name: be85b82499f63dce4378f29adf8366651e965325 --- Makefile.msc | 3 +++ manifest | 15 ++++++--------- manifest.uuid | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Makefile.msc b/Makefile.msc index 0582459b06..4abfb0e46f 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -1694,6 +1694,9 @@ fts5.c: $(FTS5_SRC) fts5.lo: fts5.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(NO_WARN) -DSQLITE_ENABLE_FTS5 -c fts5.c +fts5.dll: fts5.lo + $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ fts5.lo + # Rules to build the 'testfixture' application. # # If using the amalgamation, use sqlite3.c directly to build the test diff --git a/manifest b/manifest index 27528f47eb..0a98278363 100644 --- a/manifest +++ b/manifest @@ -1,9 +1,9 @@ -C Initial\schanges\sto\sget\sFTS5\sworking\swith\sMSVC. -D 2015-06-26T04:34:36.916 +C Add\sFTS5\sDLL\starget\sfor\sMSVC. +D 2015-06-26T16:17:36.812 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 -F Makefile.msc 3ba6a2d04472e98b2aa882f47d959ec12af7c34c +F Makefile.msc 5dff3f360037c7c924512973d13aa1051ba1fea6 F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858 F README.md 8ecc12493ff9f820cdea6520a9016001cb2e59b7 F VERSION ce0ae95abd7121c534f6917c1c8f2b70d9acd4db @@ -1364,10 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 3175220747f5d4dadd17e09fb1899dc782c90926 -R d72f4db2847a4a23ecfc8905fb081d41 -T *branch * fts5Msvc -T *sym-fts5Msvc * -T -sym-fts5 * +P ef2052f81e33ca98e85a60f8a78cdd19a7c1c35c +R 16ac75ae29a449789b1f8e90f4493086 U mistachkin -Z cca0d5b27b94cebaae71758fdb2c5cae +Z 20765b1a5df4a09bb1c9c2fbe7adccb1 diff --git a/manifest.uuid b/manifest.uuid index f3b9d6f26c..d7852f670a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ef2052f81e33ca98e85a60f8a78cdd19a7c1c35c \ No newline at end of file +be85b82499f63dce4378f29adf8366651e965325 \ No newline at end of file From eb37567fe212b453d9e3c76d6fd91f09d5df8a6e Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 26 Jun 2015 16:42:17 +0000 Subject: [PATCH 169/206] Remove fts5 initialization code from core file main.c. FossilOrigin-Name: c91a93b343b4d3b1a19afdb5d23d1c508f9779d2 --- ext/fts5/test/fts5_common.tcl | 7 +++++-- ext/fts5/test/fts5fault1.test | 2 +- manifest | 18 +++++++++--------- manifest.uuid | 2 +- src/main.c | 9 --------- src/test1.c | 4 ++++ 6 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index ddc8060ecc..a86222a94e 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -15,8 +15,11 @@ if {![info exists testdir]} { } source $testdir/tester.tcl - -catch { sqlite3_fts5_may_be_corrupt 0 } +catch { + sqlite3_fts5_may_be_corrupt 0 + append G(perm:dbconfig) "; load_static_extension \$::dbhandle fts5" + reset_db +} proc fts5_test_poslist {cmd} { set res [list] diff --git a/ext/fts5/test/fts5fault1.test b/ext/fts5/test/fts5fault1.test index 56f73c3ab7..13f36803e1 100644 --- a/ext/fts5/test/fts5fault1.test +++ b/ext/fts5/test/fts5fault1.test @@ -279,7 +279,7 @@ do_faultsim_test 7.0 -faults oom* -prep { } -body { sqlite3 db test.db } -test { - faultsim_test_result [list 0 {}] [list 1 {}] + faultsim_test_result [list 0 {}] {1 {}} {1 {initialization of fts5 failed: }} } #------------------------------------------------------------------------- diff --git a/manifest b/manifest index 81ca8a12d1..6b53ebf4a5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sMakefile.in\sto\saccount\sfor\srecent\schanges.\sUpdate\sfts5\stest\sscripts\sso\sthat\sthey\sdo\snot\srun\sif\sSQLITE_ENABLE_FTS5\sis\snot\sdefined. -D 2015-06-25T20:36:36.724 +C Remove\sfts5\sinitialization\scode\sfrom\score\sfile\smain.c. +D 2015-06-26T16:42:17.368 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -122,7 +122,7 @@ F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796 F ext/fts5/fts5_vocab.c e454fa58c6d591024659a9b61eece0d708e8b575 F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 407ab0852318cda22544834a09db7af265085ecb +F ext/fts5/test/fts5_common.tcl 9553cce0757092d194307c2168d4edd100eab578 F ext/fts5/test/fts5aa.test 0be21c89fd66b588db355a6398911fd875bdcc6c F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc @@ -150,7 +150,7 @@ F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1 F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b F ext/fts5/test/fts5ea.test 451bb37310ee6df8ef72e4354fda5621b3b51448 F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e -F ext/fts5/test/fts5fault1.test b42d3296be8a75f557cf2cbce0d8b483fc9db45b +F ext/fts5/test/fts5fault1.test 7a562367cb4a735b57b410dbdb62dcc8d971faec F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3 F ext/fts5/test/fts5fault4.test 762991d526ee67c2b374351a17248097ea38bee7 @@ -292,7 +292,7 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c ba1863ea58c4c840335a84ec276fc2b25e22bc4e F src/lempar.c 7274c97d24bb46631e504332ccd3bd1b37841770 F src/loadext.c e722f4b832f923744788365df5fb8515c0bc8a47 -F src/main.c 37e3083001abc7a09de353186db990c2ee0ead80 +F src/main.c c0061a4f8ba86f957534be93b7026dab324f12c2 F src/malloc.c 908c780fdddd472163c2d1b1820ae4081f01ad20 F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c abe6ee469b6c5a35c7f22bfeb9c9bac664a1c987 @@ -337,7 +337,7 @@ F src/sqliteLimit.h 216557999cb45f2e3578ed53ebefe228d779cb46 F src/status.c f266ad8a2892d659b74f0f50cb6a88b6e7c12179 F src/table.c 51b46b2a62d1b3a959633d593b89bab5e2c9155e F src/tclsqlite.c 13b9c2aa725882de807377fa889682eff2a74114 -F src/test1.c a8e09b811f70184ce65012f27f30cfee7e54f268 +F src/test1.c e055ab594a48d25720ed31daa5eced1163544488 F src/test2.c 577961fe48961b2f2e5c8b56ee50c3f459d3359d F src/test3.c 64d2afdd68feac1bb5e2ffb8226c8c639f798622 F src/test4.c d168f83cc78d02e8d35567bb5630e40dcd85ac1e @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fcd8f7ce601729dc51d880d16b97040c1be16aa2 -R d44c4485fa78076f45f58b5b04b05b9b +P 3175220747f5d4dadd17e09fb1899dc782c90926 +R 91fd07ffaf7094308b4ec5a4e0e61dc1 U dan -Z cdc9c7cdbdc63c34e596c23a4d9ea399 +Z ca6474591f774c998709d70d57deb075 diff --git a/manifest.uuid b/manifest.uuid index 70e4e52c4d..35291f1b95 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3175220747f5d4dadd17e09fb1899dc782c90926 \ No newline at end of file +c91a93b343b4d3b1a19afdb5d23d1c508f9779d2 \ No newline at end of file diff --git a/src/main.c b/src/main.c index 711ca8f885..c047943c93 100644 --- a/src/main.c +++ b/src/main.c @@ -19,9 +19,6 @@ #ifdef SQLITE_ENABLE_FTS3 # include "fts3.h" #endif -#ifdef SQLITE_ENABLE_FTS5 -int sqlite3_fts5_init(sqlite3*, char**, const void*); -#endif #ifdef SQLITE_ENABLE_RTREE # include "rtree.h" #endif @@ -2872,12 +2869,6 @@ static int openDatabase( } #endif -#ifdef SQLITE_ENABLE_FTS5 - if( !db->mallocFailed && rc==SQLITE_OK ){ - rc = sqlite3_fts5_init(db, 0, 0); - } -#endif - #ifdef SQLITE_ENABLE_ICU if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3IcuInit(db); diff --git a/src/test1.c b/src/test1.c index 732ad9e049..3885b71ca7 100644 --- a/src/test1.c +++ b/src/test1.c @@ -6343,6 +6343,7 @@ static int tclLoadStaticExtensionCmd( extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_totype_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*); + extern int sqlite3_fts5_init(sqlite3*,char**,const sqlite3_api_routines*); static const struct { const char *zExtName; int (*pInit)(sqlite3*,char**,const sqlite3_api_routines*); @@ -6350,6 +6351,9 @@ static int tclLoadStaticExtensionCmd( { "amatch", sqlite3_amatch_init }, { "closure", sqlite3_closure_init }, { "eval", sqlite3_eval_init }, +#ifdef SQLITE_ENABLE_FTS5 + { "fts5", sqlite3_fts5_init }, +#endif { "fileio", sqlite3_fileio_init }, { "fuzzer", sqlite3_fuzzer_init }, { "ieee754", sqlite3_ieee_init }, From 9c7c913cd6d2df5203d4b9426c96ebbee8bd4e32 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 26 Jun 2015 18:16:52 +0000 Subject: [PATCH 170/206] Cache the sqlite3_context structure in the P4 operand of VDBE programs for faster SQL function dispatch. FossilOrigin-Name: 2abc44eb3b9d489321baa50bc25e17dafbda3687 --- manifest | 30 ++++---- manifest.uuid | 2 +- mkopcodeh.awk | 4 +- src/analyze.c | 6 +- src/attach.c | 2 +- src/expr.c | 2 +- src/select.c | 2 +- src/vdbe.c | 201 +++++++++++++++++++++++++++++++++----------------- src/vdbe.h | 4 +- src/vdbeInt.h | 18 +++-- src/vdbeaux.c | 14 ++-- 11 files changed, 179 insertions(+), 106 deletions(-) diff --git a/manifest b/manifest index bb80efcf48..af874d7e51 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sharmless\scompiler\swarning\sin\sassert\sstatement. -D 2015-06-26T03:12:27.469 +C Cache\sthe\ssqlite3_context\sstructure\sin\sthe\sP4\soperand\sof\sVDBE\sprograms\nfor\sfaster\sSQL\sfunction\sdispatch. +D 2015-06-26T18:16:52.781 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -173,7 +173,7 @@ F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 F main.mk 68f86c21505d6b66765a13c193f00a53dde6a212 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea -F mkopcodeh.awk d5e22023b5238985bb54a72d33e0ac71fe4f8a32 +F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271 F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504 @@ -186,8 +186,8 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b F sqlite3.1 fc7ad8990fc8409983309bb80de8c811a7506786 F sqlite3.pc.in 48fed132e7cb71ab676105d2a4dc77127d8c1f3a F src/alter.c 48e14b8aea28dc58baafe3cfcb8889c086b7744a -F src/analyze.c d23790787f80ebed58df7774744b4cf96401498b -F src/attach.c c38ac5a520a231d5d0308fd7f2ad95191c867bae +F src/analyze.c f89727c36f997bd2bf6c5e546c2f51dc94e6f2a4 +F src/attach.c e944d0052b577703b9b83aac1638452ff42a8395 F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d @@ -202,7 +202,7 @@ F src/ctime.c 5a0b735dc95604766f5dac73973658eef782ee8b F src/date.c e4d50b3283696836ec1036b695ead9a19e37a5ac F src/dbstat.c f402e77e25089c6003d0c60b3233b9b3947d599a F src/delete.c 8857a6f27560718f65d43bdbec86c967ae1f8dfa -F src/expr.c 32c836d9fa22c25371039febf074849dcefb3de9 +F src/expr.c c5c58e4d01c7ceb2266791d8d877f1b23a88e316 F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb F src/fkey.c c9b63a217d86582c22121699a47f22f524608869 F src/func.c a98ea5880dc50e9ca6dd6f57079a37b9cfcdecf1 @@ -250,7 +250,7 @@ F src/printf.c db11b5960105ee661dcac690f2ae6276e49bf251 F src/random.c ba2679f80ec82c4190062d756f22d0c358180696 F src/resolve.c 2d47554370de8de6dd5be060cef9559eec315005 F src/rowset.c eccf6af6d620aaa4579bd3b72c1b6395d9e9fa1e -F src/select.c 9baeda79f93cfd180d471273a2f9c82c682a37a2 +F src/select.c 009c6138be8788449d4f911f380d99e8608040e2 F src/shell.c 8af3cced094aebb5f57a8ad739b9dafc7867eed7 F src/sqlite.h.in 76d2f5637eb795b6300d9dd3c3ec3632ffafd721 F src/sqlite3.rc 992c9f5fb8285ae285d6be28240a7e8d3a7f2bad @@ -313,11 +313,11 @@ F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c F src/util.c a6431c92803b975b7322724a7b433e538d243539 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 -F src/vdbe.c 3af2d06e2b36012631dc3331957df52febdf8678 -F src/vdbe.h 90048aea1910f9df93e6044592bd4a466dc9c5e7 -F src/vdbeInt.h 20295e482121d13437f69985f77db211cdc8bac1 +F src/vdbe.c 8fde5281f304c31fd635891b3cb138e6b79ce9f5 +F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 +F src/vdbeInt.h 8b54e01ad0463590e7cffabce0bc36da9ee4f816 F src/vdbeapi.c 6a0d7757987018ff6b1b81bc5293219cd26bb299 -F src/vdbeaux.c 4c82d6f686f72ea7d266d26d528a171b728626f7 +F src/vdbeaux.c 316e6bc773559d164155848f086c4b7d146f483a F src/vdbeblob.c 4f2e8e075d238392df98c5e03a64342465b03f90 F src/vdbemem.c ae38a0d35ae71cf604381a887c170466ba518090 F src/vdbesort.c f5009e7a35e3065635d8918b9a31f498a499976b @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 015302f15e46a087ec92f3644c6741600dbf4306 -R 868fbc5146ce4734a4f8cc498ff855f9 -U mistachkin -Z 40a758e35083225387fb9eeb28b88d0c +P 7097716caed9d4aef49c7e766e41ea74abf5967f +R 61b5a8d7a0dc65b1ab9c06045c3a6290 +U drh +Z 9dd787a20069829fde828ff0d1be6044 diff --git a/manifest.uuid b/manifest.uuid index 0666abfec8..ad385475d2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7097716caed9d4aef49c7e766e41ea74abf5967f \ No newline at end of file +2abc44eb3b9d489321baa50bc25e17dafbda3687 \ No newline at end of file diff --git a/mkopcodeh.awk b/mkopcodeh.awk index 94db75e607..474ae4f3b4 100644 --- a/mkopcodeh.awk +++ b/mkopcodeh.awk @@ -122,9 +122,7 @@ END { for(i=0; i=0 ) continue; - if( name=="OP_Function" \ - || name=="OP_AggStep" \ - || name=="OP_Transaction" \ + if( name=="OP_Transaction" \ || name=="OP_AutoCommit" \ || name=="OP_Savepoint" \ || name=="OP_Checkpoint" \ diff --git a/src/analyze.c b/src/analyze.c index 2a0d6d2fb7..59518cdc3f 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -943,7 +943,7 @@ static void callStatGet(Vdbe *v, int regStat4, int iParam, int regOut){ #else UNUSED_PARAMETER( iParam ); #endif - sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4, regOut); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4, regOut); sqlite3VdbeChangeP4(v, -1, (char*)&statGetFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 1 + IsStat34); } @@ -1098,7 +1098,7 @@ static void analyzeOneTable( #endif sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1); sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2); - sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4+1, regStat4); sqlite3VdbeChangeP4(v, -1, (char*)&statInitFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 2+IsStat34); @@ -1194,7 +1194,7 @@ static void analyzeOneTable( } #endif assert( regChng==(regStat4+1) ); - sqlite3VdbeAddOp3(v, OP_Function, 1, regStat4, regTemp); + sqlite3VdbeAddOp3(v, OP_Function0, 1, regStat4, regTemp); sqlite3VdbeChangeP4(v, -1, (char*)&statPushFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 2+IsStat34); sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v); diff --git a/src/attach.c b/src/attach.c index efc9eb9a81..2ab55e6ed6 100644 --- a/src/attach.c +++ b/src/attach.c @@ -359,7 +359,7 @@ static void codeAttach( assert( v || db->mallocFailed ); if( v ){ - sqlite3VdbeAddOp3(v, OP_Function, 0, regArgs+3-pFunc->nArg, regArgs+3); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regArgs+3-pFunc->nArg, regArgs+3); assert( pFunc->nArg==-1 || (pFunc->nArg&0xff)==pFunc->nArg ); sqlite3VdbeChangeP5(v, (u8)(pFunc->nArg)); sqlite3VdbeChangeP4(v, -1, (char *)pFunc, P4_FUNCDEF); diff --git a/src/expr.c b/src/expr.c index 8f697e4d45..5acb909667 100644 --- a/src/expr.c +++ b/src/expr.c @@ -2925,7 +2925,7 @@ int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target){ if( !pColl ) pColl = db->pDfltColl; sqlite3VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ); } - sqlite3VdbeAddOp4(v, OP_Function, constMask, r1, target, + sqlite3VdbeAddOp4(v, OP_Function0, constMask, r1, target, (char*)pDef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, (u8)nFarg); if( nFarg && constMask==0 ){ diff --git a/src/select.c b/src/select.c index 19edb19745..bb8bdd42b8 100644 --- a/src/select.c +++ b/src/select.c @@ -4686,7 +4686,7 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem; sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0, (char *)pColl, P4_COLLSEQ); } - sqlite3VdbeAddOp4(v, OP_AggStep, 0, regAgg, pF->iMem, + sqlite3VdbeAddOp4(v, OP_AggStep0, 0, regAgg, pF->iMem, (void*)pF->pFunc, P4_FUNCDEF); sqlite3VdbeChangeP5(v, (u8)nArg); sqlite3ExprCacheAffinityChange(pParse, regAgg, nArg); diff --git a/src/vdbe.c b/src/vdbe.c index 4fd19327ed..c1f2cee5fe 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -1546,7 +1546,7 @@ case OP_CollSeq: { break; } -/* Opcode: Function P1 P2 P3 P4 P5 +/* Opcode: Function0 P1 P2 P3 P4 P5 ** Synopsis: r[P3]=func(r[P2@P5]) ** ** Invoke a user function (P4 is a pointer to a Function structure that @@ -1561,59 +1561,99 @@ case OP_CollSeq: { ** sqlite3_set_auxdata() API may be safely retained until the next ** invocation of this opcode. ** -** See also: AggStep and AggFinal +** See also: Function, AggStep, AggFinal */ -case OP_Function: { - int i; - Mem *pArg; - sqlite3_context ctx; - sqlite3_value **apVal; +/* Opcode: Function P1 P2 P3 P4 P5 +** Synopsis: r[P3]=func(r[P2@P5]) +** +** Invoke a user function (P4 is a pointer to an sqlite3_context object that +** contains a pointer to the function to be run) with P5 arguments taken +** from register P2 and successors. The result of the function is stored +** in register P3. Register P3 must not be one of the function inputs. +** +** P1 is a 32-bit bitmask indicating whether or not each argument to the +** function was determined to be constant at compile time. If the first +** argument was constant then bit 0 of P1 is set. This is used to determine +** whether meta data associated with a user function argument using the +** sqlite3_set_auxdata() API may be safely retained until the next +** invocation of this opcode. +** +** SQL functions are initially coded as OP_Function0 with P4 pointing +** to the function itself. But on first evaluation, the P4 operand is +** automatically converted into an sqlite3_context object and the operation +** changed to this OP_Function opcode. In this way, the initialization of +** the sqlite3_context object occurs only once, rather than once for each +** evaluation of the function. +** +** See also: Function0, AggStep, AggFinal +*/ +case OP_Function0: { int n; - - n = pOp->p5; - apVal = p->apArg; - assert( apVal || n==0 ); - assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); - ctx.pOut = &aMem[pOp->p3]; - memAboutToChange(p, ctx.pOut); - - assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); - assert( pOp->p3p2 || pOp->p3>=pOp->p2+n ); - pArg = &aMem[pOp->p2]; - for(i=0; ip2+i, pArg); - } + sqlite3_context *pCtx; assert( pOp->p4type==P4_FUNCDEF ); - ctx.pFunc = pOp->p4.pFunc; - ctx.iOp = (int)(pOp - aOp); - ctx.pVdbe = p; - MemSetTypeFlag(ctx.pOut, MEM_Null); - ctx.fErrorOrAux = 0; + n = pOp->p5; + assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); + assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); + assert( pOp->p3p2 || pOp->p3>=pOp->p2+n ); + pCtx = sqlite3DbMallocRaw(db, sizeof(*pCtx) + (n-1)*sizeof(sqlite3_value*)); + if( pCtx==0 ) goto no_mem; + pCtx->pOut = 0; + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + pOp->opcode = OP_Function; + /* Fall through into OP_Function */ +} +case OP_Function: { + int i; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + if( pCtx->pOut != &aMem[pOp->p3] ){ + pCtx->pOut = &aMem[pOp->p3]; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; + } + + memAboutToChange(p, pCtx->pOut); +#ifdef SQLITE_DEBUG + for(i=0; iargc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + MemSetTypeFlag(pCtx->pOut, MEM_Null); + pCtx->fErrorOrAux = 0; db->lastRowid = lastRowid; - (*ctx.pFunc->xFunc)(&ctx, n, apVal); /* IMP: R-24505-23230 */ + (*pCtx->pFunc->xFunc)(pCtx, pCtx->argc, pCtx->argv); /* IMP: R-24505-23230 */ lastRowid = db->lastRowid; /* Remember rowid changes made by xFunc */ /* If the function returned an error, throw an exception */ - if( ctx.fErrorOrAux ){ - if( ctx.isError ){ - sqlite3VdbeError(p, "%s", sqlite3_value_text(ctx.pOut)); - rc = ctx.isError; + if( pCtx->fErrorOrAux ){ + if( pCtx->isError ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pCtx->pOut)); + rc = pCtx->isError; } sqlite3VdbeDeleteAuxData(p, (int)(pOp - aOp), pOp->p1); } /* Copy the result of the function into register P3 */ - sqlite3VdbeChangeEncoding(ctx.pOut, encoding); - if( sqlite3VdbeMemTooBig(ctx.pOut) ){ + sqlite3VdbeChangeEncoding(pCtx->pOut, encoding); + if( sqlite3VdbeMemTooBig(pCtx->pOut) ){ goto too_big; } - REGISTER_TRACE(pOp->p3, ctx.pOut); - UPDATE_MAX_BLOBSIZE(ctx.pOut); + REGISTER_TRACE(pOp->p3, pCtx->pOut); + UPDATE_MAX_BLOBSIZE(pCtx->pOut); break; } @@ -5708,46 +5748,73 @@ case OP_JumpZeroIncr: { /* jump, in1 */ ** The P5 arguments are taken from register P2 and its ** successors. */ -case OP_AggStep: { +case OP_AggStep0: { int n; - int i; - Mem *pMem; - Mem *pRec; - Mem t; - sqlite3_context ctx; - sqlite3_value **apVal; + sqlite3_context *pCtx; + assert( pOp->p4type==P4_FUNCDEF ); n = pOp->p5; - assert( n>=0 ); - pRec = &aMem[pOp->p2]; - apVal = p->apArg; - assert( apVal || n==0 ); - for(i=0; ip4.pFunc; assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); - ctx.pMem = pMem = &aMem[pOp->p3]; + assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); + assert( pOp->p3p2 || pOp->p3>=pOp->p2+n ); + pCtx = sqlite3DbMallocRaw(db, sizeof(*pCtx) + (n-1)*sizeof(sqlite3_value*)); + if( pCtx==0 ) goto no_mem; + pCtx->pMem = 0; + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + pOp->opcode = OP_AggStep; + /* Fall through into OP_AggStep */ +} +case OP_AggStep: { + int i; + sqlite3_context *pCtx; + Mem *pMem; + Mem t; + + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + pMem = &aMem[pOp->p3]; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + if( pCtx->pMem != pMem ){ + pCtx->pMem = pMem; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; + } + +#ifdef SQLITE_DEBUG + for(i=0; iargc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + pMem->n++; sqlite3VdbeMemInit(&t, db, MEM_Null); - ctx.pOut = &t; - ctx.isError = 0; - ctx.pVdbe = p; - ctx.iOp = (int)(pOp - aOp); - ctx.skipFlag = 0; - (ctx.pFunc->xStep)(&ctx, n, apVal); /* IMP: R-24505-23230 */ - if( ctx.isError ){ - sqlite3VdbeError(p, "%s", sqlite3_value_text(&t)); - rc = ctx.isError; + pCtx->pOut = &t; + pCtx->fErrorOrAux = 0; + pCtx->skipFlag = 0; + (pCtx->pFunc->xStep)(pCtx,pCtx->argc,pCtx->argv); /* IMP: R-24505-23230 */ + if( pCtx->fErrorOrAux ){ + if( pCtx->isError ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(&t)); + rc = pCtx->isError; + } + sqlite3VdbeMemRelease(&t); + }else{ + assert( t.flags==MEM_Null ); } - if( ctx.skipFlag ){ + if( pCtx->skipFlag ){ assert( pOp[-1].opcode==OP_CollSeq ); i = pOp[-1].p1; if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1); } - sqlite3VdbeMemRelease(&t); break; } diff --git a/src/vdbe.h b/src/vdbe.h index da1410282a..c489fd04a6 100644 --- a/src/vdbe.h +++ b/src/vdbe.h @@ -46,13 +46,14 @@ struct VdbeOp { int p1; /* First operand */ int p2; /* Second parameter (often the jump destination) */ int p3; /* The third parameter */ - union { /* fourth parameter */ + union p4union { /* fourth parameter */ int i; /* Integer value if p4type==P4_INT32 */ void *p; /* Generic pointer */ char *z; /* Pointer to data for string (char array) types */ i64 *pI64; /* Used when p4type is P4_INT64 */ double *pReal; /* Used when p4type is P4_REAL */ FuncDef *pFunc; /* Used when p4type is P4_FUNCDEF */ + sqlite3_context *pCtx; /* Used when p4type is P4_FUNCCTX */ CollSeq *pColl; /* Used when p4type is P4_COLLSEQ */ Mem *pMem; /* Used when p4type is P4_MEM */ VTable *pVtab; /* Used when p4type is P4_VTAB */ @@ -119,6 +120,7 @@ typedef struct VdbeOpList VdbeOpList; #define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */ #define P4_SUBPROGRAM (-18) /* P4 is a pointer to a SubProgram structure */ #define P4_ADVANCE (-19) /* P4 is a pointer to BtreeNext() or BtreePrev() */ +#define P4_FUNCCTX (-20) /* P4 is a pointer to an sqlite3_context object */ /* Error message codes for OP_Halt */ #define P5_ConstraintNotNull 1 diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 7ebd41d6ea..4a90ed6483 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -279,14 +279,16 @@ struct AuxData { ** (Mem) which are only defined there. */ struct sqlite3_context { - Mem *pOut; /* The return value is stored here */ - FuncDef *pFunc; /* Pointer to function information */ - Mem *pMem; /* Memory cell used to store aggregate context */ - Vdbe *pVdbe; /* The VM that owns this context */ - int iOp; /* Instruction number of OP_Function */ - int isError; /* Error code returned by the function. */ - u8 skipFlag; /* Skip accumulator loading if true */ - u8 fErrorOrAux; /* isError!=0 or pVdbe->pAuxData modified */ + Mem *pOut; /* The return value is stored here */ + FuncDef *pFunc; /* Pointer to function information */ + Mem *pMem; /* Memory cell used to store aggregate context */ + Vdbe *pVdbe; /* The VM that owns this context */ + int iOp; /* Instruction number of OP_Function */ + int isError; /* Error code returned by the function. */ + u8 skipFlag; /* Skip accumulator loading if true */ + u8 fErrorOrAux; /* isError!=0 or pVdbe->pAuxData modified */ + u8 argc; /* Number of arguments */ + sqlite3_value *argv[1]; /* Argument set */ }; /* diff --git a/src/vdbeaux.c b/src/vdbeaux.c index e11981e16e..19e2392720 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -489,11 +489,6 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){ /* NOTE: Be sure to update mkopcodeh.awk when adding or removing ** cases from this switch! */ switch( opcode ){ - case OP_Function: - case OP_AggStep: { - if( pOp->p5>nMaxArgs ) nMaxArgs = pOp->p5; - break; - } case OP_Transaction: { if( pOp->p2!=0 ) p->readOnly = 0; /* fall thru */ @@ -737,6 +732,10 @@ static void freeP4(sqlite3 *db, int p4type, void *p4){ if( p4 ){ assert( db ); switch( p4type ){ + case P4_FUNCCTX: { + freeEphemeralFunction(db, ((sqlite3_context*)p4)->pFunc); + /* Fall through into the next case */ + } case P4_REAL: case P4_INT64: case P4_DYNAMIC: @@ -1121,6 +1120,11 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){ sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); break; } + case P4_FUNCCTX: { + FuncDef *pDef = pOp->p4.pCtx->pFunc; + sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); + break; + } case P4_INT64: { sqlite3_snprintf(nTemp, zTemp, "%lld", *pOp->p4.pI64); break; From e2d9e7cfd026c97da0731d5302da58b9c8543a7e Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 26 Jun 2015 18:47:53 +0000 Subject: [PATCH 171/206] Further optimization of SQL function dispatch. Improvements to opcode documentation. FossilOrigin-Name: eaddbf296aee98ffca82adade1b0d2fbefd09d7b --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/vdbe.c | 40 +++++++++++++++++++++++++++++----------- src/vdbeaux.c | 2 ++ 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/manifest b/manifest index af874d7e51..4b5530ecfd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Cache\sthe\ssqlite3_context\sstructure\sin\sthe\sP4\soperand\sof\sVDBE\sprograms\nfor\sfaster\sSQL\sfunction\sdispatch. -D 2015-06-26T18:16:52.781 +C Further\soptimization\sof\sSQL\sfunction\sdispatch.\s\sImprovements\sto\sopcode\ndocumentation. +D 2015-06-26T18:47:53.814 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -313,11 +313,11 @@ F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c F src/util.c a6431c92803b975b7322724a7b433e538d243539 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 -F src/vdbe.c 8fde5281f304c31fd635891b3cb138e6b79ce9f5 +F src/vdbe.c b425feab69fb29b3cf4d40c5c8ea585bce883307 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 F src/vdbeInt.h 8b54e01ad0463590e7cffabce0bc36da9ee4f816 F src/vdbeapi.c 6a0d7757987018ff6b1b81bc5293219cd26bb299 -F src/vdbeaux.c 316e6bc773559d164155848f086c4b7d146f483a +F src/vdbeaux.c 13261b7597c7f189232f84a1e175a3268ea2c32b F src/vdbeblob.c 4f2e8e075d238392df98c5e03a64342465b03f90 F src/vdbemem.c ae38a0d35ae71cf604381a887c170466ba518090 F src/vdbesort.c f5009e7a35e3065635d8918b9a31f498a499976b @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7097716caed9d4aef49c7e766e41ea74abf5967f -R 61b5a8d7a0dc65b1ab9c06045c3a6290 +P 2abc44eb3b9d489321baa50bc25e17dafbda3687 +R a98b4348d07c70a554120b6bb6158a33 U drh -Z 9dd787a20069829fde828ff0d1be6044 +Z 2205c193de128862e6e7711ff0d4d13b diff --git a/manifest.uuid b/manifest.uuid index ad385475d2..37b4dbdf5a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2abc44eb3b9d489321baa50bc25e17dafbda3687 \ No newline at end of file +eaddbf296aee98ffca82adade1b0d2fbefd09d7b \ No newline at end of file diff --git a/src/vdbe.c b/src/vdbe.c index c1f2cee5fe..86a2244ed5 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -1549,7 +1549,7 @@ case OP_CollSeq: { /* Opcode: Function0 P1 P2 P3 P4 P5 ** Synopsis: r[P3]=func(r[P2@P5]) ** -** Invoke a user function (P4 is a pointer to a Function structure that +** Invoke a user function (P4 is a pointer to a FuncDef object that ** defines the function) with P5 arguments taken from register P2 and ** successors. The result of the function is stored in register P3. ** Register P3 must not be one of the function inputs. @@ -1579,7 +1579,7 @@ case OP_CollSeq: { ** invocation of this opcode. ** ** SQL functions are initially coded as OP_Function0 with P4 pointing -** to the function itself. But on first evaluation, the P4 operand is +** to a FuncDef object. But on first evaluation, the P4 operand is ** automatically converted into an sqlite3_context object and the operation ** changed to this OP_Function opcode. In this way, the initialization of ** the sqlite3_context object occurs only once, rather than once for each @@ -1619,8 +1619,9 @@ case OP_Function: { ** might change from one evaluation to the next. The next block of code ** checks to see if the register array has changed, and if so it ** reinitializes the relavant parts of the sqlite3_context object */ - if( pCtx->pOut != &aMem[pOp->p3] ){ - pCtx->pOut = &aMem[pOp->p3]; + pOut = &aMem[pOp->p3]; + if( pCtx->pOut != pOut ){ + pCtx->pOut = pOut; for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; } @@ -1643,13 +1644,13 @@ case OP_Function: { sqlite3VdbeError(p, "%s", sqlite3_value_text(pCtx->pOut)); rc = pCtx->isError; } - sqlite3VdbeDeleteAuxData(p, (int)(pOp - aOp), pOp->p1); + sqlite3VdbeDeleteAuxData(p, pCtx->iOp, pOp->p1); } /* Copy the result of the function into register P3 */ - sqlite3VdbeChangeEncoding(pCtx->pOut, encoding); - if( sqlite3VdbeMemTooBig(pCtx->pOut) ){ - goto too_big; + if( pOut->flags & (MEM_Str|MEM_Blob) ){ + sqlite3VdbeChangeEncoding(pCtx->pOut, encoding); + if( sqlite3VdbeMemTooBig(pCtx->pOut) ) goto too_big; } REGISTER_TRACE(pOp->p3, pCtx->pOut); @@ -5737,17 +5738,34 @@ case OP_JumpZeroIncr: { /* jump, in1 */ break; } -/* Opcode: AggStep * P2 P3 P4 P5 +/* Opcode: AggStep0 * P2 P3 P4 P5 ** Synopsis: accum=r[P3] step(r[P2@P5]) ** ** Execute the step function for an aggregate. The ** function has P5 arguments. P4 is a pointer to the FuncDef -** structure that specifies the function. Use register -** P3 as the accumulator. +** structure that specifies the function. Register P3 is the +** accumulator. ** ** The P5 arguments are taken from register P2 and its ** successors. */ +/* Opcode: AggStep * P2 P3 P4 P5 +** Synopsis: accum=r[P3] step(r[P2@P5]) +** +** Execute the step function for an aggregate. The +** function has P5 arguments. P4 is a pointer to an sqlite3_context +** object that is used to run the function. Register P3 is +** as the accumulator. +** +** The P5 arguments are taken from register P2 and its +** successors. +** +** This opcode is initially coded as OP_AggStep0. On first evaluation, +** the FuncDef stored in P4 is converted into an sqlite3_context and +** the opcode is changed. In this way, the initialization of the +** sqlite3_context only happens once, instead of on each call to the +** step function. +*/ case OP_AggStep0: { int n; sqlite3_context *pCtx; diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 19e2392720..05a6952334 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -1120,11 +1120,13 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){ sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); break; } +#ifdef SQLITE_DEBUG case P4_FUNCCTX: { FuncDef *pDef = pOp->p4.pCtx->pFunc; sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); break; } +#endif case P4_INT64: { sqlite3_snprintf(nTemp, zTemp, "%lld", *pOp->p4.pI64); break; From 6fbeb2259692e42438e73805db10eb44dabe79cd Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 26 Jun 2015 18:50:29 +0000 Subject: [PATCH 172/206] Fix some cases in the fts5 code where a corrupt database could cause a buffer overread. FossilOrigin-Name: 360c57bbb8bad6cc84af140f61e0282fa06462b4 --- ext/fts5/fts5_index.c | 47 +++++++++++++++++++++++++++---------------- manifest | 15 +++++++------- manifest.uuid | 2 +- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index d82d194a52..ff66dce441 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1847,7 +1847,7 @@ static void fts5SegIterNext( fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; - if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ + if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOffn ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } @@ -1855,6 +1855,10 @@ static void fts5SegIterNext( pIter->iLeafOffset = iOff; bNewTerm = 1; } + if( iOff>=pLeaf->n ){ + p->rc = FTS5_CORRUPT; + return; + } } } @@ -2025,6 +2029,7 @@ static void fts5SegIterSeekInit( int h; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ + Fts5Data *pLeaf; assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); @@ -2063,21 +2068,25 @@ static void fts5SegIterSeekInit( pIter->iLeafPgno = iPg - 1; fts5SegIterNextPage(p, pIter); - if( pIter->pLeaf ){ + if( (pLeaf = pIter->pLeaf) ){ int res; - pIter->iLeafOffset = fts5GetU16(&pIter->pLeaf->p[2]); - fts5SegIterLoadTerm(p, pIter, 0); - fts5SegIterLoadNPos(p, pIter); - do { - res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); - if( res>=0 ) break; - fts5SegIterNext(p, pIter, 0); - }while( pIter->pLeaf && p->rc==SQLITE_OK ); + pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]); + if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){ + p->rc = FTS5_CORRUPT; + }else{ + fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); + do { + res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); + if( res>=0 ) break; + fts5SegIterNext(p, pIter, 0); + }while( pIter->pLeaf && p->rc==SQLITE_OK ); - if( bGe==0 && res ){ - /* Set iterator to point to EOF */ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; + if( bGe==0 && res ){ + /* Set iterator to point to EOF */ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + } } } @@ -2525,7 +2534,7 @@ static void fts5MultiIterNew( int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ ){ - int nSeg; /* Number of segment-iters in use */ + int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl; @@ -4928,8 +4937,12 @@ static void fts5IndexIntegrityCheckSegment( if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); - fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; + if( iRowidOff>=pLeaf->n ){ + p->rc = FTS5_CORRUPT; + }else{ + fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; + } fts5DataRelease(pLeaf); } } diff --git a/manifest b/manifest index 77f03ee837..841501ef8d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\schanges\sto\sget\sFTS5\sworking\swith\sMSVC. -D 2015-06-26T17:10:12.989 +C Fix\ssome\scases\sin\sthe\sfts5\scode\swhere\sa\scorrupt\sdatabase\scould\scause\sa\sbuffer\soverread. +D 2015-06-26T18:50:29.818 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 7428b0bcb257641cbecc3bacce7f40686cf99f36 F ext/fts5/fts5_config.c d40da53f5e94214c553b2af3d6fd9aebac5f3f05 F ext/fts5/fts5_expr.c 3386ab0a71dbab7e1259c3b16d6113c97d14123e F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc -F ext/fts5/fts5_index.c 0c5a5218eb9fddd65c7d5bbad72c383d102c9648 +F ext/fts5/fts5_index.c ad32235180757f182050b8d24c9dbe61056385d2 F ext/fts5/fts5_main.c c5b2a219d65967c07fd1bc8fd45206863a2fe360 F ext/fts5/fts5_storage.c 3e672a0d35f63979556903861b324e7b8932cecc F ext/fts5/fts5_tcl.c b82f13f73a30f0959f539743f8818bece994a970 @@ -1364,8 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c91a93b343b4d3b1a19afdb5d23d1c508f9779d2 bfcd16089cf8c65130c011a9718bf7812685c841 -R 297861c6724b6a9194bdf2ab25858763 -T +closed bfcd16089cf8c65130c011a9718bf7812685c841 -U mistachkin -Z d2f82b98bab3ebc4b4555fb7502228b7 +P 954231d29d60460d423ecb132bbfb725b0ea375a +R f24cf4dc2b8231a9cc4f2e0f89642df3 +U dan +Z eeb5dd5cce2b08a289c0b394982d9461 diff --git a/manifest.uuid b/manifest.uuid index 8cd37282a3..9fa8bd840d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -954231d29d60460d423ecb132bbfb725b0ea375a \ No newline at end of file +360c57bbb8bad6cc84af140f61e0282fa06462b4 \ No newline at end of file From abb78fbd889dd88d6c3eccca6b0cd9a557267d3b Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 26 Jun 2015 19:43:55 +0000 Subject: [PATCH 173/206] Small size reduction and performance increase on the OP_IdxInsert opcode. FossilOrigin-Name: b6bedc2e9c2f87709673799db9401b95fdb386b0 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/vdbe.c | 8 +++----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/manifest b/manifest index 4b5530ecfd..9662ec543a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Further\soptimization\sof\sSQL\sfunction\sdispatch.\s\sImprovements\sto\sopcode\ndocumentation. -D 2015-06-26T18:47:53.814 +C Small\ssize\sreduction\sand\sperformance\sincrease\son\sthe\sOP_IdxInsert\sopcode. +D 2015-06-26T19:43:55.327 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -313,7 +313,7 @@ F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c F src/util.c a6431c92803b975b7322724a7b433e538d243539 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 -F src/vdbe.c b425feab69fb29b3cf4d40c5c8ea585bce883307 +F src/vdbe.c 3d5a78d39b15dc91ea2c11017d560a4224eb2f75 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 F src/vdbeInt.h 8b54e01ad0463590e7cffabce0bc36da9ee4f816 F src/vdbeapi.c 6a0d7757987018ff6b1b81bc5293219cd26bb299 @@ -1286,7 +1286,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2abc44eb3b9d489321baa50bc25e17dafbda3687 -R a98b4348d07c70a554120b6bb6158a33 +P eaddbf296aee98ffca82adade1b0d2fbefd09d7b +R e2390f27840a2165ad1c292ca6931905 U drh -Z 2205c193de128862e6e7711ff0d4d13b +Z 46d8302c1ebdc3ef48ddf35916e18889 diff --git a/manifest.uuid b/manifest.uuid index 37b4dbdf5a..568be4203b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -eaddbf296aee98ffca82adade1b0d2fbefd09d7b \ No newline at end of file +b6bedc2e9c2f87709673799db9401b95fdb386b0 \ No newline at end of file diff --git a/src/vdbe.c b/src/vdbe.c index 86a2244ed5..66578ec6a4 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -4797,7 +4797,6 @@ next_tail: case OP_SorterInsert: /* in2 */ case OP_IdxInsert: { /* in2 */ VdbeCursor *pC; - BtCursor *pCrsr; int nKey; const char *zKey; @@ -4807,18 +4806,17 @@ case OP_IdxInsert: { /* in2 */ assert( isSorter(pC)==(pOp->opcode==OP_SorterInsert) ); pIn2 = &aMem[pOp->p2]; assert( pIn2->flags & MEM_Blob ); - pCrsr = pC->pCursor; if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++; - assert( pCrsr!=0 ); + assert( pC->pCursor!=0 ); assert( pC->isTable==0 ); rc = ExpandBlob(pIn2); if( rc==SQLITE_OK ){ - if( isSorter(pC) ){ + if( pOp->opcode==OP_SorterInsert ){ rc = sqlite3VdbeSorterWrite(pC, pIn2); }else{ nKey = pIn2->n; zKey = pIn2->z; - rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, + rc = sqlite3BtreeInsert(pC->pCursor, zKey, nKey, "", 0, 0, pOp->p3, ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) ); assert( pC->deferredMoveto==0 ); From 6394d99a0e3b56a8d1bca5b38e2bff8ffe277d90 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 26 Jun 2015 20:08:25 +0000 Subject: [PATCH 174/206] Fix a segfault that could follow an OOM error in fts5. FossilOrigin-Name: 713239b8cf2900e8f7d97646c7f350248b4e804f --- ext/fts5/fts5_config.c | 1 + manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 6342b92cb0..8103c308c3 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -650,6 +650,7 @@ int sqlite3Fts5Tokenize( void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ){ + if( pText==0 ) return SQLITE_OK; return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } diff --git a/manifest b/manifest index 3988d45592..3110d93df4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-06-26T19:33:04.560 +C Fix\sa\ssegfault\sthat\scould\sfollow\san\sOOM\serror\sin\sfts5. +D 2015-06-26T20:08:25.582 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -109,7 +109,7 @@ F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba F ext/fts5/fts5Int.h 918e947c0c20122ed5eb9ea695d83c6c8cf7239a F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971 F ext/fts5/fts5_buffer.c 7428b0bcb257641cbecc3bacce7f40686cf99f36 -F ext/fts5/fts5_config.c d40da53f5e94214c553b2af3d6fd9aebac5f3f05 +F ext/fts5/fts5_config.c 7d19f4516cd79f1f8b58d38aa051b70195404422 F ext/fts5/fts5_expr.c 3386ab0a71dbab7e1259c3b16d6113c97d14123e F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc F ext/fts5/fts5_index.c ad32235180757f182050b8d24c9dbe61056385d2 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 360c57bbb8bad6cc84af140f61e0282fa06462b4 eaddbf296aee98ffca82adade1b0d2fbefd09d7b -R 46e30302e9c721b7ade798b428e70680 +P 2a1af0f29e4c387721ec1fb3a2b55e8605b8401d +R 9bc9b11118f27b38560ab00f58af8bdf U dan -Z 99ffda12a690ac2d5944e0d64e334d3f +Z 9ce0be1aae80e8832fceff3554ce4cad diff --git a/manifest.uuid b/manifest.uuid index 2ca7108032..b6cf8bd524 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2a1af0f29e4c387721ec1fb3a2b55e8605b8401d \ No newline at end of file +713239b8cf2900e8f7d97646c7f350248b4e804f \ No newline at end of file From 018a93d216cfd380e98fa7ba57a6c054c1e8dff0 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 26 Jun 2015 20:13:56 +0000 Subject: [PATCH 175/206] Remove unnecessary lines from fts3.h and main.mk. FossilOrigin-Name: c1b268ced38259706674ed2fcbf842d3fbd9fb8c --- ext/fts3/fts3.h | 1 - main.mk | 1 - manifest | 14 +++++++------- manifest.uuid | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/ext/fts3/fts3.h b/ext/fts3/fts3.h index e99457eebd..c1aa8caf09 100644 --- a/ext/fts3/fts3.h +++ b/ext/fts3/fts3.h @@ -20,7 +20,6 @@ extern "C" { #endif /* __cplusplus */ int sqlite3Fts3Init(sqlite3 *db); -int sqlite3Fts5Init(sqlite3 *db); #ifdef __cplusplus } /* extern "C" */ diff --git a/main.mk b/main.mk index ed90e25575..5b93a99771 100644 --- a/main.mk +++ b/main.mk @@ -47,7 +47,6 @@ TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3 TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth -TCCX += -I$(TOP)/ext/fts5 # Object files for the SQLite library. # diff --git a/manifest b/manifest index 3110d93df4..119f2e8ac5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\ssegfault\sthat\scould\sfollow\san\sOOM\serror\sin\sfts5. -D 2015-06-26T20:08:25.582 +C Remove\sunnecessary\slines\sfrom\sfts3.h\sand\smain.mk. +D 2015-06-26T20:13:56.403 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -79,7 +79,7 @@ F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts3/fts3.c a95de5190cf52f4fa9d5952890399cab63e632b9 -F ext/fts3/fts3.h 62a77d880cf06a2865052726f8325c8fabcecad7 +F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h 601743955ac43a0e82e6828a931c07bb3b0c95ff F ext/fts3/fts3_aux.c 9edc3655fcb287f0467d0a4b886a01c6185fe9f1 F ext/fts3/fts3_expr.c 71c063da9c2a4167fb54aec089dd5ef33a58c9cb @@ -248,7 +248,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk d585680c45db1f68ec5ef88199fb10694d117e17 +F main.mk 934da670dac2fe2b40986a83741cec770c91fdc0 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2a1af0f29e4c387721ec1fb3a2b55e8605b8401d -R 9bc9b11118f27b38560ab00f58af8bdf +P 713239b8cf2900e8f7d97646c7f350248b4e804f +R aa60ec33f0edbb2ca0c0b2937abf8e18 U dan -Z 9ce0be1aae80e8832fceff3554ce4cad +Z d664e62992da247f3e87e6811881dab7 diff --git a/manifest.uuid b/manifest.uuid index b6cf8bd524..1c3941fb23 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -713239b8cf2900e8f7d97646c7f350248b4e804f \ No newline at end of file +c1b268ced38259706674ed2fcbf842d3fbd9fb8c \ No newline at end of file From 95ae9587f1442e799c4423f0c3b1d48d2646a483 Mon Sep 17 00:00:00 2001 From: mistachkin Date: Fri, 26 Jun 2015 20:45:43 +0000 Subject: [PATCH 176/206] Treat compilation of FTS5 for the loadable extension specially with MSVC. FossilOrigin-Name: 7c610276bb41dbc80fe169d35fe9a3a3f6525635 --- Makefile.msc | 7 +++++-- manifest | 14 +++++++------- manifest.uuid | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Makefile.msc b/Makefile.msc index c17b89d526..95c0cf18d4 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -1693,10 +1693,13 @@ fts5.c: $(FTS5_SRC) $(TCLSH_CMD) $(TOP)\ext\fts5\tool\mkfts5c.tcl fts5.lo: fts5.c $(HDR) $(EXTHDR) + $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c fts5.c + +fts5_ext.lo: fts5.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(NO_WARN) -DSQLITE_ENABLE_FTS5 -c fts5.c -fts5.dll: fts5.lo - $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ fts5.lo +fts5.dll: fts5_ext.lo + $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ fts5_ext.lo # Rules to build the 'testfixture' application. # diff --git a/manifest b/manifest index b736989fb2..1d586dd805 100644 --- a/manifest +++ b/manifest @@ -1,9 +1,9 @@ -C Merge\sfts5\sbranch\sinto\strunk. -D 2015-06-26T20:25:59.799 +C Treat\scompilation\sof\sFTS5\sfor\sthe\sloadable\sextension\sspecially\swith\sMSVC. +D 2015-06-26T20:45:43.839 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 -F Makefile.msc de7668a4f537f91bdb185e7194a798b1d59f101f +F Makefile.msc 0a0568c12cf37fd9683241dfe7aa484fcdf27554 F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858 F README.md 8ecc12493ff9f820cdea6520a9016001cb2e59b7 F VERSION ce0ae95abd7121c534f6917c1c8f2b70d9acd4db @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b6bedc2e9c2f87709673799db9401b95fdb386b0 8671b9e137275d247e67e3d7d5d9c4cb1f380ff7 -R 0da9483025b8e801a1ae1ccbcdc9a8a2 -U dan -Z d74fcb200391e22b2c75ffc9a8c44934 +P d27d9965b5404cd32be1113215fd9feeb5b66acc +R 3acd5e6aa6095e21a23b200982f0e53b +U mistachkin +Z ca7f05b6600c190f8e4e9359f2b7edca diff --git a/manifest.uuid b/manifest.uuid index 52f47d959f..a120ef2388 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d27d9965b5404cd32be1113215fd9feeb5b66acc \ No newline at end of file +7c610276bb41dbc80fe169d35fe9a3a3f6525635 \ No newline at end of file From f44890a7c46cf849498338eb729458ebbea33af8 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 03:58:15 +0000 Subject: [PATCH 177/206] Add the Mempage.aDataOfst field and use it in sqlite3BtreeMovetoUnpacked() for about a 2 million cycle gain. FossilOrigin-Name: bee94dc3510745ba2efa044e8f3299793cfc7e34 --- manifest | 16 ++++++++-------- manifest.uuid | 2 +- src/btree.c | 12 ++++++++++-- src/btreeInt.h | 1 + 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/manifest b/manifest index 1d586dd805..0c5f8a6318 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Treat\scompilation\sof\sFTS5\sfor\sthe\sloadable\sextension\sspecially\swith\sMSVC. -D 2015-06-26T20:45:43.839 +C Add\sthe\sMempage.aDataOfst\sfield\sand\suse\sit\sin\ssqlite3BtreeMovetoUnpacked()\nfor\sabout\sa\s2\smillion\scycle\sgain. +D 2015-06-27T03:58:15.601 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,9 +269,9 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c c113f73fc4048038529cc30ed7147b24c34c2c5d +F src/btree.c 2a7554d5607f8ded06bf282c8e9e609a32d9ce55 F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 -F src/btreeInt.h fdd1aff02fb2a63812bd95716e7f579fc3759107 +F src/btreeInt.h 426d1e0d1a15d06b3ad2304f4bedc5bb71e5b4a2 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d27d9965b5404cd32be1113215fd9feeb5b66acc -R 3acd5e6aa6095e21a23b200982f0e53b -U mistachkin -Z ca7f05b6600c190f8e4e9359f2b7edca +P 7c610276bb41dbc80fe169d35fe9a3a3f6525635 +R 762529df533e804d7973bafd804366fc +U drh +Z 0b74e27756355a1dbfb2275608b79e03 diff --git a/manifest.uuid b/manifest.uuid index a120ef2388..34069c5388 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7c610276bb41dbc80fe169d35fe9a3a3f6525635 \ No newline at end of file +bee94dc3510745ba2efa044e8f3299793cfc7e34 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 7e6f750b02..bbec2dd0a2 100644 --- a/src/btree.c +++ b/src/btree.c @@ -954,10 +954,16 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ ** the page, 1 means the second cell, and so forth) return a pointer ** to the cell content. ** +** findCellPastPtr() does the same except it skips past the initial +** 4-byte child pointer found on interior pages, if there is one. +** ** This routine works only for pages that do not contain overflow cells. */ #define findCell(P,I) \ ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) +#define findCellPastPtr(P,I) \ + ((P)->aDataOfst + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) + /* ** This is common tail processing for btreeParseCellPtr() and @@ -1703,6 +1709,7 @@ static int btreeInitPage(MemPage *pPage){ pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize; pPage->aDataEnd = &data[usableSize]; pPage->aCellIdx = &data[cellOffset]; + pPage->aDataOfst = &data[pPage->childPtrSize]; /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates ** the start of the cell content area. A zero value for this integer is ** interpreted as 65536. */ @@ -1822,6 +1829,7 @@ static void zeroPage(MemPage *pPage, int flags){ pPage->cellOffset = first; pPage->aDataEnd = &data[pBt->usableSize]; pPage->aCellIdx = &data[first]; + pPage->aDataOfst = &data[pPage->childPtrSize]; pPage->nOverflow = 0; assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); pPage->maskPage = (u16)(pBt->pageSize - 1); @@ -5014,7 +5022,7 @@ int sqlite3BtreeMovetoUnpacked( if( xRecordCompare==0 ){ for(;;){ i64 nCellKey; - pCell = findCell(pPage, idx) + pPage->childPtrSize; + pCell = findCellPastPtr(pPage, idx); if( pPage->intKeyLeaf ){ while( 0x80 <= *(pCell++) ){ if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; @@ -5047,7 +5055,7 @@ int sqlite3BtreeMovetoUnpacked( }else{ for(;;){ int nCell; /* Size of the pCell cell in bytes */ - pCell = findCell(pPage, idx) + pPage->childPtrSize; + pCell = findCellPastPtr(pPage, idx); /* The maximum supported page-size is 65536 bytes. This means that ** the maximum number of record bytes stored on an index B-Tree diff --git a/src/btreeInt.h b/src/btreeInt.h index 6fc8c45ea3..c4a7f25518 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -295,6 +295,7 @@ struct MemPage { u8 *aData; /* Pointer to disk image of the page data */ u8 *aDataEnd; /* One byte past the end of usable data */ u8 *aCellIdx; /* The cell index area */ + u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ DbPage *pDbPage; /* Pager page handle */ u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ From bbf0f867d2142b02cfb9a1a654ccbf74daaf7e6f Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 14:59:26 +0000 Subject: [PATCH 178/206] Add releasePageNotNull() as an alternative to releasePage() when the argument is guaranteed to be non-NULL, and use it for a small performance gain. FossilOrigin-Name: 9383a688701ac2c366a308efc287c9c0f8977d4e --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 30 ++++++++++++++++-------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/manifest b/manifest index 0c5f8a6318..7a00537b8d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sMempage.aDataOfst\sfield\sand\suse\sit\sin\ssqlite3BtreeMovetoUnpacked()\nfor\sabout\sa\s2\smillion\scycle\sgain. -D 2015-06-27T03:58:15.601 +C Add\sreleasePageNotNull()\sas\san\salternative\sto\sreleasePage()\swhen\sthe\sargument\nis\sguaranteed\sto\sbe\snon-NULL,\sand\suse\sit\sfor\sa\ssmall\sperformance\sgain. +D 2015-06-27T14:59:26.791 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 2a7554d5607f8ded06bf282c8e9e609a32d9ce55 +F src/btree.c bcd36697aeef08a27603e1c3b49cba947dc989ff F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 426d1e0d1a15d06b3ad2304f4bedc5bb71e5b4a2 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7c610276bb41dbc80fe169d35fe9a3a3f6525635 -R 762529df533e804d7973bafd804366fc +P bee94dc3510745ba2efa044e8f3299793cfc7e34 +R 94fda06abca767c903bae476355c8d33 U drh -Z 0b74e27756355a1dbfb2275608b79e03 +Z 5cbb493cc80b630214e6dea5f8f50f0d diff --git a/manifest.uuid b/manifest.uuid index 34069c5388..f45d7f1d33 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bee94dc3510745ba2efa044e8f3299793cfc7e34 \ No newline at end of file +9383a688701ac2c366a308efc287c9c0f8977d4e \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index bbec2dd0a2..cd96ba0f67 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1947,16 +1947,17 @@ static int getAndInitPage( ** Release a MemPage. This should be called once for each prior ** call to btreeGetPage. */ +static void releasePageNotNull(MemPage *pPage){ + assert( pPage->aData ); + assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + sqlite3PagerUnrefNotNull(pPage->pDbPage); +} static void releasePage(MemPage *pPage){ - if( pPage ){ - assert( pPage->aData ); - assert( pPage->pBt ); - assert( pPage->pDbPage!=0 ); - assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - sqlite3PagerUnrefNotNull(pPage->pDbPage); - } + if( pPage ) releasePageNotNull(pPage); } /* @@ -2931,7 +2932,7 @@ static void unlockBtreeIfUnused(BtShared *pBt){ assert( pPage1->aData ); assert( sqlite3PagerRefcount(pBt->pPager)==1 ); pBt->pPage1 = 0; - releasePage(pPage1); + releasePageNotNull(pPage1); } } @@ -4705,11 +4706,9 @@ static void moveToParent(BtCursor *pCur){ pCur->apPage[pCur->iPage]->pgno ); testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); - - releasePage(pCur->apPage[pCur->iPage]); - pCur->iPage--; pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + releasePageNotNull(pCur->apPage[pCur->iPage--]); } /* @@ -4750,7 +4749,10 @@ static int moveToRoot(BtCursor *pCur){ } if( pCur->iPage>=0 ){ - while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]); + while( pCur->iPage ){ + assert( pCur->apPage[pCur->iPage]!=0 ); + releasePageNotNull(pCur->apPage[pCur->iPage--]); + } }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; From 375beb0ec03206a7b98553457d079fe960229406 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 15:51:06 +0000 Subject: [PATCH 179/206] Manually inline the call from getAndInitPage() to btreeGetPage() for a savings of 2.5 million cycles at a cost of less than 100 bytes. FossilOrigin-Name: 7f65b96b4017413bd19624570efe8fb2b0f7b991 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 9 ++++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/manifest b/manifest index 7a00537b8d..667d396c17 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sreleasePageNotNull()\sas\san\salternative\sto\sreleasePage()\swhen\sthe\sargument\nis\sguaranteed\sto\sbe\snon-NULL,\sand\suse\sit\sfor\sa\ssmall\sperformance\sgain. -D 2015-06-27T14:59:26.791 +C Manually\sinline\sthe\scall\sfrom\sgetAndInitPage()\sto\sbtreeGetPage()\sfor\sa\nsavings\sof\s2.5\smillion\scycles\sat\sa\scost\sof\sless\sthan\s100\sbytes. +D 2015-06-27T15:51:06.913 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c bcd36697aeef08a27603e1c3b49cba947dc989ff +F src/btree.c be03b25d28cf714e81a825e60f7abcd8825e9b4e F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 426d1e0d1a15d06b3ad2304f4bedc5bb71e5b4a2 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bee94dc3510745ba2efa044e8f3299793cfc7e34 -R 94fda06abca767c903bae476355c8d33 +P 9383a688701ac2c366a308efc287c9c0f8977d4e +R 16848ebaab74906a500d4e5002f1b1ec U drh -Z 5cbb493cc80b630214e6dea5f8f50f0d +Z a8b43d2118aa53b4955ba2ebec95dffb diff --git a/manifest.uuid b/manifest.uuid index f45d7f1d33..79ebb3045b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9383a688701ac2c366a308efc287c9c0f8977d4e \ No newline at end of file +7f65b96b4017413bd19624570efe8fb2b0f7b991 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index cd96ba0f67..579c259540 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1848,7 +1848,7 @@ static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ pPage->pDbPage = pDbPage; pPage->pBt = pBt; pPage->pgno = pgno; - pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; + pPage->hdrOffset = pgno==1 ? 100 : 0; return pPage; } @@ -1929,8 +1929,11 @@ static int getAndInitPage( if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, pgno, ppPage, bReadonly); - if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ + DbPage *pDbPage; + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadonly); + if( rc ) return rc; + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + if( (*ppPage)->isInit==0 ){ rc = btreeInitPage(*ppPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); From 28f58dd60e6a6ab04828f741dea1fe1a9f7ca66e Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 19:45:03 +0000 Subject: [PATCH 180/206] Performance improvements in moveToChild() by shifting some work over to getAndInitPage(). Net improvement is about 800K cycles at cost of 30 bytes. FossilOrigin-Name: 1956a4ce8eca650d98a7f68fd2d82eb8a3d6069f --- manifest | 14 ++++----- manifest.uuid | 2 +- src/btree.c | 82 ++++++++++++++++++++++++++++++-------------------- src/btreeInt.h | 1 + 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/manifest b/manifest index 667d396c17..c4b89c19df 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Manually\sinline\sthe\scall\sfrom\sgetAndInitPage()\sto\sbtreeGetPage()\sfor\sa\nsavings\sof\s2.5\smillion\scycles\sat\sa\scost\sof\sless\sthan\s100\sbytes. -D 2015-06-27T15:51:06.913 +C Performance\simprovements\sin\smoveToChild()\sby\sshifting\ssome\swork\sover\nto\sgetAndInitPage().\s\sNet\simprovement\sis\sabout\s800K\scycles\sat\scost\sof\s30\sbytes. +D 2015-06-27T19:45:03.070 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,9 +269,9 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c be03b25d28cf714e81a825e60f7abcd8825e9b4e +F src/btree.c b1ba9f65eba193ecae9519be29cce63bc1daef4c F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 -F src/btreeInt.h 426d1e0d1a15d06b3ad2304f4bedc5bb71e5b4a2 +F src/btreeInt.h 30f611b87ff873f47a28ce0bf66dd778c3274d9a F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9383a688701ac2c366a308efc287c9c0f8977d4e -R 16848ebaab74906a500d4e5002f1b1ec +P 7f65b96b4017413bd19624570efe8fb2b0f7b991 +R 4df1944eb9c633cad9fdc689344e2d7e U drh -Z a8b43d2118aa53b4955ba2ebec95dffb +Z 093d236679649777c7df295c5a4c84ba diff --git a/manifest.uuid b/manifest.uuid index 79ebb3045b..16fb8bbb5b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7f65b96b4017413bd19624570efe8fb2b0f7b991 \ No newline at end of file +1956a4ce8eca650d98a7f68fd2d82eb8a3d6069f \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 579c259540..b2ae9b5ec6 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1909,40 +1909,63 @@ u32 sqlite3BtreeLastPage(Btree *p){ } /* -** Get a page from the pager and initialize it. This routine is just a -** convenience wrapper around separate calls to btreeGetPage() and -** btreeInitPage(). +** Get a page from the pager and initialize it. ** -** If an error occurs, then the value *ppPage is set to is undefined. It +** If pCur!=0 then the page acquired will be added to that cursor. +** If the fetch fails, this routine must decrement pCur->iPage. +** +** The page is fetched as read-write unless pCur is not NULL and is +** a read-only cursor. +** +** If an error occurs, then *ppPage is undefined. It ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( BtShared *pBt, /* The database file */ Pgno pgno, /* Number of the page to get */ MemPage **ppPage, /* Write the page pointer here */ - int bReadonly /* PAGER_GET_READONLY or 0 */ + BtCursor *pCur, /* Cursor to receive the page, or NULL */ + int bReadOnly /* True for a read-only page */ ){ int rc; + DbPage *pDbPage; assert( sqlite3_mutex_held(pBt->mutex) ); - assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 ); + assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] ); + assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; - }else{ - DbPage *pDbPage; - rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadonly); - if( rc ) return rc; - *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); - if( (*ppPage)->isInit==0 ){ - rc = btreeInitPage(*ppPage); - if( rc!=SQLITE_OK ){ - releasePage(*ppPage); - } + goto getAndInitPage_error; + } + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); + if( rc ){ + goto getAndInitPage_error; + } + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + if( (*ppPage)->isInit==0 ){ + rc = btreeInitPage(*ppPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + goto getAndInitPage_error; } } + /* If obtaining a page for a cursor, we must verify that the page is + ** compatible with the cursor */ + if( pCur && pCur->iPage>0 + && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->apPage[0]->intKey) + ){ + rc = SQLITE_CORRUPT_BKPT; + releasePage(*ppPage); + goto getAndInitPage_error; + } + testcase( pgno==0 ); assert( pgno!=0 || rc==SQLITE_CORRUPT ); + return SQLITE_OK; + +getAndInitPage_error: + if( pCur ) pCur->iPage--; return rc; } @@ -4028,6 +4051,7 @@ static int btreeCursor( pCur->pBt = pBt; assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); pCur->curFlags = wrFlag; + pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; pCur->pNext = pBt->pCursor; if( pCur->pNext ){ pCur->pNext->pPrev = pCur; @@ -4641,9 +4665,6 @@ const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ ** vice-versa). */ static int moveToChild(BtCursor *pCur, u32 newPgno){ - int rc; - int i = pCur->iPage; - MemPage *pNewPage; BtShared *pBt = pCur->pBt; assert( cursorHoldsMutex(pCur) ); @@ -4653,19 +4674,12 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, newPgno, &pNewPage, - (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); - if( rc ) return rc; - pCur->apPage[i+1] = pNewPage; - pCur->aiIdx[i+1] = 0; - pCur->iPage++; - pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); - if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){ - return SQLITE_CORRUPT_BKPT; - } - return SQLITE_OK; + pCur->iPage++; + pCur->aiIdx[pCur->iPage] = 0; + return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage], + pCur, pCur->curPagerFlags); } #if SQLITE_DEBUG @@ -4760,8 +4774,10 @@ static int moveToRoot(BtCursor *pCur){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ + assert( pCur->iPage==(-1) ); + pCur->iPage = 0; rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], - (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); + pCur, pCur->curPagerFlags); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; @@ -6963,7 +6979,7 @@ static int balance_nonroot( } pgno = get4byte(pRight); while( 1 ){ - rc = getAndInitPage(pBt, pgno, &apOld[i], 0); + rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; @@ -8280,7 +8296,7 @@ static int clearDatabasePage( if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, pgno, &pPage, 0); + rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); if( rc ) return rc; if( pPage->bBusy ){ rc = SQLITE_CORRUPT_BKPT; diff --git a/src/btreeInt.h b/src/btreeInt.h index c4a7f25518..fa8d2c1fb4 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -518,6 +518,7 @@ struct BtCursor { int skipNext; /* Prev() is noop if negative. Next() is noop if positive. ** Error code if eState==CURSOR_FAULT */ u8 curFlags; /* zero or more BTCF_* flags defined below */ + u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */ u8 eState; /* One of the CURSOR_XXX constants (see below) */ u8 hints; /* As configured by CursorSetHints() */ i16 iPage; /* Index of current page in apPage */ From 15a0021b3e1c54e6352759a7848f1fc6e1ea10c8 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 20:55:00 +0000 Subject: [PATCH 181/206] Enhancements to the previous check-in to make it a little smaller and faster. FossilOrigin-Name: 291d9e0c328a7bd0f255b0b7e819ca2c909701a3 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/manifest b/manifest index c4b89c19df..f672f228e4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Performance\simprovements\sin\smoveToChild()\sby\sshifting\ssome\swork\sover\nto\sgetAndInitPage().\s\sNet\simprovement\sis\sabout\s800K\scycles\sat\scost\sof\s30\sbytes. -D 2015-06-27T19:45:03.070 +C Enhancements\sto\sthe\sprevious\scheck-in\sto\smake\sit\sa\slittle\ssmaller\sand\sfaster. +D 2015-06-27T20:55:00.648 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c b1ba9f65eba193ecae9519be29cce63bc1daef4c +F src/btree.c a6b0259834076783c7a22a0963bd91abcf27ef0f F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 30f611b87ff873f47a28ce0bf66dd778c3274d9a F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7f65b96b4017413bd19624570efe8fb2b0f7b991 -R 4df1944eb9c633cad9fdc689344e2d7e +P 1956a4ce8eca650d98a7f68fd2d82eb8a3d6069f +R 72a9a3c8bf73162e5f9bead1ad9abb3a U drh -Z 093d236679649777c7df295c5a4c84ba +Z d1566e69624f823fa39952a25235aa04 diff --git a/manifest.uuid b/manifest.uuid index 16fb8bbb5b..dd783bb52c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1956a4ce8eca650d98a7f68fd2d82eb8a3d6069f \ No newline at end of file +291d9e0c328a7bd0f255b0b7e819ca2c909701a3 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index b2ae9b5ec6..80a8ace510 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1911,8 +1911,9 @@ u32 sqlite3BtreeLastPage(Btree *p){ /* ** Get a page from the pager and initialize it. ** -** If pCur!=0 then the page acquired will be added to that cursor. -** If the fetch fails, this routine must decrement pCur->iPage. +** If pCur!=0 then the page is being fetched as part of a moveToChild() +** call. Do additional sanity checking on the page in this case. +** And if the fetch fails, this routine must decrement pCur->iPage. ** ** The page is fetched as read-write unless pCur is not NULL and is ** a read-only cursor. @@ -1932,6 +1933,7 @@ static int getAndInitPage( assert( sqlite3_mutex_held(pBt->mutex) ); assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] ); assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); + assert( pCur==0 || pCur->iPage>0 ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; @@ -1950,9 +1952,9 @@ static int getAndInitPage( } } - /* If obtaining a page for a cursor, we must verify that the page is - ** compatible with the cursor */ - if( pCur && pCur->iPage>0 + /* If obtaining a child page for a cursor, we must verify that the page is + ** compatible with the root page. */ + if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->apPage[0]->intKey) ){ rc = SQLITE_CORRUPT_BKPT; @@ -4775,9 +4777,8 @@ static int moveToRoot(BtCursor *pCur){ return SQLITE_OK; }else{ assert( pCur->iPage==(-1) ); - pCur->iPage = 0; rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], - pCur, pCur->curPagerFlags); + 0, pCur->curPagerFlags); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; From 408efc066498d96c924672ef28f552d583f1925d Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 22:49:10 +0000 Subject: [PATCH 182/206] Add the BtCursor.curIntKey field and use it for a small size reduction and performance boost. FossilOrigin-Name: 4a17df139ac41e29c9a2e58afbd1238a5e94bd36 --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/btree.c | 3 ++- src/btreeInt.h | 10 +++++++--- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/manifest b/manifest index f672f228e4..24c66a974b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Enhancements\sto\sthe\sprevious\scheck-in\sto\smake\sit\sa\slittle\ssmaller\sand\sfaster. -D 2015-06-27T20:55:00.648 +C Add\sthe\sBtCursor.curIntKey\sfield\sand\suse\sit\sfor\sa\ssmall\ssize\sreduction\sand\nperformance\sboost. +D 2015-06-27T22:49:10.437 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,9 +269,9 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c a6b0259834076783c7a22a0963bd91abcf27ef0f +F src/btree.c 124fb2cf3712a32e3cfec4441dac82218d267419 F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 -F src/btreeInt.h 30f611b87ff873f47a28ce0bf66dd778c3274d9a +F src/btreeInt.h 8ca7124af9ee2ce27747a4e5500c27a254dea8eb F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1956a4ce8eca650d98a7f68fd2d82eb8a3d6069f -R 72a9a3c8bf73162e5f9bead1ad9abb3a +P 291d9e0c328a7bd0f255b0b7e819ca2c909701a3 +R 2c201a19b31719b4111966366152c8e1 U drh -Z d1566e69624f823fa39952a25235aa04 +Z 3864de7c8c7143c5030e74933e17298b diff --git a/manifest.uuid b/manifest.uuid index dd783bb52c..7646601c90 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -291d9e0c328a7bd0f255b0b7e819ca2c909701a3 \ No newline at end of file +4a17df139ac41e29c9a2e58afbd1238a5e94bd36 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 80a8ace510..f9dbfa14ad 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1955,7 +1955,7 @@ static int getAndInitPage( /* If obtaining a child page for a cursor, we must verify that the page is ** compatible with the root page. */ if( pCur - && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->apPage[0]->intKey) + && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){ rc = SQLITE_CORRUPT_BKPT; releasePage(*ppPage); @@ -4784,6 +4784,7 @@ static int moveToRoot(BtCursor *pCur){ return rc; } pCur->iPage = 0; + pCur->curIntKey = pCur->apPage[0]->intKey; } pRoot = pCur->apPage[0]; assert( pRoot->pgno==pCur->pgnoRoot ); diff --git a/src/btreeInt.h b/src/btreeInt.h index fa8d2c1fb4..70aa937c7d 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -508,7 +508,6 @@ struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ - struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ Pgno *aOverflow; /* Cache of overflow page locations */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ @@ -520,8 +519,13 @@ struct BtCursor { u8 curFlags; /* zero or more BTCF_* flags defined below */ u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */ u8 eState; /* One of the CURSOR_XXX constants (see below) */ - u8 hints; /* As configured by CursorSetHints() */ - i16 iPage; /* Index of current page in apPage */ + u8 hints; /* As configured by CursorSetHints() */ + /* All fields above are zeroed when the cursor is allocated. See + ** sqlite3BtreeCursorZero(). Fields that follow must be manually + ** initialized. */ + i8 iPage; /* Index of current page in apPage */ + u8 curIntKey; /* Value of apPage[0]->intKey */ + struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ }; From c75d886b051565c5a9533c7726fe7512de7edc86 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 27 Jun 2015 23:55:20 +0000 Subject: [PATCH 183/206] Make greater use of BtCursor.curIntKey. FossilOrigin-Name: 63998471d023dd846d5583ac856e2acc47ad41ea --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 9 +++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/manifest b/manifest index 24c66a974b..e93f7ee73e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sBtCursor.curIntKey\sfield\sand\suse\sit\sfor\sa\ssmall\ssize\sreduction\sand\nperformance\sboost. -D 2015-06-27T22:49:10.437 +C Make\sgreater\suse\sof\sBtCursor.curIntKey. +D 2015-06-27T23:55:20.211 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 124fb2cf3712a32e3cfec4441dac82218d267419 +F src/btree.c 8f86dae090ec737d71c31c2ed79e767bd3638bcb F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 8ca7124af9ee2ce27747a4e5500c27a254dea8eb F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 291d9e0c328a7bd0f255b0b7e819ca2c909701a3 -R 2c201a19b31719b4111966366152c8e1 +P 4a17df139ac41e29c9a2e58afbd1238a5e94bd36 +R 951a5590bfe7a8b4e737119af9769202 U drh -Z 3864de7c8c7143c5030e74933e17298b +Z 51f9425184670e22671d083e0bae99bd diff --git a/manifest.uuid b/manifest.uuid index 7646601c90..02124d128b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4a17df139ac41e29c9a2e58afbd1238a5e94bd36 \ No newline at end of file +63998471d023dd846d5583ac856e2acc47ad41ea \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index f9dbfa14ad..a23bd853fc 100644 --- a/src/btree.c +++ b/src/btree.c @@ -620,7 +620,7 @@ static int saveCursorPosition(BtCursor *pCur){ ** table, then malloc space for and store the pCur->nKey bytes of key ** data. */ - if( 0==pCur->apPage[0]->intKey ){ + if( 0==pCur->curIntKey ){ void *pKey = sqlite3Malloc( pCur->nKey ); if( pKey ){ rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); @@ -633,7 +633,7 @@ static int saveCursorPosition(BtCursor *pCur){ rc = SQLITE_NOMEM; } } - assert( !pCur->apPage[0]->intKey || !pCur->pKey ); + assert( !pCur->curIntKey || !pCur->pKey ); if( rc==SQLITE_OK ){ btreeReleaseAllCursorPages(pCur); @@ -4987,7 +4987,7 @@ int sqlite3BtreeMovetoUnpacked( /* If the cursor is already positioned at the point we are trying ** to move to, then just return without doing any work */ if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 - && pCur->apPage[0]->intKey + && pCur->curIntKey ){ if( pCur->info.nKey==intKey ){ *pRes = 0; @@ -5022,7 +5022,8 @@ int sqlite3BtreeMovetoUnpacked( assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); return SQLITE_OK; } - assert( pCur->apPage[0]->intKey || pIdxKey ); + assert( pCur->apPage[0]->intKey==pCur->curIntKey ); + assert( pCur->curIntKey || pIdxKey ); for(;;){ int lwr, upr, idx, c; Pgno chldPg; From c5ef7151b0cf99cabaa0d2ad7e1a2035102b9541 Mon Sep 17 00:00:00 2001 From: drh Date: Sun, 28 Jun 2015 02:58:51 +0000 Subject: [PATCH 184/206] Avoid unnecessary calls to sqlite3VdbeSerialTypeLen() for integer serial types, for a small size reduction and a speed increase. FossilOrigin-Name: 9cd30d33b1d02dc8c55c1d74bdbcefab63ebf2a7 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/vdbeaux.c | 14 ++++++++++---- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/manifest b/manifest index e93f7ee73e..31eb85101a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Make\sgreater\suse\sof\sBtCursor.curIntKey. -D 2015-06-27T23:55:20.211 +C Avoid\sunnecessary\scalls\sto\ssqlite3VdbeSerialTypeLen()\sfor\sinteger\nserial\stypes,\sfor\sa\ssmall\ssize\sreduction\sand\sa\sspeed\sincrease. +D 2015-06-28T02:58:51.359 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -394,7 +394,7 @@ F src/vdbe.c 3d5a78d39b15dc91ea2c11017d560a4224eb2f75 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 F src/vdbeInt.h 8b54e01ad0463590e7cffabce0bc36da9ee4f816 F src/vdbeapi.c 6a0d7757987018ff6b1b81bc5293219cd26bb299 -F src/vdbeaux.c 13261b7597c7f189232f84a1e175a3268ea2c32b +F src/vdbeaux.c 54bcc56d368b2d0bebc523cff514893156c09daf F src/vdbeblob.c 4f2e8e075d238392df98c5e03a64342465b03f90 F src/vdbemem.c ae38a0d35ae71cf604381a887c170466ba518090 F src/vdbesort.c f5009e7a35e3065635d8918b9a31f498a499976b @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 4a17df139ac41e29c9a2e58afbd1238a5e94bd36 -R 951a5590bfe7a8b4e737119af9769202 +P 63998471d023dd846d5583ac856e2acc47ad41ea +R 212ca1fcb98a576a11f4bdf11ee3ebc0 U drh -Z 51f9425184670e22671d083e0bae99bd +Z 171de1cc147222ccddab9bfa4a1ca423 diff --git a/manifest.uuid b/manifest.uuid index 02124d128b..64424134b8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -63998471d023dd846d5583ac856e2acc47ad41ea \ No newline at end of file +9cd30d33b1d02dc8c55c1d74bdbcefab63ebf2a7 \ No newline at end of file diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 05a6952334..5dbbadaa2b 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -2961,6 +2961,13 @@ u32 sqlite3VdbeSerialType(Mem *pMem, int file_format){ return ((n*2) + 12 + ((flags&MEM_Str)!=0)); } +/* +** The sizes for serial types less than 12 +*/ +static const u8 sqlite3SmallTypeSizes[] = { + 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 +}; + /* ** Return the length of the data corresponding to the supplied serial-type. */ @@ -2968,8 +2975,7 @@ u32 sqlite3VdbeSerialTypeLen(u32 serial_type){ if( serial_type>=12 ){ return (serial_type-12)/2; }else{ - static const u8 aSize[] = { 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 }; - return aSize[serial_type]; + return sqlite3SmallTypeSizes[serial_type]; } } @@ -3053,7 +3059,7 @@ u32 sqlite3VdbeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ }else{ v = pMem->u.i; } - len = i = sqlite3VdbeSerialTypeLen(serial_type); + len = i = sqlite3SmallTypeSizes[serial_type]; assert( i>0 ); do{ buf[--i] = (u8)(v&0xFF); @@ -4082,7 +4088,7 @@ int sqlite3VdbeIdxRowid(sqlite3 *db, BtCursor *pCur, i64 *rowid){ if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){ goto idx_rowid_corruption; } - lenRowid = sqlite3VdbeSerialTypeLen(typeRowid); + lenRowid = sqlite3SmallTypeSizes[typeRowid]; testcase( (u32)m.n==szHdr+lenRowid ); if( unlikely((u32)m.n Date: Sun, 28 Jun 2015 17:33:11 +0000 Subject: [PATCH 185/206] Comment typo fix and minor cleanup in the pager. No logic changes. FossilOrigin-Name: 94ef17012855b7be725594c34bcea75f34c40190 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/pager.c | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/manifest b/manifest index 31eb85101a..2505603254 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\sunnecessary\scalls\sto\ssqlite3VdbeSerialTypeLen()\sfor\sinteger\nserial\stypes,\sfor\sa\ssmall\ssize\sreduction\sand\sa\sspeed\sincrease. -D 2015-06-28T02:58:51.359 +C Comment\stypo\sfix\sand\sminor\scleanup\sin\sthe\spager.\s\sNo\slogic\schanges. +D 2015-06-28T17:33:11.230 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 9bc918a009285f96ec6dac62dd764c7063552455 +F src/pager.c db79b64a5498e2b3a72f0f6bc74faebe48da3e95 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c d8b19632706dd6b81b03d0c5fd1e6bab8c13d0b9 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 63998471d023dd846d5583ac856e2acc47ad41ea -R 212ca1fcb98a576a11f4bdf11ee3ebc0 +P 9cd30d33b1d02dc8c55c1d74bdbcefab63ebf2a7 +R b8484b94b4d1a85c0081f4649a289df8 U drh -Z 171de1cc147222ccddab9bfa4a1ca423 +Z b085b905d03794e34ce8f566e5412526 diff --git a/manifest.uuid b/manifest.uuid index 64424134b8..b03653669c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9cd30d33b1d02dc8c55c1d74bdbcefab63ebf2a7 \ No newline at end of file +94ef17012855b7be725594c34bcea75f34c40190 \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 291c7e4968..a50272d377 100644 --- a/src/pager.c +++ b/src/pager.c @@ -5131,7 +5131,7 @@ int sqlite3PagerSharedLock(Pager *pPager){ ** occurring on the very first access to a file, in order to save a ** single unnecessary sqlite3OsRead() call at the start-up. ** - ** Database changes is detected by looking at 15 bytes beginning + ** Database changes are detected by looking at 15 bytes beginning ** at offset 24 into the file. The first 4 of these 16 bytes are ** a 32-bit counter that is incremented with each change. The ** other bytes change randomly with each file change when @@ -5352,10 +5352,11 @@ int sqlite3PagerAcquire( pPg = 0; goto pager_acquire_err; } - assert( (*ppPage)->pgno==pgno ); - assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 ); + assert( pPg==(*ppPage) ); + assert( pPg->pgno==pgno ); + assert( pPg->pPager==pPager || pPg->pPager==0 ); - if( (*ppPage)->pPager && !noContent ){ + if( pPg->pPager && !noContent ){ /* In this case the pcache already contains an initialized copy of ** the page. Return without further ado. */ assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) ); @@ -5366,7 +5367,6 @@ int sqlite3PagerAcquire( /* The pager cache has created a new page. Its content needs to ** be initialized. */ - pPg = *ppPage; pPg->pPager = pPager; /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page From 95c91e145adac9a16280d575b2b806266803c437 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 00:21:00 +0000 Subject: [PATCH 186/206] Use tail recursion in pcache1Unpin() to get a small size reduction and speed improvement. FossilOrigin-Name: fde70472aebc09b34a517131f676e6847d02b747 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/pcache1.c | 15 +++++++-------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/manifest b/manifest index 2505603254..d8f09afc9d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Comment\stypo\sfix\sand\sminor\scleanup\sin\sthe\spager.\s\sNo\slogic\schanges. -D 2015-06-28T17:33:11.230 +C Use\stail\srecursion\sin\spcache1Unpin()\sto\sget\sa\ssmall\ssize\sreduction\sand\nspeed\simprovement. +D 2015-06-29T00:21:00.874 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -319,7 +319,7 @@ F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c d8b19632706dd6b81b03d0c5fd1e6bab8c13d0b9 F src/pcache.h b44658c9c932d203510279439d891a2a83e12ba8 -F src/pcache1.c 8e3799b33c41d517d86444d4abefc80d4f02adca +F src/pcache1.c 9ec20f98f50ed7415019303ae9bd3745d4b7bd9b F src/pragma.c c1f4d012ea9f6b1ce52d341b2cd0ad72d560afd7 F src/pragma.h b8632d7cdda7b25323fa580e3e558a4f0d4502cc F src/prepare.c 82e5db1013846a819f198336fed72c44c974e7b1 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9cd30d33b1d02dc8c55c1d74bdbcefab63ebf2a7 -R b8484b94b4d1a85c0081f4649a289df8 +P 94ef17012855b7be725594c34bcea75f34c40190 +R 33d15daee3cafade223d75c1a4d78a0b U drh -Z b085b905d03794e34ce8f566e5412526 +Z 83eec004ab67e81db298053e8cf0783c diff --git a/manifest.uuid b/manifest.uuid index b03653669c..8003997611 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -94ef17012855b7be725594c34bcea75f34c40190 \ No newline at end of file +fde70472aebc09b34a517131f676e6847d02b747 \ No newline at end of file diff --git a/src/pcache1.c b/src/pcache1.c index c394ff1787..7185ab441b 100644 --- a/src/pcache1.c +++ b/src/pcache1.c @@ -462,10 +462,11 @@ static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ /* ** Remove the page supplied as an argument from the hash table ** (PCache1.apHash structure) that it is currently stored in. +** Also free the page if freePage is true. ** ** The PGroup mutex must be held when this function is called. */ -static void pcache1RemoveFromHash(PgHdr1 *pPage){ +static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){ unsigned int h; PCache1 *pCache = pPage->pCache; PgHdr1 **pp; @@ -476,6 +477,7 @@ static void pcache1RemoveFromHash(PgHdr1 *pPage){ *pp = (*pp)->pNext; pCache->nPage--; + if( freeFlag ) pcache1FreePage(pPage); } /* @@ -489,8 +491,7 @@ static void pcache1EnforceMaxPage(PGroup *pGroup){ assert( p->pCache->pGroup==pGroup ); assert( p->isPinned==0 ); pcache1PinPage(p); - pcache1RemoveFromHash(p); - pcache1FreePage(p); + pcache1RemoveFromHash(p, 1); } } @@ -714,7 +715,7 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( PCache1 *pOther; pPage = pGroup->pLruTail; assert( pPage->isPinned==0 ); - pcache1RemoveFromHash(pPage); + pcache1RemoveFromHash(pPage, 0); pcache1PinPage(pPage); pOther = pPage->pCache; @@ -912,8 +913,7 @@ static void pcache1Unpin( assert( pPage->isPinned==1 ); if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){ - pcache1RemoveFromHash(pPage); - pcache1FreePage(pPage); + pcache1RemoveFromHash(pPage, 1); }else{ /* Add the page to the PGroup LRU list. */ if( pGroup->pLruHead ){ @@ -1067,8 +1067,7 @@ int sqlite3PcacheReleaseMemory(int nReq){ #endif assert( p->isPinned==0 ); pcache1PinPage(p); - pcache1RemoveFromHash(p); - pcache1FreePage(p); + pcache1RemoveFromHash(p, 1); } pcache1LeaveMutex(&pcache1.grp); } From 325d087aa7e4f5ef34ba3860b219e227d883f79b Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 00:52:33 +0000 Subject: [PATCH 187/206] Move an assert() and testcase() into their correct position within getAndInitPage(). FossilOrigin-Name: 18115164e12509ec21f34598089a7f1310048819 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 5 ++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/manifest b/manifest index d8f09afc9d..b33b447eed 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\stail\srecursion\sin\spcache1Unpin()\sto\sget\sa\ssmall\ssize\sreduction\sand\nspeed\simprovement. -D 2015-06-29T00:21:00.874 +C Move\san\sassert()\sand\stestcase()\sinto\stheir\scorrect\sposition\swithin\ngetAndInitPage(). +D 2015-06-29T00:52:33.038 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 8f86dae090ec737d71c31c2ed79e767bd3638bcb +F src/btree.c ce342e156716fb64b9cf5c040260d0b47989f37b F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 8ca7124af9ee2ce27747a4e5500c27a254dea8eb F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 94ef17012855b7be725594c34bcea75f34c40190 -R 33d15daee3cafade223d75c1a4d78a0b +P fde70472aebc09b34a517131f676e6847d02b747 +R 2759f4b6497579e6ac5cdd2dfc68ef6f U drh -Z 83eec004ab67e81db298053e8cf0783c +Z 7a56a5c524bee1a78dcf76e001f644d9 diff --git a/manifest.uuid b/manifest.uuid index 8003997611..cbb6f69659 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fde70472aebc09b34a517131f676e6847d02b747 \ No newline at end of file +18115164e12509ec21f34598089a7f1310048819 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index a23bd853fc..027012e33a 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1961,13 +1961,12 @@ static int getAndInitPage( releasePage(*ppPage); goto getAndInitPage_error; } - - testcase( pgno==0 ); - assert( pgno!=0 || rc==SQLITE_CORRUPT ); return SQLITE_OK; getAndInitPage_error: if( pCur ) pCur->iPage--; + testcase( pgno==0 ); + assert( pgno!=0 || rc==SQLITE_CORRUPT ); return rc; } From 234a93fc9c332230726284a1d5cb31d689985ff5 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 03:28:43 +0000 Subject: [PATCH 188/206] Very small size reduction and performance increase in sqlite3BitvecTest(). FossilOrigin-Name: 9b3a7281bd45994edf813a687e4b7a0761697929 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/bitvec.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/manifest b/manifest index b33b447eed..36bf514e30 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Move\san\sassert()\sand\stestcase()\sinto\stheir\scorrect\sposition\swithin\ngetAndInitPage(). -D 2015-06-29T00:52:33.038 +C Very\ssmall\ssize\sreduction\sand\sperformance\sincrease\sin\ssqlite3BitvecTest(). +D 2015-06-29T03:28:43.213 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -267,7 +267,7 @@ F src/analyze.c f89727c36f997bd2bf6c5e546c2f51dc94e6f2a4 F src/attach.c e944d0052b577703b9b83aac1638452ff42a8395 F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 -F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d +F src/bitvec.c 828b218d3fc39f8ce9c9c5f4de4106fe08c92303 F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 F src/btree.c ce342e156716fb64b9cf5c040260d0b47989f37b F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fde70472aebc09b34a517131f676e6847d02b747 -R 2759f4b6497579e6ac5cdd2dfc68ef6f +P 18115164e12509ec21f34598089a7f1310048819 +R 01e6ba6910c7ae6426ef3c4845c78ace U drh -Z 7a56a5c524bee1a78dcf76e001f644d9 +Z 0a8f5dc054956bcb3a50cf01c1349d73 diff --git a/manifest.uuid b/manifest.uuid index cbb6f69659..137b220802 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -18115164e12509ec21f34598089a7f1310048819 \ No newline at end of file +9b3a7281bd45994edf813a687e4b7a0761697929 \ No newline at end of file diff --git a/src/bitvec.c b/src/bitvec.c index c348974546..f1f347fef0 100644 --- a/src/bitvec.c +++ b/src/bitvec.c @@ -128,8 +128,8 @@ Bitvec *sqlite3BitvecCreate(u32 iSize){ */ int sqlite3BitvecTest(Bitvec *p, u32 i){ if( p==0 ) return 0; - if( i>p->iSize || i==0 ) return 0; i--; + if( i>=p->iSize ) return 0; while( p->iDivisor ){ u32 bin = i/p->iDivisor; i = i%p->iDivisor; From c78ae916b956be781aaf26b75004ab051fb46291 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 04:21:15 +0000 Subject: [PATCH 189/206] Add the new PGHDR_CLEAN bit to PgHdr.flags in pcache.c. This bit is always the opposite of PGHDR_DIRTY. Use the extra bit to avoid a comparison for a small performance boost. FossilOrigin-Name: 8619fc346d9a5a66a3c4566b4cc032b6b6bf73fd --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/pcache.c | 16 +++++++++++----- src/pcache.h | 1 + 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/manifest b/manifest index 36bf514e30..14e35070b6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Very\ssmall\ssize\sreduction\sand\sperformance\sincrease\sin\ssqlite3BitvecTest(). -D 2015-06-29T03:28:43.213 +C Add\sthe\snew\sPGHDR_CLEAN\sbit\sto\sPgHdr.flags\sin\spcache.c.\s\sThis\sbit\sis\salways\nthe\sopposite\sof\sPGHDR_DIRTY.\s\sUse\sthe\sextra\sbit\sto\savoid\sa\scomparison\nfor\sa\ssmall\sperformance\sboost. +D 2015-06-29T04:21:15.041 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -317,8 +317,8 @@ F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca F src/pager.c db79b64a5498e2b3a72f0f6bc74faebe48da3e95 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 -F src/pcache.c d8b19632706dd6b81b03d0c5fd1e6bab8c13d0b9 -F src/pcache.h b44658c9c932d203510279439d891a2a83e12ba8 +F src/pcache.c 994f15b465337a079feb04aac34c199dbc610247 +F src/pcache.h 445374bcf296515fb970c8bbf47c36222196d197 F src/pcache1.c 9ec20f98f50ed7415019303ae9bd3745d4b7bd9b F src/pragma.c c1f4d012ea9f6b1ce52d341b2cd0ad72d560afd7 F src/pragma.h b8632d7cdda7b25323fa580e3e558a4f0d4502cc @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 18115164e12509ec21f34598089a7f1310048819 -R 01e6ba6910c7ae6426ef3c4845c78ace +P 9b3a7281bd45994edf813a687e4b7a0761697929 +R 07d5c4c2284ae7b5a16a8d841b25dd0d U drh -Z 0a8f5dc054956bcb3a50cf01c1349d73 +Z fa5a6a0b1c08c7af0b43f02924ea472a diff --git a/manifest.uuid b/manifest.uuid index 137b220802..1562298e0b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9b3a7281bd45994edf813a687e4b7a0761697929 \ No newline at end of file +8619fc346d9a5a66a3c4566b4cc032b6b6bf73fd \ No newline at end of file diff --git a/src/pcache.c b/src/pcache.c index d1b3f22a11..220f0bf523 100644 --- a/src/pcache.c +++ b/src/pcache.c @@ -328,6 +328,7 @@ static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( memset(pPgHdr->pExtra, 0, pCache->szExtra); pPgHdr->pCache = pCache; pPgHdr->pgno = pgno; + pPgHdr->flags = PGHDR_CLEAN; return sqlite3PcacheFetchFinish(pCache,pgno,pPage); } @@ -366,7 +367,7 @@ void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ p->nRef--; if( p->nRef==0 ){ p->pCache->nRef--; - if( (p->flags&PGHDR_DIRTY)==0 ){ + if( p->flags&PGHDR_CLEAN ){ pcacheUnpin(p); }else if( p->pDirtyPrev!=0 ){ /* Move the page to the head of the dirty list. */ @@ -402,11 +403,14 @@ void sqlite3PcacheDrop(PgHdr *p){ ** make it so. */ void sqlite3PcacheMakeDirty(PgHdr *p){ - p->flags &= ~PGHDR_DONT_WRITE; assert( p->nRef>0 ); - if( 0==(p->flags & PGHDR_DIRTY) ){ - p->flags |= PGHDR_DIRTY; - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){ + p->flags &= ~PGHDR_DONT_WRITE; + if( p->flags & PGHDR_CLEAN ){ + p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN); + assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + } } } @@ -416,8 +420,10 @@ void sqlite3PcacheMakeDirty(PgHdr *p){ */ void sqlite3PcacheMakeClean(PgHdr *p){ if( (p->flags & PGHDR_DIRTY) ){ + assert( (p->flags & PGHDR_CLEAN)==0 ); pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC); + p->flags |= PGHDR_CLEAN; if( p->nRef==0 ){ pcacheUnpin(p); } diff --git a/src/pcache.h b/src/pcache.h index 9ed62a88ff..14053c06f3 100644 --- a/src/pcache.h +++ b/src/pcache.h @@ -46,6 +46,7 @@ struct PgHdr { }; /* Bit values for PgHdr.flags */ +#define PGHDR_CLEAN 0x001 /* Page is unchanged */ #define PGHDR_DIRTY 0x002 /* Page has changed */ #define PGHDR_NEED_SYNC 0x004 /* Fsync the rollback journal before ** writing this page to the database */ From 82ef8775c731081e8377ceb8cbea23f7406d2d8a Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 14:11:50 +0000 Subject: [PATCH 190/206] Simplifications and performance improvement in pager_write(). FossilOrigin-Name: ab7aeeead395a05b91a921ef9ebe9252fffad667 --- manifest | 16 ++--- manifest.uuid | 2 +- src/bitvec.c | 7 +- src/pager.c | 169 ++++++++++++++++++++++++------------------------ src/sqliteInt.h | 1 + 5 files changed, 101 insertions(+), 94 deletions(-) diff --git a/manifest b/manifest index 14e35070b6..b0a37ff3cc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\snew\sPGHDR_CLEAN\sbit\sto\sPgHdr.flags\sin\spcache.c.\s\sThis\sbit\sis\salways\nthe\sopposite\sof\sPGHDR_DIRTY.\s\sUse\sthe\sextra\sbit\sto\savoid\sa\scomparison\nfor\sa\ssmall\sperformance\sboost. -D 2015-06-29T04:21:15.041 +C Simplifications\sand\sperformance\simprovement\sin\spager_write(). +D 2015-06-29T14:11:50.755 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -267,7 +267,7 @@ F src/analyze.c f89727c36f997bd2bf6c5e546c2f51dc94e6f2a4 F src/attach.c e944d0052b577703b9b83aac1638452ff42a8395 F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 -F src/bitvec.c 828b218d3fc39f8ce9c9c5f4de4106fe08c92303 +F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 F src/btree.c ce342e156716fb64b9cf5c040260d0b47989f37b F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c db79b64a5498e2b3a72f0f6bc74faebe48da3e95 +F src/pager.c 7e745749c375e3c35ef92051e2c1f7425cfcb3de F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c 994f15b465337a079feb04aac34c199dbc610247 @@ -332,7 +332,7 @@ F src/shell.c 8af3cced094aebb5f57a8ad739b9dafc7867eed7 F src/sqlite.h.in 76d2f5637eb795b6300d9dd3c3ec3632ffafd721 F src/sqlite3.rc 992c9f5fb8285ae285d6be28240a7e8d3a7f2bad F src/sqlite3ext.h be1a718b7d2ce40ceba725ae92c8eb5f18003066 -F src/sqliteInt.h d5df694bc33870e77fb08f389d12309597fe3059 +F src/sqliteInt.h 89768198547bdd70a160e47643a0e493f711e8d0 F src/sqliteLimit.h 216557999cb45f2e3578ed53ebefe228d779cb46 F src/status.c f266ad8a2892d659b74f0f50cb6a88b6e7c12179 F src/table.c 51b46b2a62d1b3a959633d593b89bab5e2c9155e @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 9b3a7281bd45994edf813a687e4b7a0761697929 -R 07d5c4c2284ae7b5a16a8d841b25dd0d +P 8619fc346d9a5a66a3c4566b4cc032b6b6bf73fd +R 0aa742facd6911aa1b0a57fdcda4cf1c U drh -Z fa5a6a0b1c08c7af0b43f02924ea472a +Z 44e348c1758353ed5733624fd5385fad diff --git a/manifest.uuid b/manifest.uuid index 1562298e0b..a250a1de9a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8619fc346d9a5a66a3c4566b4cc032b6b6bf73fd \ No newline at end of file +ab7aeeead395a05b91a921ef9ebe9252fffad667 \ No newline at end of file diff --git a/src/bitvec.c b/src/bitvec.c index f1f347fef0..fd908f791b 100644 --- a/src/bitvec.c +++ b/src/bitvec.c @@ -126,8 +126,8 @@ Bitvec *sqlite3BitvecCreate(u32 iSize){ ** If p is NULL (if the bitmap has not been created) or if ** i is out of range, then return false. */ -int sqlite3BitvecTest(Bitvec *p, u32 i){ - if( p==0 ) return 0; +int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ + assert( p!=0 ); i--; if( i>=p->iSize ) return 0; while( p->iDivisor ){ @@ -149,6 +149,9 @@ int sqlite3BitvecTest(Bitvec *p, u32 i){ return 0; } } +int sqlite3BitvecTest(Bitvec *p, u32 i){ + return p!=0 && sqlite3BitvecTestNotNull(p,i); +} /* ** Set the i-th bit. Return 0 on success and an error code if diff --git a/src/pager.c b/src/pager.c index a50272d377..164fd6106f 100644 --- a/src/pager.c +++ b/src/pager.c @@ -808,7 +808,7 @@ static const unsigned char aJournalMagic[] = { ** ** if( pPager->jfd->pMethods ){ ... */ -#define isOpen(pFd) ((pFd)->pMethods) +#define isOpen(pFd) ((pFd)->pMethods!=0) /* ** Return true if this pager uses a write-ahead log instead of the usual @@ -1031,19 +1031,21 @@ static int subjRequiresPage(PgHdr *pPg){ int i; for(i=0; inSavepoint; i++){ p = &pPager->aSavepoint[i]; - if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){ + if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ return 1; } } return 0; } +#ifdef SQLITE_DEBUG /* ** Return true if the page is already in the journal file. */ static int pageInJournal(Pager *pPager, PgHdr *pPg){ return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); } +#endif /* ** Read a 32-bit integer from the given file descriptor. Store the integer @@ -5648,6 +5650,59 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ return rc; } +/* +** Write page pPg onto the end of the rollback journal. +*/ +static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc; + u32 cksum; + char *pData2; + i64 iOff = pPager->journalOff; + + /* We should never write to the journal file the page that + ** contains the database locks. The following assert verifies + ** that we do not. */ + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); + + assert( pPager->journalHdr<=pPager->journalOff ); + CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); + cksum = pager_cksum(pPager, (u8*)pData2); + + /* Even if an IO or diskfull error occurs while journalling the + ** page in the block above, set the need-sync flag for the page. + ** Otherwise, when the transaction is rolled back, the logic in + ** playback_one_page() will think that the page needs to be restored + ** in the database file. And if an IO error occurs while doing so, + ** then corruption may follow. + */ + pPg->flags |= PGHDR_NEED_SYNC; + + rc = write32bits(pPager->jfd, iOff, pPg->pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); + if( rc!=SQLITE_OK ) return rc; + rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); + if( rc!=SQLITE_OK ) return rc; + + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, + pPager->journalOff, pPager->pageSize)); + PAGER_INCR(sqlite3_pager_writej_count); + PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); + + pPager->journalOff += 8 + pPager->pageSize; + pPager->nRec++; + assert( pPager->pInJournal!=0 ); + rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + rc |= addToSavepointBitvecs(pPager, pPg->pgno); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + return rc; +} + /* ** Mark a single data page as writeable. The page is written into the ** main journal or sub-journal as required. If the page is written into @@ -5658,7 +5713,6 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ static int pager_write(PgHdr *pPg){ Pager *pPager = pPg->pPager; int rc = SQLITE_OK; - int inJournal; /* This routine is not called unless a write-transaction has already ** been started. The journal file may or may not be open at this point. @@ -5671,7 +5725,6 @@ static int pager_write(PgHdr *pPg){ assert( assert_pager_state(pPager) ); assert( pPager->errCode==0 ); assert( pPager->readOnly==0 ); - CHECK_PAGE(pPg); /* The journal file needs to be opened. Higher level routines have already @@ -5690,91 +5743,41 @@ static int pager_write(PgHdr *pPg){ assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); assert( assert_pager_state(pPager) ); - /* Mark the page as dirty. If the page has already been written - ** to the journal then we can return right away. - */ + /* Mark the page that is about to be modified as dirty. */ sqlite3PcacheMakeDirty(pPg); - inJournal = pageInJournal(pPager, pPg); - if( inJournal && (pPager->nSavepoint==0 || !subjRequiresPage(pPg)) ){ - assert( !pagerUseWal(pPager) ); - }else{ - - /* The transaction journal now exists and we have a RESERVED or an - ** EXCLUSIVE lock on the main database file. Write the current page to - ** the transaction journal if it is not there already. - */ - if( !inJournal && !pagerUseWal(pPager) ){ - assert( pagerUseWal(pPager)==0 ); - if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){ - u32 cksum; - char *pData2; - i64 iOff = pPager->journalOff; - /* We should never write to the journal file the page that - ** contains the database locks. The following assert verifies - ** that we do not. */ - assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); - - assert( pPager->journalHdr<=pPager->journalOff ); - CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); - cksum = pager_cksum(pPager, (u8*)pData2); - - /* Even if an IO or diskfull error occurs while journalling the - ** page in the block above, set the need-sync flag for the page. - ** Otherwise, when the transaction is rolled back, the logic in - ** playback_one_page() will think that the page needs to be restored - ** in the database file. And if an IO error occurs while doing so, - ** then corruption may follow. - */ - pPg->flags |= PGHDR_NEED_SYNC; - - rc = write32bits(pPager->jfd, iOff, pPg->pgno); - if( rc!=SQLITE_OK ) return rc; - rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); - if( rc!=SQLITE_OK ) return rc; - rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); - if( rc!=SQLITE_OK ) return rc; - - IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, - pPager->journalOff, pPager->pageSize)); - PAGER_INCR(sqlite3_pager_writej_count); - PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", - PAGERID(pPager), pPg->pgno, - ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); - - pPager->journalOff += 8 + pPager->pageSize; - pPager->nRec++; - assert( pPager->pInJournal!=0 ); - rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); - testcase( rc==SQLITE_NOMEM ); - assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); - rc |= addToSavepointBitvecs(pPager, pPg->pgno); - if( rc!=SQLITE_OK ){ - assert( rc==SQLITE_NOMEM ); - return rc; - } - }else{ - if( pPager->eState!=PAGER_WRITER_DBMOD ){ - pPg->flags |= PGHDR_NEED_SYNC; - } - PAGERTRACE(("APPEND %d page %d needSync=%d\n", - PAGERID(pPager), pPg->pgno, - ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); + /* If a rollback journal is in use, them make sure the page that is about + ** to change is in the rollback journal, or if the page is a new page off + ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. + */ + assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); + if( pPager->pInJournal!=0 /* Journal open */ + && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 /* pPg not in jrnl */ + ){ + assert( pagerUseWal(pPager)==0 ); + if( pPg->pgno<=pPager->dbOrigSize ){ + rc = pagerAddPageToRollbackJournal(pPg); + if( rc!=SQLITE_OK ){ + return rc; } - } - - /* If the statement journal is open and the page is not in it, - ** then write the current page to the statement journal. Note that - ** the statement journal format differs from the standard journal format - ** in that it omits the checksums and the header. - */ - if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); + }else{ + if( pPager->eState!=PAGER_WRITER_DBMOD ){ + pPg->flags |= PGHDR_NEED_SYNC; + } + PAGERTRACE(("APPEND %d page %d needSync=%d\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); } } - - /* Update the database size and return. + + /* If the statement journal is open and the page is not in it, + ** then write the page into the statement journal. */ + if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){ + rc = subjournalPage(pPg); + } + + /* Update the database size and return. */ if( pPager->dbSizepgno ){ pPager->dbSize = pPg->pgno; } diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 3ebd8deb80..aed6557cdb 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -3238,6 +3238,7 @@ int sqlite3CodeOnce(Parse *); Bitvec *sqlite3BitvecCreate(u32); int sqlite3BitvecTest(Bitvec*, u32); +int sqlite3BitvecTestNotNull(Bitvec*, u32); int sqlite3BitvecSet(Bitvec*, u32); void sqlite3BitvecClear(Bitvec*, u32, void*); void sqlite3BitvecDestroy(Bitvec*); From ef648698c8d91fea1b9565bbab323f1a25e0f18a Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 29 Jun 2015 15:41:50 +0000 Subject: [PATCH 191/206] Fix minor problems in the ota demo application. FossilOrigin-Name: 6aaaec6e63cdf713b0d67e24a892088ff251c82a --- ext/ota/ota.c | 2 +- ext/ota/sqlite3ota.c | 4 ++-- manifest | 16 ++++++++-------- manifest.uuid | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ext/ota/ota.c b/ext/ota/ota.c index a6df182855..fffc1267bd 100644 --- a/ext/ota/ota.c +++ b/ext/ota/ota.c @@ -89,7 +89,7 @@ int main(int argc, char **argv){ ** sqlite3ota_step() until either the OTA has been completely applied ** or an error occurs. Or, if nStep is greater than zero, call ** sqlite3ota_step() a maximum of nStep times. */ - pOta = sqlite3ota_open(zTarget, zOta); + pOta = sqlite3ota_open(zTarget, zOta, 0); report_ota_vfs(pOta); for(i=0; (nStep<=0 || ipReal->pMethods->xFileControl; int rc; - assert( p->openFlags & - (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB|SQLITE_OPEN_TRANSIENT_DB) + assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB) + || p->openFlags & (SQLITE_OPEN_TRANSIENT_DB|SQLITE_OPEN_TEMP_JOURNAL) ); if( op==SQLITE_FCNTL_OTA ){ sqlite3ota *pOta = (sqlite3ota*)pArg; diff --git a/manifest b/manifest index b0a37ff3cc..e35c40e4d1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Simplifications\sand\sperformance\simprovement\sin\spager_write(). -D 2015-06-29T14:11:50.755 +C Fix\sminor\sproblems\sin\sthe\sota\sdemo\sapplication. +D 2015-06-29T15:41:50.815 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -200,7 +200,7 @@ F ext/misc/totype.c 4a167594e791abeed95e0a8db028822b5e8fe512 F ext/misc/vfslog.c fe40fab5c077a40477f7e5eba994309ecac6cc95 F ext/misc/vtshim.c babb0dc2bf116029e3e7c9a618b8a1377045303e F ext/misc/wholenumber.c 784b12543d60702ebdd47da936e278aa03076212 -F ext/ota/ota.c c47352838b967384a81eda5de75c352922a0dd6e +F ext/ota/ota.c 3a849c3b0a4ad6e63125668be9f67be03621216e F ext/ota/ota1.test abdcbe746db4c7f7b51e842b576cacb33eef28f5 F ext/ota/ota10.test 85e0f6e7964db5007590c1b299e75211ed4240d4 F ext/ota/ota11.test 2f606cd2b4af260a86b549e91b9f395450fc75cb @@ -216,7 +216,7 @@ F ext/ota/otaA.test ab67f7f53670b81c750dcc946c5b704f51c429a4 F ext/ota/otacrash.test 8346192b2d46cbe7787d5d65904d81d3262a3cbf F ext/ota/otafault.test 8c43586c2b96ca16bbce00b5d7e7d67316126db8 F ext/ota/otafault2.test fa202a98ca221faec318f3e5c5f39485b1256561 -F ext/ota/sqlite3ota.c 2246b779f46ab20d5e7876f5b96c378c601d20f4 +F ext/ota/sqlite3ota.c 21575d86eac30482a9bfbb2a531f433015e0e03c F ext/ota/sqlite3ota.h 00028de37eede471ff1947d455cc3f33d3a911c6 F ext/ota/test_ota.c a876f88550d7d59a3ef62d4c1a5c04c4c2f1ebe1 F ext/rtree/README 6315c0d73ebf0ec40dedb5aa0e942bc8b54e3761 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8619fc346d9a5a66a3c4566b4cc032b6b6bf73fd -R 0aa742facd6911aa1b0a57fdcda4cf1c -U drh -Z 44e348c1758353ed5733624fd5385fad +P ab7aeeead395a05b91a921ef9ebe9252fffad667 +R cf8c1f0870ca2f41a49b1e8959952d21 +U dan +Z 5ca3fbc5a6a82f3d1d52496384d0d8e0 diff --git a/manifest.uuid b/manifest.uuid index a250a1de9a..719f6fc651 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ab7aeeead395a05b91a921ef9ebe9252fffad667 \ No newline at end of file +6aaaec6e63cdf713b0d67e24a892088ff251c82a \ No newline at end of file From e399ac2e1e72c6cc1dce09a273b8218332d57418 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 18:14:02 +0000 Subject: [PATCH 192/206] Fix over-length source code lines in pager.c. FossilOrigin-Name: 14de3d39267a4005a0fa900bab4adc4c104e4084 --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/pager.c | 47 ++++++++++++++++++++++++----------------------- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/manifest b/manifest index e35c40e4d1..52d2f83173 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sminor\sproblems\sin\sthe\sota\sdemo\sapplication. -D 2015-06-29T15:41:50.815 +C Fix\sover-length\ssource\scode\slines\sin\spager.c. +D 2015-06-29T18:14:02.529 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 7e745749c375e3c35ef92051e2c1f7425cfcb3de +F src/pager.c 81099edb051a46330cb4f526aaa782ac31d5f576 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c 994f15b465337a079feb04aac34c199dbc610247 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ab7aeeead395a05b91a921ef9ebe9252fffad667 -R cf8c1f0870ca2f41a49b1e8959952d21 -U dan -Z 5ca3fbc5a6a82f3d1d52496384d0d8e0 +P 6aaaec6e63cdf713b0d67e24a892088ff251c82a +R 00b6fa5674088c81a24f73d93b973d9c +U drh +Z 08a04bd3dea2dc293fe05f9e7c08921e diff --git a/manifest.uuid b/manifest.uuid index 719f6fc651..106562fbad 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6aaaec6e63cdf713b0d67e24a892088ff251c82a \ No newline at end of file +14de3d39267a4005a0fa900bab4adc4c104e4084 \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 164fd6106f..687a87ce88 100644 --- a/src/pager.c +++ b/src/pager.c @@ -456,9 +456,9 @@ struct PagerSavepoint { /* ** Bits of the Pager.doNotSpill flag. See further description below. */ -#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ -#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ -#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ +#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ +#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ +#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ /* ** An open page cache is an instance of struct Pager. A description of @@ -540,11 +540,11 @@ struct PagerSavepoint { ** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF ** case is a user preference. ** -** If the SPILLFLAG_NOSYNC bit is set, writing to the database from pagerStress() -** is permitted, but syncing the journal file is not. This flag is set -** by sqlite3PagerWrite() when the file-system sector-size is larger than -** the database page-size in order to prevent a journal sync from happening -** in between the journalling of two pages on the same sector. +** If the SPILLFLAG_NOSYNC bit is set, writing to the database from +** pagerStress() is permitted, but syncing the journal file is not. +** This flag is set by sqlite3PagerWrite() when the file-system sector-size +** is larger than the database page-size in order to prevent a journal sync +** from happening in between the journalling of two pages on the same sector. ** ** subjInMemory ** @@ -647,7 +647,7 @@ struct Pager { u8 doNotSpill; /* Do not spill the cache when non-zero */ u8 subjInMemory; /* True to use in-memory sub-journals */ u8 bUseFetch; /* True to use xFetch() */ - u8 hasBeenUsed; /* True if any content previously read from this pager*/ + u8 hasBeenUsed; /* True if any content previously read */ Pgno dbSize; /* Number of pages in the database */ Pgno dbOrigSize; /* dbSize before the current transaction */ Pgno dbFileSize; /* Number of pages in the database file */ @@ -1657,7 +1657,8 @@ static int writeMasterJournal(Pager *pPager, const char *zMaster){ || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum))) - || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, + iHdrOff+4+nMaster+8))) ){ return rc; } @@ -4727,7 +4728,7 @@ int sqlite3PagerOpen( act_like_temp_file: tempFile = 1; pPager->eState = PAGER_READER; /* Pretend we already have a lock */ - pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE locking mode */ + pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */ pPager->noLock = 1; /* Do no locking */ readOnly = (vfsFlags&SQLITE_OPEN_READONLY); } @@ -4746,7 +4747,7 @@ act_like_temp_file: assert( nExtra<1000 ); nExtra = ROUND8(nExtra); rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, - !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); + !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); } /* If an error occurred above, free the Pager structure and close the file. @@ -5751,8 +5752,8 @@ static int pager_write(PgHdr *pPg){ ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. */ assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); - if( pPager->pInJournal!=0 /* Journal open */ - && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 /* pPg not in jrnl */ + if( pPager->pInJournal!=0 + && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 ){ assert( pagerUseWal(pPager)==0 ); if( pPg->pgno<=pPager->dbOrigSize ){ @@ -5792,17 +5793,17 @@ static int pager_write(PgHdr *pPg){ ** a write. ** ** Usually, the sector size is less than or equal to the page size, in which -** case pages can be individually written. This routine only runs in the exceptional -** case where the page size is smaller than the sector size. +** case pages can be individually written. This routine only runs in the +** exceptional case where the page size is smaller than the sector size. */ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ - int rc = SQLITE_OK; /* Return code */ - Pgno nPageCount; /* Total number of pages in database file */ - Pgno pg1; /* First page of the sector pPg is located on. */ - int nPage = 0; /* Number of pages starting at pg1 to journal */ - int ii; /* Loop counter */ - int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ - Pager *pPager = pPg->pPager; /* The pager that owns pPg */ + int rc = SQLITE_OK; /* Return code */ + Pgno nPageCount; /* Total number of pages in database file */ + Pgno pg1; /* First page of the sector pPg is located on. */ + int nPage = 0; /* Number of pages starting at pg1 to journal */ + int ii; /* Loop counter */ + int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ + Pager *pPager = pPg->pPager; /* The pager that owns pPg */ Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow From 1aacbdb3745e04b3b59ef36fb81034c054b8f154 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 18:29:10 +0000 Subject: [PATCH 193/206] Add the PGHDR_WRITEABLE bit for PgHdr.flags which is used to distinguish between pages that are on the dirty list and pages that are safe to modify. FossilOrigin-Name: 7c4ef7b7c8744af19075bb96d1e0b63e35978ed1 --- manifest | 16 ++++++++-------- manifest.uuid | 2 +- src/pager.c | 9 ++++++++- src/pcache.c | 2 +- src/pcache.h | 17 ++++++++--------- 5 files changed, 26 insertions(+), 20 deletions(-) diff --git a/manifest b/manifest index 52d2f83173..38f44bf307 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sover-length\ssource\scode\slines\sin\spager.c. -D 2015-06-29T18:14:02.529 +C Add\sthe\sPGHDR_WRITEABLE\sbit\sfor\sPgHdr.flags\swhich\sis\sused\sto\s\ndistinguish\sbetween\spages\sthat\sare\son\sthe\sdirty\slist\sand\spages\sthat\sare\nsafe\sto\smodify. +D 2015-06-29T18:29:10.051 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,11 +314,11 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 81099edb051a46330cb4f526aaa782ac31d5f576 +F src/pager.c 349cc089392bd0111e575bb0abacae0038a193c9 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 -F src/pcache.c 994f15b465337a079feb04aac34c199dbc610247 -F src/pcache.h 445374bcf296515fb970c8bbf47c36222196d197 +F src/pcache.c 379fd77feb732b39750eb733260d9c227d8a4314 +F src/pcache.h 9968603796240cdf83da7e7bef76edf90619cea9 F src/pcache1.c 9ec20f98f50ed7415019303ae9bd3745d4b7bd9b F src/pragma.c c1f4d012ea9f6b1ce52d341b2cd0ad72d560afd7 F src/pragma.h b8632d7cdda7b25323fa580e3e558a4f0d4502cc @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 6aaaec6e63cdf713b0d67e24a892088ff251c82a -R 00b6fa5674088c81a24f73d93b973d9c +P 14de3d39267a4005a0fa900bab4adc4c104e4084 +R 87e4d0501eab929e3fc91ca03b341d4f U drh -Z 08a04bd3dea2dc293fe05f9e7c08921e +Z 78f063c06b61cea5c0d3875d2ce428dd diff --git a/manifest.uuid b/manifest.uuid index 106562fbad..4c09287aba 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -14de3d39267a4005a0fa900bab4adc4c104e4084 \ No newline at end of file +7c4ef7b7c8744af19075bb96d1e0b63e35978ed1 \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 687a87ce88..7fe8c92def 100644 --- a/src/pager.c +++ b/src/pager.c @@ -5770,6 +5770,13 @@ static int pager_write(PgHdr *pPg){ ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); } } + + /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list + ** and before writing the page into the rollback journal. Wait until now, + ** after the page has been successfully journalled, before setting the + ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. + */ + pPg->flags |= PGHDR_WRITEABLE; /* If the statement journal is open and the page is not in it, ** then write the page into the statement journal. @@ -5909,7 +5916,7 @@ int sqlite3PagerWrite(PgHdr *pPg){ */ #ifndef NDEBUG int sqlite3PagerIswriteable(DbPage *pPg){ - return pPg->flags&PGHDR_DIRTY; + return pPg->flags & PGHDR_WRITEABLE; } #endif diff --git a/src/pcache.c b/src/pcache.c index 220f0bf523..9b23bd8433 100644 --- a/src/pcache.c +++ b/src/pcache.c @@ -422,7 +422,7 @@ void sqlite3PcacheMakeClean(PgHdr *p){ if( (p->flags & PGHDR_DIRTY) ){ assert( (p->flags & PGHDR_CLEAN)==0 ); pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); - p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC); + p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE); p->flags |= PGHDR_CLEAN; if( p->nRef==0 ){ pcacheUnpin(p); diff --git a/src/pcache.h b/src/pcache.h index 14053c06f3..a0724df22f 100644 --- a/src/pcache.h +++ b/src/pcache.h @@ -46,15 +46,14 @@ struct PgHdr { }; /* Bit values for PgHdr.flags */ -#define PGHDR_CLEAN 0x001 /* Page is unchanged */ -#define PGHDR_DIRTY 0x002 /* Page has changed */ -#define PGHDR_NEED_SYNC 0x004 /* Fsync the rollback journal before - ** writing this page to the database */ -#define PGHDR_NEED_READ 0x008 /* Content is unread */ -#define PGHDR_REUSE_UNLIKELY 0x010 /* A hint that reuse is unlikely */ -#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ - -#define PGHDR_MMAP 0x040 /* This is an mmap page object */ +#define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */ +#define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */ +#define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */ +#define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before + ** writing this page to the database */ +#define PGHDR_NEED_READ 0x010 /* Content is unread */ +#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ +#define PGHDR_MMAP 0x040 /* This is an mmap page object */ /* Initialize and shutdown the page cache subsystem */ int sqlite3PcacheInitialize(void); From 60e32edba5b50692af25ad56f849d6b41c2b2c72 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 19:08:18 +0000 Subject: [PATCH 194/206] Combine subjRequiresPage() and subjournalPage() into a single subjournalPageIfRequired() routine. FossilOrigin-Name: 3b65eb56c422855ca47f709247205f0c77d98a5c --- manifest | 12 +++++------ manifest.uuid | 2 +- src/pager.c | 55 +++++++++++---------------------------------------- 3 files changed, 19 insertions(+), 50 deletions(-) diff --git a/manifest b/manifest index 38f44bf307..97944da70e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sPGHDR_WRITEABLE\sbit\sfor\sPgHdr.flags\swhich\sis\sused\sto\s\ndistinguish\sbetween\spages\sthat\sare\son\sthe\sdirty\slist\sand\spages\sthat\sare\nsafe\sto\smodify. -D 2015-06-29T18:29:10.051 +C Combine\ssubjRequiresPage()\sand\ssubjournalPage()\sinto\sa\ssingle\nsubjournalPageIfRequired()\sroutine. +D 2015-06-29T19:08:18.213 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 349cc089392bd0111e575bb0abacae0038a193c9 +F src/pager.c 4cf1b151727f5f12898927c6688b167fd4999d14 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c 379fd77feb732b39750eb733260d9c227d8a4314 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 14de3d39267a4005a0fa900bab4adc4c104e4084 -R 87e4d0501eab929e3fc91ca03b341d4f +P 7c4ef7b7c8744af19075bb96d1e0b63e35978ed1 +R f62d201ad6eea9a89949366a04424df3 U drh -Z 78f063c06b61cea5c0d3875d2ce428dd +Z d7a21ab6fd66c107b4716f65f30e363a diff --git a/manifest.uuid b/manifest.uuid index 4c09287aba..76f6ffe525 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7c4ef7b7c8744af19075bb96d1e0b63e35978ed1 \ No newline at end of file +3b65eb56c422855ca47f709247205f0c77d98a5c \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 7fe8c92def..bf6f8f4d69 100644 --- a/src/pager.c +++ b/src/pager.c @@ -4320,8 +4320,6 @@ static int openSubJournal(Pager *pPager){ /* ** Append a record of the current state of page pPg to the sub-journal. -** It is the callers responsibility to use subjRequiresPage() to check -** that it is really required before calling this function. ** ** If successful, set the bit corresponding to pPg->pgno in the bitvecs ** for all open savepoints before returning. @@ -4368,6 +4366,13 @@ static int subjournalPage(PgHdr *pPg){ } return rc; } +static int subjournalPageIfRequired(PgHdr *pPg){ + if( subjRequiresPage(pPg) ){ + return subjournalPage(pPg); + }else{ + return SQLITE_OK; + } +} /* ** This function is called by the pcache layer when it has reached some @@ -4425,9 +4430,7 @@ static int pagerStress(void *p, PgHdr *pPg){ pPg->pDirty = 0; if( pagerUseWal(pPager) ){ /* Write a single frame for this page to the log. */ - if( subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); - } + rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } @@ -4440,39 +4443,6 @@ static int pagerStress(void *p, PgHdr *pPg){ rc = syncJournal(pPager, 1); } - /* If the page number of this page is larger than the current size of - ** the database image, it may need to be written to the sub-journal. - ** This is because the call to pager_write_pagelist() below will not - ** actually write data to the file in this case. - ** - ** Consider the following sequence of events: - ** - ** BEGIN; - ** - ** - ** SAVEPOINT sp; - ** - ** pagerStress(page X) - ** ROLLBACK TO sp; - ** - ** If (X>Y), then when pagerStress is called page X will not be written - ** out to the database file, but will be dropped from the cache. Then, - ** following the "ROLLBACK TO sp" statement, reading page X will read - ** data from the database file. This will be the copy of page X as it - ** was when the transaction started, not as it was when "SAVEPOINT sp" - ** was executed. - ** - ** The solution is to write the current data for page X into the - ** sub-journal file now (if it is not already there), so that it will - ** be restored to its current value when the "ROLLBACK TO sp" is - ** executed. - */ - if( NEVER( - rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) - ) ){ - rc = subjournalPage(pPg); - } - /* Write the contents of the page out to the database file. */ if( rc==SQLITE_OK ){ assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); @@ -5781,8 +5751,8 @@ static int pager_write(PgHdr *pPg){ /* If the statement journal is open and the page is not in it, ** then write the page into the statement journal. */ - if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); + if( pPager->nSavepoint>0 ){ + rc = subjournalPageIfRequired(pPg); } /* Update the database size and return. */ @@ -6772,9 +6742,8 @@ int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ ** one or more savepoint bitvecs. This is the reason this function ** may return SQLITE_NOMEM. */ - if( pPg->flags&PGHDR_DIRTY - && subjRequiresPage(pPg) - && SQLITE_OK!=(rc = subjournalPage(pPg)) + if( (pPg->flags & PGHDR_DIRTY)!=0 + && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) ){ return rc; } From b34755308cd6f874870a017316051542a09369cf Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 20:53:18 +0000 Subject: [PATCH 195/206] Add a bypass path in sqlite3PagerWrite() for pages with the PGHDR_WRITEABLE bit set, for about a 1% performance increase. FossilOrigin-Name: ba425a6abb9886e6af87b5f6205202db450beba8 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/pager.c | 9 +++++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/manifest b/manifest index 97944da70e..0e5abb1e89 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Combine\ssubjRequiresPage()\sand\ssubjournalPage()\sinto\sa\ssingle\nsubjournalPageIfRequired()\sroutine. -D 2015-06-29T19:08:18.213 +C Add\sa\sbypass\spath\sin\ssqlite3PagerWrite()\sfor\spages\swith\sthe\sPGHDR_WRITEABLE\nbit\sset,\sfor\sabout\sa\s1%\sperformance\sincrease. +D 2015-06-29T20:53:18.096 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 4cf1b151727f5f12898927c6688b167fd4999d14 +F src/pager.c 6ae566c373f74be311b5975eef8f6dd130551bd5 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c 379fd77feb732b39750eb733260d9c227d8a4314 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 7c4ef7b7c8744af19075bb96d1e0b63e35978ed1 -R f62d201ad6eea9a89949366a04424df3 +P 3b65eb56c422855ca47f709247205f0c77d98a5c +R 7a2e6c81396cabeccb9e57205ef7b5ff U drh -Z d7a21ab6fd66c107b4716f65f30e363a +Z 8e32abe3d4ffc02aca1a13a0a405eb07 diff --git a/manifest.uuid b/manifest.uuid index 76f6ffe525..ec2a9d057c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3b65eb56c422855ca47f709247205f0c77d98a5c \ No newline at end of file +ba425a6abb9886e6af87b5f6205202db450beba8 \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index bf6f8f4d69..6873a92044 100644 --- a/src/pager.c +++ b/src/pager.c @@ -2218,7 +2218,7 @@ static int pager_playback_one_page( } } - /* If this page has already been played by before during the current + /* If this page has already been played back before during the current ** rollback, then don't bother to play it back again. */ if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ @@ -5872,7 +5872,11 @@ int sqlite3PagerWrite(PgHdr *pPg){ assert( pPg->pPager->eState>=PAGER_WRITER_LOCKED ); assert( pPg->pPager->eState!=PAGER_ERROR ); assert( assert_pager_state(pPg->pPager) ); - if( pPg->pPager->sectorSize > (u32)pPg->pPager->pageSize ){ + Pager *pPager = pPg->pPager; + if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ + if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); + return SQLITE_OK; + }else if( pPager->sectorSize > (u32)pPager->pageSize ){ return pagerWriteLargeSector(pPg); }else{ return pager_write(pPg); @@ -5910,6 +5914,7 @@ void sqlite3PagerDontWrite(PgHdr *pPg){ PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) pPg->flags |= PGHDR_DONT_WRITE; + pPg->flags &= ~PGHDR_WRITEABLE; pager_set_pagehash(pPg); } } From 207c8173652fb66b72aed5cb2bea8f10406104ba Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 29 Jun 2015 23:01:32 +0000 Subject: [PATCH 196/206] From sqlite3BtreeInsert(), bypass the btreeMoveto() routine for table inserts and go directly to sqlite3BtreeMovetoUnpacked(). FossilOrigin-Name: 53fe685d56368af78238c8981cc41704929f7660 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/btree.c | 14 ++++++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/manifest b/manifest index 0e5abb1e89..9d8e547563 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\sbypass\spath\sin\ssqlite3PagerWrite()\sfor\spages\swith\sthe\sPGHDR_WRITEABLE\nbit\sset,\sfor\sabout\sa\s1%\sperformance\sincrease. -D 2015-06-29T20:53:18.096 +C From\ssqlite3BtreeInsert(),\sbypass\sthe\sbtreeMoveto()\sroutine\sfor\stable\sinserts\nand\sgo\sdirectly\sto\ssqlite3BtreeMovetoUnpacked(). +D 2015-06-29T23:01:32.059 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c ce342e156716fb64b9cf5c040260d0b47989f37b +F src/btree.c 9d8ae0dbbd0fc11c31f8ebdf52268b294d07aa9b F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 8ca7124af9ee2ce27747a4e5500c27a254dea8eb F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 3b65eb56c422855ca47f709247205f0c77d98a5c -R 7a2e6c81396cabeccb9e57205ef7b5ff +P ba425a6abb9886e6af87b5f6205202db450beba8 +R e5f91453b5eeda72698cccaefb998050 U drh -Z 8e32abe3d4ffc02aca1a13a0a405eb07 +Z 1793079077cd0546fcb9227de9d5c3a3 diff --git a/manifest.uuid b/manifest.uuid index ec2a9d057c..8990887238 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ba425a6abb9886e6af87b5f6205202db450beba8 \ No newline at end of file +53fe685d56368af78238c8981cc41704929f7660 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 027012e33a..95c18ff547 100644 --- a/src/btree.c +++ b/src/btree.c @@ -7903,20 +7903,22 @@ int sqlite3BtreeInsert( if( rc ) return rc; if( pCur->pKeyInfo==0 ){ + assert( pKey==0 ); /* If this is an insert into a table b-tree, invalidate any incrblob ** cursors open on the row being replaced */ invalidateIncrblobCursors(p, nKey, 0); /* If the cursor is currently on the last row and we are appending a - ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() - ** call */ + ** new row onto the end, set the "loc" to avoid an unnecessary + ** btreeMoveto() call */ if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 && pCur->info.nKey==nKey-1 ){ - loc = -1; + loc = -1; + }else if( loc==0 ){ + rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc); + if( rc ) return rc; } - } - - if( !loc ){ + }else if( loc==0 ){ rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); if( rc ) return rc; } From 98ef0f6c489cb8a93f24895de4d391c4016ee5f9 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 01:25:52 +0000 Subject: [PATCH 197/206] Convert several 5-year-old and older NEVER() macros into assert(). FossilOrigin-Name: ed54c14ea8a72d69d69e0c0d7f6936f54efc04c2 --- manifest | 16 ++++++++-------- manifest.uuid | 2 +- src/btree.c | 16 ++++------------ src/lempar.c | 2 +- src/vdbe.c | 5 ++--- 5 files changed, 16 insertions(+), 25 deletions(-) diff --git a/manifest b/manifest index 9d8e547563..f1e3876e00 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C From\ssqlite3BtreeInsert(),\sbypass\sthe\sbtreeMoveto()\sroutine\sfor\stable\sinserts\nand\sgo\sdirectly\sto\ssqlite3BtreeMovetoUnpacked(). -D 2015-06-29T23:01:32.059 +C Convert\sseveral\s5-year-old\sand\solder\sNEVER()\smacros\sinto\sassert(). +D 2015-06-30T01:25:52.068 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,7 +269,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 9d8ae0dbbd0fc11c31f8ebdf52268b294d07aa9b +F src/btree.c 5d5d48461b3838988bd1f5b197c145df66786805 F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h 8ca7124af9ee2ce27747a4e5500c27a254dea8eb F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 @@ -290,7 +290,7 @@ F src/hwtime.h d32741c8f4df852c7d959236615444e2b1063b08 F src/insert.c b5f8b35a1b7924020e48cade5b2b5017bca7906b F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c ba1863ea58c4c840335a84ec276fc2b25e22bc4e -F src/lempar.c 7274c97d24bb46631e504332ccd3bd1b37841770 +F src/lempar.c 92bafa308607dd985ca389a788cd9e0a2b608712 F src/loadext.c e722f4b832f923744788365df5fb8515c0bc8a47 F src/main.c c0061a4f8ba86f957534be93b7026dab324f12c2 F src/malloc.c 908c780fdddd472163c2d1b1820ae4081f01ad20 @@ -390,7 +390,7 @@ F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c F src/util.c a6431c92803b975b7322724a7b433e538d243539 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 -F src/vdbe.c 3d5a78d39b15dc91ea2c11017d560a4224eb2f75 +F src/vdbe.c 5ee4a2bf871418f61d06dc256b9b3a0084b5ec46 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 F src/vdbeInt.h 8b54e01ad0463590e7cffabce0bc36da9ee4f816 F src/vdbeapi.c 6a0d7757987018ff6b1b81bc5293219cd26bb299 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ba425a6abb9886e6af87b5f6205202db450beba8 -R e5f91453b5eeda72698cccaefb998050 +P 53fe685d56368af78238c8981cc41704929f7660 +R 0ca8cb603f507b8c27049a9ebf015fe3 U drh -Z 1793079077cd0546fcb9227de9d5c3a3 +Z e0e6efeb2c6c43faed874d1e7ab03448 diff --git a/manifest.uuid b/manifest.uuid index 8990887238..412ac12f4f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -53fe685d56368af78238c8981cc41704929f7660 \ No newline at end of file +ed54c14ea8a72d69d69e0c0d7f6936f54efc04c2 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 95c18ff547..fded6343e4 100644 --- a/src/btree.c +++ b/src/btree.c @@ -4030,10 +4030,8 @@ static int btreeCursor( assert( p->inTrans>TRANS_NONE ); assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); assert( pBt->pPage1 && pBt->pPage1->aData ); + assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); - if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){ - return SQLITE_READONLY; - } if( wrFlag ){ allocateTempSpace(pBt); if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; @@ -6016,9 +6014,7 @@ static int fillInCell( nSrc = nData; nData = 0; }else{ - if( NEVER(nKey>0x7fffffff || pKey==0) ){ - return SQLITE_CORRUPT_BKPT; - } + assert( nKey<=0x7fffffff && pKey!=0 ); nPayload = (int)nKey; pSrc = pKey; nSrc = (int)nKey; @@ -8020,12 +8016,8 @@ int sqlite3BtreeDelete(BtCursor *pCur){ assert( pCur->curFlags & BTCF_WriteFlag ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); assert( !hasReadConflicts(p, pCur->pgnoRoot) ); - - if( NEVER(pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell) - || NEVER(pCur->eState!=CURSOR_VALID) - ){ - return SQLITE_ERROR; /* Something has gone awry. */ - } + assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); + assert( pCur->eState==CURSOR_VALID ); iCellDepth = pCur->iPage; iCellIdx = pCur->aiIdx[iCellDepth]; diff --git a/src/lempar.c b/src/lempar.c index ba0837c0ab..b6c60a25b6 100644 --- a/src/lempar.c +++ b/src/lempar.c @@ -329,7 +329,7 @@ static int yy_pop_parser_stack(yyParser *pParser){ /* There is no mechanism by which the parser stack can be popped below ** empty in SQLite. */ - if( NEVER(pParser->yyidx<0) ) return 0; + assert( pParser->yyidx>=0 ); #ifndef NDEBUG if( yyTraceFILE && pParser->yyidx>=0 ){ fprintf(yyTraceFILE,"%sPopping %s\n", diff --git a/src/vdbe.c b/src/vdbe.c index 66578ec6a4..296a12bc9f 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -4056,9 +4056,8 @@ case OP_NewRowid: { /* out2 */ assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); - if( NEVER(pC->pCursor==0) ){ - /* The zero initialization above is all that is needed */ - }else{ + assert( pC->pCursor!=0 ); + { /* The next rowid or record number (different terms for the same ** thing) is obtained in a two-step algorithm. ** From 27fb746cde8eda3889ebbda44f576db991ba2b5b Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 02:47:36 +0000 Subject: [PATCH 198/206] Put BtCursor objects on a singly-linked list instead of a doubly-linked list. Add the BTCF_Multiple flag. Only invoke saveAllCursors() when using a cursor that has BTCF_Multiple set. FossilOrigin-Name: 429ccef2b36fc46e92914eb54afd5f800b1a40ff --- manifest | 14 ++++++------- manifest.uuid | 2 +- src/btree.c | 54 +++++++++++++++++++++++++++++++++++++------------- src/btreeInt.h | 4 +++- 4 files changed, 51 insertions(+), 23 deletions(-) diff --git a/manifest b/manifest index f1e3876e00..4290efa141 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Convert\sseveral\s5-year-old\sand\solder\sNEVER()\smacros\sinto\sassert(). -D 2015-06-30T01:25:52.068 +C Put\sBtCursor\sobjects\son\sa\ssingly-linked\slist\sinstead\sof\sa\sdoubly-linked\slist.\nAdd\sthe\sBTCF_Multiple\sflag.\s\sOnly\sinvoke\ssaveAllCursors()\swhen\susing\sa\scursor\nthat\shas\sBTCF_Multiple\sset. +D 2015-06-30T02:47:36.537 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,9 +269,9 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 5d5d48461b3838988bd1f5b197c145df66786805 +F src/btree.c 08ae64b0387e72b8201981a2d7b0f58b92fdff29 F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 -F src/btreeInt.h 8ca7124af9ee2ce27747a4e5500c27a254dea8eb +F src/btreeInt.h e4eabc722b1ae017ac9c266a75769d4c6a9afde6 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 53fe685d56368af78238c8981cc41704929f7660 -R 0ca8cb603f507b8c27049a9ebf015fe3 +P ed54c14ea8a72d69d69e0c0d7f6936f54efc04c2 +R f3f8b025f26bf8ce4de670023e83b4e0 U drh -Z e0e6efeb2c6c43faed874d1e7ab03448 +Z aa2cb7c94aea3021edb11ed06abf028f diff --git a/manifest.uuid b/manifest.uuid index 412ac12f4f..556c262e4c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ed54c14ea8a72d69d69e0c0d7f6936f54efc04c2 \ No newline at end of file +429ccef2b36fc46e92914eb54afd5f800b1a40ff \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index fded6343e4..ef2a861528 100644 --- a/src/btree.c +++ b/src/btree.c @@ -655,6 +655,15 @@ static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); ** routine is called just before cursor pExcept is used to modify the ** table, for example in BtreeDelete() or BtreeInsert(). ** +** If there are two or more cursors on the same btree, then all such +** cursors should have their BTCF_Multiple flag set. The btreeCursor() +** routine enforces that rule. This routine only needs to be called in +** the uncommon case when pExpect has the BTCF_Multiple flag set. +** +** If pExpect!=NULL and if no other cursors are found on the same root-page, +** then the BTCF_Multiple flag on pExpect is cleared, to avoid another +** pointless call to this routine. +** ** Implementation note: This routine merely checks to see if any cursors ** need to be saved. It calls out to saveCursorsOnList() in the (unusual) ** event that cursors are in need to being saved. @@ -666,7 +675,9 @@ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ for(p=pBt->pCursor; p; p=p->pNext){ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; } - return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK; + if( p ) return saveCursorsOnList(p, iRoot, pExcept); + if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; + return SQLITE_OK; } /* This helper routine to saveAllCursors does the actual work of saving @@ -4015,6 +4026,7 @@ static int btreeCursor( BtCursor *pCur /* Space for new cursor */ ){ BtShared *pBt = p->pBt; /* Shared b-tree handle */ + BtCursor *pX; /* Looping over other all cursors */ assert( sqlite3BtreeHoldsMutex(p) ); assert( wrFlag==0 || wrFlag==1 ); @@ -4051,10 +4063,15 @@ static int btreeCursor( assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); pCur->curFlags = wrFlag; pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; - pCur->pNext = pBt->pCursor; - if( pCur->pNext ){ - pCur->pNext->pPrev = pCur; + /* If there are two or more cursors on the same btree, then all such + ** cursors *must* have the BTCF_Multiple flag set. */ + for(pX=pBt->pCursor; pX; pX=pX->pNext){ + if( pX->pgnoRoot==(Pgno)iTable ){ + pX->curFlags |= BTCF_Multiple; + pCur->curFlags |= BTCF_Multiple; + } } + pCur->pNext = pBt->pCursor; pBt->pCursor = pCur; pCur->eState = CURSOR_INVALID; return SQLITE_OK; @@ -4112,13 +4129,18 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ BtShared *pBt = pCur->pBt; sqlite3BtreeEnter(pBtree); sqlite3BtreeClearCursor(pCur); - if( pCur->pPrev ){ - pCur->pPrev->pNext = pCur->pNext; - }else{ + assert( pBt->pCursor!=0 ); + if( pBt->pCursor==pCur ){ pBt->pCursor = pCur->pNext; - } - if( pCur->pNext ){ - pCur->pNext->pPrev = pCur->pPrev; + }else{ + BtCursor *pPrev = pBt->pCursor; + do{ + if( pPrev->pNext==pCur ){ + pPrev->pNext = pCur->pNext; + break; + } + pPrev = pPrev->pNext; + }while( ALWAYS(pPrev) ); } for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); @@ -7895,8 +7917,10 @@ int sqlite3BtreeInsert( ** doing any work. To avoid thwarting these optimizations, it is important ** not to clear the cursor here. */ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } if( pCur->pKeyInfo==0 ){ assert( pKey==0 ); @@ -8042,8 +8066,10 @@ int sqlite3BtreeDelete(BtCursor *pCur){ ** deleted writable. Then free any overflow pages associated with the ** entry and finally remove the cell itself from within the page. */ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } /* If this is a delete operation to remove a row from a table b-tree, ** invalidate any incrblob cursors open on the row being deleted. */ diff --git a/src/btreeInt.h b/src/btreeInt.h index 70aa937c7d..92a4d44692 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -507,7 +507,7 @@ struct CellInfo { struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ - BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ + BtCursor *pNext; /* Forms a linked list of all cursors */ Pgno *aOverflow; /* Cache of overflow page locations */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ @@ -526,6 +526,7 @@ struct BtCursor { i8 iPage; /* Index of current page in apPage */ u8 curIntKey; /* Value of apPage[0]->intKey */ struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ + void *padding1; /* Make object size a multiple of 16 */ u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ }; @@ -538,6 +539,7 @@ struct BtCursor { #define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ #define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ #define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ +#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ /* ** Potential values for BtCursor.eState. From 597d2b6412a38ca8ce0312579d0acaf4035aaee9 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 03:13:47 +0000 Subject: [PATCH 199/206] Change sqlite3ApiExit() so that its first argument is never NULL. FossilOrigin-Name: 791b706ec6c3e80885666e48e01524f0e9a7557e --- manifest | 16 ++++++++-------- manifest.uuid | 2 +- src/complete.c | 2 +- src/main.c | 4 ++-- src/malloc.c | 13 ++++++------- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/manifest b/manifest index 4290efa141..0c3d3a246e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Put\sBtCursor\sobjects\son\sa\ssingly-linked\slist\sinstead\sof\sa\sdoubly-linked\slist.\nAdd\sthe\sBTCF_Multiple\sflag.\s\sOnly\sinvoke\ssaveAllCursors()\swhen\susing\sa\scursor\nthat\shas\sBTCF_Multiple\sset. -D 2015-06-30T02:47:36.537 +C Change\ssqlite3ApiExit()\sso\sthat\sits\sfirst\sargument\sis\snever\sNULL. +D 2015-06-30T03:13:47.365 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -274,7 +274,7 @@ F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 F src/btreeInt.h e4eabc722b1ae017ac9c266a75769d4c6a9afde6 F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 -F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575 +F src/complete.c addcd8160b081131005d5bc2d34adf20c1c5c92f F src/ctime.c 5a0b735dc95604766f5dac73973658eef782ee8b F src/date.c e4d50b3283696836ec1036b695ead9a19e37a5ac F src/dbstat.c f402e77e25089c6003d0c60b3233b9b3947d599a @@ -292,8 +292,8 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c ba1863ea58c4c840335a84ec276fc2b25e22bc4e F src/lempar.c 92bafa308607dd985ca389a788cd9e0a2b608712 F src/loadext.c e722f4b832f923744788365df5fb8515c0bc8a47 -F src/main.c c0061a4f8ba86f957534be93b7026dab324f12c2 -F src/malloc.c 908c780fdddd472163c2d1b1820ae4081f01ad20 +F src/main.c 92d79bfa1a36c7c554700bb58eb8327abff1ac5c +F src/malloc.c 9be4e645f2fb411e5a04cf97e91f68b4faa6dc81 F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c abe6ee469b6c5a35c7f22bfeb9c9bac664a1c987 F src/mem2.c f1940d9e91948dd6a908fbb9ce3835c36b5d83c3 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ed54c14ea8a72d69d69e0c0d7f6936f54efc04c2 -R f3f8b025f26bf8ce4de670023e83b4e0 +P 429ccef2b36fc46e92914eb54afd5f800b1a40ff +R 76fff8f8ba7a3e292fd9a942df7fcd8e U drh -Z aa2cb7c94aea3021edb11ed06abf028f +Z f5ae2d48a4b628f4b044b90e76fe7af0 diff --git a/manifest.uuid b/manifest.uuid index 556c262e4c..952f484670 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -429ccef2b36fc46e92914eb54afd5f800b1a40ff \ No newline at end of file +791b706ec6c3e80885666e48e01524f0e9a7557e \ No newline at end of file diff --git a/src/complete.c b/src/complete.c index a12184e64d..b120b7e811 100644 --- a/src/complete.c +++ b/src/complete.c @@ -284,7 +284,7 @@ int sqlite3_complete16(const void *zSql){ rc = SQLITE_NOMEM; } sqlite3ValueFree(pVal); - return sqlite3ApiExit(0, rc); + return rc & 0xff; } #endif /* SQLITE_OMIT_UTF16 */ #endif /* SQLITE_OMIT_COMPLETE */ diff --git a/src/main.c b/src/main.c index c047943c93..faeee6af2e 100644 --- a/src/main.c +++ b/src/main.c @@ -2928,7 +2928,7 @@ opendb_out: sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0); } #endif - return sqlite3ApiExit(0, rc); + return rc & 0xff; } /* @@ -2986,7 +2986,7 @@ int sqlite3_open16( } sqlite3ValueFree(pVal); - return sqlite3ApiExit(0, rc); + return rc & 0xff; } #endif /* SQLITE_OMIT_UTF16 */ diff --git a/src/malloc.c b/src/malloc.c index 1b9a20956b..97b9cd5778 100644 --- a/src/malloc.c +++ b/src/malloc.c @@ -796,17 +796,16 @@ static SQLITE_NOINLINE int apiOomError(sqlite3 *db){ ** function. However, if a malloc() failure has occurred since the previous ** invocation SQLITE_NOMEM is returned instead. ** -** If the first argument, db, is not NULL and a malloc() error has occurred, -** then the connection error-code (the value returned by sqlite3_errcode()) -** is set to SQLITE_NOMEM. +** If an OOM as occurred, then the connection error-code (the value +** returned by sqlite3_errcode()) is set to SQLITE_NOMEM. */ int sqlite3ApiExit(sqlite3* db, int rc){ - /* If the db handle is not NULL, then we must hold the connection handle - ** mutex here. Otherwise the read (and possible write) of db->mallocFailed + /* If the db handle must hold the connection handle mutex here. + ** Otherwise the read (and possible write) of db->mallocFailed ** is unsafe, as is the call to sqlite3Error(). */ - assert( !db || sqlite3_mutex_held(db->mutex) ); - if( db==0 ) return rc & 0xff; + assert( db!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); if( db->mallocFailed || rc==SQLITE_IOERR_NOMEM ){ return apiOomError(db); } From 1116bf13597165d8fa8f5558e6b8b6952da61dfd Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 03:18:33 +0000 Subject: [PATCH 200/206] Implement sqlite3Strlen30() using strlen() from the C library. FossilOrigin-Name: 8001aa52bd12f900092387fe3571463e89efd977 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/util.c | 4 +--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/manifest b/manifest index 0c3d3a246e..3619261167 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\ssqlite3ApiExit()\sso\sthat\sits\sfirst\sargument\sis\snever\sNULL. -D 2015-06-30T03:13:47.365 +C Implement\ssqlite3Strlen30()\susing\sstrlen()\sfrom\sthe\sC\slibrary. +D 2015-06-30T03:18:33.065 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -388,7 +388,7 @@ F src/treeview.c c84b1a8ebc7f1d00cd76ce4958eeb3ae1021beed F src/trigger.c 322f23aad694e8f31d384dcfa386d52a48d3c52f F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c -F src/util.c a6431c92803b975b7322724a7b433e538d243539 +F src/util.c a5471ac4834ca3abe9c569745700d3f4dd5f5574 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 F src/vdbe.c 5ee4a2bf871418f61d06dc256b9b3a0084b5ec46 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 429ccef2b36fc46e92914eb54afd5f800b1a40ff -R 76fff8f8ba7a3e292fd9a942df7fcd8e +P 791b706ec6c3e80885666e48e01524f0e9a7557e +R 890e3b877cdeef90793b2b79488c4b65 U drh -Z f5ae2d48a4b628f4b044b90e76fe7af0 +Z 7d695210e1c17d010d5e00878a5ee246 diff --git a/manifest.uuid b/manifest.uuid index 952f484670..98e3a983f6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -791b706ec6c3e80885666e48e01524f0e9a7557e \ No newline at end of file +8001aa52bd12f900092387fe3571463e89efd977 \ No newline at end of file diff --git a/src/util.c b/src/util.c index 0bc1eeacf4..8fdaf26785 100644 --- a/src/util.c +++ b/src/util.c @@ -105,10 +105,8 @@ int sqlite3IsNaN(double x){ ** than 1GiB) the value returned might be less than the true string length. */ int sqlite3Strlen30(const char *z){ - const char *z2 = z; if( z==0 ) return 0; - while( *z2 ){ z2++; } - return 0x3fffffff & (int)(z2 - z); + return 0x3fffffff & (int)strlen(z); } /* From 50642b1debf7db2b24a9ad28a124f891bd838cb4 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 03:34:13 +0000 Subject: [PATCH 201/206] Put all variable declarations for sqlite3PagerWrite() at the beginning of the block. Syntax only - no logic changes. FossilOrigin-Name: a0fa6b3ba59716e53b8c035b497602050ed0ccb8 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/pager.c | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/manifest b/manifest index 3619261167..4773cc0264 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Implement\ssqlite3Strlen30()\susing\sstrlen()\sfrom\sthe\sC\slibrary. -D 2015-06-30T03:18:33.065 +C Put\sall\svariable\sdeclarations\sfor\ssqlite3PagerWrite()\nat\sthe\sbeginning\sof\sthe\sblock.\s\sSyntax\sonly\s-\sno\slogic\schanges. +D 2015-06-30T03:34:13.376 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 6ae566c373f74be311b5975eef8f6dd130551bd5 +F src/pager.c fbd0dd63da21d62c51e845b919548d6eb14dd2ff F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c 379fd77feb732b39750eb733260d9c227d8a4314 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 791b706ec6c3e80885666e48e01524f0e9a7557e -R 890e3b877cdeef90793b2b79488c4b65 +P 8001aa52bd12f900092387fe3571463e89efd977 +R 3ab730a0fd8ce3dab29c0f5735db472f U drh -Z 7d695210e1c17d010d5e00878a5ee246 +Z 2e2284bbdb4fcaa6c0dd63e70a1d9358 diff --git a/manifest.uuid b/manifest.uuid index 98e3a983f6..c6fb62560a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8001aa52bd12f900092387fe3571463e89efd977 \ No newline at end of file +a0fa6b3ba59716e53b8c035b497602050ed0ccb8 \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 6873a92044..dd3cac40a1 100644 --- a/src/pager.c +++ b/src/pager.c @@ -5868,11 +5868,11 @@ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ ** as appropriate. Otherwise, SQLITE_OK. */ int sqlite3PagerWrite(PgHdr *pPg){ - assert( (pPg->flags & PGHDR_MMAP)==0 ); - assert( pPg->pPager->eState>=PAGER_WRITER_LOCKED ); - assert( pPg->pPager->eState!=PAGER_ERROR ); - assert( assert_pager_state(pPg->pPager) ); Pager *pPager = pPg->pPager; + assert( (pPg->flags & PGHDR_MMAP)==0 ); + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( pPager->eState!=PAGER_ERROR ); + assert( assert_pager_state(pPager) ); if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); return SQLITE_OK; From d8c0ba3b02aa16e9f248db1acc6a233eef62cc4e Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 03:57:59 +0000 Subject: [PATCH 202/206] Optimization to the sqlite3PcacheFetch() logic. FossilOrigin-Name: d9a0481ce0fd9824b1566f182d68ec46ef091b1b --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/pager.c | 6 ++++++ src/pcache.c | 4 ++-- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/manifest b/manifest index 4773cc0264..6a222cd1bf 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Put\sall\svariable\sdeclarations\sfor\ssqlite3PagerWrite()\nat\sthe\sbeginning\sof\sthe\sblock.\s\sSyntax\sonly\s-\sno\slogic\schanges. -D 2015-06-30T03:34:13.376 +C Optimization\sto\sthe\ssqlite3PcacheFetch()\slogic. +D 2015-06-30T03:57:59.045 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,10 +314,10 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c fbd0dd63da21d62c51e845b919548d6eb14dd2ff +F src/pager.c 3fa869dafdb59ece9d318177e3b9972039dfee39 F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 -F src/pcache.c 379fd77feb732b39750eb733260d9c227d8a4314 +F src/pcache.c cde06aa50962595e412d497e22fd2e07878ba1f0 F src/pcache.h 9968603796240cdf83da7e7bef76edf90619cea9 F src/pcache1.c 9ec20f98f50ed7415019303ae9bd3745d4b7bd9b F src/pragma.c c1f4d012ea9f6b1ce52d341b2cd0ad72d560afd7 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8001aa52bd12f900092387fe3571463e89efd977 -R 3ab730a0fd8ce3dab29c0f5735db472f +P a0fa6b3ba59716e53b8c035b497602050ed0ccb8 +R ef96145c17347794d738ef415b1d0f04 U drh -Z 2e2284bbdb4fcaa6c0dd63e70a1d9358 +Z a3fb1d60e02fc08f324c2aa0fa9d650d diff --git a/manifest.uuid b/manifest.uuid index c6fb62560a..caf2e1d807 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a0fa6b3ba59716e53b8c035b497602050ed0ccb8 \ No newline at end of file +d9a0481ce0fd9824b1566f182d68ec46ef091b1b \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index dd3cac40a1..0ac05f715b 100644 --- a/src/pager.c +++ b/src/pager.c @@ -5312,6 +5312,11 @@ int sqlite3PagerAcquire( if( pBase==0 ){ rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); if( rc!=SQLITE_OK ) goto pager_acquire_err; + if( pBase==0 ){ + pPg = *ppPage = 0; + rc = SQLITE_NOMEM; + goto pager_acquire_err; + } } pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); if( pPg==0 ) rc = SQLITE_NOMEM; @@ -5418,6 +5423,7 @@ DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ assert( pPager->pPCache!=0 ); pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); assert( pPage==0 || pPager->hasBeenUsed ); + if( pPage==0 ) return 0; return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); } diff --git a/src/pcache.c b/src/pcache.c index 9b23bd8433..58c05ac2a4 100644 --- a/src/pcache.c +++ b/src/pcache.c @@ -321,7 +321,7 @@ static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( assert( pPage!=0 ); pPgHdr = (PgHdr*)pPage->pExtra; assert( pPgHdr->pPage==0 ); - memset(pPgHdr, 0, sizeof(PgHdr)); + memset(pPgHdr, 0, sizeof(PgHdr)); pPgHdr->pPage = pPage; pPgHdr->pData = pPage->pBuf; pPgHdr->pExtra = (void *)&pPgHdr[1]; @@ -345,7 +345,7 @@ PgHdr *sqlite3PcacheFetchFinish( ){ PgHdr *pPgHdr; - if( pPage==0 ) return 0; + assert( pPage!=0 ); pPgHdr = (PgHdr *)pPage->pExtra; if( !pPgHdr->pPage ){ From 3169906d061c60903ca654b5bdbc6355f7cee1e7 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 11:07:32 +0000 Subject: [PATCH 203/206] Change an unreachable branch into an assert() in sqlite3PagerAcquire() and optimize sqlite3PcacheOpenSavepoint() by factoring out rarely used code into a subroutine. FossilOrigin-Name: b406b20ecdf0bff63c5c222fab11cb9acee86765 --- manifest | 12 ++++---- manifest.uuid | 2 +- src/pager.c | 80 ++++++++++++++++++++++++++++----------------------- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/manifest b/manifest index 6a222cd1bf..c106e26357 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Optimization\sto\sthe\ssqlite3PcacheFetch()\slogic. -D 2015-06-30T03:57:59.045 +C Change\san\sunreachable\sbranch\sinto\san\sassert()\sin\ssqlite3PagerAcquire()\nand\soptimize\ssqlite3PcacheOpenSavepoint()\sby\sfactoring\sout\srarely\sused\scode\ninto\sa\ssubroutine. +D 2015-06-30T11:07:32.245 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -314,7 +314,7 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca -F src/pager.c 3fa869dafdb59ece9d318177e3b9972039dfee39 +F src/pager.c 922d8ea28387b79a117488da06ee84f77d50d71e F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77 F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8 F src/pcache.c cde06aa50962595e412d497e22fd2e07878ba1f0 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a0fa6b3ba59716e53b8c035b497602050ed0ccb8 -R ef96145c17347794d738ef415b1d0f04 +P d9a0481ce0fd9824b1566f182d68ec46ef091b1b +R 24b6e6de7d9b089958239ab689d72050 U drh -Z a3fb1d60e02fc08f324c2aa0fa9d650d +Z 9400732c29ac033e0975f1d6b596266c diff --git a/manifest.uuid b/manifest.uuid index caf2e1d807..bc33da0418 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d9a0481ce0fd9824b1566f182d68ec46ef091b1b \ No newline at end of file +b406b20ecdf0bff63c5c222fab11cb9acee86765 \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 0ac05f715b..982413ca8a 100644 --- a/src/pager.c +++ b/src/pager.c @@ -5319,7 +5319,7 @@ int sqlite3PagerAcquire( } } pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); - if( pPg==0 ) rc = SQLITE_NOMEM; + assert( pPg!=0 ); } } @@ -6475,54 +6475,62 @@ int sqlite3PagerIsMemdb(Pager *pPager){ ** occurs while opening the sub-journal file, then an IO error code is ** returned. Otherwise, SQLITE_OK. */ -int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ +static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ int rc = SQLITE_OK; /* Return code */ int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ + int ii; /* Iterator variable */ + PagerSavepoint *aNew; /* New Pager.aSavepoint array */ assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); + assert( nSavepoint>nCurrent && pPager->useJournal ); - if( nSavepoint>nCurrent && pPager->useJournal ){ - int ii; /* Iterator variable */ - PagerSavepoint *aNew; /* New Pager.aSavepoint array */ + /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM + ** if the allocation fails. Otherwise, zero the new portion in case a + ** malloc failure occurs while populating it in the for(...) loop below. + */ + aNew = (PagerSavepoint *)sqlite3Realloc( + pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint + ); + if( !aNew ){ + return SQLITE_NOMEM; + } + memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); + pPager->aSavepoint = aNew; - /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM - ** if the allocation fails. Otherwise, zero the new portion in case a - ** malloc failure occurs while populating it in the for(...) loop below. - */ - aNew = (PagerSavepoint *)sqlite3Realloc( - pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint - ); - if( !aNew ){ + /* Populate the PagerSavepoint structures just allocated. */ + for(ii=nCurrent; iidbSize; + if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ + aNew[ii].iOffset = pPager->journalOff; + }else{ + aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); + } + aNew[ii].iSubRec = pPager->nSubRec; + aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); + if( !aNew[ii].pInSavepoint ){ return SQLITE_NOMEM; } - memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); - pPager->aSavepoint = aNew; - - /* Populate the PagerSavepoint structures just allocated. */ - for(ii=nCurrent; iidbSize; - if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ - aNew[ii].iOffset = pPager->journalOff; - }else{ - aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); - } - aNew[ii].iSubRec = pPager->nSubRec; - aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); - if( !aNew[ii].pInSavepoint ){ - return SQLITE_NOMEM; - } - if( pagerUseWal(pPager) ){ - sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); - } - pPager->nSavepoint = ii+1; + if( pagerUseWal(pPager) ){ + sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); } - assert( pPager->nSavepoint==nSavepoint ); - assertTruncateConstraint(pPager); + pPager->nSavepoint = ii+1; } - + assert( pPager->nSavepoint==nSavepoint ); + assertTruncateConstraint(pPager); return rc; } +int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + + if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ + return pagerOpenSavepoint(pPager, nSavepoint); + }else{ + return SQLITE_OK; + } +} + /* ** This function is called to rollback or release (commit) a savepoint. From 5372e4d4f9a6407de9075e29ecbd0730dc6193c7 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 12:47:09 +0000 Subject: [PATCH 204/206] Make use of htonl() and __builtin_bswap32() for faster implementations of sqlite3Get4byte() and sqlite3Put4byte(). FossilOrigin-Name: bc27ebd7f73e9fc8e00da6ec82632e439fcce812 --- manifest | 12 ++++++------ manifest.uuid | 2 +- src/util.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/manifest b/manifest index c106e26357..a786e15afe 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\san\sunreachable\sbranch\sinto\san\sassert()\sin\ssqlite3PagerAcquire()\nand\soptimize\ssqlite3PcacheOpenSavepoint()\sby\sfactoring\sout\srarely\sused\scode\ninto\sa\ssubroutine. -D 2015-06-30T11:07:32.245 +C Make\suse\sof\shtonl()\sand\s__builtin_bswap32()\sfor\sfaster\nimplementations\sof\ssqlite3Get4byte()\sand\ssqlite3Put4byte(). +D 2015-06-30T12:47:09.735 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -388,7 +388,7 @@ F src/treeview.c c84b1a8ebc7f1d00cd76ce4958eeb3ae1021beed F src/trigger.c 322f23aad694e8f31d384dcfa386d52a48d3c52f F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c -F src/util.c a5471ac4834ca3abe9c569745700d3f4dd5f5574 +F src/util.c 99396e37038a68073b11bd41b041eace5c1728c2 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 F src/vdbe.c 5ee4a2bf871418f61d06dc256b9b3a0084b5ec46 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d9a0481ce0fd9824b1566f182d68ec46ef091b1b -R 24b6e6de7d9b089958239ab689d72050 +P b406b20ecdf0bff63c5c222fab11cb9acee86765 +R 52c32250d08365fc3595da7414b917a8 U drh -Z 9400732c29ac033e0975f1d6b596266c +Z a9454dee991519551f365a2fb6736c41 diff --git a/manifest.uuid b/manifest.uuid index bc33da0418..e1a76f6457 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b406b20ecdf0bff63c5c222fab11cb9acee86765 \ No newline at end of file +bc27ebd7f73e9fc8e00da6ec82632e439fcce812 \ No newline at end of file diff --git a/src/util.c b/src/util.c index 8fdaf26785..f2d3e91e76 100644 --- a/src/util.c +++ b/src/util.c @@ -1078,14 +1078,38 @@ int sqlite3VarintLen(u64 v){ ** Read or write a four-byte big-endian integer value. */ u32 sqlite3Get4byte(const u8 *p){ +#if SQLITE_BYTEORDER==4321 + u32 x; + memcpy(&x,p,4); + return x; +#elif defined(_MSC_VER) + u32 x; + memcpy(&x,p,4); + return htonl(x); +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) + u32 x; + memcpy(&x,p,4); + return __builtin_bswap32(x); +#else testcase( p[0]&0x80 ); return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; +#endif } void sqlite3Put4byte(unsigned char *p, u32 v){ +#if SQLITE_BYTEORDER==4321 + memcpy(p,&v,4); +#elif defined(_MSC_VER) + u32 x = htonl(v); + memcpy(&x,p,4); +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) + u32 x = __builtin_bswap32(v); + memcpy(p,&x,4); +#else p[0] = (u8)(v>>24); p[1] = (u8)(v>>16); p[2] = (u8)(v>>8); p[3] = (u8)v; +#endif } From 329428e2088aabb1db2dc6e48108b76551405a8e Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 13:28:18 +0000 Subject: [PATCH 205/206] Remove the use of htonl() in the previous check-in due to linkage issues. Add the get2byteAligned() macro and use it for access to the cell offsets on btree pages for about a 1% performance gain. FossilOrigin-Name: 79ff36b7170c9e7e7a9935c8b9d1665867771087 --- manifest | 16 ++++++++-------- manifest.uuid | 2 +- src/btree.c | 14 +++++++------- src/btreeInt.h | 13 +++++++++++++ src/util.c | 7 ------- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/manifest b/manifest index a786e15afe..cfb68a89fa 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Make\suse\sof\shtonl()\sand\s__builtin_bswap32()\sfor\sfaster\nimplementations\sof\ssqlite3Get4byte()\sand\ssqlite3Put4byte(). -D 2015-06-30T12:47:09.735 +C Remove\sthe\suse\sof\shtonl()\sin\sthe\sprevious\scheck-in\sdue\sto\slinkage\sissues.\nAdd\sthe\sget2byteAligned()\smacro\sand\suse\sit\sfor\saccess\sto\sthe\scell\soffsets\non\sbtree\spages\sfor\sabout\sa\s1%\sperformance\sgain. +D 2015-06-30T13:28:18.237 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -269,9 +269,9 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240 F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3 F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 -F src/btree.c 08ae64b0387e72b8201981a2d7b0f58b92fdff29 +F src/btree.c 3a535c0118872c3ee4e198b80a62d09315381dab F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 -F src/btreeInt.h e4eabc722b1ae017ac9c266a75769d4c6a9afde6 +F src/btreeInt.h 583240896ca96ba3b371b42e8cb6bd73c9e4717f F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c addcd8160b081131005d5bc2d34adf20c1c5c92f @@ -388,7 +388,7 @@ F src/treeview.c c84b1a8ebc7f1d00cd76ce4958eeb3ae1021beed F src/trigger.c 322f23aad694e8f31d384dcfa386d52a48d3c52f F src/update.c 487747b328b7216bb7f6af0695d6937d5c9e605f F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c -F src/util.c 99396e37038a68073b11bd41b041eace5c1728c2 +F src/util.c 89bfe78b4610d456ba431a0865309a20acc115f3 F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701 F src/vdbe.c 5ee4a2bf871418f61d06dc256b9b3a0084b5ec46 F src/vdbe.h 7a75045d879118b9d3af7e8b3c108f2f27c51473 @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b406b20ecdf0bff63c5c222fab11cb9acee86765 -R 52c32250d08365fc3595da7414b917a8 +P bc27ebd7f73e9fc8e00da6ec82632e439fcce812 +R be192770615d2f155ef5ba2c23f2b23e U drh -Z a9454dee991519551f365a2fb6736c41 +Z ba4fa6154f0804a80e26fea56b904f4a diff --git a/manifest.uuid b/manifest.uuid index e1a76f6457..0e8e099549 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bc27ebd7f73e9fc8e00da6ec82632e439fcce812 \ No newline at end of file +79ff36b7170c9e7e7a9935c8b9d1665867771087 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index ef2a861528..fe2c067db2 100644 --- a/src/btree.c +++ b/src/btree.c @@ -971,9 +971,9 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ ** This routine works only for pages that do not contain overflow cells. */ #define findCell(P,I) \ - ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) + ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) #define findCellPastPtr(P,I) \ - ((P)->aDataOfst + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) + ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) /* @@ -1754,7 +1754,7 @@ static int btreeInitPage(MemPage *pPage){ if( !pPage->leaf ) iCellLast--; for(i=0; inCell; i++){ - pc = get2byte(&data[cellOffset+i*2]); + pc = get2byteAligned(&data[cellOffset+i*2]); testcase( pc==iCellFirst ); testcase( pc==iCellLast ); if( pciCellLast ){ @@ -6625,7 +6625,7 @@ static int editPage( #ifdef SQLITE_DEBUG for(i=0; iapCell[i+iNew]; - int iOff = get2byte(&pPg->aCellIdx[i*2]); + int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } @@ -7127,7 +7127,7 @@ static int balance_nonroot( memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow); limit = pOld->aiOvfl[0]; for(j=0; jcellOffset + 2*pOld->nCell; while( piCellxCellSize(pPage, &data[pc]); diff --git a/src/btreeInt.h b/src/btreeInt.h index 92a4d44692..1ccdb08398 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -691,3 +691,16 @@ struct IntegrityCk { #define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v)) #define get4byte sqlite3Get4byte #define put4byte sqlite3Put4byte + +/* +** get2byteAligned(), unlike get2byte(), requires that its argument point to a +** two-byte aligned address. get2bytea() is only used for accessing the +** cell addresses in a btree header. +*/ +#if SQLITE_BYTEORDER==4321 +# define get2byteAligned(x) (*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) +# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) +#else +# define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) +#endif diff --git a/src/util.c b/src/util.c index f2d3e91e76..4e029b0e01 100644 --- a/src/util.c +++ b/src/util.c @@ -1082,10 +1082,6 @@ u32 sqlite3Get4byte(const u8 *p){ u32 x; memcpy(&x,p,4); return x; -#elif defined(_MSC_VER) - u32 x; - memcpy(&x,p,4); - return htonl(x); #elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) u32 x; memcpy(&x,p,4); @@ -1098,9 +1094,6 @@ u32 sqlite3Get4byte(const u8 *p){ void sqlite3Put4byte(unsigned char *p, u32 v){ #if SQLITE_BYTEORDER==4321 memcpy(p,&v,4); -#elif defined(_MSC_VER) - u32 x = htonl(v); - memcpy(&x,p,4); #elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) u32 x = __builtin_bswap32(v); memcpy(p,&x,4); From ad265296ffbaa5d238f3fd9c4e09398be92c3af1 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 30 Jun 2015 14:01:20 +0000 Subject: [PATCH 206/206] Only use __builtin_bswap16() with GCC 4.8 and later. FossilOrigin-Name: ce8177e3e64d2ac4cd5d9e7757cdd5fcecd7d0ea --- manifest | 14 +++++++------- manifest.uuid | 2 +- src/btreeInt.h | 2 +- src/sqliteInt.h | 7 +++++++ 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/manifest b/manifest index cfb68a89fa..aa0967343b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sthe\suse\sof\shtonl()\sin\sthe\sprevious\scheck-in\sdue\sto\slinkage\sissues.\nAdd\sthe\sget2byteAligned()\smacro\sand\suse\sit\sfor\saccess\sto\sthe\scell\soffsets\non\sbtree\spages\sfor\sabout\sa\s1%\sperformance\sgain. -D 2015-06-30T13:28:18.237 +C Only\suse\s__builtin_bswap16()\swith\sGCC\s4.8\sand\slater. +D 2015-06-30T14:01:20.529 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 285a0a234ed7610d431d91671c136098c2bd86a9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -271,7 +271,7 @@ F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79 F src/btree.c 3a535c0118872c3ee4e198b80a62d09315381dab F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1 -F src/btreeInt.h 583240896ca96ba3b371b42e8cb6bd73c9e4717f +F src/btreeInt.h 64b5723d8410a1f02c94d99c46b1b74cd547e6ef F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0 F src/complete.c addcd8160b081131005d5bc2d34adf20c1c5c92f @@ -332,7 +332,7 @@ F src/shell.c 8af3cced094aebb5f57a8ad739b9dafc7867eed7 F src/sqlite.h.in 76d2f5637eb795b6300d9dd3c3ec3632ffafd721 F src/sqlite3.rc 992c9f5fb8285ae285d6be28240a7e8d3a7f2bad F src/sqlite3ext.h be1a718b7d2ce40ceba725ae92c8eb5f18003066 -F src/sqliteInt.h 89768198547bdd70a160e47643a0e493f711e8d0 +F src/sqliteInt.h cf643b5a34633de5d4703eba1e2b1a205af53c8a F src/sqliteLimit.h 216557999cb45f2e3578ed53ebefe228d779cb46 F src/status.c f266ad8a2892d659b74f0f50cb6a88b6e7c12179 F src/table.c 51b46b2a62d1b3a959633d593b89bab5e2c9155e @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P bc27ebd7f73e9fc8e00da6ec82632e439fcce812 -R be192770615d2f155ef5ba2c23f2b23e +P 79ff36b7170c9e7e7a9935c8b9d1665867771087 +R e9f634c6724f1e6a3b5c72453520bbb0 U drh -Z ba4fa6154f0804a80e26fea56b904f4a +Z f2b5d98d916a7a92a030423af1030c7f diff --git a/manifest.uuid b/manifest.uuid index 0e8e099549..818c8d4f8b 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -79ff36b7170c9e7e7a9935c8b9d1665867771087 \ No newline at end of file +ce8177e3e64d2ac4cd5d9e7757cdd5fcecd7d0ea \ No newline at end of file diff --git a/src/btreeInt.h b/src/btreeInt.h index 1ccdb08398..74117848ac 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -699,7 +699,7 @@ struct IntegrityCk { */ #if SQLITE_BYTEORDER==4321 # define get2byteAligned(x) (*(u16*)(x)) -#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000 # define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) #else # define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) diff --git a/src/sqliteInt.h b/src/sqliteInt.h index aed6557cdb..907434889d 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -60,6 +60,13 @@ # define _LARGEFILE_SOURCE 1 #endif +/* What version of GCC is being used. 0 means GCC is not being used */ +#ifdef __GNUC__ +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif + /* Needed for various definitions... */ #if defined(__GNUC__) && !defined(_GNU_SOURCE) # define _GNU_SOURCE