mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-01 06:27:03 +03:00
Merge the latest trunk changes into the sessions branch.
FossilOrigin-Name: 4c5e276c902e0b93cfc05bf2e1db966ecdac0ed0
This commit is contained in:
3056
ext/fts3/fts3.c
3056
ext/fts3/fts3.c
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,6 @@
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
|
||||
#ifndef _FTSINT_H
|
||||
#define _FTSINT_H
|
||||
|
||||
@ -19,6 +18,16 @@
|
||||
# define NDEBUG 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
** FTS4 is really an extension for FTS3. It is enabled using the
|
||||
** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all
|
||||
** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
|
||||
*/
|
||||
#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
|
||||
# define SQLITE_ENABLE_FTS3
|
||||
#endif
|
||||
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
#include "sqlite3.h"
|
||||
#include "fts3_tokenizer.h"
|
||||
#include "fts3_hash.h"
|
||||
@ -47,12 +56,35 @@
|
||||
*/
|
||||
#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
|
||||
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Maximum length of a varint encoded integer. The varint format is different
|
||||
** from that used by SQLite, so the maximum length is 10, not 9.
|
||||
*/
|
||||
#define FTS3_VARINT_MAX 10
|
||||
|
||||
/*
|
||||
** FTS4 virtual tables may maintain multiple indexes - one index of all terms
|
||||
** in the document set and zero or more prefix indexes. All indexes are stored
|
||||
** as one or more b+-trees in the %_segments and %_segdir tables.
|
||||
**
|
||||
** It is possible to determine which index a b+-tree belongs to based on the
|
||||
** value stored in the "%_segdir.level" column. Given this value L, the index
|
||||
** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
|
||||
** level values between 0 and 1023 (inclusive) belong to index 0, all levels
|
||||
** between 1024 and 2047 to index 1, and so on.
|
||||
**
|
||||
** It is considered impossible for an index to use more than 1024 levels. In
|
||||
** theory though this may happen, but only after at least
|
||||
** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
|
||||
*/
|
||||
#define FTS3_SEGDIR_MAXLEVEL 1024
|
||||
#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
|
||||
|
||||
/*
|
||||
** The testcase() macro is only used by the amalgamation. If undefined,
|
||||
** make it a no-op.
|
||||
@ -124,10 +156,11 @@ typedef struct Fts3Expr Fts3Expr;
|
||||
typedef struct Fts3Phrase Fts3Phrase;
|
||||
typedef struct Fts3PhraseToken Fts3PhraseToken;
|
||||
|
||||
typedef struct Fts3Doclist Fts3Doclist;
|
||||
typedef struct Fts3SegFilter Fts3SegFilter;
|
||||
typedef struct Fts3DeferredToken Fts3DeferredToken;
|
||||
typedef struct Fts3SegReader Fts3SegReader;
|
||||
typedef struct Fts3SegReaderCursor Fts3SegReaderCursor;
|
||||
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
|
||||
|
||||
/*
|
||||
** A connection to a fulltext index is an instance of the following
|
||||
@ -148,7 +181,7 @@ struct Fts3Table {
|
||||
/* Precompiled statements used by the implementation. Each of these
|
||||
** statements is run and reset within a single virtual table API call.
|
||||
*/
|
||||
sqlite3_stmt *aStmt[24];
|
||||
sqlite3_stmt *aStmt[27];
|
||||
|
||||
char *zReadExprlist;
|
||||
char *zWriteExprlist;
|
||||
@ -156,21 +189,33 @@ struct Fts3Table {
|
||||
int nNodeSize; /* Soft limit for node size */
|
||||
u8 bHasStat; /* True if %_stat table exists */
|
||||
u8 bHasDocsize; /* True if %_docsize table exists */
|
||||
u8 bDescIdx; /* True if doclists are in reverse order */
|
||||
int nPgsz; /* Page size for host database */
|
||||
char *zSegmentsTbl; /* Name of %_segments table */
|
||||
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
|
||||
|
||||
/* The following hash table is used to buffer pending index updates during
|
||||
/* TODO: Fix the first paragraph of this comment.
|
||||
**
|
||||
** The following hash table is used to buffer pending index updates during
|
||||
** transactions. Variable nPendingData estimates the memory size of the
|
||||
** pending data, including hash table overhead, but not malloc overhead.
|
||||
** When nPendingData exceeds nMaxPendingData, the buffer is flushed
|
||||
** automatically. Variable iPrevDocid is the docid of the most recently
|
||||
** inserted record.
|
||||
**
|
||||
** A single FTS4 table may have multiple full-text indexes. For each index
|
||||
** there is an entry in the aIndex[] array. Index 0 is an index of all the
|
||||
** terms that appear in the document set. Each subsequent index in aIndex[]
|
||||
** is an index of prefixes of a specific length.
|
||||
*/
|
||||
int nMaxPendingData;
|
||||
int nPendingData;
|
||||
sqlite_int64 iPrevDocid;
|
||||
Fts3Hash pendingTerms;
|
||||
int nIndex; /* Size of aIndex[] */
|
||||
struct Fts3Index {
|
||||
int nPrefix; /* Prefix length (0 for main terms index) */
|
||||
Fts3Hash hPending; /* Pending terms table for this index */
|
||||
} *aIndex;
|
||||
int nMaxPendingData; /* Max pending data before flush to disk */
|
||||
int nPendingData; /* Current bytes of pending data */
|
||||
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
|
||||
|
||||
#if defined(SQLITE_DEBUG)
|
||||
/* State variables used for validating that the transaction control
|
||||
@ -201,9 +246,10 @@ struct Fts3Cursor {
|
||||
char *pNextId; /* Pointer into the body of aDoclist */
|
||||
char *aDoclist; /* List of docids for full-text queries */
|
||||
int nDoclist; /* Size of buffer at aDoclist */
|
||||
int desc; /* True to sort in descending order */
|
||||
u8 bDesc; /* True to sort in descending order */
|
||||
int eEvalmode; /* An FTS3_EVAL_XX constant */
|
||||
int nRowAvg; /* Average size of database rows, in pages */
|
||||
int nDoc; /* Documents in table */
|
||||
|
||||
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
|
||||
u32 *aMatchinfo; /* Information about most recent match */
|
||||
@ -234,47 +280,70 @@ struct Fts3Cursor {
|
||||
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
|
||||
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
|
||||
|
||||
|
||||
struct Fts3Doclist {
|
||||
char *aAll; /* Array containing doclist (or NULL) */
|
||||
int nAll; /* Size of a[] in bytes */
|
||||
char *pNextDocid; /* Pointer to next docid */
|
||||
|
||||
sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
|
||||
int bFreeList; /* True if pList should be sqlite3_free()d */
|
||||
char *pList; /* Pointer to position list following iDocid */
|
||||
int nList; /* Length of position list */
|
||||
} doclist;
|
||||
|
||||
/*
|
||||
** A "phrase" is a sequence of one or more tokens that must match in
|
||||
** sequence. A single token is the base case and the most common case.
|
||||
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
|
||||
** nToken will be the number of tokens in the string.
|
||||
**
|
||||
** The nDocMatch and nMatch variables contain data that may be used by the
|
||||
** matchinfo() function. They are populated when the full-text index is
|
||||
** queried for hits on the phrase. If one or more tokens in the phrase
|
||||
** are deferred, the nDocMatch and nMatch variables are populated based
|
||||
** on the assumption that the
|
||||
*/
|
||||
struct Fts3PhraseToken {
|
||||
char *z; /* Text of the token */
|
||||
int n; /* Number of bytes in buffer z */
|
||||
int isPrefix; /* True if token ends with a "*" character */
|
||||
int bFulltext; /* True if full-text index was used */
|
||||
Fts3SegReaderCursor *pSegcsr; /* Segment-reader for this token */
|
||||
|
||||
/* Variables above this point are populated when the expression is
|
||||
** parsed (by code in fts3_expr.c). Below this point the variables are
|
||||
** used when evaluating the expression. */
|
||||
Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
|
||||
Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
|
||||
};
|
||||
|
||||
struct Fts3Phrase {
|
||||
/* Variables populated by fts3_expr.c when parsing a MATCH expression */
|
||||
/* Cache of doclist for this phrase. */
|
||||
Fts3Doclist doclist;
|
||||
int bIncr; /* True if doclist is loaded incrementally */
|
||||
int iDoclistToken;
|
||||
|
||||
/* Variables below this point are populated by fts3_expr.c when parsing
|
||||
** a MATCH expression. Everything above is part of the evaluation phase.
|
||||
*/
|
||||
int nToken; /* Number of tokens in the phrase */
|
||||
int iColumn; /* Index of column this phrase must match */
|
||||
int isNot; /* Phrase prefixed by unary not (-) operator */
|
||||
Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
|
||||
};
|
||||
|
||||
/*
|
||||
** A tree of these objects forms the RHS of a MATCH operator.
|
||||
**
|
||||
** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded
|
||||
** is true, then aDoclist points to a malloced buffer, size nDoclist bytes,
|
||||
** containing the results of the NEAR or phrase query in FTS3 doclist
|
||||
** format. As usual, the initial "Length" field found in doclists stored
|
||||
** on disk is omitted from this buffer.
|
||||
** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist
|
||||
** points to a malloced buffer, size nDoclist bytes, containing the results
|
||||
** of this phrase query in FTS3 doclist format. As usual, the initial
|
||||
** "Length" field found in doclists stored on disk is omitted from this
|
||||
** buffer.
|
||||
**
|
||||
** Variable pCurrent always points to the start of a docid field within
|
||||
** aDoclist. Since the doclist is usually scanned in docid order, this can
|
||||
** be used to accelerate seeking to the required docid within the doclist.
|
||||
** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
|
||||
** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
|
||||
** where nCol is the number of columns in the queried FTS table. The array
|
||||
** is populated as follows:
|
||||
**
|
||||
** aMI[iCol*3 + 0] = Undefined
|
||||
** aMI[iCol*3 + 1] = Number of occurrences
|
||||
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
|
||||
**
|
||||
** The aMI array is allocated using sqlite3_malloc(). It should be freed
|
||||
** when the expression node is.
|
||||
*/
|
||||
struct Fts3Expr {
|
||||
int eType; /* One of the FTSQUERY_XXX values defined below */
|
||||
@ -284,12 +353,13 @@ struct Fts3Expr {
|
||||
Fts3Expr *pRight; /* Right operand */
|
||||
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
|
||||
|
||||
int isLoaded; /* True if aDoclist/nDoclist are initialized. */
|
||||
char *aDoclist; /* Buffer containing doclist */
|
||||
int nDoclist; /* Size of aDoclist in bytes */
|
||||
/* The following are used by the fts3_eval.c module. */
|
||||
sqlite3_int64 iDocid; /* Current docid */
|
||||
u8 bEof; /* True this expression is at EOF already */
|
||||
u8 bStart; /* True if iDocid is valid */
|
||||
u8 bDeferred; /* True if this expression is entirely deferred */
|
||||
|
||||
sqlite3_int64 iCurrent;
|
||||
char *pCurrent;
|
||||
u32 *aMI;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -317,12 +387,12 @@ void sqlite3Fts3PendingTermsClear(Fts3Table *);
|
||||
int sqlite3Fts3Optimize(Fts3Table *);
|
||||
int sqlite3Fts3SegReaderNew(int, sqlite3_int64,
|
||||
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
|
||||
int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**);
|
||||
int sqlite3Fts3SegReaderPending(
|
||||
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
|
||||
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
|
||||
int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *);
|
||||
int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **);
|
||||
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
|
||||
int sqlite3Fts3ReadLock(Fts3Table *);
|
||||
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*);
|
||||
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
|
||||
|
||||
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
|
||||
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
|
||||
@ -331,17 +401,18 @@ void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
|
||||
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
|
||||
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
|
||||
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
|
||||
char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *);
|
||||
void sqlite3Fts3SegmentsClose(Fts3Table *);
|
||||
|
||||
#define FTS3_SEGCURSOR_PENDING -1
|
||||
#define FTS3_SEGCURSOR_ALL -2
|
||||
/* Special values interpreted by sqlite3SegReaderCursor() */
|
||||
#define FTS3_SEGCURSOR_PENDING -1
|
||||
#define FTS3_SEGCURSOR_ALL -2
|
||||
|
||||
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
|
||||
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
|
||||
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
|
||||
|
||||
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*);
|
||||
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *);
|
||||
void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *);
|
||||
int sqlite3Fts3SegReaderCursor(
|
||||
Fts3Table *, int, const char *, int, int, int, Fts3SegReaderCursor *);
|
||||
Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *);
|
||||
|
||||
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
|
||||
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
|
||||
@ -358,7 +429,7 @@ struct Fts3SegFilter {
|
||||
int flags;
|
||||
};
|
||||
|
||||
struct Fts3SegReaderCursor {
|
||||
struct Fts3MultiSegReader {
|
||||
/* Used internally by sqlite3Fts3SegReaderXXX() calls */
|
||||
Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */
|
||||
int nSegment; /* Size of apSegment array */
|
||||
@ -367,8 +438,12 @@ struct Fts3SegReaderCursor {
|
||||
char *aBuffer; /* Buffer to merge doclists in */
|
||||
int nBuffer; /* Allocated size of aBuffer[] in bytes */
|
||||
|
||||
/* Cost of running this iterator. Used by fts3.c only. */
|
||||
int nCost;
|
||||
int iColFilter; /* If >=0, filter for this column */
|
||||
int bRestart;
|
||||
|
||||
/* Used by fts3.c only. */
|
||||
int nCost; /* Cost of running iterator */
|
||||
int bLookup; /* True if a lookup of a single entry. */
|
||||
|
||||
/* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
|
||||
char *zTerm; /* Pointer to term buffer */
|
||||
@ -383,11 +458,9 @@ int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
|
||||
int sqlite3Fts3GetVarint32(const char *, int *);
|
||||
int sqlite3Fts3VarintLen(sqlite3_uint64);
|
||||
void sqlite3Fts3Dequote(char *);
|
||||
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
|
||||
|
||||
char *sqlite3Fts3FindPositions(Fts3Cursor *, Fts3Expr *, sqlite3_int64, int);
|
||||
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);
|
||||
int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *);
|
||||
int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
|
||||
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
|
||||
|
||||
/* fts3_tokenizer.c */
|
||||
const char *sqlite3Fts3NextToken(const char *, int *);
|
||||
@ -417,4 +490,28 @@ int sqlite3Fts3InitTerm(sqlite3 *db);
|
||||
/* fts3_aux.c */
|
||||
int sqlite3Fts3InitAux(sqlite3 *db);
|
||||
|
||||
int sqlite3Fts3TermSegReaderCursor(
|
||||
Fts3Cursor *pCsr, /* Virtual table cursor handle */
|
||||
const char *zTerm, /* Term to query for */
|
||||
int nTerm, /* Size of zTerm in bytes */
|
||||
int isPrefix, /* True for a prefix search */
|
||||
Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
|
||||
);
|
||||
|
||||
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
|
||||
|
||||
int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int);
|
||||
int sqlite3Fts3EvalNext(Fts3Cursor *pCsr);
|
||||
|
||||
int sqlite3Fts3MsrIncrStart(
|
||||
Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
|
||||
int sqlite3Fts3MsrIncrNext(
|
||||
Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
|
||||
char *sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol);
|
||||
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
|
||||
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
|
||||
|
||||
int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
|
||||
|
||||
#endif /* SQLITE_ENABLE_FTS3 */
|
||||
#endif /* _FTSINT_H */
|
||||
|
@ -11,10 +11,9 @@
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include "fts3Int.h"
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
@ -28,7 +27,7 @@ struct Fts3auxTable {
|
||||
|
||||
struct Fts3auxCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
Fts3SegReaderCursor csr; /* Must be right after "base" */
|
||||
Fts3MultiSegReader csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
char *zStop;
|
||||
int nStop; /* Byte-length of string zStop */
|
||||
@ -96,6 +95,7 @@ static int fts3auxConnectMethod(
|
||||
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
|
||||
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
|
||||
p->pFts3Tab->db = db;
|
||||
p->pFts3Tab->nIndex = 1;
|
||||
|
||||
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
|
||||
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
|
||||
@ -376,7 +376,7 @@ static int fts3auxFilterMethod(
|
||||
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
|
||||
}
|
||||
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL,
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL,
|
||||
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
|
@ -15,6 +15,7 @@
|
||||
** syntax is relatively simple, the whole tokenizer/parser system is
|
||||
** hand-coded.
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
/*
|
||||
@ -77,16 +78,24 @@ int sqlite3_fts3_enable_parentheses = 0;
|
||||
*/
|
||||
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
|
||||
|
||||
#include "fts3Int.h"
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
** isNot:
|
||||
** This variable is used by function getNextNode(). When getNextNode() is
|
||||
** called, it sets ParseContext.isNot to true if the 'next node' is a
|
||||
** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
|
||||
** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
|
||||
** zero.
|
||||
*/
|
||||
typedef struct ParseContext ParseContext;
|
||||
struct ParseContext {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
|
||||
const char **azCol; /* Array of column names for fts3 table */
|
||||
int nCol; /* Number of entries in azCol[] */
|
||||
int iDefaultCol; /* Default column to query */
|
||||
int isNot; /* True if getNextNode() sees a unary - */
|
||||
sqlite3_context *pCtx; /* Write error message here */
|
||||
int nNest; /* Number of nested brackets */
|
||||
};
|
||||
@ -172,7 +181,7 @@ static int getNextToken(
|
||||
iEnd++;
|
||||
}
|
||||
if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
|
||||
pRet->pPhrase->isNot = 1;
|
||||
pParse->isNot = 1;
|
||||
}
|
||||
}
|
||||
nConsumed = iEnd;
|
||||
@ -224,36 +233,55 @@ static int getNextString(
|
||||
char *zTemp = 0;
|
||||
int nTemp = 0;
|
||||
|
||||
const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
||||
int nToken = 0;
|
||||
|
||||
/* The final Fts3Expr data structure, including the Fts3Phrase,
|
||||
** Fts3PhraseToken structures token buffers are all stored as a single
|
||||
** allocation so that the expression can be freed with a single call to
|
||||
** sqlite3_free(). Setting this up requires a two pass approach.
|
||||
**
|
||||
** The first pass, in the block below, uses a tokenizer cursor to iterate
|
||||
** through the tokens in the expression. This pass uses fts3ReallocOrFree()
|
||||
** to assemble data in two dynamic buffers:
|
||||
**
|
||||
** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
|
||||
** structure, followed by the array of Fts3PhraseToken
|
||||
** structures. This pass only populates the Fts3PhraseToken array.
|
||||
**
|
||||
** Buffer zTemp: Contains copies of all tokens.
|
||||
**
|
||||
** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
|
||||
** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
|
||||
** structures.
|
||||
*/
|
||||
rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
|
||||
if( rc==SQLITE_OK ){
|
||||
int ii;
|
||||
pCursor->pTokenizer = pTokenizer;
|
||||
for(ii=0; rc==SQLITE_OK; ii++){
|
||||
const char *zToken;
|
||||
int nToken, iBegin, iEnd, iPos;
|
||||
rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
|
||||
const char *zByte;
|
||||
int nByte, iBegin, iEnd, iPos;
|
||||
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
|
||||
if( rc==SQLITE_OK ){
|
||||
int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
||||
p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken));
|
||||
zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
|
||||
if( !p || !zTemp ){
|
||||
goto no_mem;
|
||||
}
|
||||
if( ii==0 ){
|
||||
memset(p, 0, nByte);
|
||||
p->pPhrase = (Fts3Phrase *)&p[1];
|
||||
}
|
||||
p->pPhrase = (Fts3Phrase *)&p[1];
|
||||
memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken));
|
||||
p->pPhrase->nToken = ii+1;
|
||||
p->pPhrase->aToken[ii].n = nToken;
|
||||
memcpy(&zTemp[nTemp], zToken, nToken);
|
||||
nTemp += nToken;
|
||||
if( iEnd<nInput && zInput[iEnd]=='*' ){
|
||||
p->pPhrase->aToken[ii].isPrefix = 1;
|
||||
}else{
|
||||
p->pPhrase->aToken[ii].isPrefix = 0;
|
||||
}
|
||||
Fts3PhraseToken *pToken;
|
||||
|
||||
p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
|
||||
if( !p ) goto no_mem;
|
||||
|
||||
zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
|
||||
if( !zTemp ) goto no_mem;
|
||||
|
||||
assert( nToken==ii );
|
||||
pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
|
||||
memset(pToken, 0, sizeof(Fts3PhraseToken));
|
||||
|
||||
memcpy(&zTemp[nTemp], zByte, nByte);
|
||||
nTemp += nByte;
|
||||
|
||||
pToken->n = nByte;
|
||||
pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
|
||||
nToken = ii+1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,28 +291,24 @@ static int getNextString(
|
||||
|
||||
if( rc==SQLITE_DONE ){
|
||||
int jj;
|
||||
char *zNew = NULL;
|
||||
int nNew = 0;
|
||||
int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
||||
nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken);
|
||||
p = fts3ReallocOrFree(p, nByte + nTemp);
|
||||
if( !p ){
|
||||
goto no_mem;
|
||||
}
|
||||
if( zTemp ){
|
||||
zNew = &(((char *)p)[nByte]);
|
||||
memcpy(zNew, zTemp, nTemp);
|
||||
}else{
|
||||
memset(p, 0, nByte+nTemp);
|
||||
}
|
||||
p->pPhrase = (Fts3Phrase *)&p[1];
|
||||
for(jj=0; jj<p->pPhrase->nToken; jj++){
|
||||
p->pPhrase->aToken[jj].z = &zNew[nNew];
|
||||
nNew += p->pPhrase->aToken[jj].n;
|
||||
}
|
||||
sqlite3_free(zTemp);
|
||||
char *zBuf = 0;
|
||||
|
||||
p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
|
||||
if( !p ) goto no_mem;
|
||||
memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
|
||||
p->eType = FTSQUERY_PHRASE;
|
||||
p->pPhrase = (Fts3Phrase *)&p[1];
|
||||
p->pPhrase->iColumn = pParse->iDefaultCol;
|
||||
p->pPhrase->nToken = nToken;
|
||||
|
||||
zBuf = (char *)&p->pPhrase->aToken[nToken];
|
||||
memcpy(zBuf, zTemp, nTemp);
|
||||
sqlite3_free(zTemp);
|
||||
|
||||
for(jj=0; jj<p->pPhrase->nToken; jj++){
|
||||
p->pPhrase->aToken[jj].z = zBuf;
|
||||
zBuf += p->pPhrase->aToken[jj].n;
|
||||
}
|
||||
rc = SQLITE_OK;
|
||||
}
|
||||
|
||||
@ -341,6 +365,8 @@ static int getNextNode(
|
||||
const char *zInput = z;
|
||||
int nInput = n;
|
||||
|
||||
pParse->isNot = 0;
|
||||
|
||||
/* Skip over any whitespace before checking for a keyword, an open or
|
||||
** close bracket, or a quoted string.
|
||||
*/
|
||||
@ -559,7 +585,7 @@ static int fts3ExprParse(
|
||||
int isPhrase;
|
||||
|
||||
if( !sqlite3_fts3_enable_parentheses
|
||||
&& p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot
|
||||
&& p->eType==FTSQUERY_PHRASE && pParse->isNot
|
||||
){
|
||||
/* Create an implicit NOT operator. */
|
||||
Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
|
||||
@ -577,7 +603,6 @@ static int fts3ExprParse(
|
||||
p = pPrev;
|
||||
}else{
|
||||
int eType = p->eType;
|
||||
assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
|
||||
isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
|
||||
|
||||
/* The isRequirePhrase variable is set to true if a phrase or
|
||||
@ -740,9 +765,11 @@ int sqlite3Fts3ExprParse(
|
||||
*/
|
||||
void sqlite3Fts3ExprFree(Fts3Expr *p){
|
||||
if( p ){
|
||||
assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
|
||||
sqlite3Fts3ExprFree(p->pLeft);
|
||||
sqlite3Fts3ExprFree(p->pRight);
|
||||
sqlite3_free(p->aDoclist);
|
||||
sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
|
||||
sqlite3_free(p->aMI);
|
||||
sqlite3_free(p);
|
||||
}
|
||||
}
|
||||
@ -800,7 +827,7 @@ static char *exprToString(Fts3Expr *pExpr, char *zBuf){
|
||||
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
||||
int i;
|
||||
zBuf = sqlite3_mprintf(
|
||||
"%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot);
|
||||
"%zPHRASE %d 0", zBuf, pPhrase->iColumn);
|
||||
for(i=0; zBuf && i<pPhrase->nToken; i++){
|
||||
zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
|
||||
pPhrase->aToken[i].n, pPhrase->aToken[i].z,
|
||||
|
@ -23,6 +23,7 @@
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
|
@ -10,10 +10,8 @@
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements a tokenizer for fts3 based on the ICU library.
|
||||
**
|
||||
** $Id: fts3_icu.c,v 1.3 2008/09/01 18:34:20 danielk1977 Exp $
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
#ifdef SQLITE_ENABLE_ICU
|
||||
|
||||
|
@ -22,9 +22,8 @@
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -11,9 +11,9 @@
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include "fts3Int.h"
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
@ -176,51 +176,6 @@ static int fts3ExprIterate(
|
||||
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
|
||||
}
|
||||
|
||||
/*
|
||||
** The argument to this function is always a phrase node. Its doclist
|
||||
** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes
|
||||
** to the left of this one in the query tree have already been loaded.
|
||||
**
|
||||
** If this phrase node is part of a series of phrase nodes joined by
|
||||
** NEAR operators (and is not the left-most of said series), then elements are
|
||||
** removed from the phrases doclist consistent with the NEAR restriction. If
|
||||
** required, elements may be removed from the doclists of phrases to the
|
||||
** left of this one that are part of the same series of NEAR operator
|
||||
** connected phrases.
|
||||
**
|
||||
** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
|
||||
*/
|
||||
static int fts3ExprNearTrim(Fts3Expr *pExpr){
|
||||
int rc = SQLITE_OK;
|
||||
Fts3Expr *pParent = pExpr->pParent;
|
||||
|
||||
assert( pExpr->eType==FTSQUERY_PHRASE );
|
||||
while( rc==SQLITE_OK
|
||||
&& pParent
|
||||
&& pParent->eType==FTSQUERY_NEAR
|
||||
&& pParent->pRight==pExpr
|
||||
){
|
||||
/* This expression (pExpr) is the right-hand-side of a NEAR operator.
|
||||
** Find the expression to the left of the same operator.
|
||||
*/
|
||||
int nNear = pParent->nNear;
|
||||
Fts3Expr *pLeft = pParent->pLeft;
|
||||
|
||||
if( pLeft->eType!=FTSQUERY_PHRASE ){
|
||||
assert( pLeft->eType==FTSQUERY_NEAR );
|
||||
assert( pLeft->pRight->eType==FTSQUERY_PHRASE );
|
||||
pLeft = pLeft->pRight;
|
||||
}
|
||||
|
||||
rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear);
|
||||
|
||||
pExpr = pLeft;
|
||||
pParent = pExpr->pParent;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is an fts3ExprIterate() callback used while loading the doclists
|
||||
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
|
||||
@ -228,20 +183,13 @@ static int fts3ExprNearTrim(Fts3Expr *pExpr){
|
||||
*/
|
||||
static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
int rc = SQLITE_OK;
|
||||
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
||||
LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
|
||||
|
||||
UNUSED_PARAMETER(iPhrase);
|
||||
|
||||
p->nPhrase++;
|
||||
p->nToken += pExpr->pPhrase->nToken;
|
||||
|
||||
if( pExpr->isLoaded==0 ){
|
||||
rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr);
|
||||
pExpr->isLoaded = 1;
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts3ExprNearTrim(pExpr);
|
||||
}
|
||||
}
|
||||
p->nToken += pPhrase->nToken;
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -415,7 +363,7 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
|
||||
pPhrase->nToken = pExpr->pPhrase->nToken;
|
||||
|
||||
pCsr = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->pCsr->iPrevId, p->iCol);
|
||||
pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
|
||||
if( pCsr ){
|
||||
int iFirst = 0;
|
||||
pPhrase->pList = pCsr;
|
||||
@ -772,26 +720,6 @@ static int fts3ColumnlistCount(char **ppCollist){
|
||||
return nEntry;
|
||||
}
|
||||
|
||||
static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){
|
||||
char *pCsr = *pp;
|
||||
while( *pCsr ){
|
||||
int nHit;
|
||||
sqlite3_int64 iCol = 0;
|
||||
if( *pCsr==0x01 ){
|
||||
pCsr++;
|
||||
pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
|
||||
}
|
||||
nHit = fts3ColumnlistCount(&pCsr);
|
||||
assert( nHit>0 );
|
||||
if( isGlobal ){
|
||||
aOut[iCol*3+1]++;
|
||||
}
|
||||
aOut[iCol*3] += nHit;
|
||||
}
|
||||
pCsr++;
|
||||
*pp = pCsr;
|
||||
}
|
||||
|
||||
/*
|
||||
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
|
||||
** for a single query.
|
||||
@ -825,48 +753,9 @@ static int fts3ExprGlobalHitsCb(
|
||||
void *pCtx /* Pointer to MatchInfo structure */
|
||||
){
|
||||
MatchInfo *p = (MatchInfo *)pCtx;
|
||||
Fts3Cursor *pCsr = p->pCursor;
|
||||
char *pIter;
|
||||
char *pEnd;
|
||||
char *pFree = 0;
|
||||
u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol];
|
||||
|
||||
assert( pExpr->isLoaded );
|
||||
assert( pExpr->eType==FTSQUERY_PHRASE );
|
||||
|
||||
if( pCsr->pDeferred ){
|
||||
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
||||
int ii;
|
||||
for(ii=0; ii<pPhrase->nToken; ii++){
|
||||
if( pPhrase->aToken[ii].bFulltext ) break;
|
||||
}
|
||||
if( ii<pPhrase->nToken ){
|
||||
int nFree = 0;
|
||||
int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
pIter = pFree;
|
||||
pEnd = &pFree[nFree];
|
||||
}else{
|
||||
int iCol; /* Column index */
|
||||
for(iCol=0; iCol<p->nCol; iCol++){
|
||||
aOut[iCol*3 + 1] = (u32)p->nDoc;
|
||||
aOut[iCol*3 + 2] = (u32)p->nDoc;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}else{
|
||||
pIter = pExpr->aDoclist;
|
||||
pEnd = &pExpr->aDoclist[pExpr->nDoclist];
|
||||
}
|
||||
|
||||
/* Fill in the global hit count matrix row for this phrase. */
|
||||
while( pIter<pEnd ){
|
||||
while( *pIter++ & 0x80 ); /* Skip past docid. */
|
||||
fts3LoadColumnlistCounts(&pIter, &aOut[1], 1);
|
||||
}
|
||||
|
||||
sqlite3_free(pFree);
|
||||
return SQLITE_OK;
|
||||
return sqlite3Fts3EvalPhraseStats(
|
||||
p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -883,14 +772,13 @@ static int fts3ExprLocalHitsCb(
|
||||
int iStart = iPhrase * p->nCol * 3;
|
||||
int i;
|
||||
|
||||
for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0;
|
||||
|
||||
if( pExpr->aDoclist ){
|
||||
for(i=0; i<p->nCol; i++){
|
||||
char *pCsr;
|
||||
|
||||
pCsr = sqlite3Fts3FindPositions(p->pCursor, pExpr, p->pCursor->iPrevId, -1);
|
||||
pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i);
|
||||
if( pCsr ){
|
||||
fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
|
||||
p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
|
||||
}else{
|
||||
p->aMatchinfo[iStart+i*3] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -976,9 +864,8 @@ static int fts3MatchinfoSelectDoctotal(
|
||||
typedef struct LcsIterator LcsIterator;
|
||||
struct LcsIterator {
|
||||
Fts3Expr *pExpr; /* Pointer to phrase expression */
|
||||
char *pRead; /* Cursor used to iterate through aDoclist */
|
||||
int iPosOffset; /* Tokens count up to end of this phrase */
|
||||
int iCol; /* Current column number */
|
||||
char *pRead; /* Cursor used to iterate through aDoclist */
|
||||
int iPos; /* Current position */
|
||||
};
|
||||
|
||||
@ -1009,17 +896,10 @@ static int fts3LcsIteratorAdvance(LcsIterator *pIter){
|
||||
int rc = 0;
|
||||
|
||||
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
|
||||
if( iRead==0 ){
|
||||
pIter->iCol = LCS_ITERATOR_FINISHED;
|
||||
if( iRead==0 || iRead==1 ){
|
||||
pRead = 0;
|
||||
rc = 1;
|
||||
}else{
|
||||
if( iRead==1 ){
|
||||
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
|
||||
pIter->iCol = (int)iRead;
|
||||
pIter->iPos = pIter->iPosOffset;
|
||||
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
|
||||
rc = 1;
|
||||
}
|
||||
pIter->iPos += (int)(iRead-2);
|
||||
}
|
||||
|
||||
@ -1051,42 +931,34 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
|
||||
if( !aIter ) return SQLITE_NOMEM;
|
||||
memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
|
||||
|
||||
for(i=0; i<pInfo->nPhrase; i++){
|
||||
LcsIterator *pIter = &aIter[i];
|
||||
nToken -= pIter->pExpr->pPhrase->nToken;
|
||||
pIter->iPosOffset = nToken;
|
||||
pIter->pRead = sqlite3Fts3FindPositions(pCsr,pIter->pExpr,pCsr->iPrevId,-1);
|
||||
if( pIter->pRead ){
|
||||
pIter->iPos = pIter->iPosOffset;
|
||||
fts3LcsIteratorAdvance(&aIter[i]);
|
||||
}else{
|
||||
pIter->iCol = LCS_ITERATOR_FINISHED;
|
||||
}
|
||||
}
|
||||
|
||||
for(iCol=0; iCol<pInfo->nCol; iCol++){
|
||||
int nLcs = 0; /* LCS value for this column */
|
||||
int nLive = 0; /* Number of iterators in aIter not at EOF */
|
||||
|
||||
/* Loop through the iterators in aIter[]. Set nLive to the number of
|
||||
** iterators that point to a position-list corresponding to column iCol.
|
||||
*/
|
||||
for(i=0; i<pInfo->nPhrase; i++){
|
||||
assert( aIter[i].iCol>=iCol );
|
||||
if( aIter[i].iCol==iCol ) nLive++;
|
||||
LcsIterator *pIt = &aIter[i];
|
||||
pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol);
|
||||
if( pIt->pRead ){
|
||||
pIt->iPos = pIt->iPosOffset;
|
||||
fts3LcsIteratorAdvance(&aIter[i]);
|
||||
nLive++;
|
||||
}
|
||||
}
|
||||
|
||||
/* The following loop runs until all iterators in aIter[] have finished
|
||||
** iterating through positions in column iCol. Exactly one of the
|
||||
** iterators is advanced each time the body of the loop is run.
|
||||
*/
|
||||
while( nLive>0 ){
|
||||
LcsIterator *pAdv = 0; /* The iterator to advance by one position */
|
||||
int nThisLcs = 0; /* LCS for the current iterator positions */
|
||||
|
||||
for(i=0; i<pInfo->nPhrase; i++){
|
||||
LcsIterator *pIter = &aIter[i];
|
||||
if( iCol!=pIter->iCol ){
|
||||
if( pIter->pRead==0 ){
|
||||
/* This iterator is already at EOF for this column. */
|
||||
nThisLcs = 0;
|
||||
}else{
|
||||
@ -1426,7 +1298,7 @@ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
int iPos = 0; /* First position in position-list */
|
||||
|
||||
UNUSED_PARAMETER(iPhrase);
|
||||
pList = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->iDocid, p->iCol);
|
||||
pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
|
||||
nTerm = pExpr->pPhrase->nToken;
|
||||
if( pList ){
|
||||
fts3GetDeltaPosition(&pList, &iPos);
|
||||
|
@ -15,10 +15,10 @@
|
||||
** access to the full-text index of an FTS table.
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#include "fts3Int.h"
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
@ -27,12 +27,13 @@ typedef struct Fts3termCursor Fts3termCursor;
|
||||
|
||||
struct Fts3termTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
int iIndex; /* Index for Fts3Table.aIndex[] */
|
||||
Fts3Table *pFts3Tab;
|
||||
};
|
||||
|
||||
struct Fts3termCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
Fts3SegReaderCursor csr; /* Must be right after "base" */
|
||||
Fts3MultiSegReader csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
|
||||
int isEof; /* True if cursor is at EOF */
|
||||
@ -56,7 +57,7 @@ struct Fts3termCursor {
|
||||
*/
|
||||
static int fts3termConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pUnused, /* Unused */
|
||||
void *pCtx, /* Non-zero for an fts4prefix table */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
@ -69,8 +70,12 @@ static int fts3termConnectMethod(
|
||||
int nByte; /* Bytes of space to allocate here */
|
||||
int rc; /* value returned by declare_vtab() */
|
||||
Fts3termTable *p; /* Virtual table object to return */
|
||||
int iIndex = 0;
|
||||
|
||||
UNUSED_PARAMETER(pUnused);
|
||||
if( argc==5 ){
|
||||
iIndex = atoi(argv[4]);
|
||||
argc--;
|
||||
}
|
||||
|
||||
/* The user should specify a single argument - the name of an fts3 table. */
|
||||
if( argc!=4 ){
|
||||
@ -97,6 +102,8 @@ static int fts3termConnectMethod(
|
||||
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
|
||||
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
|
||||
p->pFts3Tab->db = db;
|
||||
p->pFts3Tab->nIndex = iIndex+1;
|
||||
p->iIndex = iIndex;
|
||||
|
||||
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
|
||||
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
|
||||
@ -244,7 +251,8 @@ static int fts3termFilterMethod(
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
|
||||
Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int rc;
|
||||
|
||||
UNUSED_PARAMETER(nVal);
|
||||
@ -262,7 +270,7 @@ static int fts3termFilterMethod(
|
||||
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
|
||||
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
|
||||
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, FTS3_SEGCURSOR_ALL,
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL,
|
||||
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
|
320
ext/fts3/fts3_test.c
Normal file
320
ext/fts3/fts3_test.c
Normal file
@ -0,0 +1,320 @@
|
||||
/*
|
||||
** 2011 Jun 13
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file is not part of the production FTS code. It is only used for
|
||||
** testing. It contains a Tcl command that can be used to test if a document
|
||||
** matches an FTS NEAR expression.
|
||||
*/
|
||||
|
||||
#include <tcl.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define NM_MAX_TOKEN 12
|
||||
|
||||
typedef struct NearPhrase NearPhrase;
|
||||
typedef struct NearDocument NearDocument;
|
||||
typedef struct NearToken NearToken;
|
||||
|
||||
struct NearDocument {
|
||||
int nToken; /* Length of token in bytes */
|
||||
NearToken *aToken; /* Token array */
|
||||
};
|
||||
|
||||
struct NearToken {
|
||||
int n; /* Length of token in bytes */
|
||||
const char *z; /* Pointer to token string */
|
||||
};
|
||||
|
||||
struct NearPhrase {
|
||||
int nNear; /* Preceding NEAR value */
|
||||
int nToken; /* Number of tokens in this phrase */
|
||||
NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
|
||||
};
|
||||
|
||||
static int nm_phrase_match(
|
||||
NearPhrase *p,
|
||||
NearToken *aToken
|
||||
){
|
||||
int ii;
|
||||
|
||||
for(ii=0; ii<p->nToken; ii++){
|
||||
NearToken *pToken = &p->aToken[ii];
|
||||
if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
|
||||
if( aToken[ii].n<(pToken->n-1) ) return 0;
|
||||
if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
|
||||
}else{
|
||||
if( aToken[ii].n!=pToken->n ) return 0;
|
||||
if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nm_near_chain(
|
||||
int iDir, /* Direction to iterate through aPhrase[] */
|
||||
NearDocument *pDoc, /* Document to match against */
|
||||
int iPos, /* Position at which iPhrase was found */
|
||||
int nPhrase, /* Size of phrase array */
|
||||
NearPhrase *aPhrase, /* Phrase array */
|
||||
int iPhrase /* Index of phrase found */
|
||||
){
|
||||
int iStart;
|
||||
int iStop;
|
||||
int ii;
|
||||
int nNear;
|
||||
int iPhrase2;
|
||||
NearPhrase *p;
|
||||
NearPhrase *pPrev;
|
||||
|
||||
assert( iDir==1 || iDir==-1 );
|
||||
|
||||
if( iDir==1 ){
|
||||
if( (iPhrase+1)==nPhrase ) return 1;
|
||||
nNear = aPhrase[iPhrase+1].nNear;
|
||||
}else{
|
||||
if( iPhrase==0 ) return 1;
|
||||
nNear = aPhrase[iPhrase].nNear;
|
||||
}
|
||||
pPrev = &aPhrase[iPhrase];
|
||||
iPhrase2 = iPhrase+iDir;
|
||||
p = &aPhrase[iPhrase2];
|
||||
|
||||
iStart = iPos - nNear - p->nToken;
|
||||
iStop = iPos + nNear + pPrev->nToken;
|
||||
|
||||
if( iStart<0 ) iStart = 0;
|
||||
if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
|
||||
|
||||
for(ii=iStart; ii<=iStop; ii++){
|
||||
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
|
||||
if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nm_match_count(
|
||||
NearDocument *pDoc, /* Document to match against */
|
||||
int nPhrase, /* Size of phrase array */
|
||||
NearPhrase *aPhrase, /* Phrase array */
|
||||
int iPhrase /* Index of phrase to count matches for */
|
||||
){
|
||||
int nOcc = 0;
|
||||
int ii;
|
||||
NearPhrase *p = &aPhrase[iPhrase];
|
||||
|
||||
for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
|
||||
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
|
||||
/* Test forward NEAR chain (i>iPhrase) */
|
||||
if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
|
||||
|
||||
/* Test reverse NEAR chain (i<iPhrase) */
|
||||
if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
|
||||
|
||||
/* This is a real match. Increment the counter. */
|
||||
nOcc++;
|
||||
}
|
||||
}
|
||||
|
||||
return nOcc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
|
||||
*/
|
||||
static int fts3_near_match_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
int nTotal = 0;
|
||||
int rc;
|
||||
int ii;
|
||||
int nPhrase;
|
||||
NearPhrase *aPhrase = 0;
|
||||
NearDocument doc = {0, 0};
|
||||
Tcl_Obj **apDocToken;
|
||||
Tcl_Obj *pRet;
|
||||
Tcl_Obj *pPhrasecount = 0;
|
||||
|
||||
Tcl_Obj **apExprToken;
|
||||
int nExprToken;
|
||||
|
||||
/* Must have 3 or more arguments. */
|
||||
if( objc<3 || (objc%2)==0 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
|
||||
rc = TCL_ERROR;
|
||||
goto near_match_out;
|
||||
}
|
||||
|
||||
for(ii=3; ii<objc; ii+=2){
|
||||
enum NM_enum { NM_PHRASECOUNTS };
|
||||
struct TestnmSubcmd {
|
||||
char *zName;
|
||||
enum NM_enum eOpt;
|
||||
} aOpt[] = {
|
||||
{ "-phrasecountvar", NM_PHRASECOUNTS },
|
||||
{ 0, 0 }
|
||||
};
|
||||
int iOpt;
|
||||
if( Tcl_GetIndexFromObjStruct(
|
||||
interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
|
||||
){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
switch( aOpt[iOpt].eOpt ){
|
||||
case NM_PHRASECOUNTS:
|
||||
pPhrasecount = objv[ii+1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
|
||||
for(ii=0; ii<doc.nToken; ii++){
|
||||
doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
|
||||
}
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
|
||||
nPhrase = (nExprToken + 1) / 2;
|
||||
aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
|
||||
memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
|
||||
for(ii=0; ii<nPhrase; ii++){
|
||||
Tcl_Obj *pPhrase = apExprToken[ii*2];
|
||||
Tcl_Obj **apToken;
|
||||
int nToken;
|
||||
int jj;
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
if( nToken>NM_MAX_TOKEN ){
|
||||
Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
|
||||
rc = TCL_ERROR;
|
||||
goto near_match_out;
|
||||
}
|
||||
for(jj=0; jj<nToken; jj++){
|
||||
NearToken *pT = &aPhrase[ii].aToken[jj];
|
||||
pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
|
||||
}
|
||||
aPhrase[ii].nToken = nToken;
|
||||
}
|
||||
for(ii=1; ii<nPhrase; ii++){
|
||||
Tcl_Obj *pNear = apExprToken[2*ii-1];
|
||||
int nNear;
|
||||
rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
aPhrase[ii].nNear = nNear;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
for(ii=0; ii<nPhrase; ii++){
|
||||
int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
|
||||
Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
|
||||
nTotal += nOcc;
|
||||
}
|
||||
if( pPhrasecount ){
|
||||
Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
|
||||
|
||||
near_match_out:
|
||||
ckfree((char *)aPhrase);
|
||||
ckfree((char *)doc.aToken);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
|
||||
**
|
||||
** Normally, FTS uses hard-coded values to determine the minimum doclist
|
||||
** size eligible for incremental loading, and the size of the chunks loaded
|
||||
** when a doclist is incrementally loaded. This command allows the built-in
|
||||
** values to be overridden for testing purposes.
|
||||
**
|
||||
** If present, the first argument is the chunksize in bytes to load doclists
|
||||
** in. The second argument is the minimum doclist size in bytes to use
|
||||
** incremental loading with.
|
||||
**
|
||||
** Whether or not the arguments are present, this command returns a list of
|
||||
** two integers - the initial chunksize and threshold when the command is
|
||||
** invoked. This can be used to restore the default behaviour after running
|
||||
** tests. For example:
|
||||
**
|
||||
** # Override incr-load settings for testing:
|
||||
** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
|
||||
**
|
||||
** .... run tests ....
|
||||
**
|
||||
** # Restore initial incr-load settings:
|
||||
** eval fts3_configure_incr_load $cfg
|
||||
*/
|
||||
static int fts3_configure_incr_load_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
extern int test_fts3_node_chunksize;
|
||||
extern int test_fts3_node_chunk_threshold;
|
||||
int iArg1;
|
||||
int iArg2;
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
if( objc!=1 && objc!=3 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
Tcl_ListObjAppendElement(
|
||||
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
|
||||
Tcl_ListObjAppendElement(
|
||||
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
|
||||
|
||||
if( objc==3 ){
|
||||
int iArg1;
|
||||
int iArg2;
|
||||
if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
|
||||
|| Tcl_GetIntFromObj(interp, objv[2], &iArg2)
|
||||
){
|
||||
Tcl_DecrRefCount(pRet);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
test_fts3_node_chunksize = iArg1;
|
||||
test_fts3_node_chunk_threshold = iArg2;
|
||||
}
|
||||
|
||||
Tcl_SetObjResult(interp, pRet);
|
||||
Tcl_DecrRefCount(pRet);
|
||||
#endif
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
int Sqlitetestfts3_Init(Tcl_Interp *interp){
|
||||
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
|
||||
Tcl_CreateObjCommand(interp,
|
||||
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
|
||||
);
|
||||
return TCL_OK;
|
||||
}
|
@ -23,14 +23,14 @@
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include "sqlite3ext.h"
|
||||
#ifndef SQLITE_CORE
|
||||
SQLITE_EXTENSION_INIT1
|
||||
#endif
|
||||
|
||||
#include "fts3Int.h"
|
||||
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
|
@ -22,9 +22,8 @@
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user