1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-22 20:22:44 +03:00

Add the tokendata=1 option and related APIs to fts5.

FossilOrigin-Name: a76a636b23c0ebd95d47fdf8358de4729e51a5f68f1a730cd4d89b378e94ac0d
This commit is contained in:
dan
2023-12-06 14:36:34 +00:00
19 changed files with 2329 additions and 176 deletions

View File

@@ -261,9 +261,30 @@ struct Fts5PhraseIter {
**
** xPhraseNextColumn()
** See xPhraseFirstColumn above.
**
** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken)
** This is used to access token iToken of phrase iPhrase of the current
** query. Before returning, output parameter *ppToken is set to point
** to a buffer containing the requested token, and *pnToken to the
** size of this buffer in bytes.
**
** The output text is not a copy of the query text that specified the
** token. It is the output of the tokenizer module. For tokendata=1
** tables, this includes any embedded 0x00 and trailing data.
**
** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken)
** This is used to access token iToken of phrase hit iIdx within the
** current row.
**
** The output text is not a copy of the document text that was tokenized.
** It is the output of the tokenizer module. For tokendata=1 tables, this
** includes any embedded 0x00 and trailing data.
**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" or "detail=column" option.
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 2 */
int iVersion; /* Currently always set to 3 */
void *(*xUserData)(Fts5Context*);
@@ -298,6 +319,13 @@ struct Fts5ExtensionApi {
int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
/* Below this point are iVersion>=3 only */
int (*xQueryToken)(Fts5Context*,
int iPhrase, int iToken,
const char **ppToken, int *pnToken
);
int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*);
};
/*

View File

@@ -196,6 +196,7 @@ struct Fts5Config {
char *zContent; /* content table */
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
int bTokendata; /* "tokendata=" option value (dflt==0) */
int eDetail; /* FTS5_DETAIL_XXX value */
char *zContentExprlist;
Fts5Tokenizer *pTok;
@@ -384,17 +385,19 @@ struct Fts5IndexIter {
/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
/* The following are used internally by the fts5_index.c module. They are
** defined here only to make it easier to avoid clashes with the flags
** above. */
#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
#define FTS5INDEX_QUERY_SKIPHASH 0x0040
#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
#define FTS5INDEX_QUERY_SKIPHASH 0x0040
#define FTS5INDEX_QUERY_NOTOKENDATA 0x0080
#define FTS5INDEX_QUERY_SCANONETERM 0x0100
/*
** Create/destroy an Fts5Index object.
@@ -463,6 +466,10 @@ void *sqlite3Fts5StructureRef(Fts5Index*);
void sqlite3Fts5StructureRelease(void*);
int sqlite3Fts5StructureTest(Fts5Index*, void*);
/*
** Used by xInstToken():
*/
int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*);
/*
** Insert or remove data to or from the index. Each time a document is
@@ -540,6 +547,13 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin);
int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid);
void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*);
/* Used to populate hash tables for xInstToken in detail=none/column mode. */
int sqlite3Fts5IndexIterWriteTokendata(
Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff
);
/*
** End of interface to code in fts5_index.c.
**************************************************************************/
@@ -645,6 +659,7 @@ void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
const char **pzTerm, /* OUT: term (nul-terminated) */
int *pnTerm, /* OUT: Size of term in bytes */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
);
@@ -771,6 +786,10 @@ int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*);
int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*);
void sqlite3Fts5ExprClearTokens(Fts5Expr*);
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by

View File

@@ -398,6 +398,16 @@ static int fts5ConfigParseSpecial(
return rc;
}
if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
*pzErr = sqlite3_mprintf("malformed tokendata=... directive");
rc = SQLITE_ERROR;
}else{
pConfig->bTokendata = (zArg[0]=='1');
}
return rc;
}
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
return SQLITE_ERROR;
}

View File

@@ -100,7 +100,9 @@ struct Fts5ExprNode {
struct Fts5ExprTerm {
u8 bPrefix; /* True for a prefix term */
u8 bFirst; /* True if token must be first in column */
char *zTerm; /* nul-terminated term */
char *pTerm; /* Term data */
int nQueryTerm; /* Effective size of term in bytes */
int nFullTerm; /* Size of term in bytes incl. tokendata */
Fts5IndexIter *pIter; /* Iterator for this term */
Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
@@ -967,7 +969,7 @@ static int fts5ExprNearInitAll(
p->pIter = 0;
}
rc = sqlite3Fts5IndexQuery(
pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
pExpr->pIndex, p->pTerm, p->nQueryTerm,
(pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
(pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
pNear->pColset,
@@ -1604,7 +1606,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
Fts5ExprTerm *pSyn;
Fts5ExprTerm *pNext;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
sqlite3_free(pTerm->zTerm);
sqlite3_free(pTerm->pTerm);
sqlite3Fts5IterClose(pTerm->pIter);
for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
pNext = pSyn->pSynonym;
@@ -1702,6 +1704,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
typedef struct TokenCtx TokenCtx;
struct TokenCtx {
Fts5ExprPhrase *pPhrase;
Fts5Config *pConfig;
int rc;
};
@@ -1735,8 +1738,10 @@ static int fts5ParseTokenize(
rc = SQLITE_NOMEM;
}else{
memset(pSyn, 0, (size_t)nByte);
pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
memcpy(pSyn->zTerm, pToken, nToken);
pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
pSyn->nFullTerm = pSyn->nQueryTerm = nToken;
if( pCtx->pConfig->bTokendata ) pSyn->nQueryTerm = strlen(pSyn->pTerm);
memcpy(pSyn->pTerm, pToken, nToken);
pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
}
@@ -1761,7 +1766,11 @@ static int fts5ParseTokenize(
if( rc==SQLITE_OK ){
pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
memset(pTerm, 0, sizeof(Fts5ExprTerm));
pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
pTerm->nFullTerm = pTerm->nQueryTerm = nToken;
if( pCtx->pConfig->bTokendata && rc==SQLITE_OK ){
pTerm->nQueryTerm = strlen(pTerm->pTerm);
}
}
}
@@ -1828,6 +1837,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
memset(&sCtx, 0, sizeof(TokenCtx));
sCtx.pPhrase = pAppend;
sCtx.pConfig = pConfig;
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
@@ -1877,8 +1887,7 @@ int sqlite3Fts5ExprClonePhrase(
int rc = SQLITE_OK; /* Return code */
Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */
pOrig = pExpr->apExprPhrase[iPhrase];
pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
if( rc==SQLITE_OK ){
@@ -1909,13 +1918,12 @@ int sqlite3Fts5ExprClonePhrase(
if( pOrig->nTerm ){
int i; /* Used to iterate through phrase terms */
sCtx.pConfig = pExpr->pConfig;
for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
int tflags = 0;
Fts5ExprTerm *p;
for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
const char *zTerm = p->zTerm;
rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
0, 0);
rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm,p->nFullTerm,0,0);
tflags = FTS5_TOKEN_COLOCATED;
}
if( rc==SQLITE_OK ){
@@ -2296,11 +2304,13 @@ static Fts5ExprNode *fts5ParsePhraseToAnd(
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(pPhrase);
}else{
Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii];
Fts5ExprTerm *pTo = &pPhrase->aTerm[0];
pParse->apPhrase[pParse->nPhrase++] = pPhrase;
pPhrase->nTerm = 1;
pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
&pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
);
pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm);
pTo->nQueryTerm = p->nQueryTerm;
pTo->nFullTerm = p->nFullTerm;
pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
);
@@ -2485,16 +2495,17 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
/* Determine the maximum amount of space required. */
for(p=pTerm; p; p=p->pSynonym){
nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
nByte += pTerm->nQueryTerm * 2 + 3 + 2;
}
zQuoted = sqlite3_malloc64(nByte);
if( zQuoted ){
int i = 0;
for(p=pTerm; p; p=p->pSynonym){
char *zIn = p->zTerm;
char *zIn = p->pTerm;
char *zEnd = &zIn[p->nQueryTerm];
zQuoted[i++] = '"';
while( *zIn ){
while( zIn<zEnd ){
if( *zIn=='"' ) zQuoted[i++] = '"';
zQuoted[i++] = *zIn++;
}
@@ -2572,8 +2583,10 @@ static char *fts5ExprPrintTcl(
zRet = fts5PrintfAppend(zRet, " {");
for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
char *zTerm = pPhrase->aTerm[iTerm].zTerm;
zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
Fts5ExprTerm *p = &pPhrase->aTerm[iTerm];
zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ",
p->nQueryTerm, p->pTerm
);
if( pPhrase->aTerm[iTerm].bPrefix ){
zRet = fts5PrintfAppend(zRet, "*");
}
@@ -2974,6 +2987,17 @@ static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
return 0;
}
/*
** pToken is a buffer nToken bytes in size that may or may not contain
** an embedded 0x00 byte. If it does, return the number of bytes in
** the buffer before the 0x00. If it does not, return nToken.
*/
static int fts5QueryTerm(const char *pToken, int nToken){
int ii;
for(ii=0; ii<nToken && pToken[ii]; ii++){}
return ii;
}
static int fts5ExprPopulatePoslistsCb(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
@@ -2985,22 +3009,33 @@ static int fts5ExprPopulatePoslistsCb(
Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
Fts5Expr *pExpr = p->pExpr;
int i;
int nQuery = nToken;
i64 iRowid = pExpr->pRoot->iRowid;
UNUSED_PARAM2(iUnused1, iUnused2);
if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
if( nQuery>FTS5_MAX_TOKEN_SIZE ) nQuery = FTS5_MAX_TOKEN_SIZE;
if( pExpr->pConfig->bTokendata ){
nQuery = fts5QueryTerm(pToken, nQuery);
}
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
for(i=0; i<pExpr->nPhrase; i++){
Fts5ExprTerm *pTerm;
Fts5ExprTerm *pT;
if( p->aPopulator[i].bOk==0 ) continue;
for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
int nTerm = (int)strlen(pTerm->zTerm);
if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
&& memcmp(pTerm->zTerm, pToken, nTerm)==0
for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){
if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix))
&& memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0
){
int rc = sqlite3Fts5PoslistWriterAppend(
&pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
);
if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){
int iCol = p->iOff>>32;
int iTokOff = p->iOff & 0x7FFFFFFF;
rc = sqlite3Fts5IndexIterWriteTokendata(
pT->pIter, pToken, nToken, iRowid, iCol, iTokOff
);
}
if( rc ) return rc;
break;
}
@@ -3135,3 +3170,81 @@ int sqlite3Fts5ExprPhraseCollist(
return rc;
}
/*
** Does the work of the fts5_api.xQueryToken() API method.
*/
int sqlite3Fts5ExprQueryToken(
Fts5Expr *pExpr,
int iPhrase,
int iToken,
const char **ppOut,
int *pnOut
){
Fts5ExprPhrase *pPhrase = 0;
if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
return SQLITE_RANGE;
}
pPhrase = pExpr->apExprPhrase[iPhrase];
if( iToken<0 || iToken>=pPhrase->nTerm ){
return SQLITE_RANGE;
}
*ppOut = pPhrase->aTerm[iToken].pTerm;
*pnOut = pPhrase->aTerm[iToken].nFullTerm;
return SQLITE_OK;
}
/*
** Does the work of the fts5_api.xInstToken() API method.
*/
int sqlite3Fts5ExprInstToken(
Fts5Expr *pExpr,
i64 iRowid,
int iPhrase,
int iCol,
int iOff,
int iToken,
const char **ppOut,
int *pnOut
){
Fts5ExprPhrase *pPhrase = 0;
Fts5ExprTerm *pTerm = 0;
int rc = SQLITE_OK;
if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
return SQLITE_RANGE;
}
pPhrase = pExpr->apExprPhrase[iPhrase];
if( iToken<0 || iToken>=pPhrase->nTerm ){
return SQLITE_RANGE;
}
pTerm = &pPhrase->aTerm[iToken];
if( pTerm->bPrefix==0 ){
if( pExpr->pConfig->bTokendata ){
rc = sqlite3Fts5IterToken(
pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut
);
}else{
*ppOut = pTerm->pTerm;
*pnOut = pTerm->nFullTerm;
}
}
return rc;
}
/*
** Clear the token mappings for all Fts5IndexIter objects mannaged by
** the expression passed as the only argument.
*/
void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){
int ii;
for(ii=0; ii<pExpr->nPhrase; ii++){
Fts5ExprTerm *pT;
for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
sqlite3Fts5IndexIterClearTokendata(pT->pIter);
}
}
}

View File

@@ -36,10 +36,15 @@ struct Fts5Hash {
/*
** Each entry in the hash table is represented by an object of the
** following type. Each object, its key (a nul-terminated string) and
** its current data are stored in a single memory allocation. The
** key immediately follows the object in memory. The position list
** data immediately follows the key data in memory.
** following type. Each object, its key, and its current data are stored
** in a single memory allocation. The key immediately follows the object
** in memory. The position list data immediately follows the key data
** in memory.
**
** The key is Fts5HashEntry.nKey bytes in size. It consists of a single
** byte identifying the index (either the main term index or a prefix-index),
** followed by the term data. For example: "0token". There is no
** nul-terminator - in this case nKey=6.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
@@ -174,8 +179,7 @@ static int fts5HashResize(Fts5Hash *pHash){
unsigned int iHash;
Fts5HashEntry *p = apOld[i];
apOld[i] = p->pHashNext;
iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p),
(int)strlen(fts5EntryKey(p)));
iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), p->nKey);
p->pHashNext = apNew[iHash];
apNew[iHash] = p;
}
@@ -259,7 +263,7 @@ int sqlite3Fts5HashWrite(
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
char *zKey = fts5EntryKey(p);
if( zKey[0]==bByte
&& p->nKey==nToken
&& p->nKey==nToken+1
&& memcmp(&zKey[1], pToken, nToken)==0
){
break;
@@ -289,9 +293,9 @@ int sqlite3Fts5HashWrite(
zKey[0] = bByte;
memcpy(&zKey[1], pToken, nToken);
assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) );
p->nKey = nToken;
p->nKey = nToken+1;
zKey[nToken+1] = '\0';
p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry);
p->nData = nToken+1 + sizeof(Fts5HashEntry);
p->pHashNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
pHash->nEntry++;
@@ -408,12 +412,17 @@ static Fts5HashEntry *fts5HashEntryMerge(
*ppOut = p1;
p1 = 0;
}else{
int i = 0;
char *zKey1 = fts5EntryKey(p1);
char *zKey2 = fts5EntryKey(p2);
while( zKey1[i]==zKey2[i] ) i++;
int nMin = MIN(p1->nKey, p2->nKey);
if( ((u8)zKey1[i])>((u8)zKey2[i]) ){
int cmp = memcmp(zKey1, zKey2, nMin);
if( cmp==0 ){
cmp = p1->nKey - p2->nKey;
}
assert( cmp!=0 );
if( cmp>0 ){
/* p2 is smaller */
*ppOut = p2;
ppOut = &p2->pScanNext;
@@ -455,7 +464,7 @@ static int fts5HashEntrySort(
Fts5HashEntry *pIter;
for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
if( pTerm==0
|| (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
|| (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
){
Fts5HashEntry *pEntry = pIter;
pEntry->pScanNext = 0;
@@ -494,12 +503,11 @@ int sqlite3Fts5HashQuery(
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
zKey = fts5EntryKey(p);
assert( p->nKey+1==(int)strlen(zKey) );
if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break;
if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break;
}
if( p ){
int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1;
int nHashPre = sizeof(Fts5HashEntry) + nTerm;
int nList = p->nData - nHashPre;
u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10));
if( pRet ){
@@ -560,19 +568,22 @@ int sqlite3Fts5HashScanEof(Fts5Hash *p){
void sqlite3Fts5HashScanEntry(
Fts5Hash *pHash,
const char **pzTerm, /* OUT: term (nul-terminated) */
int *pnTerm, /* OUT: Size of term in bytes */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
){
Fts5HashEntry *p;
if( (p = pHash->pScan) ){
char *zKey = fts5EntryKey(p);
int nTerm = (int)strlen(zKey);
int nTerm = p->nKey;
fts5HashAddPoslistSize(pHash, p, 0);
*pzTerm = zKey;
*ppDoclist = (const u8*)&zKey[nTerm+1];
*pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1);
*pnTerm = nTerm;
*ppDoclist = (const u8*)&zKey[nTerm];
*pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm);
}else{
*pzTerm = 0;
*pnTerm = 0;
*ppDoclist = 0;
*pnDoclist = 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -656,12 +656,15 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
}
idxStr[iIdxStr] = '\0';
/* Set idxFlags flags for the ORDER BY clause */
/* Set idxFlags flags for the ORDER BY clause
**
** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC".
*/
if( pInfo->nOrderBy==1 ){
int iSort = pInfo->aOrderBy[0].iColumn;
if( iSort==(pConfig->nCol+1) && bSeenMatch ){
idxFlags |= FTS5_BI_ORDER_RANK;
}else if( iSort==-1 ){
}else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){
idxFlags |= FTS5_BI_ORDER_ROWID;
}
if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
@@ -913,6 +916,16 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
);
assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
/* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table,
** clear any token mappings accumulated at the fts5_index.c level. In
** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH,
** we need to retain the mappings for the entire query. */
if( pCsr->ePlan==FTS5_PLAN_MATCH
&& ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata
){
sqlite3Fts5ExprClearTokens(pCsr->pExpr);
}
if( pCsr->ePlan<3 ){
int bSkip = 0;
if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
@@ -2063,12 +2076,6 @@ static int fts5ApiInst(
){
if( iIdx<0 || iIdx>=pCsr->nInstCount ){
rc = SQLITE_RANGE;
#if 0
}else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
*piPhrase = pCsr->aInst[iIdx*3];
*piCol = pCsr->aInst[iIdx*3 + 2];
*piOff = -1;
#endif
}else{
*piPhrase = pCsr->aInst[iIdx*3];
*piCol = pCsr->aInst[iIdx*3 + 1];
@@ -2323,13 +2330,56 @@ static int fts5ApiPhraseFirstColumn(
return rc;
}
/*
** xQueryToken() API implemenetation.
*/
static int fts5ApiQueryToken(
Fts5Context* pCtx,
int iPhrase,
int iToken,
const char **ppOut,
int *pnOut
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut);
}
/*
** xInstToken() API implemenetation.
*/
static int fts5ApiInstToken(
Fts5Context *pCtx,
int iIdx,
int iToken,
const char **ppOut, int *pnOut
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
int rc = SQLITE_OK;
if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
|| SQLITE_OK==(rc = fts5CacheInstArray(pCsr))
){
if( iIdx<0 || iIdx>=pCsr->nInstCount ){
rc = SQLITE_RANGE;
}else{
int iPhrase = pCsr->aInst[iIdx*3];
int iCol = pCsr->aInst[iIdx*3 + 1];
int iOff = pCsr->aInst[iIdx*3 + 2];
i64 iRowid = fts5CursorRowid(pCsr);
rc = sqlite3Fts5ExprInstToken(
pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut
);
}
}
return rc;
}
static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
);
static const Fts5ExtensionApi sFts5Api = {
2, /* iVersion */
3, /* iVersion */
fts5ApiUserData,
fts5ApiColumnCount,
fts5ApiRowCount,
@@ -2349,6 +2399,8 @@ static const Fts5ExtensionApi sFts5Api = {
fts5ApiPhraseNext,
fts5ApiPhraseFirstColumn,
fts5ApiPhraseNextColumn,
fts5ApiQueryToken,
fts5ApiInstToken
};
/*

View File

@@ -244,6 +244,9 @@ static int SQLITE_TCLAPI xF5tApi(
{ "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */
{ "xPhraseForeach", 4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */
{ "xPhraseColumnForeach", 3, "IPHRASE COLVAR SCRIPT" }, /* 17 */
{ "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */
{ "xInstToken", 2, "IDX ITERM" }, /* 19 */
{ 0, 0, 0}
};
@@ -500,6 +503,38 @@ static int SQLITE_TCLAPI xF5tApi(
break;
}
CASE(18, "xQueryToken") {
const char *pTerm = 0;
int nTerm = 0;
int iPhrase = 0;
int iTerm = 0;
if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR;
rc = p->pApi->xQueryToken(p->pFts, iPhrase, iTerm, &pTerm, &nTerm);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm));
}
break;
}
CASE(19, "xInstToken") {
const char *pTerm = 0;
int nTerm = 0;
int iIdx = 0;
int iTerm = 0;
if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ) return TCL_ERROR;
if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR;
rc = p->pApi->xInstToken(p->pFts, iIdx, iTerm, &pTerm, &nTerm);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm));
}
break;
}
default:
assert( 0 );
break;
@@ -1117,6 +1152,176 @@ static int SQLITE_TCLAPI f5tRegisterTok(
return TCL_OK;
}
typedef struct OriginTextCtx OriginTextCtx;
struct OriginTextCtx {
sqlite3 *db;
fts5_api *pApi;
};
typedef struct OriginTextTokenizer OriginTextTokenizer;
struct OriginTextTokenizer {
Fts5Tokenizer *pTok; /* Underlying tokenizer object */
fts5_tokenizer tokapi; /* API implementation for pTok */
};
/*
** Delete the OriginTextCtx object indicated by the only argument.
*/
static void f5tOrigintextTokenizerDelete(void *pCtx){
OriginTextCtx *p = (OriginTextCtx*)pCtx;
ckfree(p);
}
static int f5tOrigintextCreate(
void *pCtx,
const char **azArg,
int nArg,
Fts5Tokenizer **ppOut
){
OriginTextCtx *p = (OriginTextCtx*)pCtx;
OriginTextTokenizer *pTok = 0;
void *pTokCtx = 0;
int rc = SQLITE_OK;
pTok = (OriginTextTokenizer*)sqlite3_malloc(sizeof(OriginTextTokenizer));
if( pTok==0 ){
rc = SQLITE_NOMEM;
}else if( nArg<1 ){
rc = SQLITE_ERROR;
}else{
/* Locate the underlying tokenizer */
rc = p->pApi->xFindTokenizer(p->pApi, azArg[0], &pTokCtx, &pTok->tokapi);
}
/* Create the new tokenizer instance */
if( rc==SQLITE_OK ){
rc = pTok->tokapi.xCreate(pTokCtx, &azArg[1], nArg-1, &pTok->pTok);
}
if( rc!=SQLITE_OK ){
sqlite3_free(pTok);
pTok = 0;
}
*ppOut = (Fts5Tokenizer*)pTok;
return rc;
}
static void f5tOrigintextDelete(Fts5Tokenizer *pTokenizer){
OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer;
if( p->pTok ){
p->tokapi.xDelete(p->pTok);
}
sqlite3_free(p);
}
typedef struct OriginTextCb OriginTextCb;
struct OriginTextCb {
void *pCtx;
const char *pText;
int nText;
int (*xToken)(void *, int, const char *, int, int, int);
char *aBuf; /* Buffer to use */
int nBuf; /* Allocated size of aBuf[] */
};
static int xOriginToken(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input */
){
OriginTextCb *p = (OriginTextCb*)pCtx;
int ret = 0;
if( nToken==(iEnd-iStart) && 0==memcmp(pToken, &p->pText[iStart], nToken) ){
/* Token exactly matches document text. Pass it through as is. */
ret = p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
}else{
int nReq = nToken + 1 + (iEnd-iStart);
if( nReq>p->nBuf ){
sqlite3_free(p->aBuf);
p->aBuf = sqlite3_malloc(nReq*2);
if( p->aBuf==0 ) return SQLITE_NOMEM;
p->nBuf = nReq*2;
}
memcpy(p->aBuf, pToken, nToken);
p->aBuf[nToken] = '\0';
memcpy(&p->aBuf[nToken+1], &p->pText[iStart], iEnd-iStart);
ret = p->xToken(p->pCtx, tflags, p->aBuf, nReq, iStart, iEnd);
}
return ret;
}
static int f5tOrigintextTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
int flags, /* Mask of FTS5_TOKENIZE_* flags */
const char *pText, int nText,
int (*xToken)(void *, int, const char *, int, int, int)
){
OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer;
OriginTextCb cb;
int ret;
memset(&cb, 0, sizeof(cb));
cb.pCtx = pCtx;
cb.pText = pText;
cb.nText = nText;
cb.xToken = xToken;
ret = p->tokapi.xTokenize(p->pTok,(void*)&cb,flags,pText,nText,xOriginToken);
sqlite3_free(cb.aBuf);
return ret;
}
/*
** sqlite3_fts5_register_origintext DB
**
** Description...
*/
static int SQLITE_TCLAPI f5tRegisterOriginText(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
sqlite3 *db = 0;
fts5_api *pApi = 0;
int rc;
fts5_tokenizer tok = {0, 0, 0};
OriginTextCtx *pCtx = 0;
if( objc!=2 ){
Tcl_WrongNumArgs(interp, 1, objv, "DB");
return TCL_ERROR;
}
if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR;
pCtx = (OriginTextCtx*)ckalloc(sizeof(OriginTextCtx));
pCtx->db = db;
pCtx->pApi = pApi;
tok.xCreate = f5tOrigintextCreate;
tok.xDelete = f5tOrigintextDelete;
tok.xTokenize = f5tOrigintextTokenize;
rc = pApi->xCreateTokenizer(
pApi, "origintext", (void*)pCtx, &tok, f5tOrigintextTokenizerDelete
);
Tcl_ResetResult(interp);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
return TCL_ERROR;
}
return TCL_OK;
}
/*
** Entry point.
*/
@@ -1133,7 +1338,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
{ "sqlite3_fts5_token_hash", f5tTokenHash, 0 },
{ "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 },
{ "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 }
{ "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 },
{ "sqlite3_fts5_register_origintext",f5tRegisterOriginText, 0 }
};
int i;
F5tTokenizerContext *pContext;

View File

@@ -438,6 +438,20 @@ proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }
proc detail_is_col {} { detail_check ; expr {$::detail == "col" } }
proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }
proc foreach_tokenizer_mode {prefix script} {
set saved $::testprefix
foreach {d mapping} {
"" {}
"-origintext" {, tokenize="origintext unicode61", tokendata=1}
} {
set s [string map [list %TOKENIZER% $mapping] $script]
set ::testprefix "$prefix$d"
reset_db
sqlite3_fts5_register_origintext db
uplevel $s
}
set ::testprefix $saved
}
#-------------------------------------------------------------------------
# Convert a poslist of the type returned by fts5_test_poslist() to a

View File

@@ -22,6 +22,7 @@ ifcapable !fts5 {
}
foreach_detail_mode $::testprefix {
foreach_tokenizer_mode $::testprefix {
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, c);
@@ -44,7 +45,7 @@ do_execsql_test 1.1 {
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%);
}
do_execsql_test 2.1 {
INSERT INTO t1 VALUES('a b c', 'd e f');
@@ -73,8 +74,9 @@ do_execsql_test 2.4 {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
}
foreach {i x y} {
1 {g f d b f} {h h e i a}
@@ -97,8 +99,9 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
@@ -121,8 +124,9 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
@@ -145,8 +149,9 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
@@ -181,6 +186,7 @@ do_execsql_test 6.6 {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
expr srand(0)
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
@@ -222,6 +228,7 @@ for {set i 1} {$i <= 10} {incr i} {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
@@ -236,6 +243,7 @@ do_execsql_test 8.1 {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
expr srand(0)
@@ -280,8 +288,9 @@ for {set i 1} {$i <= 10} {incr i} {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
}
set d10 {
1 {g f d b f} {h h e i a}
@@ -314,19 +323,19 @@ do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
#-------------------------------------------------------------------------
#
do_catchsql_test 11.1 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL%);
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL% %TOKENIZER%);
} {1 {reserved fts5 column name: rank}}
do_catchsql_test 11.2 {
CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL%);
CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL% %TOKENIZER%);
} {1 {reserved fts5 table name: rank}}
do_catchsql_test 11.3 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL%);
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL% %TOKENIZER%);
} {1 {reserved fts5 column name: rowid}}
#-------------------------------------------------------------------------
#
do_execsql_test 12.1 {
CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
} {}
do_catchsql_test 12.2 {
@@ -341,8 +350,9 @@ do_test 12.3 {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 13.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o');
} {}
@@ -365,8 +375,9 @@ do_execsql_test 13.6 {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 14.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
WITH d(x,y) AS (
SELECT NULL, 'xyz xyz xyz xyz xyz xyz'
@@ -449,8 +460,9 @@ do_catchsql_test 16.2 {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 17.1 {
CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO b2 VALUES('a');
INSERT INTO b2 VALUES('b');
INSERT INTO b2 VALUES('c');
@@ -466,8 +478,9 @@ do_test 17.2 {
if {[string match n* %DETAIL%]==0} {
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 17.3 {
CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL%);
CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO c2 VALUES('x x x', 'x x x');
SELECT rowid FROM c2 WHERE c2 MATCH 'y:x';
} {1}
@@ -476,8 +489,9 @@ if {[string match n* %DETAIL%]==0} {
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 17.1 {
CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL%);
CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL% %TOKENIZER%);
INSERT INTO uio VALUES(NULL);
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
@@ -524,8 +538,8 @@ do_execsql_test 17.9 {
#--------------------------------------------------------------------
#
do_execsql_test 18.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%);
CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL%);
CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL% %TOKENIZER%);
CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1 VALUES('abc*', NULL);
INSERT INTO t2 VALUES(1, 'abcdefg');
}
@@ -540,8 +554,9 @@ do_execsql_test 18.3 {
# fts5 table in the temp schema.
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 19.0 {
CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1 VALUES('x y z');
INSERT INTO t1 VALUES('w x 1');
SELECT rowid FROM t1 WHERE t1 MATCH 'x';
@@ -551,8 +566,9 @@ do_execsql_test 19.0 {
# Test that 6 and 7 byte varints can be read.
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 20.0 {
CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL% %TOKENIZER%);
}
set ::ids [list \
0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43]
@@ -570,7 +586,7 @@ do_test 20.1 {
#
do_execsql_test 21.0 {
CREATE TEMP TABLE t8(a, b);
CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL% %TOKENIZER%);
}
do_execsql_test 21.1 {
@@ -581,7 +597,7 @@ do_execsql_test 21.1 {
}
do_execsql_test 22.0 {
CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t9(rowid, x) VALUES(2, 'bbb');
BEGIN;
INSERT INTO t9(rowid, x) VALUES(1, 'aaa');
@@ -596,7 +612,7 @@ do_execsql_test 22.1 {
#-------------------------------------------------------------------------
do_execsql_test 23.0 {
CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
CREATE TABLE t11(x);
}
do_execsql_test 23.1 {
@@ -608,7 +624,7 @@ do_execsql_test 23.2 {
#-------------------------------------------------------------------------
do_execsql_test 24.0 {
CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t12 VALUES('aaaa');
}
do_execsql_test 24.1 {
@@ -618,6 +634,9 @@ do_execsql_test 24.1 {
INSERT INTO t12 VALUES('aaaa');
END;
}
execsql_pp {
SELECT rowid, hex(block) FROM t12_data
}
do_execsql_test 24.2 {
INSERT INTO t12(t12) VALUES('integrity-check');
}
@@ -627,7 +646,7 @@ do_execsql_test 24.3 {
#-------------------------------------------------------------------------
do_execsql_test 25.0 {
CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL%);
CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
}
do_execsql_test 25.1 {
BEGIN;
@@ -638,6 +657,7 @@ SELECT * FROM t13('BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
}
}
}
expand_all_sql db

View File

@@ -0,0 +1,93 @@
# 2010 June 15
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5faultG
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set ::testprefix fts5faultH
sqlite3_fts5_register_origintext db
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(
x, tokenize="origintext unicode61", tokendata=1
);
BEGIN;
INSERT INTO t1 VALUES('oNe tWo thRee');
INSERT INTO t1 VALUES('One Two Three');
INSERT INTO t1 VALUES('onE twO threE');
COMMIT;
BEGIN;
INSERT INTO t1 VALUES('one two three');
INSERT INTO t1 VALUES('one two three');
INSERT INTO t1 VALUES('one two three');
COMMIT;
}
do_faultsim_test 1 -faults oom* -prep {
} -body {
execsql {
SELECT rowid FROM t1('three');
}
} -test {
faultsim_integrity_check
faultsim_test_result {0 {1 2 3 4 5 6}}
}
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(
x, tokenize="origintext unicode61", tokendata=1
);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
BEGIN;
INSERT INTO t1(rowid, x) VALUES(10, 'aaa bbb BBB');
INSERT INTO t1(rowid, x) VALUES(12, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(13, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(14, 'bbb BBB bbb');
INSERT INTO t1(rowid, x) VALUES(15, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(16, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(17, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(18, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(19, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(20, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(21, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(22, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(23, 'bbb bbb bbb');
INSERT INTO t1(rowid, x) VALUES(24, 'aaa bbb BBB');
COMMIT;
}
do_faultsim_test 2 -faults oom* -prep {
} -body {
execsql {
SELECT rowid FROM t1('BBB AND AAA');
}
} -test {
faultsim_integrity_check
faultsim_test_result {0 {10 24}}
}
finish_test

View File

@@ -0,0 +1,297 @@
# 2014 Jan 08
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on phrase queries.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5origintext
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
foreach_detail_mode $testprefix {
sqlite3_fts5_register_origintext db
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", detail=%DETAIL%
);
CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
}
do_execsql_test 1.1 {
INSERT INTO ft VALUES('Hello world');
}
do_execsql_test 1.2 {
INSERT INTO ft(ft) VALUES('integrity-check');
}
proc b {x} { string map [list "\0" "."] $x }
db func b b
do_execsql_test 1.3 {
select b(term) from vocab;
} {
hello.Hello
world
}
do_execsql_test 1.4 {
SELECT rowid FROM ft('Hello');
} {1}
#-------------------------------------------------------------------------
reset_db
# Return a random integer between 0 and n-1.
#
proc random {n} {
expr {abs(int(rand()*$n))}
}
proc select_one {list} {
set n [llength $list]
lindex $list [random $n]
}
proc term {} {
set first_letter {
a b c d e f g h i j k l m n o p q r s t u v w x y z
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
}
set term [select_one $first_letter]
append term [random 100]
}
proc document {} {
set nTerm [expr [random 5] + 5]
set doc ""
for {set ii 0} {$ii < $nTerm} {incr ii} {
lappend doc [term]
}
set doc
}
db func document document
sqlite3_fts5_register_origintext db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", detail=%DETAIL%
);
INSERT INTO ft(ft, rank) VALUES('pgsz', 128);
CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
}
do_test 2.1 {
for {set ii 0} {$ii < 500} {incr ii} {
execsql { INSERT INTO ft VALUES( document() ) }
}
} {}
do_execsql_test 2.2 {
INSERT INTO ft(ft) VALUES('integrity-check');
}
do_execsql_test 2.3 {
INSERT INTO ft(ft, rank) VALUES('merge', 16);
}
do_execsql_test 2.4 {
INSERT INTO ft(ft) VALUES('integrity-check');
}
do_execsql_test 2.5 {
INSERT INTO ft(ft) VALUES('optimize');
}
#-------------------------------------------------------------------------
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", detail=%DETAIL%
);
CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
INSERT INTO ft(rowid, x) VALUES(1, 'hello');
INSERT INTO ft(rowid, x) VALUES(2, 'Hello');
INSERT INTO ft(rowid, x) VALUES(3, 'HELLO');
}
#proc b {x} { string map [list "\0" "."] $x }
#db func b b
#execsql_pp { SELECT b(term) FROM vocab }
do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1
do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2
do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3
do_execsql_test 3.2 {
CREATE VIRTUAL TABLE ft2 USING fts5(x,
tokenize="origintext unicode61",
tokendata=1,
detail=%DETAIL%
);
CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance);
INSERT INTO ft2(rowid, x) VALUES(1, 'hello');
INSERT INTO ft2(rowid, x) VALUES(2, 'Hello');
INSERT INTO ft2(rowid, x) VALUES(3, 'HELLO');
INSERT INTO ft2(rowid, x) VALUES(10, 'helloooo');
}
#proc b {x} { string map [list "\0" "."] $x }
#db func b b
#execsql_pp { SELECT b(term) FROM vocab }
do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3}
do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3}
do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3}
do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10}
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
proc querytoken {cmd iPhrase iToken} {
set txt [$cmd xQueryToken $iPhrase $iToken]
string map [list "\0" "."] $txt
}
sqlite3_fts5_create_function db querytoken querytoken
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
);
INSERT INTO ft VALUES('one two three four');
}
do_execsql_test 4.1 {
SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO')
} {1 two.TwO}
do_execsql_test 4.2 {
SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE')
} {1 one}
do_execsql_test 4.3 {
SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE')
} {1 two.TWO}
if {"%DETAIL%"=="full"} {
# Phrase queries are only supported for detail=full.
#
do_execsql_test 4.4 {
SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"')
} {1 three.ThreE}
do_catchsql_test 4.5 {
SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"')
} {1 SQLITE_RANGE}
do_catchsql_test 4.6 {
SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"')
} {1 SQLITE_RANGE}
do_catchsql_test 4.7 {
SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"')
} {1 SQLITE_RANGE}
}
#-------------------------------------------------------------------------
#
reset_db
sqlite3_fts5_register_origintext db
proc insttoken {cmd iIdx iToken} {
set txt [$cmd xInstToken $iIdx $iToken]
string map [list "\0" "."] $txt
}
sqlite3_fts5_create_function db insttoken insttoken
fts5_aux_test_functions db
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
);
INSERT INTO ft VALUES('one ONE One oNe oNE one');
}
do_execsql_test 5.1 {
SELECT insttoken(ft, 0, 0),
insttoken(ft, 1, 0),
insttoken(ft, 2, 0),
insttoken(ft, 3, 0),
insttoken(ft, 4, 0),
insttoken(ft, 5, 0)
FROM ft('one');
} {
one one.ONE one.One one.oNe one.oNE one
}
do_execsql_test 5.2 {
SELECT insttoken(ft, 1, 0) FROM ft('one');
} {
one.ONE
}
do_execsql_test 5.3 {
SELECT fts5_test_poslist(ft) FROM ft('one');
} {
{0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5}
}
#-------------------------------------------------------------------------
# Test the xInstToken() API with:
#
# * a non tokendata=1 table.
# * prefix queries.
#
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, y, tokenize='origintext unicode61', detail=%DETAIL%
);
INSERT INTO ft VALUES('One Two', 'Three two');
INSERT INTO ft VALUES('three Three', 'one One');
}
proc tokens {cmd} {
set ret [list]
for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
set txt [$cmd xInstToken $iTok 0]
set txt [string map [list "\0" "."] $txt]
lappend ret $txt
}
set ret
}
sqlite3_fts5_create_function db tokens tokens
do_execsql_test 6.1 {
SELECT rowid, tokens(ft) FROM ft('One');
} {1 one.One 2 one.One}
do_execsql_test 6.2 {
SELECT rowid, tokens(ft) FROM ft('on*');
} {1 {{}} 2 {{} {}}}
do_execsql_test 6.3 {
SELECT rowid, tokens(ft) FROM ft('Three*');
} {1 {{}} 2 {{}}}
}
finish_test

View File

@@ -0,0 +1,146 @@
# 2014 Jan 08
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on phrase queries.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5origintext2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
sqlite3_fts5_register_origintext db
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", tokendata=1
);
}
do_execsql_test 1.1 {
BEGIN;
INSERT INTO ft VALUES('Hello');
INSERT INTO ft VALUES('hello');
INSERT INTO ft VALUES('HELLO');
INSERT INTO ft VALUES('today');
INSERT INTO ft VALUES('today');
INSERT INTO ft VALUES('today');
INSERT INTO ft VALUES('World');
INSERT INTO ft VALUES('world');
INSERT INTO ft VALUES('WORLD');
COMMIT;
}
do_execsql_test 1.2 { SELECT rowid FROM ft('hello'); } {1 2 3}
do_execsql_test 1.3 { SELECT rowid FROM ft('today'); } {4 5 6}
do_execsql_test 1.4 { SELECT rowid FROM ft('world'); } {7 8 9}
do_execsql_test 1.5 {
SELECT count(*) FROM ft_data
} 3
do_execsql_test 1.6 {
DELETE FROM ft;
INSERT INTO ft(ft, rank) VALUES('pgsz', 64);
BEGIN;
WITH s(i) AS (
SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100
)
INSERT INTO ft SELECT 'Hello Hello Hello Hello Hello Hello Hello' FROM s;
INSERT INTO ft VALUES ('hELLO hELLO hELLO');
INSERT INTO ft VALUES('today today today today today today today');
INSERT INTO ft VALUES('today today today today today today today');
INSERT INTO ft VALUES('today today today today today today today');
INSERT INTO ft VALUES('today today today today today today today');
INSERT INTO ft VALUES('today today today today today today today');
INSERT INTO ft VALUES('today today today today today today today');
INSERT INTO ft VALUES('World World World World World World World');
INSERT INTO ft VALUES('world world world world world world world');
INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD');
INSERT INTO ft VALUES('World World World World World World World');
INSERT INTO ft VALUES('world world world world world world world');
INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD');
COMMIT;
}
do_execsql_test 1.7 {
SELECT count(*) FROM ft_data;
} 23
do_execsql_test 1.8 { SELECT rowid FROM ft('hello') WHERE rowid>100; } {101}
do_execsql_test 1.9 {
DELETE FROM ft;
INSERT INTO ft(ft) VALUES('optimize');
SELECT count(*) FROM ft_data;
} {2}
do_execsql_test 1.10 {
BEGIN;
INSERT INTO ft VALUES('Hello');
INSERT INTO ft VALUES('hello');
INSERT INTO ft VALUES('HELLO');
INSERT INTO ft VALUES('today');
INSERT INTO ft VALUES('today');
INSERT INTO ft VALUES('today');
INSERT INTO ft VALUES('World');
INSERT INTO ft VALUES('world');
INSERT INTO ft VALUES('WORLD');
}
do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3}
do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6}
do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9}
do_execsql_test 1.14 { SELECT rowid FROM ft('hello') ORDER BY rank; } {1 2 3}
#------------------------------------------------------------------------
reset_db
sqlite3_fts5_register_origintext db
proc tokens {cmd} {
set ret [list]
for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
set txt [$cmd xInstToken $iTok 0]
set txt [string map [list "\0" "."] $txt]
lappend ret $txt
}
set ret
}
sqlite3_fts5_create_function db tokens tokens
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE x1 USING fts5(
v, tokenize="origintext unicode61", tokendata=1, detail=none
);
INSERT INTO x1 VALUES('xxx Xxx XXX yyy YYY yyy');
INSERT INTO x1 VALUES('xxx yyy xxx yyy yyy yyy');
}
do_execsql_test 2.1 {
SELECT tokens(x1) FROM x1('xxx');
} {
{xxx xxx.Xxx xxx.XXX} {xxx xxx}
}
do_execsql_test 2.2 {
UPDATE x1_content SET c0 = 'xxx xxX xxx yyy yyy yyy' WHERE id=1;
}
do_execsql_test 2.3 {
SELECT tokens(x1) FROM x1('xxx');
} {
{xxx {} xxx} {xxx xxx}
}
finish_test

View File

@@ -0,0 +1,101 @@
# 2023 November 22
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on phrase queries.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5origintext3
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
foreach_detail_mode $testprefix {
reset_db
sqlite3_fts5_register_origintext db
fts5_aux_test_functions db
proc insttoken {cmd iIdx iToken} {
set txt [$cmd xInstToken $iIdx $iToken]
string map [list "\0" "."] $txt
}
sqlite3_fts5_create_function db insttoken insttoken
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
);
}
do_execsql_test 1.1 {
INSERT INTO ft VALUES('Hello world HELLO WORLD hello');
}
do_execsql_test 1.2 {
SELECT fts5_test_poslist(ft) FROM ft('hello');
} {{0.0.0 0.0.2 0.0.4}}
do_execsql_test 1.3 {
SELECT
insttoken(ft, 0, 0),
insttoken(ft, 1, 0),
insttoken(ft, 2, 0)
FROM ft('hello');
} {hello.Hello hello.HELLO hello}
do_execsql_test 1.4 {
SELECT
insttoken(ft, 0, 0),
insttoken(ft, 1, 0),
insttoken(ft, 2, 0)
FROM ft('hello') ORDER BY rank;
} {hello.Hello hello.HELLO hello}
do_execsql_test 1.5 {
CREATE VIRTUAL TABLE ft2 USING fts5(
x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
);
INSERT INTO ft2(rowid, x) VALUES(1, 'ONE one two three ONE');
INSERT INTO ft2(rowid, x) VALUES(2, 'TWO one two three TWO');
INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE');
}
do_execsql_test 1.6 {
SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank;
} {three.THREE 3 three 1 three 2}
do_execsql_test 1.7 {
INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB');
INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(14, 'bbb BBB bbb');
INSERT INTO ft2(rowid, x) VALUES(15, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(16, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(17, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(18, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(19, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(20, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(21, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(22, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(23, 'bbb bbb bbb');
INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB');
}
do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24}
do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24}
}
finish_test

View File

@@ -0,0 +1,66 @@
# 2023 November 22
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on phrase queries.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5origintext4
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
sqlite3_fts5_register_origintext db
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", tokendata=1
);
}
do_execsql_test 1.1 {
BEGIN;
INSERT INTO ft SELECT 'the first thing';
WITH s(i) AS (
SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<90000
)
INSERT INTO ft SELECT 'The second thing' FROM s;
INSERT INTO ft SELECT 'the first thing';
COMMIT;
INSERT INTO ft(ft) VALUES('optimize');
}
foreach {tn sql expr} {
1 { SELECT rowid FROM ft('the') } {$mem > 250000}
2 { SELECT rowid FROM ft('first') } {$mem < 50000}
3 { SELECT rowid FROM ft('the first') } {$mem < 50000}
} {
db close
sqlite3 db test.db
sqlite3_fts5_register_origintext db
execsql $sql
do_test 1.2.$tn {
set mem [lindex [sqlite3_db_status db CACHE_USED 0] 1]
expr $expr
} 1
}
proc b {x} { string map [list "\0" "."] $x }
db func b b
# execsql_pp { SELECT segid, b(term), pgno from ft_idx }
finish_test

View File

@@ -0,0 +1,273 @@
# 2023 Dec 04
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests for tables that use both tokendata=1 and contentless_delete=1.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5origintext
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
# Return a random integer between 0 and n-1.
#
proc random {n} { expr {abs(int(rand()*$n))} }
# Select an element of the list passed as the only argument at random and
# return it.
#
proc select_one {list} {
set n [llength $list]
lindex $list [random $n]
}
# Given a term that consists entirely of alphabet characters, return all
# permutations of the term using upper and lower case characters. e.g.
#
# "abc" -> {CBA cBA CbA cbA CBa cBa Cba cba}
#
proc casify {term {lRet {{}}}} {
if {$term==""} { return $lRet }
set t [string range $term 1 end]
set f1 [string toupper [string range $term 0 0]]
set f2 [string tolower [string range $term 0 0]]
set ret [list]
foreach x $lRet {
lappend ret "$x$f1"
lappend ret "$x$f2"
}
return [casify $t $ret]
}
proc vocab {} {
list abc def ghi jkl mno pqr stu vwx yza
}
# Return a random 3 letter term.
#
proc term {} {
if {[info exists ::expanded_vocab]==0} {
foreach v [vocab] { lappend ::expanded_vocab {*}[casify $v] }
}
select_one $::expanded_vocab
}
# Return a document - between 3 and 10 terms.
#
proc document {} {
set nTerm [expr [random 3] + 7]
set doc ""
for {set ii 0} {$ii < $nTerm} {incr ii} {
lappend doc [term]
}
set doc
}
db func document document
#-------------------------------------------------------------------------
expr srand(6)
set NDOC 200
set NLOOP 50
sqlite3_fts5_register_origintext db
proc tokens {cmd} {
set ret [list]
for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
set txt [$cmd xInstToken $iTok 0]
set txt [string map [list "\0" "."] $txt]
lappend ret $txt
}
set ret
}
sqlite3_fts5_create_function db tokens tokens
proc rankfunc {cmd} {
$cmd xRowid
}
sqlite3_fts5_create_function db rankfunc rankfunc
proc ctrl_tokens {term args} {
set ret [list]
set term [string tolower $term]
foreach doc $args {
foreach a $doc {
if {[string tolower $a]==$term} {
if {$a==$term} {
lappend ret $a
} else {
lappend ret [string tolower $a].$a
}
}
}
}
set ret
}
db func ctrl_tokens ctrl_tokens
proc do_all_vocab_test {tn} {
foreach ::v [concat [vocab] nnn] {
set answer [execsql {
SELECT id, ctrl_tokens($::v, x) FROM ctrl WHERE x LIKE '%' || $::v || '%'
}]
do_execsql_test $tn.$::v.1 {
SELECT rowid, tokens(ft) FROM ft($::v)
} $answer
do_execsql_test $tn.$::v.2 {
SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank
} $answer
}
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft USING fts5(
x, tokenize="origintext unicode61", content=, contentless_delete=1,
tokendata=1
);
CREATE TABLE ctrl(id INTEGER PRIMARY KEY, x TEXT);
INSERT INTO ft(ft, rank) VALUES('pgsz', 64);
INSERT INTO ft(ft, rank) VALUES('rank', 'rankfunc()');
}
do_test 1.1 {
for {set ii 0} {$ii < $NDOC} {incr ii} {
set doc [document]
execsql {
INSERT INTO ft(rowid, x) VALUES($ii, $doc);
INSERT INTO ctrl(id, x) VALUES($ii, $doc);
}
}
} {}
#execsql_pp { SELECT * FROM ctrl }
#execsql_pp { SELECT * FROM ft }
#fts5_aux_test_functions db
#execsql_pp { SELECT rowid, tokens(ft), fts5_test_poslist(ft) FROM ft('ghi'); }
do_all_vocab_test 1.2
for {set ii 0} {$ii < $NLOOP} {incr ii} {
set lRowid [execsql { SELECT id FROM ctrl WHERE random() % 2 }]
foreach r $lRowid {
execsql { DELETE FROM ft WHERE rowid = $r }
execsql { DELETE FROM ctrl WHERE rowid = $r }
set doc [document]
execsql { INSERT INTO ft(rowid, x) VALUES($r, $doc) }
execsql { INSERT INTO ctrl(id, x) VALUES($r, $doc) }
}
do_all_vocab_test 1.3.$ii
}
#-------------------------------------------------------------------------
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE ft2 USING fts5(
x, y, tokenize="origintext unicode61", content=, contentless_delete=1,
tokendata=1
);
CREATE TABLE ctrl2(id INTEGER PRIMARY KEY, x TEXT, y TEXT);
INSERT INTO ft2(ft2, rank) VALUES('pgsz', 64);
INSERT INTO ft2(ft2, rank) VALUES('rank', 'rankfunc()');
}
do_test 2.1 {
for {set ii 0} {$ii < $NDOC} {incr ii} {
set doc1 [document]
set doc2 [document]
execsql {
INSERT INTO ft2(rowid, x, y) VALUES($ii, $doc, $doc2);
INSERT INTO ctrl2(id, x, y) VALUES($ii, $doc, $doc2);
}
}
} {}
proc do_all_vocab_test2 {tn} {
foreach ::v [vocab] {
set answer [execsql {
SELECT id, ctrl_tokens($::v, x, y) FROM ctrl2
WHERE x LIKE '%' || $::v || '%' OR y LIKE '%' || $::v || '%';
}]
do_execsql_test $tn.$::v.1 {
SELECT rowid, tokens(ft2) FROM ft2($::v)
} $answer
do_execsql_test $tn.$::v.2 {
SELECT rowid, tokens(ft2) FROM ft2($::v) ORDER BY rank
} $answer
}
}
do_all_vocab_test2 2.2
for {set ii 0} {$ii < $NLOOP} {incr ii} {
set lRowid [execsql { SELECT id FROM ctrl2 WHERE random() % 2 }]
foreach r $lRowid {
execsql { DELETE FROM ft2 WHERE rowid = $r }
execsql { DELETE FROM ctrl2 WHERE rowid = $r }
set doc1 [document]
set doc2 [document]
execsql { INSERT INTO ft2(rowid, x, y) VALUES($r, $doc, $doc1) }
execsql { INSERT INTO ctrl2(id, x, y) VALUES($r, $doc, $doc2) }
}
do_all_vocab_test 2.3.$ii
}
#-------------------------------------------------------------------------
unset -nocomplain ::expanded_vocab
proc vocab {} {
list abcde fghij klmno
}
proc do_all_vocab_test3 {tn} {
foreach ::v [concat [vocab] nnn] {
set answer [execsql {
SELECT rowid, ctrl_tokens($::v, w) FROM ctrl3 WHERE w LIKE '%' || $::v || '%'
}]
do_execsql_test $tn.$::v.1 {
SELECT rowid, tokens(ft3) FROM ft3($::v)
} $answer
do_execsql_test $tn.$::v.2 {
SELECT rowid, tokens(ft3) FROM ft3($::v) ORDER BY rank
} $answer
}
}
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE ft3 USING fts5(
w, tokenize="origintext unicode61", content=, contentless_delete=1,
tokendata=1
);
INSERT INTO ft3(ft3, rank) VALUES('rank', 'rankfunc()');
CREATE TABLE ctrl3(w);
}
do_execsql_test 3.1 {
WITH s(i) AS (
SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<2
)
INSERT INTO ctrl3 SELECT document() FROM s;
INSERT INTO ft3(rowid, w) SELECT rowid, w FROM ctrl3;
}
do_all_vocab_test3 3.2
finish_test

View File

@@ -343,7 +343,9 @@ do_execsql_test 17.0 {
INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb');
COMMIT;
}
do_execsql_test 17.1 { SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20 }
do_execsql_test 17.1 {
SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20
}
do_execsql_test 17.2 {
BEGIN;
INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb');

View File

@@ -1,5 +1,5 @@
C README.md\stypo\sfix\sreported\sin\sthe\sforum\sand\supdate\sall\slinks\sfrom\shttp:\sto\shttps:.
D 2023-12-06T12:30:28.174
C Add\sthe\stokendata=1\soption\sand\srelated\sAPIs\sto\sfts5.
D 2023-12-06T14:36:34.858
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -89,17 +89,17 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520
F ext/fts5/fts5Int.h 78a63cc0795186cde5384816a9403a68c65774b35d952e05b81a1b4b158e07c8
F ext/fts5/fts5.h ff90acaa97f8e865b66d1177d1b56b8c110fd5548ab5863bab43f055a1d745fe
F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880e9834
F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad
F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5
F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081
F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6
F ext/fts5/fts5_hash.c 076058f93327051952a752dc765df1acfe783eb11b419b30652aa1fc1f987902
F ext/fts5/fts5_index.c 458cbed8a3e17617cbf7e80cdfb7612000b9bb3781f286b345fb9655858658cf
F ext/fts5/fts5_main.c a07ed863b8bd9e6fefb62db2fd40a3518eb30a5f7dcfda5be915dd2db45efa2f
F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf
F ext/fts5/fts5_expr.c b1ec526371b9ffde82341423a5b9753c42cbea629a41b69f26fa377d13b95a8e
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c be39b44ff8773cff56bcbc01f74701a83e068c20d773cafd01e8bb2fa0fc1bc5
F ext/fts5/fts5_main.c fb7ec495d663f40d18e420e1986316591041a70e1e4b4696ab2a7384e4c7fd7a
F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d
F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae
F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13
@@ -108,8 +108,8 @@ F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d0988
F ext/fts5/fts5_vocab.c aed56169ae5c1aa9b8189c779ffeef04ed516d3c712c06914e6d91a6759f4e4a
F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl a9de9c2209cc4e7ae3c753e783504e67206c6c1467d08f209cd0c5923d3e8d8b
F ext/fts5/test/fts5aa.test ba5158eba7d61359becdfca895ef471072c7bf7b20e5e60dcb4d024c8419c926
F ext/fts5/test/fts5_common.tcl 8b1848ac2baad10e444e4183034a52050b52d20b3796d9d30e78f01ab0d05583
F ext/fts5/test/fts5aa.test 4db81519863244a3cab35795fe65ab6b592e7970c7409eba098b23ebbfc08d95
F ext/fts5/test/fts5ab.test bd932720c748383277456b81f91bc00453de2174f9762cd05f95d0495dc50390
F ext/fts5/test/fts5ac.test a7aa7e1fefc6e1918aa4d3111d5c44a09177168e962c5fd2cca9620de8a7ed6d
F ext/fts5/test/fts5ad.test e8cf959dfcd57c8e46d6f5f25665686f3b6627130a9a981371dafdf6482790de
@@ -170,6 +170,7 @@ F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860
F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e4710c77eb8ce7075
F ext/fts5/test/fts5faultF.test 4abef99f86e99d9f0c6460dd68c586a766b6b9f1f660ada55bf2e8266bd1bbc1
F ext/fts5/test/fts5faultG.test d2e5a4d9a34e08dcaadcaeafef74d10cbc2abdd11aa2659a18af0294bf2812d3
F ext/fts5/test/fts5faultH.test d845f45dac3e1a3f20c7e0a2be95280c95d3204c06802f86ab2c110e52ed3d14
F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079
F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717
F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e
@@ -190,6 +191,11 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618
F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1
F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785
F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca
F ext/fts5/test/fts5origintext.test d2796fa08ee7aecfabdc0c45bb8a2fb16a00ea8757e63fbc153b718dbe430a39
F ext/fts5/test/fts5origintext2.test f3b9436de540828d01f0672df855b09ebc0863e126d5b56234701d71dfa73634
F ext/fts5/test/fts5origintext3.test 0d25933506600452a5ab3873cbb418ed5f2de2446c3672b9997b1ea104b0e7f0
F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871
F ext/fts5/test/fts5origintext5.test a037bdf7235a22033c4663837bdb12d9738245464a3ac2f60c71fc40d07ede7d
F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b
F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a
F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15
@@ -212,7 +218,7 @@ F ext/fts5/test/fts5secure7.test fd03d0868d64340a1db8615b02e5508fea409de13910114
F ext/fts5/test/fts5secure8.test eb3579e9d58b0acad97e8082dee1f99b2d393198f03500b453c2b25761c0c298
F ext/fts5/test/fts5securefault.test dbca2b6a1c16700017f5051138991b705410889933f2a37c57ae8a23b296b10b
F ext/fts5/test/fts5simple.test a298670508c1458b88ce6030440f26a30673931884eb5f4094ac1773b3ba217b
F ext/fts5/test/fts5simple2.test 258a1b0c590409bfa5271e872c79572b319d2a56554d0585f68f146a0da603f0
F ext/fts5/test/fts5simple2.test 8dd2389ee75e21a1429fe87e5f8c7d9a97ad1470304a8a2d3ba4b8c3c345fecd
F ext/fts5/test/fts5simple3.test d5c74a9d3ca71bd5dd5cacb7c55b86ea12cdddfc8b1910e3de2995206898380f
F ext/fts5/test/fts5synonym.test 1651815b8008de170e8e600dcacc17521d765482ea8f074ae82cfa870d8bb7fb
F ext/fts5/test/fts5synonym2.test 8f891fc49cc1e8daed727051e77e1f42849c784a6a54bef82564761b2cb3e016
@@ -2147,8 +2153,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 7f0c79b94e8f55e5013e52ba64ba8b32dad1dc4e2224d2099733cbc561de1810
R e4c4253b529b92ef6212320f71e1570e
U stephan
Z 02e09a8447ded58f933fad2e169fffdc
P 5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf 8f46eace86e7b2e556913575aa3cd6f7987ac0efcc880f0af649d42c253aeb81
R aee08254c1ed5ae187dbc54a7e67d0a2
T +closed 8f46eace86e7b2e556913575aa3cd6f7987ac0efcc880f0af649d42c253aeb81
U dan
Z b8398992a8dd36d240de5bcbcb58489b
# Remove this line to create a well-formed Fossil manifest.

View File

@@ -1 +1 @@
5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf
a76a636b23c0ebd95d47fdf8358de4729e51a5f68f1a730cd4d89b378e94ac0d