From a35ae44150c9afa5b74dd364f015767371dbe97d Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 30 Sep 2023 18:13:35 +0000 Subject: [PATCH 01/24] Changes so that fts5 can handle tokens with embedded '\0' bytes. FossilOrigin-Name: c027c092c4af53bd6ae3cc6e2b4439167d9eeb0f9de549b6a2c2a72a67ee886c --- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_hash.c | 49 +++++---- ext/fts5/fts5_index.c | 23 ++-- ext/fts5/fts5_tcl.c | 173 +++++++++++++++++++++++++++++- ext/fts5/test/fts5origintext.test | 116 ++++++++++++++++++++ manifest | 24 +++-- manifest.uuid | 2 +- 7 files changed, 345 insertions(+), 43 deletions(-) create mode 100644 ext/fts5/test/fts5origintext.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 8bbafbaaf4..1687168d5f 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -645,6 +645,7 @@ void sqlite3Fts5HashScanNext(Fts5Hash*); int sqlite3Fts5HashScanEof(Fts5Hash*); void sqlite3Fts5HashScanEntry(Fts5Hash *, const char **pzTerm, /* OUT: term (nul-terminated) */ + int *pnTerm, /* OUT: Size of term in bytes */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ); diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 7e50c36608..f6224f1275 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -36,10 +36,15 @@ struct Fts5Hash { /* ** Each entry in the hash table is represented by an object of the -** following type. Each object, its key (a nul-terminated string) and -** its current data are stored in a single memory allocation. The -** key immediately follows the object in memory. The position list -** data immediately follows the key data in memory. +** following type. Each object, its key, and its current data are stored +** in a single memory allocation. The key immediately follows the object +** in memory. The position list data immediately follows the key data +** in memory. +** +** The key is Fts5HashEntry.nKey bytes in size. It consists of a single +** byte identifying the index (either the main term index or a prefix-index), +** followed by the term data. For example: "0token". There is no +** nul-terminator - in this case nKey=6. ** ** The data that follows the key is in a similar, but not identical format ** to the doclist data stored in the database. It is: @@ -174,8 +179,7 @@ static int fts5HashResize(Fts5Hash *pHash){ unsigned int iHash; Fts5HashEntry *p = apOld[i]; apOld[i] = p->pHashNext; - iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), - (int)strlen(fts5EntryKey(p))); + iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), p->nKey); p->pHashNext = apNew[iHash]; apNew[iHash] = p; } @@ -259,7 +263,7 @@ int sqlite3Fts5HashWrite( for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ char *zKey = fts5EntryKey(p); if( zKey[0]==bByte - && p->nKey==nToken + && p->nKey==nToken+1 && memcmp(&zKey[1], pToken, nToken)==0 ){ break; @@ -289,9 +293,9 @@ int sqlite3Fts5HashWrite( zKey[0] = bByte; memcpy(&zKey[1], pToken, nToken); assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) ); - p->nKey = nToken; + p->nKey = nToken+1; zKey[nToken+1] = '\0'; - p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); + p->nData = nToken+1 + sizeof(Fts5HashEntry); p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; @@ -408,12 +412,17 @@ static Fts5HashEntry *fts5HashEntryMerge( *ppOut = p1; p1 = 0; }else{ - int i = 0; char *zKey1 = fts5EntryKey(p1); char *zKey2 = fts5EntryKey(p2); - while( zKey1[i]==zKey2[i] ) i++; + int nMin = MIN(p1->nKey, p2->nKey); - if( ((u8)zKey1[i])>((u8)zKey2[i]) ){ + int cmp = memcmp(zKey1, zKey2, nMin); + if( cmp==0 ){ + cmp = p1->nKey - p2->nKey; + } + assert( cmp!=0 ); + + if( cmp>0 ){ /* p2 is smaller */ *ppOut = p2; ppOut = &p2->pScanNext; @@ -457,7 +466,7 @@ static int fts5HashEntrySort( Fts5HashEntry *pIter; for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ if( pTerm==0 - || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) + || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) ){ Fts5HashEntry *pEntry = pIter; pEntry->pScanNext = 0; @@ -496,12 +505,11 @@ int sqlite3Fts5HashQuery( for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ zKey = fts5EntryKey(p); - assert( p->nKey+1==(int)strlen(zKey) ); - if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break; + if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break; } if( p ){ - int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1; + int nHashPre = sizeof(Fts5HashEntry) + nTerm; int nList = p->nData - nHashPre; u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10)); if( pRet ){ @@ -562,19 +570,22 @@ int sqlite3Fts5HashScanEof(Fts5Hash *p){ void sqlite3Fts5HashScanEntry( Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ + int *pnTerm, /* OUT: Size of term in bytes */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ char *zKey = fts5EntryKey(p); - int nTerm = (int)strlen(zKey); + int nTerm = p->nKey; fts5HashAddPoslistSize(pHash, p, 0); *pzTerm = zKey; - *ppDoclist = (const u8*)&zKey[nTerm+1]; - *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1); + *pnTerm = nTerm; + *ppDoclist = (const u8*)&zKey[nTerm]; + *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm); }else{ *pzTerm = 0; + *pnTerm = 0; *ppDoclist = 0; *pnDoclist = 0; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index f527709237..9db5925b94 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2177,15 +2177,16 @@ static void fts5SegIterNext_None( }else{ const u8 *pList = 0; const char *zTerm = 0; + int nTerm = 0; int nList; sqlite3Fts5HashScanNext(p->pHash); - sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); if( pList==0 ) goto next_none_eof; pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; pIter->iEndofDoclist = nList; - sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); + sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } @@ -2251,11 +2252,12 @@ static void fts5SegIterNext( }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm = 0; + int nTerm = 0; int nList = 0; assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ sqlite3Fts5HashScanNext(p->pHash); - sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); } if( pList==0 ){ fts5DataRelease(pIter->pLeaf); @@ -2265,8 +2267,7 @@ static void fts5SegIterNext( pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; pIter->iEndofDoclist = nList+1; - sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), - (u8*)zTerm); + sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); *pbNewTerm = 1; } @@ -2711,8 +2712,7 @@ static void fts5SegIterHashInit( const u8 *pList = 0; p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); - sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); - n = (z ? (int)strlen((const char*)z) : 0); + sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList); if( pList ){ pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf ){ @@ -5313,10 +5313,10 @@ static void fts5FlushSecureDelete( Fts5Index *p, Fts5Structure *pStruct, const char *zTerm, + int nTerm, i64 iRowid ){ const int f = FTS5INDEX_QUERY_SKIPHASH; - int nTerm = (int)strlen(zTerm); Fts5Iter *pIter = 0; /* Used to find term instance */ fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter); @@ -5390,8 +5390,7 @@ static void fts5FlushOneHash(Fts5Index *p){ int nDoclist; /* Size of doclist in bytes */ /* Get the term and doclist for this entry. */ - sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - nTerm = (int)strlen(zTerm); + sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist); if( bSecureDelete==0 ){ fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); if( p->rc!=SQLITE_OK ) break; @@ -5421,7 +5420,7 @@ static void fts5FlushOneHash(Fts5Index *p){ if( bSecureDelete ){ if( eDetail==FTS5_DETAIL_NONE ){ if( iOffrc!=SQLITE_OK || pDoclist[iOff]==0x01 ){ iOff++; continue; diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 80c600dbb1..fb4bea8e9e 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -1117,6 +1117,176 @@ static int SQLITE_TCLAPI f5tRegisterTok( return TCL_OK; } +typedef struct OriginTextCtx OriginTextCtx; +struct OriginTextCtx { + sqlite3 *db; + fts5_api *pApi; +}; + +typedef struct OriginTextTokenizer OriginTextTokenizer; +struct OriginTextTokenizer { + Fts5Tokenizer *pTok; /* Underlying tokenizer object */ + fts5_tokenizer tokapi; /* API implementation for pTok */ +}; + +/* +** Delete the OriginTextCtx object indicated by the only argument. +*/ +static void f5tOrigintextTokenizerDelete(void *pCtx){ + OriginTextCtx *p = (OriginTextCtx*)pCtx; + ckfree(p); +} + +static int f5tOrigintextCreate( + void *pCtx, + const char **azArg, + int nArg, + Fts5Tokenizer **ppOut +){ + OriginTextCtx *p = (OriginTextCtx*)pCtx; + OriginTextTokenizer *pTok = 0; + void *pTokCtx = 0; + int rc = SQLITE_OK; + + pTok = (OriginTextTokenizer*)sqlite3_malloc(sizeof(OriginTextTokenizer)); + if( pTok==0 ){ + rc = SQLITE_NOMEM; + }else if( nArg<1 ){ + rc = SQLITE_ERROR; + }else{ + /* Locate the underlying tokenizer */ + rc = p->pApi->xFindTokenizer(p->pApi, azArg[0], &pTokCtx, &pTok->tokapi); + } + + /* Create the new tokenizer instance */ + if( rc==SQLITE_OK ){ + rc = pTok->tokapi.xCreate(pTokCtx, &azArg[1], nArg-1, &pTok->pTok); + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(pTok); + pTok = 0; + } + *ppOut = (Fts5Tokenizer*)pTok; + return rc; +} + +static void f5tOrigintextDelete(Fts5Tokenizer *pTokenizer){ + OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer; + if( p->pTok ){ + p->tokapi.xDelete(p->pTok); + } + sqlite3_free(p); +} + +typedef struct OriginTextCb OriginTextCb; +struct OriginTextCb { + void *pCtx; + const char *pText; + int nText; + int (*xToken)(void *, int, const char *, int, int, int); + + char *aBuf; /* Buffer to use */ + int nBuf; /* Allocated size of aBuf[] */ +}; + +static int xOriginToken( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd /* Byte offset of end of token within input */ +){ + OriginTextCb *p = (OriginTextCb*)pCtx; + int ret = 0; + + if( nToken==(iEnd-iStart) && 0==memcmp(pToken, &p->pText[iStart], nToken) ){ + /* Token exactly matches document text. Pass it through as is. */ + ret = p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); + }else{ + int nReq = nToken + 1 + (iEnd-iStart); + if( nReq>p->nBuf ){ + sqlite3_free(p->aBuf); + p->aBuf = sqlite3_malloc(nReq*2); + if( p->aBuf==0 ) return SQLITE_NOMEM; + p->nBuf = nReq*2; + } + + memcpy(p->aBuf, pToken, nToken); + p->aBuf[nToken] = '\0'; + memcpy(&p->aBuf[nToken+1], &p->pText[iStart], iEnd-iStart); + ret = p->xToken(p->pCtx, tflags, p->aBuf, nReq, iStart, iEnd); + } + + return ret; +} + + +static int f5tOrigintextTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ + const char *pText, int nText, + int (*xToken)(void *, int, const char *, int, int, int) +){ + OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer; + OriginTextCb cb; + int ret; + + memset(&cb, 0, sizeof(cb)); + cb.pCtx = pCtx; + cb.pText = pText; + cb.nText = nText; + cb.xToken = xToken; + + ret = p->tokapi.xTokenize(p->pTok,(void*)&cb,flags,pText,nText,xOriginToken); + sqlite3_free(cb.aBuf); + return ret; +} + +/* +** sqlite3_fts5_register_origintext DB +** +** Description... +*/ +static int SQLITE_TCLAPI f5tRegisterOriginText( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db = 0; + fts5_api *pApi = 0; + int rc; + fts5_tokenizer tok = {0, 0, 0}; + OriginTextCtx *pCtx = 0; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB"); + return TCL_ERROR; + } + if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; + + pCtx = (OriginTextCtx*)ckalloc(sizeof(OriginTextCtx)); + pCtx->db = db; + pCtx->pApi = pApi; + + tok.xCreate = f5tOrigintextCreate; + tok.xDelete = f5tOrigintextDelete; + tok.xTokenize = f5tOrigintextTokenize; + rc = pApi->xCreateTokenizer( + pApi, "origintext", (void*)pCtx, &tok, f5tOrigintextTokenizerDelete + ); + + Tcl_ResetResult(interp); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + return TCL_OK; +} + /* ** Entry point. */ @@ -1133,7 +1303,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){ { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 }, { "sqlite3_fts5_token_hash", f5tTokenHash, 0 }, { "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 }, - { "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 } + { "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 }, + { "sqlite3_fts5_register_origintext",f5tRegisterOriginText, 0 } }; int i; F5tTokenizerContext *pContext; diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test new file mode 100644 index 0000000000..791b850c76 --- /dev/null +++ b/ext/fts5/test/fts5origintext.test @@ -0,0 +1,116 @@ +# 2014 Jan 08 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); +} + +do_execsql_test 1.1 { + INSERT INTO ft VALUES('Hello world'); +} + +do_execsql_test 1.2 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +proc b {x} { string map [list "\0" "."] $x } +db func b b + +do_execsql_test 1.3 { + select b(term) from vocab; +} { + hello.Hello + world +} + +#------------------------------------------------------------------------- +reset_db + +# Return a random integer between 0 and n-1. +# +proc random {n} { + expr {abs(int(rand()*$n))} +} + +proc select_one {list} { + set n [llength $list] + lindex $list [random $n] +} + +proc term {} { + set first_letter { + a b c d e f g h i j k l m n o p q r s t u v w x y z + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + } + + set term [select_one $first_letter] + append term [random 100] +} + +proc document {} { + set nTerm [expr [random 5] + 5] + set doc "" + for {set ii 0} {$ii < $nTerm} {incr ii} { + lappend doc [term] + } + set doc +} +db func document document + +sqlite3_fts5_register_origintext db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + INSERT INTO ft(ft, rank) VALUES('pgsz', 128); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); +} + +do_test 2.1 { + for {set ii 0} {$ii < 500} {incr ii} { + execsql { INSERT INTO ft VALUES( document() ) } + } +} {} + +do_execsql_test 2.2 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 2.3 { + INSERT INTO ft(ft, rank) VALUES('merge', 16); +} + +do_execsql_test 2.4 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 2.5 { + INSERT INTO ft(ft) VALUES('optimize'); +} + +proc b {x} { string map [list "\0" "."] $x } +db func b b +#execsql_pp { SELECT b(term) FROM vocab } + +finish_test + diff --git a/manifest b/manifest index 2eae6bfcf8..bf21569e58 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sJNI\sbinding\sto\scompile\swithout\sSQLITE_ENABLE_PREUPDATE_HOOK.\sAdd\sbuild\soption\sto\sdisable\sall\soptional\sENABLE\sflags. -D 2023-09-30T17:08:29.126 +C Changes\sso\sthat\sfts5\scan\shandle\stokens\swith\sembedded\s'\\0'\sbytes. +D 2023-09-30T18:13:35.306 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -88,16 +88,16 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520 -F ext/fts5/fts5Int.h 78a63cc0795186cde5384816a9403a68c65774b35d952e05b81a1b4b158e07c8 +F ext/fts5/fts5Int.h 66a38b285e2b860baa29745d8eff27f5b0809268e7820498494d9acfaccf8a5c F ext/fts5/fts5_aux.c 572d5ec92ba7301df2fea3258576332f2f4d2dfd66d8263afd157d9deceac480 F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6 -F ext/fts5/fts5_hash.c 65e7707bc8774706574346d18c20218facf87de3599b995963c3e6d6809f203d -F ext/fts5/fts5_index.c a86bcd5637625ce1037649d55974ab8da1fa8d1375cb334aae47ef376642e93b +F ext/fts5/fts5_hash.c 76765856397eff56f526b0640b23a1677d737d35e07bc00e4b4b2e0fc5fda60d +F ext/fts5/fts5_index.c 16d775ecbccf7d3698a03bcae3c3fbee0749df748b93b29d0e82a37e02eaaa94 F ext/fts5/fts5_main.c 799ec88d2309055f6406bddb0bd6ed80148c5da5eb14594c3c5309a6e944d489 F ext/fts5/fts5_storage.c 3c9b41fce41b6410f2e8f82eb035c6a29b2560483f773e6dc98cf3cb2e4ddbb5 -F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae +F ext/fts5/fts5_tcl.c 0d2bb0ff7bf6ee136015be118167f0bd956ddd05a8f02c68bd34299b50648f9f F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c a2bed8edb25f6432e8cdb62aad5916935c19dba8dac2b8324950cfff397e25ff F ext/fts5/fts5_tokenize.c 5e251efb0f1af99a25ed50010ba6b1ad1250aca5921af1988fdcabe5ebc3cb43 @@ -187,6 +187,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca +F ext/fts5/test/fts5origintext.test 9a6edc85ccc4afb10e71d54d98d8170f850272e55b120520f367afbb12526674 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2122,8 +2123,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 5e387275f69ab2d3159b4b67b8cbfc6270410b61e5ac1f988616e8d051f6572e -R 02618be495064fd0c511f49fba8a92b2 -U stephan -Z fb900a6927398da79962f35041fce8dc +P c04022b7407f77eaf0175e831ebcd6bbdc0af1cef0d42c5c11102aa8484f24ca +R ee3b13ddf778c77c1640cd7c7844c1f5 +T *branch * fts5-token-data +T *sym-fts5-token-data * +T -sym-trunk * +U dan +Z 7d5bd217a552215de3d888e155abaef5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 92d537682b..9db9eb3c64 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c04022b7407f77eaf0175e831ebcd6bbdc0af1cef0d42c5c11102aa8484f24ca \ No newline at end of file +c027c092c4af53bd6ae3cc6e2b4439167d9eeb0f9de549b6a2c2a72a67ee886c \ No newline at end of file From eb28787b5ff64a634a70ae7d649eb540f068b0e0 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 3 Oct 2023 17:07:54 +0000 Subject: [PATCH 02/24] Update fts5_decode() to allow for embedded 0x00 bytes in tokens. FossilOrigin-Name: e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 --- ext/fts5/fts5_index.c | 28 ++++++++++++++++++++++------ manifest | 15 ++++++--------- manifest.uuid | 2 +- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 9db5925b94..6cf30b5a00 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -7745,6 +7745,24 @@ static void fts5DecodeRowidList( } #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ +#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) +static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){ + int ii; + fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1); + if( *pRc==SQLITE_OK ){ + for(ii=0; iin; ii++){ + if( pTerm->p[ii]==0x00 ){ + pBuf->p[pBuf->n++] = '\\'; + pBuf->p[pBuf->n++] = '0'; + }else{ + pBuf->p[pBuf->n++] = pTerm->p[ii]; + } + } + pBuf->p[pBuf->n] = 0x00; + } +} +#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ + #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) /* ** The implementation of user-defined scalar function fts5_decode(). @@ -7852,9 +7870,8 @@ static void fts5DecodeFunction( iOff += fts5GetVarint32(&a[iOff], nAppend); term.n = nKeep; fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); - sqlite3Fts5BufferAppendPrintf( - &rc, &s, " term=%.*s", term.n, (const char*)term.p - ); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); + fts5BufferAppendTerm(&rc, &s, &term); iOff += nAppend; /* Figure out where the doclist for this term ends */ @@ -7962,9 +7979,8 @@ static void fts5DecodeFunction( fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; - sqlite3Fts5BufferAppendPrintf( - &rc, &s, " term=%.*s", term.n, (const char*)term.p - ); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); + fts5BufferAppendTerm(&rc, &s, &term); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); } diff --git a/manifest b/manifest index bf21569e58..431d902181 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Changes\sso\sthat\sfts5\scan\shandle\stokens\swith\sembedded\s'\\0'\sbytes. -D 2023-09-30T18:13:35.306 +C Update\sfts5_decode()\sto\sallow\sfor\sembedded\s0x00\sbytes\sin\stokens. +D 2023-10-03T17:07:54.562 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -94,7 +94,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6 F ext/fts5/fts5_hash.c 76765856397eff56f526b0640b23a1677d737d35e07bc00e4b4b2e0fc5fda60d -F ext/fts5/fts5_index.c 16d775ecbccf7d3698a03bcae3c3fbee0749df748b93b29d0e82a37e02eaaa94 +F ext/fts5/fts5_index.c e472083d371f420d52ec80445b9d2a99b16b23548205cb4064ddcd41bd79f63e F ext/fts5/fts5_main.c 799ec88d2309055f6406bddb0bd6ed80148c5da5eb14594c3c5309a6e944d489 F ext/fts5/fts5_storage.c 3c9b41fce41b6410f2e8f82eb035c6a29b2560483f773e6dc98cf3cb2e4ddbb5 F ext/fts5/fts5_tcl.c 0d2bb0ff7bf6ee136015be118167f0bd956ddd05a8f02c68bd34299b50648f9f @@ -2123,11 +2123,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c04022b7407f77eaf0175e831ebcd6bbdc0af1cef0d42c5c11102aa8484f24ca -R ee3b13ddf778c77c1640cd7c7844c1f5 -T *branch * fts5-token-data -T *sym-fts5-token-data * -T -sym-trunk * +P c027c092c4af53bd6ae3cc6e2b4439167d9eeb0f9de549b6a2c2a72a67ee886c +R 9e81ed5ff713a928831c6c73df8f7a54 U dan -Z 7d5bd217a552215de3d888e155abaef5 +Z 0a0daf2566a3400fafbefb55947df637 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 9db9eb3c64..53a545a52f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c027c092c4af53bd6ae3cc6e2b4439167d9eeb0f9de549b6a2c2a72a67ee886c \ No newline at end of file +e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 \ No newline at end of file From 1846de49a46bbe1487679d5fdc4283acd38ba307 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 3 Oct 2023 19:06:52 +0000 Subject: [PATCH 03/24] Fixes for fts5 expression parser module to allow embedded 0x00 bytes in tokens. FossilOrigin-Name: 342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f --- ext/fts5/fts5_expr.c | 43 +++++++++++++++++-------------- ext/fts5/test/fts5origintext.test | 4 +++ manifest | 14 +++++----- manifest.uuid | 2 +- 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index f5101ba065..745a5d9fa6 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -100,7 +100,8 @@ struct Fts5ExprNode { struct Fts5ExprTerm { u8 bPrefix; /* True for a prefix term */ u8 bFirst; /* True if token must be first in column */ - char *zTerm; /* nul-terminated term */ + char *pTerm; /* Term data */ + int nTerm; /* Size of term in bytes */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; @@ -967,7 +968,7 @@ static int fts5ExprNearInitAll( p->pIter = 0; } rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), + pExpr->pIndex, p->pTerm, p->nTerm, (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), pNear->pColset, @@ -1604,7 +1605,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; - sqlite3_free(pTerm->zTerm); + sqlite3_free(pTerm->pTerm); sqlite3Fts5IterClose(pTerm->pIter); for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; @@ -1735,8 +1736,9 @@ static int fts5ParseTokenize( rc = SQLITE_NOMEM; }else{ memset(pSyn, 0, (size_t)nByte); - pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); - memcpy(pSyn->zTerm, pToken, nToken); + pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); + pSyn->nTerm = nToken; + memcpy(pSyn->pTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; } @@ -1761,7 +1763,8 @@ static int fts5ParseTokenize( if( rc==SQLITE_OK ){ pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + pTerm->nTerm = nToken; } } @@ -1913,9 +1916,7 @@ int sqlite3Fts5ExprClonePhrase( int tflags = 0; Fts5ExprTerm *p; for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ - const char *zTerm = p->zTerm; - rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), - 0, 0); + rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm, p->nTerm, 0, 0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ @@ -2296,11 +2297,13 @@ static Fts5ExprNode *fts5ParsePhraseToAnd( if( parseGrowPhraseArray(pParse) ){ fts5ExprPhraseFree(pPhrase); }else{ + Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; pParse->apPhrase[pParse->nPhrase++] = pPhrase; pPhrase->nTerm = 1; - pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup( - &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1 + pPhrase->aTerm[0].pTerm = sqlite3Fts5Strndup( + &pParse->rc, p->pTerm, p->nTerm ); + pPhrase->aTerm[0].nTerm = p->nTerm; pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) ); @@ -2485,16 +2488,17 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ - nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; + nByte += pTerm->nTerm * 2 + 3 + 2; } zQuoted = sqlite3_malloc64(nByte); if( zQuoted ){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ - char *zIn = p->zTerm; + char *zIn = p->pTerm; + char *zEnd = &zIn[p->nTerm]; zQuoted[i++] = '"'; - while( *zIn ){ + while( zInnTerm; iTerm++){ - char *zTerm = pPhrase->aTerm[iTerm].zTerm; - zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; + zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", + p->nTerm, p->pTerm + ); if( pPhrase->aTerm[iTerm].bPrefix ){ zRet = fts5PrintfAppend(zRet, "*"); } @@ -2994,9 +3000,8 @@ static int fts5ExprPopulatePoslistsCb( Fts5ExprTerm *pTerm; if( p->aPopulator[i].bOk==0 ) continue; for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ - int nTerm = (int)strlen(pTerm->zTerm); - if( (nTerm==nToken || (nTermbPrefix)) - && memcmp(pTerm->zTerm, pToken, nTerm)==0 + if( (pTerm->nTerm==nToken || (pTerm->nTermbPrefix)) + && memcmp(pTerm->pTerm, pToken, pTerm->nTerm)==0 ){ int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 791b850c76..155d74c025 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -45,6 +45,10 @@ do_execsql_test 1.3 { world } +do_execsql_test 1.4 { + SELECT rowid FROM ft('Hello'); +} {1} + #------------------------------------------------------------------------- reset_db diff --git a/manifest b/manifest index 431d902181..e0c37bf1e2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sfts5_decode()\sto\sallow\sfor\sembedded\s0x00\sbytes\sin\stokens. -D 2023-10-03T17:07:54.562 +C Fixes\sfor\sfts5\sexpression\sparser\smodule\sto\sallow\sembedded\s0x00\sbytes\sin\stokens. +D 2023-10-03T19:06:52.966 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -92,7 +92,7 @@ F ext/fts5/fts5Int.h 66a38b285e2b860baa29745d8eff27f5b0809268e7820498494d9acfacc F ext/fts5/fts5_aux.c 572d5ec92ba7301df2fea3258576332f2f4d2dfd66d8263afd157d9deceac480 F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 -F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6 +F ext/fts5/fts5_expr.c cc215d39714b428523d2f2ef42b713c83095a28a67bc7f6f2dc4ac036a29f460 F ext/fts5/fts5_hash.c 76765856397eff56f526b0640b23a1677d737d35e07bc00e4b4b2e0fc5fda60d F ext/fts5/fts5_index.c e472083d371f420d52ec80445b9d2a99b16b23548205cb4064ddcd41bd79f63e F ext/fts5/fts5_main.c 799ec88d2309055f6406bddb0bd6ed80148c5da5eb14594c3c5309a6e944d489 @@ -187,7 +187,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 9a6edc85ccc4afb10e71d54d98d8170f850272e55b120520f367afbb12526674 +F ext/fts5/test/fts5origintext.test 3e1ac3230f65a0d644e9bf0738bebb09b4db9d9f123e1307d8630e42269b4afb F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2123,8 +2123,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c027c092c4af53bd6ae3cc6e2b4439167d9eeb0f9de549b6a2c2a72a67ee886c -R 9e81ed5ff713a928831c6c73df8f7a54 +P e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 +R 5cce41f02eae121cb66e72942ef56113 U dan -Z 0a0daf2566a3400fafbefb55947df637 +Z 80b8e2664e768ed2ac03913fcf0180ea # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 53a545a52f..4798938837 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 \ No newline at end of file +342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f \ No newline at end of file From 03204e910680096d982139bda04c6f8a1b0f8f67 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 11 Oct 2023 21:08:12 +0000 Subject: [PATCH 04/24] Add the tokendata=1 option to ignore trailing token-data when querying an fts5 table. FossilOrigin-Name: 122935182ad5869ce3a4c6d796c38a0509f6f3384dd1b3e60a3f2f0f366cc5f5 --- ext/fts5/fts5Int.h | 1 + ext/fts5/fts5_config.c | 10 +++++ ext/fts5/fts5_expr.c | 41 +++++++++++-------- ext/fts5/fts5_index.c | 19 +++++++-- ext/fts5/test/fts5_common.tcl | 14 +++++++ ext/fts5/test/fts5aa.test | 68 ++++++++++++++++++++----------- ext/fts5/test/fts5origintext.test | 45 +++++++++++++++++++- manifest | 24 +++++------ manifest.uuid | 2 +- 9 files changed, 164 insertions(+), 60 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 1687168d5f..4aa578559b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -196,6 +196,7 @@ struct Fts5Config { char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ + int bTokendata; /* "tokendata=" option value (dflt==0) */ int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist; Fts5Tokenizer *pTok; diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 5d0770502e..d2e8309cd2 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -398,6 +398,16 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ + *pzErr = sqlite3_mprintf("malformed tokendata=... directive"); + rc = SQLITE_ERROR; + }else{ + pConfig->bTokendata = (zArg[0]=='1'); + } + return rc; + } + *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 745a5d9fa6..a2c6320719 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -101,7 +101,8 @@ struct Fts5ExprTerm { u8 bPrefix; /* True for a prefix term */ u8 bFirst; /* True if token must be first in column */ char *pTerm; /* Term data */ - int nTerm; /* Size of term in bytes */ + int nQueryTerm; /* Effective size of term in bytes */ + int nFullTerm; /* Size of term in bytes incl. tokendata */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; @@ -968,7 +969,7 @@ static int fts5ExprNearInitAll( p->pIter = 0; } rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, p->pTerm, p->nTerm, + pExpr->pIndex, p->pTerm, p->nQueryTerm, (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), pNear->pColset, @@ -1703,6 +1704,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; + Fts5Config *pConfig; int rc; }; @@ -1737,7 +1739,8 @@ static int fts5ParseTokenize( }else{ memset(pSyn, 0, (size_t)nByte); pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); - pSyn->nTerm = nToken; + pSyn->nFullTerm = pSyn->nQueryTerm = nToken; + if( pCtx->pConfig->bTokendata ) pSyn->nQueryTerm = strlen(pSyn->pTerm); memcpy(pSyn->pTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; @@ -1764,7 +1767,8 @@ static int fts5ParseTokenize( pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); - pTerm->nTerm = nToken; + pTerm->nFullTerm = pTerm->nQueryTerm = nToken; + if( pCtx->pConfig->bTokendata ) pTerm->nQueryTerm = strlen(pTerm->pTerm); } } @@ -1831,6 +1835,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; + sCtx.pConfig = pConfig; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ @@ -1880,8 +1885,7 @@ int sqlite3Fts5ExprClonePhrase( int rc = SQLITE_OK; /* Return code */ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ - TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ - + TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */ pOrig = pExpr->apExprPhrase[iPhrase]; pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); if( rc==SQLITE_OK ){ @@ -1912,11 +1916,12 @@ int sqlite3Fts5ExprClonePhrase( if( pOrig->nTerm ){ int i; /* Used to iterate through phrase terms */ + sCtx.pConfig = pExpr->pConfig; for(i=0; rc==SQLITE_OK && inTerm; i++){ int tflags = 0; Fts5ExprTerm *p; for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ - rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm, p->nTerm, 0, 0); + rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm,p->nFullTerm,0,0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ @@ -2298,12 +2303,12 @@ static Fts5ExprNode *fts5ParsePhraseToAnd( fts5ExprPhraseFree(pPhrase); }else{ Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; + Fts5ExprTerm *pTo = &pPhrase->aTerm[0]; pParse->apPhrase[pParse->nPhrase++] = pPhrase; pPhrase->nTerm = 1; - pPhrase->aTerm[0].pTerm = sqlite3Fts5Strndup( - &pParse->rc, p->pTerm, p->nTerm - ); - pPhrase->aTerm[0].nTerm = p->nTerm; + pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm); + pTo->nQueryTerm = p->nQueryTerm; + pTo->nFullTerm = p->nFullTerm; pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) ); @@ -2488,7 +2493,7 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ - nByte += pTerm->nTerm * 2 + 3 + 2; + nByte += pTerm->nQueryTerm * 2 + 3 + 2; } zQuoted = sqlite3_malloc64(nByte); @@ -2496,7 +2501,7 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ char *zIn = p->pTerm; - char *zEnd = &zIn[p->nTerm]; + char *zEnd = &zIn[p->nQueryTerm]; zQuoted[i++] = '"'; while( zInnTerm; iTerm++){ Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", - p->nTerm, p->pTerm + p->nQueryTerm, p->pTerm ); if( pPhrase->aTerm[iTerm].bPrefix ){ zRet = fts5PrintfAppend(zRet, "*"); @@ -2997,11 +3002,11 @@ static int fts5ExprPopulatePoslistsCb( if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; inPhrase; i++){ - Fts5ExprTerm *pTerm; + Fts5ExprTerm *pT; if( p->aPopulator[i].bOk==0 ) continue; - for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ - if( (pTerm->nTerm==nToken || (pTerm->nTermbPrefix)) - && memcmp(pTerm->pTerm, pToken, pTerm->nTerm)==0 + for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ + if( (pT->nFullTerm==nToken || (pT->nFullTermbPrefix)) + && memcmp(pT->pTerm, pToken, pT->nFullTerm)==0 ){ int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6cf30b5a00..b90a66b88f 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6037,11 +6037,12 @@ static void fts5MergePrefixLists( static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ + int bTokenscan, int iIdx, /* Index to scan for data */ u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ - Fts5Iter **ppIter /* OUT: New iterator */ + Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; @@ -6060,6 +6061,8 @@ static void fts5SetupPrefixIter( xAppend = fts5AppendPoslist; } + assert( bTokenscan==0 || iIdx==0 ); + aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); @@ -6075,6 +6078,12 @@ static void fts5SetupPrefixIter( int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); + + /* If iIdx is non-zero, then it is the number of a prefix-index for + ** prefixes 1 character longer than the prefix being queried for. That + ** index contains all the doclists required, except for the one + ** corresponding to the prefix itself. That one is extracted from the + ** main term index here. */ if( iIdx!=0 ){ int dummy = 0; const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; @@ -6110,6 +6119,7 @@ static void fts5SetupPrefixIter( assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( bNewTerm ){ if( nTermnToken && pTerm[nToken]!=0x00 ) break; } if( p1->base.nData==0 ) continue; @@ -6438,7 +6448,7 @@ int sqlite3Fts5IndexQuery( } } - if( iIdx<=pConfig->nPrefix ){ + if( iIdx<=pConfig->nPrefix && (pConfig->bTokendata==0 || iIdx!=0) ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); @@ -6451,7 +6461,10 @@ int sqlite3Fts5IndexQuery( }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); + int bTokenscan = (iIdx==0); + fts5SetupPrefixIter( + p, bDesc, bTokenscan, iPrefixIdx, buf.p, nToken+1, pColset, &pRet + ); if( pRet==0 ){ assert( p->rc!=SQLITE_OK ); }else{ diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 9c012932da..001cad1de2 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -438,6 +438,20 @@ proc detail_is_none {} { detail_check ; expr {$::detail == "none"} } proc detail_is_col {} { detail_check ; expr {$::detail == "col" } } proc detail_is_full {} { detail_check ; expr {$::detail == "full"} } +proc foreach_tokenizer_mode {prefix script} { + set saved $::testprefix + foreach {d mapping} { + "" {} + "-origintext" {, tokenize="origintext unicode61", tokendata=1} + } { + set s [string map [list %TOKENIZER% $mapping] $script] + set ::testprefix "$prefix$d" + reset_db + sqlite3_fts5_register_origintext db + uplevel $s + } + set ::testprefix $saved +} #------------------------------------------------------------------------- # Convert a poslist of the type returned by fts5_test_poslist() to a diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index e1551fc516..a80a307a49 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -22,6 +22,7 @@ ifcapable !fts5 { } foreach_detail_mode $::testprefix { +foreach_tokenizer_mode $::testprefix { do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); @@ -44,7 +45,7 @@ do_execsql_test 1.1 { # do_execsql_test 2.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); @@ -73,8 +74,9 @@ do_execsql_test 2.4 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 3.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } foreach {i x y} { 1 {g f d b f} {h h e i a} @@ -97,8 +99,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 4.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { @@ -121,8 +124,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 5.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { @@ -145,8 +149,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 6.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } @@ -181,6 +186,7 @@ do_execsql_test 6.6 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); @@ -222,6 +228,7 @@ for {set i 1} {$i <= 10} {incr i} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); @@ -236,6 +243,7 @@ do_execsql_test 8.1 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db expr srand(0) @@ -280,8 +288,9 @@ for {set i 1} {$i <= 10} {incr i} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 10.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } set d10 { 1 {g f d b f} {h h e i a} @@ -314,19 +323,19 @@ do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } #------------------------------------------------------------------------- # do_catchsql_test 11.1 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 column name: rank}} do_catchsql_test 11.2 { - CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL%); + CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 table name: rank}} do_catchsql_test 11.3 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 column name: rowid}} #------------------------------------------------------------------------- # do_execsql_test 12.1 { - CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } {} do_catchsql_test 12.2 { @@ -341,8 +350,9 @@ do_test 12.3 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 13.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o'); } {} @@ -365,8 +375,9 @@ do_execsql_test 13.6 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 14.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH d(x,y) AS ( SELECT NULL, 'xyz xyz xyz xyz xyz xyz' @@ -449,8 +460,9 @@ do_catchsql_test 16.2 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO b2 VALUES('a'); INSERT INTO b2 VALUES('b'); INSERT INTO b2 VALUES('c'); @@ -466,8 +478,9 @@ do_test 17.2 { if {[string match n* %DETAIL%]==0} { reset_db + sqlite3_fts5_register_origintext db do_execsql_test 17.3 { - CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); INSERT INTO c2 VALUES('x x x', 'x x x'); SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; } {1} @@ -476,8 +489,9 @@ if {[string match n* %DETAIL%]==0} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL%); + CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL% %TOKENIZER%); INSERT INTO uio VALUES(NULL); INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; @@ -524,8 +538,8 @@ do_execsql_test 17.9 { #-------------------------------------------------------------------- # do_execsql_test 18.1 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); - CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL% %TOKENIZER%); + CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1 VALUES('abc*', NULL); INSERT INTO t2 VALUES(1, 'abcdefg'); } @@ -540,8 +554,9 @@ do_execsql_test 18.3 { # fts5 table in the temp schema. # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 19.0 { - CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('w x 1'); SELECT rowid FROM t1 WHERE t1 MATCH 'x'; @@ -551,8 +566,9 @@ do_execsql_test 19.0 { # Test that 6 and 7 byte varints can be read. # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 20.0 { - CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL% %TOKENIZER%); } set ::ids [list \ 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] @@ -570,7 +586,7 @@ do_test 20.1 { # do_execsql_test 21.0 { CREATE TEMP TABLE t8(a, b); - CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 21.1 { @@ -581,7 +597,7 @@ do_execsql_test 21.1 { } do_execsql_test 22.0 { - CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t9(rowid, x) VALUES(2, 'bbb'); BEGIN; INSERT INTO t9(rowid, x) VALUES(1, 'aaa'); @@ -596,7 +612,7 @@ do_execsql_test 22.1 { #------------------------------------------------------------------------- do_execsql_test 23.0 { - CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL% %TOKENIZER%); CREATE TABLE t11(x); } do_execsql_test 23.1 { @@ -608,7 +624,7 @@ do_execsql_test 23.2 { #------------------------------------------------------------------------- do_execsql_test 24.0 { - CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t12 VALUES('aaaa'); } do_execsql_test 24.1 { @@ -618,6 +634,9 @@ do_execsql_test 24.1 { INSERT INTO t12 VALUES('aaaa'); END; } +execsql_pp { + SELECT rowid, hex(block) FROM t12_data +} do_execsql_test 24.2 { INSERT INTO t12(t12) VALUES('integrity-check'); } @@ -627,7 +646,7 @@ do_execsql_test 24.3 { #------------------------------------------------------------------------- do_execsql_test 25.0 { - CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 25.1 { BEGIN; @@ -638,6 +657,7 @@ SELECT * FROM t13('BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB } +} } expand_all_sql db diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 155d74c025..3fd5f17e7b 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -112,9 +112,50 @@ do_execsql_test 2.5 { INSERT INTO ft(ft) VALUES('optimize'); } -proc b {x} { string map [list "\0" "."] $x } -db func b b +#------------------------------------------------------------------------- +reset_db + +sqlite3_fts5_register_origintext db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); + + INSERT INTO ft(rowid, x) VALUES(1, 'hello'); + INSERT INTO ft(rowid, x) VALUES(2, 'Hello'); + INSERT INTO ft(rowid, x) VALUES(3, 'HELLO'); +} + +#proc b {x} { string map [list "\0" "."] $x } +#db func b b #execsql_pp { SELECT b(term) FROM vocab } +do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 +do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 +do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, + tokenize="origintext unicode61", + tokendata=1 + ); + CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance); + + INSERT INTO ft2(rowid, x) VALUES(1, 'hello'); + INSERT INTO ft2(rowid, x) VALUES(2, 'Hello'); + INSERT INTO ft2(rowid, x) VALUES(3, 'HELLO'); + + INSERT INTO ft2(rowid, x) VALUES(10, 'helloooo'); +} + +#proc b {x} { string map [list "\0" "."] $x } +#db func b b +#execsql_pp { SELECT b(term) FROM vocab } + +do_execsql_test 3.1.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test 3.1.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test 3.1.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} + +do_execsql_test 3.1.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} + finish_test diff --git a/manifest b/manifest index e0c37bf1e2..92f3bfcf68 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sfor\sfts5\sexpression\sparser\smodule\sto\sallow\sembedded\s0x00\sbytes\sin\stokens. -D 2023-10-03T19:06:52.966 +C Add\sthe\stokendata=1\soption\sto\signore\strailing\stoken-data\swhen\squerying\san\sfts5\stable. +D 2023-10-11T21:08:12.656 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -88,13 +88,13 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520 -F ext/fts5/fts5Int.h 66a38b285e2b860baa29745d8eff27f5b0809268e7820498494d9acfaccf8a5c +F ext/fts5/fts5Int.h a21eb1cf036ac9eb943e45ed307762901ea86f0159bf0848baa2079a112ddc2f F ext/fts5/fts5_aux.c 572d5ec92ba7301df2fea3258576332f2f4d2dfd66d8263afd157d9deceac480 F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 -F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 -F ext/fts5/fts5_expr.c cc215d39714b428523d2f2ef42b713c83095a28a67bc7f6f2dc4ac036a29f460 +F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf +F ext/fts5/fts5_expr.c fd091d0558fda2517602ed5886ec615ce3e1bd76fb0bb0e5d1aa85ba8db287a8 F ext/fts5/fts5_hash.c 76765856397eff56f526b0640b23a1677d737d35e07bc00e4b4b2e0fc5fda60d -F ext/fts5/fts5_index.c e472083d371f420d52ec80445b9d2a99b16b23548205cb4064ddcd41bd79f63e +F ext/fts5/fts5_index.c 79a8e45771d0be24f0399b12268299f132ce0970ade941ba8a2d40b1d1aee4d7 F ext/fts5/fts5_main.c 799ec88d2309055f6406bddb0bd6ed80148c5da5eb14594c3c5309a6e944d489 F ext/fts5/fts5_storage.c 3c9b41fce41b6410f2e8f82eb035c6a29b2560483f773e6dc98cf3cb2e4ddbb5 F ext/fts5/fts5_tcl.c 0d2bb0ff7bf6ee136015be118167f0bd956ddd05a8f02c68bd34299b50648f9f @@ -106,8 +106,8 @@ F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d0988 F ext/fts5/fts5_vocab.c 12138e84616b56218532e3e8feb1d3e0e7ae845e33408dbe911df520424dc9d6 F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl a9de9c2209cc4e7ae3c753e783504e67206c6c1467d08f209cd0c5923d3e8d8b -F ext/fts5/test/fts5aa.test ba5158eba7d61359becdfca895ef471072c7bf7b20e5e60dcb4d024c8419c926 +F ext/fts5/test/fts5_common.tcl 8b1848ac2baad10e444e4183034a52050b52d20b3796d9d30e78f01ab0d05583 +F ext/fts5/test/fts5aa.test 4db81519863244a3cab35795fe65ab6b592e7970c7409eba098b23ebbfc08d95 F ext/fts5/test/fts5ab.test bd932720c748383277456b81f91bc00453de2174f9762cd05f95d0495dc50390 F ext/fts5/test/fts5ac.test a7aa7e1fefc6e1918aa4d3111d5c44a09177168e962c5fd2cca9620de8a7ed6d F ext/fts5/test/fts5ad.test e8cf959dfcd57c8e46d6f5f25665686f3b6627130a9a981371dafdf6482790de @@ -187,7 +187,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 3e1ac3230f65a0d644e9bf0738bebb09b4db9d9f123e1307d8630e42269b4afb +F ext/fts5/test/fts5origintext.test 646df137f1aa5b3d7032374ebe82bfdbe88d9f825d73ce8d44bead480317a9c5 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2123,8 +2123,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 -R 5cce41f02eae121cb66e72942ef56113 +P 342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f +R 28168382a793dd4cf732de7d6442ef72 U dan -Z 80b8e2664e768ed2ac03913fcf0180ea +Z cded15b3b6aeefc28aa13f7856e47e8d # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 4798938837..8bc375eea2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f \ No newline at end of file +122935182ad5869ce3a4c6d796c38a0509f6f3384dd1b3e60a3f2f0f366cc5f5 \ No newline at end of file From 43d05ccc314472ece8dfcb2eaac36c6e93ceee23 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 8 Nov 2023 14:55:20 +0000 Subject: [PATCH 05/24] Add declarations for new API functions. FossilOrigin-Name: b8a48cc18c94d15017f898c820fdd784efbaac20d7a45c4d97269333e8f2ec60 --- ext/fts5/fts5.h | 5 +++++ manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 323d73a28f..730a2fb5e3 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -298,6 +298,11 @@ struct Fts5ExtensionApi { int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); + + /* Below this point are iVersion>=3 only */ + int (*xQueryToken)(Fts5Context*, int iPhrase, int iToken, const char**, int*); + int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); + int (*xPhraseToken)(Fts5Context*, Fts5PhraseIter*, int, const char**, int*); }; /* diff --git a/manifest b/manifest index 8eba96ed3c..baff022d13 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\sinto\sthis\sbranch. -D 2023-11-06T19:16:38.711 +C Add\sdeclarations\sfor\snew\sAPI\sfunctions. +D 2023-11-08T14:55:20.854 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -87,7 +87,7 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 -F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520 +F ext/fts5/fts5.h 68256dd94eaba3e874762a63578922fd62a5ca87c76a28f7636effee4d9bd781 F ext/fts5/fts5Int.h a21eb1cf036ac9eb943e45ed307762901ea86f0159bf0848baa2079a112ddc2f F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 @@ -2143,8 +2143,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P ac5570614ebb77feff4943c7a78d877508e31087b550f337c11b45f1016e3c55 c2058a045b57571b2b5d342adb212fe606717c633a0422755691ae6bf5725d25 -R 5cb5a1a54d74cfe313891abc677e111f +P 3a869cf1f84b0e9bdcc4de53685430ab41eafacbba1ca7b87e727aa98811c6c5 +R 9e06a9e0b44056d476c02db915884372 U dan -Z d0201f081f93fb8379f972a79c65f95a +Z 6c0dbde9f9a0f05c4a27f8bd01924beb # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 412eb8af05..cf6088cc7c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3a869cf1f84b0e9bdcc4de53685430ab41eafacbba1ca7b87e727aa98811c6c5 \ No newline at end of file +b8a48cc18c94d15017f898c820fdd784efbaac20d7a45c4d97269333e8f2ec60 \ No newline at end of file From e108029332f7d403961b3f5c9bbe1c8fe3a9e6d5 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 13 Nov 2023 14:29:12 +0000 Subject: [PATCH 06/24] Add new fts5 API xQueryToken(). FossilOrigin-Name: 828566392b3ea8db603cb1ae5eccbc8ac035efaa284bc7c15ba89874f634aec9 --- ext/fts5/fts5.h | 2 +- ext/fts5/fts5Int.h | 2 ++ ext/fts5/fts5_expr.c | 26 +++++++++++++++++++ ext/fts5/fts5_main.c | 16 +++++++++++- ext/fts5/fts5_tcl.c | 18 +++++++++++++ ext/fts5/test/fts5origintext.test | 43 +++++++++++++++++++++++++++++++ manifest | 22 ++++++++-------- manifest.uuid | 2 +- 8 files changed, 117 insertions(+), 14 deletions(-) diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 730a2fb5e3..5a2008882f 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -263,7 +263,7 @@ struct Fts5PhraseIter { ** See xPhraseFirstColumn above. */ struct Fts5ExtensionApi { - int iVersion; /* Currently always set to 2 */ + int iVersion; /* Currently always set to 3 */ void *(*xUserData)(Fts5Context*); diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 4aa578559b..df25fd9ffa 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -773,6 +773,8 @@ int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); +int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); + /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index a2c6320719..80387a90fd 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3145,3 +3145,29 @@ int sqlite3Fts5ExprPhraseCollist( return rc; } + +/* +** Does the work of the fts5_api.xQueryToken() API method. +*/ +int sqlite3Fts5ExprQueryToken( + Fts5Expr *pExpr, + int iPhrase, + int iToken, + const char **ppOut, + int *pnOut +){ + Fts5ExprPhrase *pPhrase = 0; + + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ + return SQLITE_RANGE; + } + pPhrase = pExpr->apExprPhrase[iPhrase]; + if( iToken<0 || iToken>=pPhrase->nTerm ){ + return SQLITE_RANGE; + } + + *ppOut = pPhrase->aTerm[iToken].pTerm; + *pnOut = pPhrase->aTerm[iToken].nFullTerm; + return SQLITE_OK; +} + diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 6e86ca5951..7ddc7b6fcf 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -2323,13 +2323,24 @@ static int fts5ApiPhraseFirstColumn( return rc; } +static int fts5ApiQueryToken( + Fts5Context* pCtx, + int iPhrase, + int iToken, + const char **ppOut, + int *pnOut +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut); +} + static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) ); static const Fts5ExtensionApi sFts5Api = { - 2, /* iVersion */ + 3, /* iVersion */ fts5ApiUserData, fts5ApiColumnCount, fts5ApiRowCount, @@ -2349,6 +2360,9 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiPhraseNext, fts5ApiPhraseFirstColumn, fts5ApiPhraseNextColumn, + fts5ApiQueryToken, + 0, + 0 }; /* diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index fb4bea8e9e..a9390adc9e 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -244,6 +244,8 @@ static int SQLITE_TCLAPI xF5tApi( { "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */ { "xPhraseForeach", 4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */ { "xPhraseColumnForeach", 3, "IPHRASE COLVAR SCRIPT" }, /* 17 */ + + { "xQueryToken", 2, "IPHRASE ITERM" }, /* 19 */ { 0, 0, 0} }; @@ -500,6 +502,22 @@ static int SQLITE_TCLAPI xF5tApi( break; } + CASE(18, "xQueryToken") { + const char *pTerm = 0; + int nTerm = 0; + int iPhrase = 0; + int iTerm = 0; + + if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR; + if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR; + rc = p->pApi->xQueryToken(p->pFts, iPhrase, iTerm, &pTerm, &nTerm); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm)); + } + + break; + } + default: assert( 0 ); break; diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 3fd5f17e7b..2c9a4ba5a3 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -157,5 +157,48 @@ do_execsql_test 3.1.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} do_execsql_test 3.1.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} +#------------------------------------------------------------------------- +# +reset_db +sqlite3_fts5_register_origintext db +proc querytoken {cmd iPhrase iToken} { + set txt [$cmd xQueryToken $iPhrase $iToken] + string map [list "\0" "."] $txt +} +sqlite3_fts5_create_function db querytoken querytoken + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize='origintext unicode61', tokendata=1 + ); + INSERT INTO ft VALUES('one two three four'); +} + +do_execsql_test 3.1 { + SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO') +} {1 two.TwO} +do_execsql_test 3.2 { + SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE') +} {1 one} +do_execsql_test 3.3 { + SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE') +} {1 two.TWO} +do_execsql_test 3.4 { + SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') +} {1 three.ThreE} + +do_catchsql_test 3.5 { + SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') +} {1 SQLITE_RANGE} +do_catchsql_test 3.6 { + SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') +} {1 SQLITE_RANGE} +do_catchsql_test 3.7 { + SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') +} {1 SQLITE_RANGE} + + + + finish_test diff --git a/manifest b/manifest index baff022d13..4a539764cf 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sdeclarations\sfor\snew\sAPI\sfunctions. -D 2023-11-08T14:55:20.854 +C Add\snew\sfts5\sAPI\sxQueryToken(). +D 2023-11-13T14:29:12.382 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -87,17 +87,17 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 -F ext/fts5/fts5.h 68256dd94eaba3e874762a63578922fd62a5ca87c76a28f7636effee4d9bd781 -F ext/fts5/fts5Int.h a21eb1cf036ac9eb943e45ed307762901ea86f0159bf0848baa2079a112ddc2f +F ext/fts5/fts5.h e27cdb10e38d87cb041dcb56cef97addf7d902aeab07e84e7102f5fc65d3357c +F ext/fts5/fts5Int.h 19b198459a2791415919428d44ebf4c830b59b2da6f27f8faaffe39a876b7ecf F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c fd091d0558fda2517602ed5886ec615ce3e1bd76fb0bb0e5d1aa85ba8db287a8 +F ext/fts5/fts5_expr.c 69c81af515ce1cedccf093c7c76f8b3b4f24bafbfb1d03a431af9f5c69a81834 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c 145723e22ffee28dbe2a24933e74ad998d32419223c0ddb8506a1f0c39b952c4 -F ext/fts5/fts5_main.c a07ed863b8bd9e6fefb62db2fd40a3518eb30a5f7dcfda5be915dd2db45efa2f +F ext/fts5/fts5_main.c ddac85dbd28167af81f64e568bfe9020bf9708d650de207d1465ed19938316d1 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d -F ext/fts5/fts5_tcl.c 0d2bb0ff7bf6ee136015be118167f0bd956ddd05a8f02c68bd34299b50648f9f +F ext/fts5/fts5_tcl.c 71641a0c5693c64acfad9d10e64475ec92d9f464d06ba7fd350552de373586d8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13 @@ -188,7 +188,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 646df137f1aa5b3d7032374ebe82bfdbe88d9f825d73ce8d44bead480317a9c5 +F ext/fts5/test/fts5origintext.test 8296984d268d1d20f85c9de316f422ffb6ebc12020d3f8a0a18144d6ca7b347f F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2143,8 +2143,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 3a869cf1f84b0e9bdcc4de53685430ab41eafacbba1ca7b87e727aa98811c6c5 -R 9e06a9e0b44056d476c02db915884372 +P b8a48cc18c94d15017f898c820fdd784efbaac20d7a45c4d97269333e8f2ec60 +R 177f59b3c48b3865f7f595e0ec855966 U dan -Z 6c0dbde9f9a0f05c4a27f8bd01924beb +Z ac818dc8fd8dc9007ff77eae486bea2e # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index cf6088cc7c..f6858a05cd 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b8a48cc18c94d15017f898c820fdd784efbaac20d7a45c4d97269333e8f2ec60 \ No newline at end of file +828566392b3ea8db603cb1ae5eccbc8ac035efaa284bc7c15ba89874f634aec9 \ No newline at end of file From 50b0e25a556c34115337e7f3eca85dabea6b463d Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 15 Nov 2023 11:45:19 +0000 Subject: [PATCH 07/24] Add implementation of xInstToken() API. FossilOrigin-Name: a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 --- ext/fts5/fts5Int.h | 5 + ext/fts5/fts5_expr.c | 24 ++++ ext/fts5/fts5_index.c | 206 +++++++++++++++++++++++++++++- ext/fts5/fts5_main.c | 55 ++++++-- ext/fts5/fts5_tcl.c | 19 ++- ext/fts5/test/fts5origintext.test | 55 ++++++-- manifest | 22 ++-- manifest.uuid | 2 +- 8 files changed, 356 insertions(+), 32 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index df25fd9ffa..19d9579eb7 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -464,6 +464,10 @@ void *sqlite3Fts5StructureRef(Fts5Index*); void sqlite3Fts5StructureRelease(void*); int sqlite3Fts5StructureTest(Fts5Index*, void*); +/* +** Used by xInstToken() and xPhraseToken(). +*/ +int sqlite3Fts5IterToken(Fts5IndexIter*, int, int, const char**, int*); /* ** Insert or remove data to or from the index. Each time a document is @@ -774,6 +778,7 @@ int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); +int sqlite3Fts5ExprInstToken(Fts5Expr*, int, int, int, int, const char**, int*); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 80387a90fd..ae851a877a 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3171,3 +3171,27 @@ int sqlite3Fts5ExprQueryToken( return SQLITE_OK; } +int sqlite3Fts5ExprInstToken( + Fts5Expr *pExpr, + int iPhrase, + int iCol, + int iOff, + int iToken, + const char **ppOut, + int *pnOut +){ + Fts5ExprPhrase *pPhrase = 0; + Fts5IndexIter *pIter = 0; + + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ + return SQLITE_RANGE; + } + pPhrase = pExpr->apExprPhrase[iPhrase]; + if( iToken<0 || iToken>=pPhrase->nTerm ){ + return SQLITE_RANGE; + } + pIter = pPhrase->aTerm[iToken].pIter; + + return sqlite3Fts5IterToken(pIter, iCol, iOff+iToken, ppOut, pnOut); +} + diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6cc2c13927..2d6d561b7b 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -323,6 +323,9 @@ typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; +typedef struct Fts5TokenMapEntry Fts5TokenMapEntry; +typedef struct Fts5TokenMapToken Fts5TokenMapToken; +typedef struct Fts5TokenMap Fts5TokenMap; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ @@ -605,11 +608,34 @@ struct Fts5Iter { int bRev; /* True to iterate in reverse order */ u8 bSkipEmpty; /* True to skip deleted entries */ + Fts5TokenMap *pTokenMap; i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; +struct Fts5TokenMapEntry { + i64 iRowid; + u16 iCol; + int iOff; + int iTok; /* Offset into aToken[] + 1 */ +}; + +struct Fts5TokenMapToken { + u8 *pTerm; + int nTerm; +}; + +struct Fts5TokenMap { + int nEntryAlloc; + int nEntry; + Fts5TokenMapEntry *aEntry; + + int nTokenAlloc; + int nToken; + Fts5TokenMapToken *aToken; +}; + /* ** An instance of the following type is used to iterate through the contents @@ -3023,6 +3049,20 @@ static void fts5SegIterNextFrom( }while( p->rc==SQLITE_OK ); } +/* +** Free the Fts5TokenMap object passed as the only argument. +*/ +static void fts5TokenMapFree(Fts5TokenMap *pMap){ + if( pMap ){ + int ii; + for(ii=0; iinToken; ii++){ + sqlite3_free(pMap->aToken[ii].pTerm); + } + sqlite3_free(pMap->aToken); + sqlite3_free(pMap->aEntry); + sqlite3_free(pMap); + } +} /* ** Free the iterator object passed as the second argument. @@ -3034,6 +3074,7 @@ static void fts5MultiIterFree(Fts5Iter *pIter){ fts5SegIterClear(&pIter->aSeg[i]); } fts5BufferFree(&pIter->poslist); + fts5TokenMapFree(pIter->pTokenMap); sqlite3_free(pIter); } } @@ -3847,6 +3888,7 @@ fts5MultiIterNew_post_check: static void fts5MultiIterNew2( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Data *pData, /* Doclist to iterate through */ + Fts5TokenMap *pMap, /* Token-map, if any */ int bDesc, /* True for descending rowid order */ Fts5Iter **ppOut /* New object */ ){ @@ -3854,7 +3896,8 @@ static void fts5MultiIterNew2( pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; - + pNew->pTokenMap = pMap; + pMap = 0; pIter->flags = FTS5_SEGITER_ONETERM; if( pData->szLeaf>0 ){ pIter->pLeaf = pData; @@ -3877,6 +3920,7 @@ static void fts5MultiIterNew2( *ppOut = pNew; } + fts5TokenMapFree(pMap); fts5DataRelease(pData); } @@ -6063,6 +6107,128 @@ static void fts5MergePrefixLists( *p1 = out; } +static u8 *fts5IdxBufferDup(Fts5Index *p, const u8 *pDup, int nDup){ + u8 *pRet = fts5IdxMalloc(p, nDup+1); + if( pRet ){ + memcpy(pRet, pDup, nDup); + } + return pRet; +} + +static void fts5TokenMapTerm( + Fts5Index *p, + Fts5TokenMap *pMap, + const u8 *pTerm, + int nTerm +){ + if( p->rc==SQLITE_OK ){ + Fts5TokenMapToken *pToken = 0; + if( pMap->nToken==pMap->nTokenAlloc ){ + i64 nNew = (pMap->nTokenAlloc ? pMap->nTokenAlloc * 2 : 32); + Fts5TokenMapToken *aNew = sqlite3_realloc64( + pMap->aToken, nNew*sizeof(Fts5TokenMapToken) + ); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + pMap->nTokenAlloc = nNew; + pMap->aToken = aNew; + } + pToken = &pMap->aToken[pMap->nToken++]; + pToken->nTerm = nTerm; + pToken->pTerm = fts5IdxBufferDup(p, pTerm, nTerm); + } +} + + +static void fts5TokenMapPoslist( + Fts5Index *p, + Fts5TokenMap *pMap, + Fts5Iter *p1 +){ + if( p->rc==SQLITE_OK ){ + const u8 *a = p1->base.pData; + i64 iPos = 0; + int iOff = 0; + + while( 0==sqlite3Fts5PoslistNext64(a, p1->base.nData, &iOff, &iPos) ){ + Fts5TokenMapEntry *pEntry = 0; + int iCol = FTS5_POS2COLUMN(iPos); + int iTokOff = FTS5_POS2OFFSET(iPos); + + if( pMap->nEntry==pMap->nEntryAlloc ){ + i64 nNew = (pMap->nEntryAlloc ? pMap->nEntryAlloc * 2 : 32); + Fts5TokenMapEntry *aNew = sqlite3_realloc64( + pMap->aEntry, nNew*sizeof(Fts5TokenMapEntry) + ); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + pMap->nEntryAlloc = nNew; + pMap->aEntry = aNew; + } + pEntry = &pMap->aEntry[pMap->nEntry++]; + pEntry->iRowid = p1->base.iRowid; + pEntry->iCol = iCol; + pEntry->iOff = iTokOff; + pEntry->iTok = pMap->nToken; + } + } +} + +static int fts5TokenMapHash(i64 iRowid, int iCol, int iOff){ + return iRowid + (iRowid << 3) + (iCol << 6) + (iOff << 9); +} + +static void fts5TokenMapHashify(Fts5Index *p, Fts5TokenMap *pMap){ + int nHash = pMap->nEntry*2; + Fts5TokenMapEntry *aHash = 0; + + aHash = (Fts5TokenMapEntry*)fts5IdxMalloc(p, nHash*sizeof(Fts5TokenMapEntry)); + if( aHash ){ + int ii; + for(ii=0; iinEntry; ii++){ + Fts5TokenMapEntry *pEntry = &pMap->aEntry[ii]; + Fts5TokenMapEntry *pCopy = 0; + int iHash = fts5TokenMapHash(pEntry->iRowid, pEntry->iCol, pEntry->iOff); + + while( aHash[iHash % nHash].iTok ){ + iHash++; + } + pCopy = &aHash[iHash % nHash]; + memcpy(pCopy, pEntry, sizeof(Fts5TokenMapEntry)); + } + + sqlite3_free(pMap->aEntry); + pMap->aEntry = aHash; + pMap->nEntry = pMap->nEntryAlloc = nHash; + } +} + +static const u8 *fts5TokenMapLookup( + Fts5TokenMap *pMap, + i64 iRowid, + int iCol, + int iOff, + int *pnOut +){ + int iHash = fts5TokenMapHash(iRowid, iCol, iOff) % pMap->nEntry; + + for(; pMap->aEntry[iHash].iTok!=0; iHash = (iHash+1)%pMap->nEntry){ + Fts5TokenMapEntry *pEntry = &pMap->aEntry[iHash]; + if( pEntry->iRowid==iRowid && pEntry->iCol==iCol && pEntry->iOff==iOff ){ + *pnOut = pMap->aToken[pEntry->iTok-1].nTerm; + return pMap->aToken[pEntry->iTok-1].pTerm; + } + } + + *pnOut = 0; + return 0; +} + + static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ @@ -6077,6 +6243,7 @@ static void fts5SetupPrefixIter( Fts5Buffer *aBuf; int nBuf = 32; int nMerge = 1; + Fts5TokenMap *pMap = 0; void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); @@ -6094,8 +6261,12 @@ static void fts5SetupPrefixIter( aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); + if( iIdx==0 ){ + pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); + } + assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); - if( aBuf && pStruct ){ + if( p->rc==SQLITE_OK ){ const int flags = FTS5INDEX_QUERY_SCAN | FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_NOOUTPUT; @@ -6113,6 +6284,7 @@ static void fts5SetupPrefixIter( ** index contains all the doclists required, except for the one ** corresponding to the prefix itself. That one is extracted from the ** main term index here. */ + assert( iIdx==0 || pMap==0 ); if( iIdx!=0 ){ int dummy = 0; const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; @@ -6145,6 +6317,13 @@ static void fts5SetupPrefixIter( const u8 *pTerm = pSeg->term.p; p1->xSetOutputs(p1, pSeg); + if( pMap ){ + if( bNewTerm ){ + fts5TokenMapTerm(p, pMap, &pTerm[1], nTerm-1); + } + fts5TokenMapPoslist(p, pMap, p1); + } + assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( bNewTerm ){ if( nTermp = (u8*)&pData[1]; pData->nn = pData->szLeaf = doclist.n; if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); - fts5MultiIterNew2(p, pData, bDesc, ppIter); + if( pMap ) fts5TokenMapHashify(p, pMap); + fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); } fts5BufferFree(&doclist); } @@ -6575,6 +6755,26 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ return (z ? &z[1] : 0); } +/* +** +*/ +int sqlite3Fts5IterToken( + Fts5IndexIter *pIndexIter, + int iCol, + int iOff, + const char **ppOut, int *pnOut +){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + if( pIter->pTokenMap ){ + *ppOut = fts5TokenMapLookup( + pIter->pTokenMap, pIndexIter->iRowid, iCol, iOff, pnOut + ); + }else{ + *ppOut = sqlite3Fts5IterTerm(pIndexIter, pnOut); + } + return SQLITE_OK; +} + /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 7ddc7b6fcf..5ef80719ae 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -2063,12 +2063,6 @@ static int fts5ApiInst( ){ if( iIdx<0 || iIdx>=pCsr->nInstCount ){ rc = SQLITE_RANGE; -#if 0 - }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){ - *piPhrase = pCsr->aInst[iIdx*3]; - *piCol = pCsr->aInst[iIdx*3 + 2]; - *piOff = -1; -#endif }else{ *piPhrase = pCsr->aInst[iIdx*3]; *piCol = pCsr->aInst[iIdx*3 + 1]; @@ -2323,6 +2317,9 @@ static int fts5ApiPhraseFirstColumn( return rc; } +/* +** xQueryToken() API implemenetation. +*/ static int fts5ApiQueryToken( Fts5Context* pCtx, int iPhrase, @@ -2334,6 +2331,48 @@ static int fts5ApiQueryToken( return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut); } +/* +** xInstToken() API implemenetation. +*/ +static int fts5ApiInstToken( + Fts5Context *pCtx, + int iIdx, + int iToken, + const char **ppOut, int *pnOut +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + int rc = SQLITE_OK; + if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 + || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) + ){ + if( iIdx<0 || iIdx>=pCsr->nInstCount ){ + rc = SQLITE_RANGE; + }else{ + int iPhrase = pCsr->aInst[iIdx*3]; + int iCol = pCsr->aInst[iIdx*3 + 1]; + int iOff = pCsr->aInst[iIdx*3 + 2]; + + rc = sqlite3Fts5ExprInstToken( + pCsr->pExpr, iPhrase, iCol, iOff, iToken, ppOut, pnOut + ); + } + } + return rc; +} + +/* +** xPhraseToken() API implemenetation. +*/ +static int fts5ApiPhraseToken( + Fts5Context *pCtx, + Fts5PhraseIter *pIter, + int iToken, + const char **ppOut, + int *pnOut +){ + return SQLITE_OK; +} + static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) @@ -2361,8 +2400,8 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiPhraseFirstColumn, fts5ApiPhraseNextColumn, fts5ApiQueryToken, - 0, - 0 + fts5ApiInstToken, + fts5ApiPhraseToken }; /* diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index a9390adc9e..853a41865e 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -245,7 +245,8 @@ static int SQLITE_TCLAPI xF5tApi( { "xPhraseForeach", 4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */ { "xPhraseColumnForeach", 3, "IPHRASE COLVAR SCRIPT" }, /* 17 */ - { "xQueryToken", 2, "IPHRASE ITERM" }, /* 19 */ + { "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */ + { "xInstToken", 2, "IDX ITERM" }, /* 19 */ { 0, 0, 0} }; @@ -518,6 +519,22 @@ static int SQLITE_TCLAPI xF5tApi( break; } + CASE(19, "xInstToken") { + const char *pTerm = 0; + int nTerm = 0; + int iIdx = 0; + int iTerm = 0; + + if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ) return TCL_ERROR; + if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR; + rc = p->pApi->xInstToken(p->pFts, iIdx, iTerm, &pTerm, &nTerm); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm)); + } + + break; + } + default: assert( 0 ); break; diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 2c9a4ba5a3..07e5b4fb2e 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -167,38 +167,77 @@ proc querytoken {cmd iPhrase iToken} { } sqlite3_fts5_create_function db querytoken querytoken -do_execsql_test 3.0 { +do_execsql_test 4.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize='origintext unicode61', tokendata=1 ); INSERT INTO ft VALUES('one two three four'); } -do_execsql_test 3.1 { +do_execsql_test 4.1 { SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO') } {1 two.TwO} -do_execsql_test 3.2 { +do_execsql_test 4.2 { SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE') } {1 one} -do_execsql_test 3.3 { +do_execsql_test 4.3 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE') } {1 two.TWO} -do_execsql_test 3.4 { +do_execsql_test 4.4 { SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') } {1 three.ThreE} -do_catchsql_test 3.5 { +do_catchsql_test 4.5 { SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} -do_catchsql_test 3.6 { +do_catchsql_test 4.6 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} -do_catchsql_test 3.7 { +do_catchsql_test 4.7 { SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') } {1 SQLITE_RANGE} +#------------------------------------------------------------------------- +# +reset_db +sqlite3_fts5_register_origintext db +proc insttoken {cmd iIdx iToken} { + set txt [$cmd xInstToken $iIdx $iToken] + string map [list "\0" "."] $txt +} +sqlite3_fts5_create_function db insttoken insttoken +fts5_aux_test_functions db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize='origintext unicode61', tokendata=1 + ); + INSERT INTO ft VALUES('one ONE One oNe oNE one'); +} +do_execsql_test 5.1 { + SELECT insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0), + insttoken(ft, 3, 0), + insttoken(ft, 4, 0), + insttoken(ft, 5, 0) + FROM ft('one'); +} { + one one.ONE one.One one.oNe one.oNE one +} + +do_execsql_test 5.2 { + SELECT insttoken(ft, 1, 0) FROM ft('one'); +} { + one.ONE +} + +do_execsql_test 5.3 { + SELECT fts5_test_poslist(ft) FROM ft('one'); +} { + {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5} +} finish_test diff --git a/manifest b/manifest index 4a539764cf..3477a07b6e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\snew\sfts5\sAPI\sxQueryToken(). -D 2023-11-13T14:29:12.382 +C Add\simplementation\sof\sxInstToken()\sAPI. +D 2023-11-15T11:45:19.681 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -88,16 +88,16 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h e27cdb10e38d87cb041dcb56cef97addf7d902aeab07e84e7102f5fc65d3357c -F ext/fts5/fts5Int.h 19b198459a2791415919428d44ebf4c830b59b2da6f27f8faaffe39a876b7ecf +F ext/fts5/fts5Int.h 88ab1ee1eefa6f98e4c7fd3c96c99ef76ed2819cc3058736c87bb01e4a301628 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 69c81af515ce1cedccf093c7c76f8b3b4f24bafbfb1d03a431af9f5c69a81834 +F ext/fts5/fts5_expr.c 4b50ed0c724cb160f086e20e964ed2d57b99d0d3c1cb1b029901c0300b11bd9f F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 145723e22ffee28dbe2a24933e74ad998d32419223c0ddb8506a1f0c39b952c4 -F ext/fts5/fts5_main.c ddac85dbd28167af81f64e568bfe9020bf9708d650de207d1465ed19938316d1 +F ext/fts5/fts5_index.c 3b51c2f0554a665694e777c8f2765cb5b1283d4bc960dde350a604af3e5e5d98 +F ext/fts5/fts5_main.c f151eb2c6d27418d907c88cd623ad4508bdcf518a79d504e850270754c228b74 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d -F ext/fts5/fts5_tcl.c 71641a0c5693c64acfad9d10e64475ec92d9f464d06ba7fd350552de373586d8 +F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13 @@ -188,7 +188,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 8296984d268d1d20f85c9de316f422ffb6ebc12020d3f8a0a18144d6ca7b347f +F ext/fts5/test/fts5origintext.test 908a1fb6b1106e4b6ed0f9cf683c2ad7f986cce1aea1e0a13b3309c6f568932b F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2143,8 +2143,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P b8a48cc18c94d15017f898c820fdd784efbaac20d7a45c4d97269333e8f2ec60 -R 177f59b3c48b3865f7f595e0ec855966 +P 828566392b3ea8db603cb1ae5eccbc8ac035efaa284bc7c15ba89874f634aec9 +R 7870d9470a55737470bd92d95fe480a9 U dan -Z ac818dc8fd8dc9007ff77eae486bea2e +Z d10d6cf5b22c051f4553454e4a3996a4 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index f6858a05cd..96d818fc70 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -828566392b3ea8db603cb1ae5eccbc8ac035efaa284bc7c15ba89874f634aec9 \ No newline at end of file +a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 \ No newline at end of file From a0764f63a8fa984cc1c90b377916f8dbe11207ec Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 16 Nov 2023 21:11:56 +0000 Subject: [PATCH 08/24] When querying a tokendata=1 fts5 table, do not use a prefix cursor for the case where the term has only one variant. FossilOrigin-Name: d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 --- ext/fts5/fts5_index.c | 232 +++++++++++++++++++++-------- ext/fts5/test/fts5origintext2.test | 107 +++++++++++++ manifest | 13 +- manifest.uuid | 2 +- 4 files changed, 282 insertions(+), 72 deletions(-) create mode 100644 ext/fts5/test/fts5origintext2.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 2d6d561b7b..887bb75dac 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -368,6 +368,7 @@ struct Fts5Index { sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ sqlite3_stmt *pIdxSelect; + sqlite3_stmt *pIdxProbe; int nRead; /* Total number of blocks read */ sqlite3_stmt *pDeleteFromIdx; @@ -2629,6 +2630,18 @@ static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ return p->pIdxSelect; } +static sqlite3_stmt *fts5IdxProbeStmt(Fts5Index *p){ + if( p->pIdxProbe==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxProbe, sqlite3_mprintf( + "SELECT 1 FROM '%q'.'%q_idx' WHERE " + "segid=? AND term>? AND termzDb, pConfig->zName + )); + } + return p->pIdxProbe; +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. @@ -3846,7 +3859,7 @@ static void fts5MultiIterNew( assert( iIter==nSeg ); } - /* If the above was successful, each component iterators now points + /* If the above was successful, each component iterator now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ @@ -6179,7 +6192,7 @@ static void fts5TokenMapPoslist( } static int fts5TokenMapHash(i64 iRowid, int iCol, int iOff){ - return iRowid + (iRowid << 3) + (iCol << 6) + (iOff << 9); + return (iRowid + (iRowid << 3) + (iCol << 6) + (iOff << 9)) & 0x7FFFFFFF; } static void fts5TokenMapHashify(Fts5Index *p, Fts5TokenMap *pMap){ @@ -6228,6 +6241,84 @@ static const u8 *fts5TokenMapLookup( return 0; } +/* +** The iterator passed as the second argument has been opened to scan and +** merge doclists for a series of tokens in tokendata=1 mode. This function +** tests whether or not, instead of using the cursor to read doclists to +** merge, it can be used directly by the upper layer. This is the case +** if the cursor currently points to the only token that corresponds to +** the queried term. i.e. if the next token that will be visited by the +** iterator does not match the query. +*/ +int fts5TokendataIterIsOk( + Fts5Index *p, + Fts5Iter *pIter, + const u8 *pToken, + int nToken +){ + int ii; + Fts5Buffer buf = {0, 0, 0}; + int bRet = 1; + Fts5Buffer *pTerm = 0; + + /* Iterator is not usable if it uses the hash table */ + if( pIter->aSeg[0].pSeg==0 ) return 0; + + for(ii=0; bRet && iinSeg; ii++){ + Fts5SegIter *pSeg = &pIter->aSeg[ii]; + Fts5Data *pLeaf = pSeg->pLeaf; + if( pLeaf ){ + + if( pTerm==0 ){ + pTerm = &pSeg->term; + }else{ + if( pSeg->term.n!=pTerm->n + || memcmp(pSeg->term.p, pTerm->p, pTerm->n) + ){ + bRet = 0; + break; + } + } + + if( pSeg->iEndofDoclistszLeaf ){ + /* Next term is on this node. Check it directly. */ + int nPrefix = 0; + fts5GetVarint32(&pLeaf->p[pSeg->iEndofDoclist], nPrefix); + if( nPrefix>=nToken ) bRet = 0; + }else{ + /* Next term is on a subsequent page. In this case query the %_idx + ** table to discover exactly what that next term is. */ + sqlite3_stmt *pProbe = fts5IdxProbeStmt(p); + if( pProbe ){ + int rc = SQLITE_OK; + if( buf.n==0 ){ + sqlite3Fts5BufferAppendBlob(&p->rc, &buf, nToken, pToken); + sqlite3Fts5BufferAppendBlob(&p->rc, &buf, 1, (const u8*)"\1"); + } + sqlite3_bind_int(pProbe, 1, pSeg->pSeg->iSegid); + sqlite3_bind_blob(pProbe,2, pSeg->term.p,pSeg->term.n, SQLITE_STATIC); + sqlite3_bind_blob(pProbe,3, buf.p, buf.n, SQLITE_STATIC); + + if( sqlite3_step(pProbe)==SQLITE_ROW ){ + bRet = 0; + } + rc = sqlite3_reset(pProbe); + if( p->rc==SQLITE_OK ) p->rc = rc; + } + } + } + } + + if( bRet ){ + for(ii=0; iinSeg; ii++){ + Fts5SegIter *pSeg = &pIter->aSeg[ii]; + pSeg->flags |= FTS5_SEGITER_ONETERM; + } + } + + fts5BufferFree(&buf); + return bRet; +} static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ @@ -6261,9 +6352,6 @@ static void fts5SetupPrefixIter( aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); - if( iIdx==0 ){ - pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); - } assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); if( p->rc==SQLITE_OK ){ @@ -6308,79 +6396,92 @@ static void fts5SetupPrefixIter( pToken[0] = FTS5_MAIN_PREFIX + iIdx; fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); fts5IterSetOutputCb(&p->rc, p1); - for( /* no-op */ ; - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext2(p, p1, &bNewTerm) - ){ - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; - int nTerm = pSeg->term.n; - const u8 *pTerm = pSeg->term.p; - p1->xSetOutputs(p1, pSeg); - if( pMap ){ + if( bDesc==0 && bTokenscan && fts5TokendataIterIsOk(p, p1, pToken,nToken) ){ + /* In this case iterator p1 may be used as is. */ + *ppIter = p1; + }else{ + + if( iIdx==0 && p->pConfig->eDetail==FTS5_DETAIL_FULL ){ + pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); + } + assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); + + for( /* no-op */ ; + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext2(p, p1, &bNewTerm) + ){ + Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; + int nTerm = pSeg->term.n; + const u8 *pTerm = pSeg->term.p; + p1->xSetOutputs(p1, pSeg); + + assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( bNewTerm ){ - fts5TokenMapTerm(p, pMap, &pTerm[1], nTerm-1); + if( nTermnToken && pTerm[nToken]!=0x00 ) break; } - fts5TokenMapPoslist(p, pMap, p1); - } - - assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); - if( bNewTerm ){ - if( nTermnToken && pTerm[nToken]!=0x00 ) break; - } - - if( p1->base.nData==0 ) continue; - - if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ - for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ - int i1 = i*nMerge; - int iStore; - assert( i1+nMerge<=nBuf ); - for(iStore=i1; iStorebase.nData==0 ) continue; + if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ + for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ + int i1 = i*nMerge; + int iStore; + assert( i1+nMerge<=nBuf ); for(iStore=i1; iStorebase.iRowid-(u64)iLastRowid, p1, &doclist); + iLastRowid = p1->base.iRowid; } - - xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); - iLastRowid = p1->base.iRowid; - } - - assert( (nBuf%nMerge)==0 ); - for(i=0; irc==SQLITE_OK ){ - xMerge(p, &doclist, nMerge, &aBuf[i]); + + assert( (nBuf%nMerge)==0 ); + for(i=0; irc==SQLITE_OK ){ + xMerge(p, &doclist, nMerge, &aBuf[i]); + } + for(iFree=i; iFreep = (u8*)&pData[1]; + pData->nn = pData->szLeaf = doclist.n; + if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); + if( pMap ) fts5TokenMapHashify(p, pMap); + fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); + pMap = 0; } + fts5BufferFree(&doclist); } - fts5MultiIterFree(p1); - - pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING); - if( pData ){ - pData->p = (u8*)&pData[1]; - pData->nn = pData->szLeaf = doclist.n; - if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); - if( pMap ) fts5TokenMapHashify(p, pMap); - fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); - } - fts5BufferFree(&doclist); } + fts5TokenMapFree(pMap); fts5StructureRelease(pStruct); sqlite3_free(aBuf); } @@ -6514,6 +6615,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ sqlite3_finalize(p->pIdxWriter); sqlite3_finalize(p->pIdxDeleter); sqlite3_finalize(p->pIdxSelect); + sqlite3_finalize(p->pIdxProbe); sqlite3_finalize(p->pDataVersion); sqlite3_finalize(p->pDeleteFromIdx); sqlite3Fts5HashFree(p->pHash); @@ -6766,7 +6868,7 @@ int sqlite3Fts5IterToken( ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; if( pIter->pTokenMap ){ - *ppOut = fts5TokenMapLookup( + *ppOut = (const char*)fts5TokenMapLookup( pIter->pTokenMap, pIndexIter->iRowid, iCol, iOff, pnOut ); }else{ diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test new file mode 100644 index 0000000000..7cf8d80071 --- /dev/null +++ b/ext/fts5/test/fts5origintext2.test @@ -0,0 +1,107 @@ +# 2014 Jan 08 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); +} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO ft VALUES('Hello'); + INSERT INTO ft VALUES('hello'); + INSERT INTO ft VALUES('HELLO'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('World'); + INSERT INTO ft VALUES('world'); + INSERT INTO ft VALUES('WORLD'); + COMMIT; +} + +do_execsql_test 1.2 { SELECT rowid FROM ft('hello'); } {1 2 3} +do_execsql_test 1.3 { SELECT rowid FROM ft('today'); } {4 5 6} +do_execsql_test 1.4 { SELECT rowid FROM ft('world'); } {7 8 9} + +do_execsql_test 1.5 { + SELECT count(*) FROM ft_data +} 3 + +do_execsql_test 1.6 { + DELETE FROM ft; + INSERT INTO ft(ft, rank) VALUES('pgsz', 64); + BEGIN; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO ft SELECT 'Hello Hello Hello Hello Hello Hello Hello' FROM s; + INSERT INTO ft VALUES ('hELLO hELLO hELLO'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('World World World World World World World'); + INSERT INTO ft VALUES('world world world world world world world'); + INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD'); + INSERT INTO ft VALUES('World World World World World World World'); + INSERT INTO ft VALUES('world world world world world world world'); + INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD'); + COMMIT; +} + +do_execsql_test 1.7 { + SELECT count(*) FROM ft_data; +} 23 + +do_execsql_test 1.8 { SELECT rowid FROM ft('hello') WHERE rowid>100; } {101} + +do_execsql_test 1.9 { + DELETE FROM ft; + INSERT INTO ft(ft) VALUES('optimize'); + SELECT count(*) FROM ft_data; +} {2} +do_execsql_test 1.10 { + BEGIN; + INSERT INTO ft VALUES('Hello'); + INSERT INTO ft VALUES('hello'); + INSERT INTO ft VALUES('HELLO'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('World'); + INSERT INTO ft VALUES('world'); + INSERT INTO ft VALUES('WORLD'); +} + +breakpoint +do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3} +do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} +do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} + +finish_test + diff --git a/manifest b/manifest index 3477a07b6e..7763b46700 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\simplementation\sof\sxInstToken()\sAPI. -D 2023-11-15T11:45:19.681 +C When\squerying\sa\stokendata=1\sfts5\stable,\sdo\snot\suse\sa\sprefix\scursor\sfor\sthe\scase\swhere\sthe\sterm\shas\sonly\sone\svariant. +D 2023-11-16T21:11:56.608 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -94,7 +94,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c 4b50ed0c724cb160f086e20e964ed2d57b99d0d3c1cb1b029901c0300b11bd9f F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 3b51c2f0554a665694e777c8f2765cb5b1283d4bc960dde350a604af3e5e5d98 +F ext/fts5/fts5_index.c 70fa4a6d8a062ca4b63a62d0721d72ce2f6336413c6e8b0703881c708797d24d F ext/fts5/fts5_main.c f151eb2c6d27418d907c88cd623ad4508bdcf518a79d504e850270754c228b74 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -189,6 +189,7 @@ F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca F ext/fts5/test/fts5origintext.test 908a1fb6b1106e4b6ed0f9cf683c2ad7f986cce1aea1e0a13b3309c6f568932b +F ext/fts5/test/fts5origintext2.test a654c77f1548ccd8eab7f6d07230655c0070cdf32dcd4740ccdf496f77d5282c F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2143,8 +2144,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 828566392b3ea8db603cb1ae5eccbc8ac035efaa284bc7c15ba89874f634aec9 -R 7870d9470a55737470bd92d95fe480a9 +P a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 +R d7c277a055a404d272fdcb5090bf371a U dan -Z d10d6cf5b22c051f4553454e4a3996a4 +Z 0e1bf556ad9eba9db356685a09c7ab31 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 96d818fc70..6373f95ef5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 \ No newline at end of file +d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 \ No newline at end of file From 5c268bbf67d8bfd5383a16c4618b9aaefd86abeb Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 22 Nov 2023 19:02:54 +0000 Subject: [PATCH 09/24] Fix tokendata=1 and xInstToken() APIs for detail=none and detail=column tables. FossilOrigin-Name: 37b271c19d772bd06524db816ded03377b426efed7a7783c8a96f6fb156ecd86 --- ext/fts5/fts5Int.h | 7 ++++ ext/fts5/fts5_expr.c | 48 +++++++++++++++++++-- ext/fts5/fts5_index.c | 67 ++++++++++++++++++++++++++++++ ext/fts5/test/fts5origintext.test | 51 +++++++++++++++-------- ext/fts5/test/fts5origintext2.test | 2 +- ext/fts5/test/fts5origintext3.test | 60 ++++++++++++++++++++++++++ manifest | 21 +++++----- manifest.uuid | 2 +- 8 files changed, 224 insertions(+), 34 deletions(-) create mode 100644 ext/fts5/test/fts5origintext3.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 19d9579eb7..317d66db99 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -545,6 +545,13 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin); int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); +/* Used to populate hash tables for xInstToken in detail=none/column mode. */ +void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); +int sqlite3Fts5IndexIterWriteTokendata( + Fts5IndexIter*, const char*, int, int iCol, int iOff +); +int sqlite3Fts5IndexIterHashifyTokendata(Fts5IndexIter*); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index ae851a877a..9889bccb32 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -2985,6 +2985,12 @@ static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ return 0; } +static int fts5QueryTerm(const char *pToken, int nToken){ + int ii; + for(ii=0; iipExpr; int i; + int nQuery = nToken; UNUSED_PARAM2(iUnused1, iUnused2); - if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; + if( nQuery>FTS5_MAX_TOKEN_SIZE ) nQuery = FTS5_MAX_TOKEN_SIZE; + if( pExpr->pConfig->bTokendata ){ + nQuery = fts5QueryTerm(pToken, nQuery); + } if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; inPhrase; i++){ Fts5ExprTerm *pT; if( p->aPopulator[i].bOk==0 ) continue; for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ - if( (pT->nFullTerm==nToken || (pT->nFullTermbPrefix)) - && memcmp(pT->pTerm, pToken, pT->nFullTerm)==0 + if( (pT->nQueryTerm==nQuery || (pT->nQueryTermbPrefix)) + && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0 ){ int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff ); + if( rc==SQLITE_OK && pExpr->pConfig->bTokendata ){ + int iCol = p->iOff>>32; + int iTokOff = p->iOff & 0x7FFFFFFF; + rc = sqlite3Fts5IndexIterWriteTokendata( + pT->pIter, pToken, nToken, iCol, iTokOff + ); + } if( rc ) return rc; break; } @@ -3027,11 +3044,23 @@ int sqlite3Fts5ExprPopulatePoslists( const char *z, int n ){ int i; + int rc = SQLITE_OK; Fts5ExprCtx sCtx; sCtx.pExpr = pExpr; sCtx.aPopulator = aPopulator; sCtx.iOff = (((i64)iCol) << 32) - 1; + /* If this is a tokendata=1 table, clear out the hash tables of + ** full-terms. */ + if( pConfig->bTokendata ){ + for(i=0; inPhrase; i++){ + Fts5ExprTerm *pT; + for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ + sqlite3Fts5IndexIterClearTokendata(pT->pIter); + } + } + } + for(i=0; inPhrase; i++){ Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; Fts5Colset *pColset = pNode->pNear->pColset; @@ -3044,9 +3073,20 @@ int sqlite3Fts5ExprPopulatePoslists( } } - return sqlite3Fts5Tokenize(pConfig, + rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb ); + + if( pConfig->bTokendata ){ + for(i=0; inPhrase; i++){ + Fts5ExprTerm *pT; + for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ + sqlite3Fts5IndexIterHashifyTokendata(pT->pIter); + } + } + } + + return rc; } static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 887bb75dac..f206b8116d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6877,6 +6877,73 @@ int sqlite3Fts5IterToken( return SQLITE_OK; } +/* +** Clear any existing entries from the token-map associated with the +** iterator passed as the only argument. +*/ +void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL ); + if( pIter->pTokenMap ){ + pIter->pTokenMap->nEntry = 0; + } +} + +int sqlite3Fts5IndexIterWriteTokendata( + Fts5IndexIter *pIndexIter, + const char *pToken, int nToken, + int iCol, int iOff +){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + Fts5Index *p = pIter->pIndex; + assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL ); + if( pIter->pTokenMap==0 ){ + pIter->pTokenMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); + } + if( p->rc==SQLITE_OK ){ + Fts5TokenMap *pMap = pIter->pTokenMap; + int ii; + for(ii=0; iinToken; ii++){ + if( nToken==pMap->aToken[ii].nTerm + && 0==memcmp(pMap->aToken[ii].pTerm, pToken, nToken) + ){ + break; + } + } + if( ii==pMap->nToken ){ + fts5TokenMapTerm(p, pMap, (const u8*)pToken, nToken); + } + if( pMap->nEntry>=pMap->nEntryAlloc ){ + int nNew = pMap->nEntryAlloc ? pMap->nEntryAlloc*2 : 32; + Fts5TokenMapEntry *aNew = (Fts5TokenMapEntry*)sqlite3_realloc( + pMap->aEntry, nNew * sizeof(Fts5TokenMapEntry) + ); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + pMap->aEntry = aNew; + pMap->nEntryAlloc = nNew; + } + } + if( p->rc==SQLITE_OK ){ + Fts5TokenMapEntry *pEntry = &pMap->aEntry[pMap->nEntry++]; + pEntry->iRowid = pIndexIter->iRowid; + pEntry->iCol = iCol; + pEntry->iOff = iOff; + pEntry->iTok = ii+1; + } + } + return fts5IndexReturn(p); +} + +int sqlite3Fts5IndexIterHashifyTokendata(Fts5IndexIter *pIndexIter){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + if( pIter->pTokenMap ){ + fts5TokenMapHashify(pIter->pIndex, pIter->pTokenMap); + } + return fts5IndexReturn(pIter->pIndex); +} + /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 07e5b4fb2e..845e8145db 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -21,9 +21,13 @@ ifcapable !fts5 { return } +foreach_detail_mode $testprefix { + sqlite3_fts5_register_origintext db do_execsql_test 1.0 { - CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", detail=%DETAIL% + ); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); } @@ -85,7 +89,9 @@ db func document document sqlite3_fts5_register_origintext db do_execsql_test 2.0 { - CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", detail=%DETAIL% + ); INSERT INTO ft(ft, rank) VALUES('pgsz', 128); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); } @@ -117,7 +123,9 @@ reset_db sqlite3_fts5_register_origintext db do_execsql_test 3.0 { - CREATE VIRTUAL TABLE ft USING fts5(x, tokenize="origintext unicode61"); + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", detail=%DETAIL% + ); CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); INSERT INTO ft(rowid, x) VALUES(1, 'hello'); @@ -136,7 +144,8 @@ do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 do_execsql_test 3.0 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize="origintext unicode61", - tokendata=1 + tokendata=1, + detail=%DETAIL% ); CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance); @@ -169,7 +178,7 @@ sqlite3_fts5_create_function db querytoken querytoken do_execsql_test 4.0 { CREATE VIRTUAL TABLE ft USING fts5( - x, tokenize='origintext unicode61', tokendata=1 + x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% ); INSERT INTO ft VALUES('one two three four'); } @@ -183,19 +192,23 @@ do_execsql_test 4.2 { do_execsql_test 4.3 { SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE') } {1 two.TWO} -do_execsql_test 4.4 { - SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') -} {1 three.ThreE} -do_catchsql_test 4.5 { - SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') -} {1 SQLITE_RANGE} -do_catchsql_test 4.6 { - SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') -} {1 SQLITE_RANGE} -do_catchsql_test 4.7 { - SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') -} {1 SQLITE_RANGE} +if {"%DETAIL%"=="full"} { + # Phrase queries are only supported for detail=full. + # + do_execsql_test 4.4 { + SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') + } {1 three.ThreE} + do_catchsql_test 4.5 { + SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') + } {1 SQLITE_RANGE} + do_catchsql_test 4.6 { + SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') + } {1 SQLITE_RANGE} + do_catchsql_test 4.7 { + SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') + } {1 SQLITE_RANGE} +} #------------------------------------------------------------------------- # @@ -210,7 +223,7 @@ fts5_aux_test_functions db do_execsql_test 5.0 { CREATE VIRTUAL TABLE ft USING fts5( - x, tokenize='origintext unicode61', tokendata=1 + x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% ); INSERT INTO ft VALUES('one ONE One oNe oNE one'); } @@ -239,5 +252,7 @@ do_execsql_test 5.3 { {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5} } +} + finish_test diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test index 7cf8d80071..26f9864098 100644 --- a/ext/fts5/test/fts5origintext2.test +++ b/ext/fts5/test/fts5origintext2.test @@ -13,7 +13,7 @@ # source [file join [file dirname [info script]] fts5_common.tcl] -set testprefix fts5origintext +set testprefix fts5origintext2 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test new file mode 100644 index 0000000000..57b5984f4d --- /dev/null +++ b/ext/fts5/test/fts5origintext3.test @@ -0,0 +1,60 @@ +# 2023 November 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext3 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach_detail_mode $testprefix { + reset_db + + sqlite3_fts5_register_origintext db + fts5_aux_test_functions db + proc insttoken {cmd iIdx iToken} { + set txt [$cmd xInstToken $iIdx $iToken] + string map [list "\0" "."] $txt + } + sqlite3_fts5_create_function db insttoken insttoken + + do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% + ); + } + + do_execsql_test 1.1 { + INSERT INTO ft VALUES('Hello world HELLO WORLD hello'); + } + + do_execsql_test 1.2 { + SELECT fts5_test_poslist(ft) FROM ft('hello'); + } {{0.0.0 0.0.2 0.0.4}} + + do_execsql_test 1.3 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hello'); + } {hello.Hello hello.HELLO hello} + +} + +finish_test + diff --git a/manifest b/manifest index 7763b46700..e729bc7cf1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C When\squerying\sa\stokendata=1\sfts5\stable,\sdo\snot\suse\sa\sprefix\scursor\sfor\sthe\scase\swhere\sthe\sterm\shas\sonly\sone\svariant. -D 2023-11-16T21:11:56.608 +C Fix\stokendata=1\sand\sxInstToken()\sAPIs\sfor\sdetail=none\sand\sdetail=column\stables. +D 2023-11-22T19:02:54.078 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -88,13 +88,13 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h e27cdb10e38d87cb041dcb56cef97addf7d902aeab07e84e7102f5fc65d3357c -F ext/fts5/fts5Int.h 88ab1ee1eefa6f98e4c7fd3c96c99ef76ed2819cc3058736c87bb01e4a301628 +F ext/fts5/fts5Int.h d330c2e20051c300b26325b8ba29aa89e99d301c80e2f51092e5bb70346a17cd F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 4b50ed0c724cb160f086e20e964ed2d57b99d0d3c1cb1b029901c0300b11bd9f +F ext/fts5/fts5_expr.c 0d846134eafeeb1f0724b9c8cc02a2ef9c4082519aa3923173deadd5155910b1 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 70fa4a6d8a062ca4b63a62d0721d72ce2f6336413c6e8b0703881c708797d24d +F ext/fts5/fts5_index.c 7b87808d788238eff4a0a68728e6ed49817e71bbfb328a18050d7d8e92a5d66a F ext/fts5/fts5_main.c f151eb2c6d27418d907c88cd623ad4508bdcf518a79d504e850270754c228b74 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -188,8 +188,9 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 908a1fb6b1106e4b6ed0f9cf683c2ad7f986cce1aea1e0a13b3309c6f568932b -F ext/fts5/test/fts5origintext2.test a654c77f1548ccd8eab7f6d07230655c0070cdf32dcd4740ccdf496f77d5282c +F ext/fts5/test/fts5origintext.test 7caef7634889bab8b44d145141c0d9325299398fb89b116bccd6262fde5659db +F ext/fts5/test/fts5origintext2.test 26482f4af1f2785cb01d06af9aae202289b6e8cf7b708d18aea305b459c2f302 +F ext/fts5/test/fts5origintext3.test 87a212b8235794348c56cb70f21e122d182a5af688c56057b90b7c151d0aa347 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2144,8 +2145,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P a34b26fe7f60b74e7ae5cf64900920a3d352a20da2496401bcbc27041689cd07 -R d7c277a055a404d272fdcb5090bf371a +P d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 +R 161c23f360cac1706a2c5f6b11155312 U dan -Z 0e1bf556ad9eba9db356685a09c7ab31 +Z 281a4d74e3cce55af028079774718a8b # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 6373f95ef5..38bacc7670 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 \ No newline at end of file +37b271c19d772bd06524db816ded03377b426efed7a7783c8a96f6fb156ecd86 \ No newline at end of file From af54826e4a76259a37d396e88dc3c582ef76664c Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 22 Nov 2023 20:02:55 +0000 Subject: [PATCH 10/24] Defer building xInstToken() hash-table until it is to be used. FossilOrigin-Name: 9b005085ff4a53cda0a1dff0c836630d6d3b95b9c40658ffd2a886f3e1b37faa --- ext/fts5/fts5Int.h | 1 - ext/fts5/fts5_expr.c | 14 +------------- ext/fts5/fts5_index.c | 24 ++++++++++++------------ manifest | 16 ++++++++-------- manifest.uuid | 2 +- 5 files changed, 22 insertions(+), 35 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 317d66db99..9d2622448d 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -550,7 +550,6 @@ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); int sqlite3Fts5IndexIterWriteTokendata( Fts5IndexIter*, const char*, int, int iCol, int iOff ); -int sqlite3Fts5IndexIterHashifyTokendata(Fts5IndexIter*); /* ** End of interface to code in fts5_index.c. diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 9889bccb32..6589d1b3e6 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3044,7 +3044,6 @@ int sqlite3Fts5ExprPopulatePoslists( const char *z, int n ){ int i; - int rc = SQLITE_OK; Fts5ExprCtx sCtx; sCtx.pExpr = pExpr; sCtx.aPopulator = aPopulator; @@ -3073,20 +3072,9 @@ int sqlite3Fts5ExprPopulatePoslists( } } - rc = sqlite3Fts5Tokenize(pConfig, + return sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb ); - - if( pConfig->bTokendata ){ - for(i=0; inPhrase; i++){ - Fts5ExprTerm *pT; - for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ - sqlite3Fts5IndexIterHashifyTokendata(pT->pIter); - } - } - } - - return rc; } static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index f206b8116d..a4150d3d5b 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -628,6 +628,8 @@ struct Fts5TokenMapToken { }; struct Fts5TokenMap { + int bHashed; /* True once hashed */ + int nEntryAlloc; int nEntry; Fts5TokenMapEntry *aEntry; @@ -6372,7 +6374,6 @@ static void fts5SetupPrefixIter( ** index contains all the doclists required, except for the one ** corresponding to the prefix itself. That one is extracted from the ** main term index here. */ - assert( iIdx==0 || pMap==0 ); if( iIdx!=0 ){ int dummy = 0; const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; @@ -6402,7 +6403,7 @@ static void fts5SetupPrefixIter( *ppIter = p1; }else{ - if( iIdx==0 && p->pConfig->eDetail==FTS5_DETAIL_FULL ){ + if( iIdx==0 && p->pConfig->eDetail==FTS5_DETAIL_FULL && bTokenscan ){ pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); } assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); @@ -6473,7 +6474,6 @@ static void fts5SetupPrefixIter( pData->p = (u8*)&pData[1]; pData->nn = pData->szLeaf = doclist.n; if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); - if( pMap ) fts5TokenMapHashify(p, pMap); fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); pMap = 0; } @@ -6867,7 +6867,15 @@ int sqlite3Fts5IterToken( const char **ppOut, int *pnOut ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - if( pIter->pTokenMap ){ + Fts5TokenMap *pMap = pIter->pTokenMap; + if( pMap ){ + if( pMap->bHashed==0 ){ + Fts5Index *p = pIter->pIndex; + fts5TokenMapHashify(p, pMap); + if( p->rc ){ + return fts5IndexReturn(p); + } + } *ppOut = (const char*)fts5TokenMapLookup( pIter->pTokenMap, pIndexIter->iRowid, iCol, iOff, pnOut ); @@ -6936,14 +6944,6 @@ int sqlite3Fts5IndexIterWriteTokendata( return fts5IndexReturn(p); } -int sqlite3Fts5IndexIterHashifyTokendata(Fts5IndexIter *pIndexIter){ - Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - if( pIter->pTokenMap ){ - fts5TokenMapHashify(pIter->pIndex, pIter->pTokenMap); - } - return fts5IndexReturn(pIter->pIndex); -} - /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ diff --git a/manifest b/manifest index e729bc7cf1..dfaf7bb154 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\stokendata=1\sand\sxInstToken()\sAPIs\sfor\sdetail=none\sand\sdetail=column\stables. -D 2023-11-22T19:02:54.078 +C Defer\sbuilding\sxInstToken()\shash-table\suntil\sit\sis\sto\sbe\sused. +D 2023-11-22T20:02:55.862 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -88,13 +88,13 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h e27cdb10e38d87cb041dcb56cef97addf7d902aeab07e84e7102f5fc65d3357c -F ext/fts5/fts5Int.h d330c2e20051c300b26325b8ba29aa89e99d301c80e2f51092e5bb70346a17cd +F ext/fts5/fts5Int.h 782151060d176be22861f57bf38e087a82cfb0dfc4b2fa6f9ccbc2641b6d01e3 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 0d846134eafeeb1f0724b9c8cc02a2ef9c4082519aa3923173deadd5155910b1 +F ext/fts5/fts5_expr.c 5d557c7ebefaeac5a5111cc47d4fee8a2fc6bc15245d5c99eebf53dd04bf794e F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 7b87808d788238eff4a0a68728e6ed49817e71bbfb328a18050d7d8e92a5d66a +F ext/fts5/fts5_index.c 710b022dcdf152eb7bbbc3f83eb662e1e67c25e0643416096ed070b10d7829fb F ext/fts5/fts5_main.c f151eb2c6d27418d907c88cd623ad4508bdcf518a79d504e850270754c228b74 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -2145,8 +2145,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P d711c96ba855686d6881a50498418de3492144f005684b5ae55bca24413dce47 -R 161c23f360cac1706a2c5f6b11155312 +P 37b271c19d772bd06524db816ded03377b426efed7a7783c8a96f6fb156ecd86 +R 0d85011f1dd994f228f63f6f67d53fdc U dan -Z 281a4d74e3cce55af028079774718a8b +Z f3ab4e11005318d9a7fa0f164d5ead9f # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 38bacc7670..81d2b68945 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -37b271c19d772bd06524db816ded03377b426efed7a7783c8a96f6fb156ecd86 \ No newline at end of file +9b005085ff4a53cda0a1dff0c836630d6d3b95b9c40658ffd2a886f3e1b37faa \ No newline at end of file From 41c9e0b767dbd637ac8c5f7b82d9190799d24cc2 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 22 Nov 2023 21:15:05 +0000 Subject: [PATCH 11/24] Add documentation for new fts5 auxiliary function APIs. FossilOrigin-Name: 9be8969edd49e3da96fb8ac2279aff6fe2e215d6ac55162b4734aca1b6316580 --- ext/fts5/fts5.h | 27 +++++++++++++++++++++++++-- ext/fts5/fts5_main.c | 16 +--------------- manifest | 14 +++++++------- manifest.uuid | 2 +- 4 files changed, 34 insertions(+), 25 deletions(-) diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 5a2008882f..9feedbba19 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -261,6 +261,27 @@ struct Fts5PhraseIter { ** ** xPhraseNextColumn() ** See xPhraseFirstColumn above. +** +** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken) +** This is used to access token iToken of phrase iPhrase of the current +** query. Before returning, output parameter *ppToken is set to point +** to a buffer containing the requested token, and *pnToken to the +** size of this buffer in bytes. +** +** The output text is not a copy of the query text that specified the +** token. It is the output of the tokenizer module. For tokendata=1 +** tables, this includes any embedded 0x00 and trailing data. +** +** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken) +** This is used to access token iToken of phrase hit iIdx within the +** current row. +** +** The output text is not a copy of the document text that was tokenized. +** It is the output of the tokenizer module. For tokendata=1 tables, this +** includes any embedded 0x00 and trailing data. +** +** This API can be quite slow if used with an FTS5 table created with the +** "detail=none" or "detail=column" option. */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 3 */ @@ -300,9 +321,11 @@ struct Fts5ExtensionApi { void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); /* Below this point are iVersion>=3 only */ - int (*xQueryToken)(Fts5Context*, int iPhrase, int iToken, const char**, int*); + int (*xQueryToken)(Fts5Context*, + int iPhrase, int iToken, + const char **ppToken, int *pnToken + ); int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); - int (*xPhraseToken)(Fts5Context*, Fts5PhraseIter*, int, const char**, int*); }; /* diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 5ef80719ae..9f19b24b88 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -2360,19 +2360,6 @@ static int fts5ApiInstToken( return rc; } -/* -** xPhraseToken() API implemenetation. -*/ -static int fts5ApiPhraseToken( - Fts5Context *pCtx, - Fts5PhraseIter *pIter, - int iToken, - const char **ppOut, - int *pnOut -){ - return SQLITE_OK; -} - static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) @@ -2400,8 +2387,7 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiPhraseFirstColumn, fts5ApiPhraseNextColumn, fts5ApiQueryToken, - fts5ApiInstToken, - fts5ApiPhraseToken + fts5ApiInstToken }; /* diff --git a/manifest b/manifest index dfaf7bb154..8adb17e981 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Defer\sbuilding\sxInstToken()\shash-table\suntil\sit\sis\sto\sbe\sused. -D 2023-11-22T20:02:55.862 +C Add\sdocumentation\sfor\snew\sfts5\sauxiliary\sfunction\sAPIs. +D 2023-11-22T21:15:05.459 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -87,7 +87,7 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 -F ext/fts5/fts5.h e27cdb10e38d87cb041dcb56cef97addf7d902aeab07e84e7102f5fc65d3357c +F ext/fts5/fts5.h 5e5630fc81e212f658afaa5b2650dac939d2729d0723aef1eeaff908f1725648 F ext/fts5/fts5Int.h 782151060d176be22861f57bf38e087a82cfb0dfc4b2fa6f9ccbc2641b6d01e3 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 @@ -95,7 +95,7 @@ F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532 F ext/fts5/fts5_expr.c 5d557c7ebefaeac5a5111cc47d4fee8a2fc6bc15245d5c99eebf53dd04bf794e F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c 710b022dcdf152eb7bbbc3f83eb662e1e67c25e0643416096ed070b10d7829fb -F ext/fts5/fts5_main.c f151eb2c6d27418d907c88cd623ad4508bdcf518a79d504e850270754c228b74 +F ext/fts5/fts5_main.c 55b53085dbd1693b5735463198a8d124dfbc27f08311c839637b44b8254ef7cb F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -2145,8 +2145,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 37b271c19d772bd06524db816ded03377b426efed7a7783c8a96f6fb156ecd86 -R 0d85011f1dd994f228f63f6f67d53fdc +P 9b005085ff4a53cda0a1dff0c836630d6d3b95b9c40658ffd2a886f3e1b37faa +R 2f037553902ade27b74f0aa5f4f0c2dc U dan -Z f3ab4e11005318d9a7fa0f164d5ead9f +Z 56eb427eba85d076d9cd44ffbac445d7 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 81d2b68945..eca2a6957f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9b005085ff4a53cda0a1dff0c836630d6d3b95b9c40658ffd2a886f3e1b37faa \ No newline at end of file +9be8969edd49e3da96fb8ac2279aff6fe2e215d6ac55162b4734aca1b6316580 \ No newline at end of file From 89fcfbb424b1c36b97acd151267adb7c574f8240 Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 29 Nov 2023 16:22:39 +0000 Subject: [PATCH 12/24] Fix signed integer overflow in fts5. FossilOrigin-Name: 60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 --- ext/fts5/fts5_index.c | 2 +- manifest | 14 +++++++------- manifest.uuid | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 993069f490..4b7c8d3335 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4276,7 +4276,7 @@ static void fts5WriteDlidxAppend( } if( pDlidx->bPrevValid ){ - iVal = iRowid - pDlidx->iPrev; + iVal = (u64)iRowid - (u64)pDlidx->iPrev; }else{ i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); assert( pDlidx->buf.n==0 ); diff --git a/manifest b/manifest index e69bda2b13..13a7da6a20 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\supdates\sinto\sthis\sbranch. -D 2023-11-28T19:43:08.965 +C Fix\ssigned\sinteger\soverflow\sin\sfts5. +D 2023-11-29T16:22:39.960 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -96,7 +96,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c 5d557c7ebefaeac5a5111cc47d4fee8a2fc6bc15245d5c99eebf53dd04bf794e F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c de7bc351d9b7e0c8891db93b211fd9e137edef2cec832dcedfb32d4a2929c0d2 +F ext/fts5/fts5_index.c bafdef8be40a20bb86a131af4f09b56919f416fa13d1a86af1bf92bad9a2870d F ext/fts5/fts5_main.c 55b53085dbd1693b5735463198a8d124dfbc27f08311c839637b44b8254ef7cb F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -264,7 +264,7 @@ F ext/jni/src/org/sqlite/jni/capi/CollationCallback.java e29bcfc540fdd343e2f5cca F ext/jni/src/org/sqlite/jni/capi/CollationNeededCallback.java 5bfa226a8e7a92e804fd52d6e42b4c7b875fa7a94f8e2c330af8cc244a8920ab F ext/jni/src/org/sqlite/jni/capi/CommitHookCallback.java 482f53dfec9e3ac2a9070d3fceebd56250932aaaf7c4f5bc8de29fc011416e0c F ext/jni/src/org/sqlite/jni/capi/ConfigLogCallback.java b995ca412f59b631803b93aa5b3684fce62e335d1e123207084c054abfd488d4 -F ext/jni/src/org/sqlite/jni/capi/ConfigSqlLogCallback.java e5723900b6458bc6288f52187090a78ebe0a20f403ac7c887ec9061dfe51aba7 w ext/jni/src/org/sqlite/jni/capi/ConfigSqllogCallback.java +F ext/jni/src/org/sqlite/jni/capi/ConfigSqlLogCallback.java e5723900b6458bc6288f52187090a78ebe0a20f403ac7c887ec9061dfe51aba7 F ext/jni/src/org/sqlite/jni/capi/NativePointerHolder.java b7036dcb1ef1b39f1f36ac605dde0ff1a24a9a01ade6aa1a605039443e089a61 F ext/jni/src/org/sqlite/jni/capi/OutputPointer.java 246b0e66c4603f41c567105a21189d138aaf8c58203ecd4928802333da553e7c F ext/jni/src/org/sqlite/jni/capi/PrepareMultiCallback.java 97352091abd7556167f4799076396279a51749fdae2b72a6ba61cd39b3df0359 @@ -2146,8 +2146,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 9be8969edd49e3da96fb8ac2279aff6fe2e215d6ac55162b4734aca1b6316580 4c055b7a6e4533e1e571773456226ca7038ce372df3eedbbbcd9a81e8652a6cf -R ec1a7fd48afc240687b4ab2330f993e9 +P 554fc13f2ca5f2ebd9ad0206034c25b556ff40db3106051c5e539f2e142e88ea +R 5c7c1632cd09d2b131adef895eab90a8 U dan -Z 65a112da054f34cd19f6d265591002ed +Z 009b023eba91b6acc35beb7d051a677a # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 4ca4cbb4d0..df2c2789ad 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -554fc13f2ca5f2ebd9ad0206034c25b556ff40db3106051c5e539f2e142e88ea \ No newline at end of file +60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 \ No newline at end of file From b5effc0605abcf3f67a4c62ef42d0ded03a214fd Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 Dec 2023 20:09:59 +0000 Subject: [PATCH 13/24] Different approach to querying a tokendata=1 table. Saves cpu and memory. FossilOrigin-Name: c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e --- ext/fts5/fts5Int.h | 19 +- ext/fts5/fts5_expr.c | 6 +- ext/fts5/fts5_index.c | 553 ++++++++++++++++++++++++++--- ext/fts5/fts5_main.c | 7 +- ext/fts5/test/fts5origintext.test | 10 +- ext/fts5/test/fts5origintext2.test | 1 + ext/fts5/test/fts5origintext3.test | 9 + manifest | 24 +- manifest.uuid | 2 +- 9 files changed, 541 insertions(+), 90 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 9d2622448d..cee9528858 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -385,17 +385,18 @@ struct Fts5IndexIter { /* ** Values used as part of the flags argument passed to IndexQuery(). */ -#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ -#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ -#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ -#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ +#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ +#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* The following are used internally by the fts5_index.c module. They are ** defined here only to make it easier to avoid clashes with the flags ** above. */ -#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 -#define FTS5INDEX_QUERY_NOOUTPUT 0x0020 -#define FTS5INDEX_QUERY_SKIPHASH 0x0040 +#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 +#define FTS5INDEX_QUERY_NOOUTPUT 0x0020 +#define FTS5INDEX_QUERY_SKIPHASH 0x0040 +#define FTS5INDEX_QUERY_NOTOKENDATA 0x0080 /* ** Create/destroy an Fts5Index object. @@ -467,7 +468,7 @@ int sqlite3Fts5StructureTest(Fts5Index*, void*); /* ** Used by xInstToken() and xPhraseToken(). */ -int sqlite3Fts5IterToken(Fts5IndexIter*, int, int, const char**, int*); +int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*); /* ** Insert or remove data to or from the index. Each time a document is @@ -548,7 +549,7 @@ int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); /* Used to populate hash tables for xInstToken in detail=none/column mode. */ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); int sqlite3Fts5IndexIterWriteTokendata( - Fts5IndexIter*, const char*, int, int iCol, int iOff + Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff ); /* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 6589d1b3e6..89e7cbf364 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3003,6 +3003,7 @@ static int fts5ExprPopulatePoslistsCb( Fts5Expr *pExpr = p->pExpr; int i; int nQuery = nToken; + i64 iRowid = pExpr->pRoot->iRowid; UNUSED_PARAM2(iUnused1, iUnused2); @@ -3025,7 +3026,7 @@ static int fts5ExprPopulatePoslistsCb( int iCol = p->iOff>>32; int iTokOff = p->iOff & 0x7FFFFFFF; rc = sqlite3Fts5IndexIterWriteTokendata( - pT->pIter, pToken, nToken, iCol, iTokOff + pT->pIter, pToken, nToken, iRowid, iCol, iTokOff ); } if( rc ) return rc; @@ -3210,6 +3211,7 @@ int sqlite3Fts5ExprInstToken( ){ Fts5ExprPhrase *pPhrase = 0; Fts5IndexIter *pIter = 0; + i64 iRowid = pExpr->pRoot->iRowid; if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ return SQLITE_RANGE; @@ -3220,6 +3222,6 @@ int sqlite3Fts5ExprInstToken( } pIter = pPhrase->aTerm[iToken].pIter; - return sqlite3Fts5IterToken(pIter, iCol, iOff+iToken, ppOut, pnOut); + return sqlite3Fts5IterToken(pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 4b7c8d3335..5f39a1e764 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -327,6 +327,8 @@ typedef struct Fts5TokenMapEntry Fts5TokenMapEntry; typedef struct Fts5TokenMapToken Fts5TokenMapToken; typedef struct Fts5TokenMap Fts5TokenMap; +typedef struct Fts5TokenDataIter Fts5TokenDataIter; + struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int nn; /* Size of record in bytes */ @@ -594,9 +596,16 @@ struct Fts5SegIter { ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. +** +** pColset: +** If not NULL, points to an object containing a set of column indices. +** Only matches that occur in one of these columns will be returned. +** The Fts5Iter does not own the Fts5Colset object, and so it is not +** freed when the iterator is closed - it is owned by the upper layer. */ struct Fts5Iter { Fts5IndexIter base; /* Base class containing output vars */ + Fts5TokenDataIter *pTokenDataIter; Fts5Index *pIndex; /* Index that owns this iterator */ Fts5Buffer poslist; /* Buffer containing current poslist */ @@ -3780,6 +3789,28 @@ static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ } } +static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ + int iIter; + for(iIter=pIter->nSeg-1; iIter>0; iIter--){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){ + Fts5SegIter *pSeg = &pIter->aSeg[iEq]; + if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); + fts5MultiIterAdvanced(p, pIter, iEq, iIter); + } + } + fts5MultiIterSetEof(pIter); + fts5AssertMultiIterSetup(p, pIter); + + if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter)) + || fts5MultiIterIsDeleted(pIter) + ){ + fts5MultiIterNext(p, pIter, 0, 0); + }else if( pIter->base.bEof==0 ){ + Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + pIter->xSetOutputs(pIter, pSeg); + } +} /* ** Allocate a new Fts5Iter object. @@ -3866,26 +3897,7 @@ static void fts5MultiIterNew( ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ - for(iIter=pNew->nSeg-1; iIter>0; iIter--){ - int iEq; - if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - Fts5SegIter *pSeg = &pNew->aSeg[iEq]; - if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); - fts5MultiIterAdvanced(p, pNew, iEq, iIter); - } - } - fts5MultiIterSetEof(pNew); - fts5AssertMultiIterSetup(p, pNew); - - if( (pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew)) - || fts5MultiIterIsDeleted(pNew) - ){ - fts5MultiIterNext(p, pNew, 0, 0); - }else if( pNew->base.bEof==0 ){ - Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; - pNew->xSetOutputs(pNew, pSeg); - } - + fts5MultiIterFinishSetup(p, pNew); }else{ fts5MultiIterFree(pNew); *ppOut = 0; @@ -6718,6 +6730,431 @@ int sqlite3Fts5IndexWrite( return rc; } +/* +** pToken points to a buffer of size nToken bytes containing a search +** term, including the index number at the start, used on a tokendata=1 +** table. This function returns true if the term in buffer pBuf matches +** token pToken/nToken. +*/ +static int fts5IsTokendataPrefix( + Fts5Buffer *pBuf, + const u8 *pToken, + int nToken +){ + return ( + pBuf->n>=nToken + && 0==memcmp(pBuf->p, pToken, nToken) + && (pBuf->n==nToken || pBuf->p[nToken]==0x00) + ); +} + +/* +** Ensure the segment-iterator passed as the only argument points to EOF. +*/ +static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ + fts5DataRelease(pSeg->pLeaf); + pSeg->pLeaf = 0; +} + +typedef struct Fts5TokenDataMap Fts5TokenDataMap; +struct Fts5TokenDataMap { + i64 iRowid; + i64 iPos; + int iIter; +}; + +struct Fts5TokenDataIter { + int nIter; + int nIterAlloc; + + int nMap; + int nMapAlloc; + Fts5TokenDataMap *aMap; + + Fts5PoslistReader *aPoslistReader; + Fts5Iter *apIter[1]; +}; + +static Fts5TokenDataIter *fts5AppendTokendataIter( + Fts5Index *p, + Fts5TokenDataIter *pIn, + Fts5Iter *pAppend +){ + Fts5TokenDataIter *pRet = pIn; + + if( p->rc==SQLITE_OK ){ + if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ + int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; + int nByte = nAlloc * sizeof(Fts5Iter*); + Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte); + + if( pNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + if( pIn==0 ) memset(pNew, 0, nByte); + pRet = pNew; + pNew->nIterAlloc = nAlloc; + } + } + } + if( p->rc ){ + sqlite3Fts5IterClose((Fts5IndexIter*)pAppend); + }else{ + pRet->apIter[pRet->nIter++] = pAppend; + } + + return pRet; +} + +static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ + if( pSet ){ + int ii; + for(ii=0; iinIter; ii++){ + fts5MultiIterFree(pSet->apIter[ii]); + } + sqlite3_free(pSet->aPoslistReader); + sqlite3_free(pSet->aMap); + sqlite3_free(pSet); + } +} + +static int fts5TokendataIterToken( + Fts5Iter *pIter, + i64 iRowid, + int iCol, int iOff, + const char **ppOut, int *pnOut +){ + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + Fts5TokenDataMap *aMap = pT->aMap; + i64 iPos = (((i64)iCol)<<32) + iOff; + + int i1 = 0; + int i2 = pT->nMap; + int iTest = 0; + + while( i2>i1 ){ + iTest = (i1 + i2) / 2; + + if( aMap[iTest].iRowidiRowid ){ + i2 = iTest; + }else{ + if( aMap[iTest].iPosiPos ){ + i2 = iTest; + }else{ + break; + } + } + } + + if( i2>i1 ){ + Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; + *ppOut = (const char*)pMap->aSeg[0].term.p+1; + *pnOut = pMap->aSeg[0].term.n-1; + } + + return SQLITE_OK; +} + +static void fts5TokendataIterAppendMap( + Fts5Index *p, + Fts5TokenDataIter *pT, + int iIter, + i64 iRowid, + i64 iPos +){ + if( p->rc==SQLITE_OK ){ + if( pT->nMap==pT->nMapAlloc ){ + int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; + int nByte = nNew * sizeof(Fts5TokenDataMap); + Fts5TokenDataMap *aNew; + + aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pT->aMap[pT->nMap].iRowid = iRowid; + pT->aMap[pT->nMap].iPos = iPos; + pT->aMap[pT->nMap].iIter = iIter; + pT->nMap++; + } +} + +static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ + int ii; + int nHit = 0; + i64 iRowid = SMALLEST_INT64; + int iMin = 0; + + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + + pIter->base.nData = 0; + pIter->base.pData = 0; + + for(ii=0; iinIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( p->base.bEof==0 ){ + if( nHit==0 || p->base.iRowidbase.iRowid; + nHit = 1; + pIter->base.pData = p->base.pData; + pIter->base.nData = p->base.nData; + iMin = ii; + }else if( p->base.iRowid==iRowid ){ + nHit++; + } + } + } + + if( nHit==0 ){ + pIter->base.bEof = 1; + }else{ + int eDetail = pIter->pIndex->pConfig->eDetail; + pIter->base.bEof = 0; + pIter->base.iRowid = iRowid; + + if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){ + fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1); + }else + if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){ + int nReader = 0; + int nByte = 0; + i64 iPrev = 0; + + /* Allocate array of iterators if they are not already allocated. */ + if( pT->aPoslistReader==0 ){ + pT->aPoslistReader = sqlite3Fts5MallocZero( + &pIter->pIndex->rc, sizeof(Fts5PoslistReader) * pT->nIter + ); + if( pT->aPoslistReader==0 ) return; + } + + /* Populate an iterator for each poslist that will be merged */ + for(ii=0; iinIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( iRowid==p->base.iRowid ){ + sqlite3Fts5PoslistReaderInit( + p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] + ); + nByte += p->base.nData; + } + } + + /* Ensure the output buffer is large enough */ + if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){ + return; + } + + /* Ensure the token-mapping is large enough */ + if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){ + int nNew = (pT->nMapAlloc + nByte) * 2; + Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc( + pT->aMap, nNew*sizeof(Fts5TokenDataMap) + ); + if( aNew==0 ){ + pIter->pIndex->rc = SQLITE_NOMEM; + return; + } + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pIter->poslist.n = 0; + + while( 1 ){ + i64 iMinPos = LARGEST_INT64; + + /* Find smallest position */ + iMin = 0; + for(ii=0; iiaPoslistReader[ii]; + if( pReader->bEof==0 ){ + if( pReader->iPosiPos; + iMin = ii; + } + } + } + + /* If all readers were at EOF, break out of the loop. */ + if( iMinPos==LARGEST_INT64 ) break; + + sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos); + sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]); + + if( eDetail==FTS5_DETAIL_FULL ){ + pT->aMap[pT->nMap].iPos = iMinPos; + pT->aMap[pT->nMap].iIter = iMin; + pT->aMap[pT->nMap].iRowid = iRowid; + pT->nMap++; + } + } + + pIter->base.pData = pIter->poslist.p; + pIter->base.nData = pIter->poslist.n; + } + } +} + +static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ + int ii; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + + for(ii=0; iinIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( p->base.bEof==0 + && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowidpIndex, p, bFrom, iFrom); + while( bFrom && p->base.bEof==0 + && p->base.iRowidpIndex->rc==SQLITE_OK + ){ + fts5MultiIterNext(p->pIndex, p, 0, 0); + } + } + } + + fts5IterSetOutputsTokendata(pIter); +} + +static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ + if( pIter && pIter->aSeg[0].pLeaf==0 ){ + fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p); + } +} + +static Fts5Iter *fts5SetupTokendataIter( + Fts5Index *p, /* FTS index to query */ + int bDesc, /* True for "ORDER BY rowid DESC" */ + const u8 *pToken, /* Buffer containing query term */ + int nToken, /* Size of buffer pToken in bytes */ + Fts5Colset *pColset /* Colset to filter on */ +){ + Fts5Iter *pRet = 0; + Fts5TokenDataIter *pSet = 0; + Fts5Structure *pStruct = 0; + const int flags = FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_SCAN; + + assert( bDesc==0 ); + + Fts5Buffer bSeek = {0, 0, 0}; + Fts5Buffer *pSmall = 0; + + fts5IndexFlush(p); + pStruct = fts5StructureRead(p); + + while( 1 ){ + Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0; + Fts5Iter *pNew = 0; + Fts5SegIter *pNewIter = 0; + Fts5SegIter *pPrevIter = 0; + + int iLvl, iSeg, ii; + + pNew = fts5MultiIterAlloc(p, pStruct->nSegment); + if( pNew==0 ) break; + + if( pSmall ){ + fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p); + fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0"); + }else{ + fts5BufferSet(&p->rc, &bSeek, nToken, pToken); + } + + pNewIter = &pNew->aSeg[0]; + pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0); + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); + + pNewIter++; + if( pPrevIter ){ + if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ + fts5SegIterSetEOF(pPrevIter); + } + pPrevIter++; + } + } + } + fts5TokendataSetTermIfEof(pPrev, pSmall); + + pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); + pNew->pColset = pColset; + fts5IterSetOutputCb(&p->rc, pNew); + + /* Loop through all segments in the new iterator. Find the smallest + ** term that any segment-iterator points to. Iterator pNew will be + ** used for this term. Also, set any iterator that points to a term that + ** does not match pToken/nToken to point to EOF */ + pSmall = 0; + for(ii=0; iinSeg; ii++){ + Fts5SegIter *pII = &pNew->aSeg[ii]; + if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ + fts5SegIterSetEOF(pII); + } + if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ + pSmall = &pII->term; + } + } + + /* If pSmall is still NULL at this point, then the new iterator does + ** not point to any terms that match the query. So delete it and break + ** out of the loop - all required iterators have been collected. */ + if( pSmall==0 ){ + sqlite3Fts5IterClose((Fts5IndexIter*)pNew); + break; + } + + /* Append this iterator to the set and continue. */ + pSet = fts5AppendTokendataIter(p, pSet, pNew); + } + + if( p->rc==SQLITE_OK && pSet ){ + int ii; + for(ii=0; iinIter; ii++){ + Fts5Iter *pIter = pSet->apIter[ii]; + int iSeg; + for(iSeg=0; iSegnSeg; iSeg++){ + pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM; + } + fts5MultiIterFinishSetup(p, pIter); + } + } + + if( p->rc==SQLITE_OK ){ + pRet = fts5MultiIterAlloc(p, 0); + } + if( pRet ){ + pRet->pTokenDataIter = pSet; + if( pSet ){ + fts5IterSetOutputsTokendata(pRet); + }else{ + pRet->base.bEof = 1; + } + }else{ + fts5TokendataIterDelete(pSet); + } + + fts5StructureRelease(pStruct); + fts5BufferFree(&bSeek); + return pRet; +} + + /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. @@ -6739,6 +7176,7 @@ int sqlite3Fts5IndexQuery( if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ int iPrefixIdx = 0; /* +1 prefix index */ + int bTokendata = (flags&FTS5INDEX_QUERY_NOTOKENDATA)?0:pConfig->bTokendata; if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); /* Figure out which index to search and set iIdx accordingly. If this @@ -6766,7 +7204,7 @@ int sqlite3Fts5IndexQuery( } } - if( iIdx<=pConfig->nPrefix && (pConfig->bTokendata==0 || iIdx!=0) ){ + if( iIdx<=pConfig->nPrefix && (bTokendata==0 || iIdx!=0) ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); @@ -6776,6 +7214,10 @@ int sqlite3Fts5IndexQuery( ); fts5StructureRelease(pStruct); } + }else if( bTokendata && iIdx==0 ){ + int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; + buf.p[0] = '0'; + pRet = fts5SetupTokendataIter(p, bDesc, buf.p, nToken+1, pColset); }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; @@ -6816,7 +7258,11 @@ int sqlite3Fts5IndexQuery( int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->rc==SQLITE_OK ); - fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); + if( pIter->pTokenDataIter ){ + fts5TokendataIterNext(pIter, 0, 0); + }else{ + fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); + } return fts5IndexReturn(pIter->pIndex); } @@ -6849,7 +7295,11 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); + if( pIter->pTokenDataIter ){ + fts5TokendataIterNext(pIter, 1, iMatch); + }else{ + fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); + } return fts5IndexReturn(pIter->pIndex); } @@ -6869,12 +7319,18 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ */ int sqlite3Fts5IterToken( Fts5IndexIter *pIndexIter, + i64 iRowid, int iCol, int iOff, const char **ppOut, int *pnOut ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5TokenMap *pMap = pIter->pTokenMap; + + if( pIter->pTokenDataIter ){ + return fts5TokendataIterToken(pIter, iRowid, iCol, iOff, ppOut, pnOut); + } + if( pMap ){ if( pMap->bHashed==0 ){ Fts5Index *p = pIter->pIndex; @@ -6899,53 +7355,29 @@ int sqlite3Fts5IterToken( void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL ); - if( pIter->pTokenMap ){ - pIter->pTokenMap->nEntry = 0; + if( pIter->pTokenDataIter ){ + pIter->pTokenDataIter->nMap = 0; } } int sqlite3Fts5IndexIterWriteTokendata( Fts5IndexIter *pIndexIter, const char *pToken, int nToken, - int iCol, int iOff + i64 iRowid, int iCol, int iOff ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; Fts5Index *p = pIter->pIndex; + assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL ); - if( pIter->pTokenMap==0 ){ - pIter->pTokenMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); - } - if( p->rc==SQLITE_OK ){ - Fts5TokenMap *pMap = pIter->pTokenMap; + if( pT ){ int ii; - for(ii=0; iinToken; ii++){ - if( nToken==pMap->aToken[ii].nTerm - && 0==memcmp(pMap->aToken[ii].pTerm, pToken, nToken) - ){ - break; - } + for(ii=0; iinIter; ii++){ + Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; + if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; } - if( ii==pMap->nToken ){ - fts5TokenMapTerm(p, pMap, (const u8*)pToken, nToken); - } - if( pMap->nEntry>=pMap->nEntryAlloc ){ - int nNew = pMap->nEntryAlloc ? pMap->nEntryAlloc*2 : 32; - Fts5TokenMapEntry *aNew = (Fts5TokenMapEntry*)sqlite3_realloc( - pMap->aEntry, nNew * sizeof(Fts5TokenMapEntry) - ); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - pMap->aEntry = aNew; - pMap->nEntryAlloc = nNew; - } - } - if( p->rc==SQLITE_OK ){ - Fts5TokenMapEntry *pEntry = &pMap->aEntry[pMap->nEntry++]; - pEntry->iRowid = pIndexIter->iRowid; - pEntry->iCol = iCol; - pEntry->iOff = iOff; - pEntry->iTok = ii+1; + if( iinIter ){ + fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff); } } return fts5IndexReturn(p); @@ -6958,6 +7390,7 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ if( pIndexIter ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *pIndex = pIter->pIndex; + fts5TokendataIterDelete(pIter->pTokenDataIter); fts5MultiIterFree(pIter); sqlite3Fts5IndexCloseReader(pIndex); } @@ -7465,7 +7898,9 @@ static int fts5QueryCksum( int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; Fts5IndexIter *pIter = 0; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); + int rc = sqlite3Fts5IndexQuery( + p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter + ); while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){ i64 rowid = pIter->iRowid; diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 9f19b24b88..34050474f8 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -656,12 +656,15 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ } idxStr[iIdxStr] = '\0'; - /* Set idxFlags flags for the ORDER BY clause */ + /* Set idxFlags flags for the ORDER BY clause + ** + ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC". + */ if( pInfo->nOrderBy==1 ){ int iSort = pInfo->aOrderBy[0].iColumn; if( iSort==(pConfig->nCol+1) && bSeenMatch ){ idxFlags |= FTS5_BI_ORDER_RANK; - }else if( iSort==-1 ){ + }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){ idxFlags |= FTS5_BI_ORDER_ROWID; } if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 845e8145db..8273b3ca4d 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -141,7 +141,7 @@ do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 -do_execsql_test 3.0 { +do_execsql_test 3.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize="origintext unicode61", tokendata=1, @@ -160,11 +160,11 @@ do_execsql_test 3.0 { #db func b b #execsql_pp { SELECT b(term) FROM vocab } -do_execsql_test 3.1.1 { SELECT rowid FROM ft2('hello') } {1 2 3} -do_execsql_test 3.1.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} -do_execsql_test 3.1.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} +do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} -do_execsql_test 3.1.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} +do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} #------------------------------------------------------------------------- # diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test index 26f9864098..948db1c519 100644 --- a/ext/fts5/test/fts5origintext2.test +++ b/ext/fts5/test/fts5origintext2.test @@ -102,6 +102,7 @@ breakpoint do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3} do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} +do_execsql_test 1.14 { SELECT rowid FROM ft('hello') ORDER BY rank; } {1 2 3} finish_test diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index 57b5984f4d..2b1e5c6387 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -46,6 +46,7 @@ foreach_detail_mode $testprefix { SELECT fts5_test_poslist(ft) FROM ft('hello'); } {{0.0.0 0.0.2 0.0.4}} +breakpoint do_execsql_test 1.3 { SELECT insttoken(ft, 0, 0), @@ -54,6 +55,14 @@ foreach_detail_mode $testprefix { FROM ft('hello'); } {hello.Hello hello.HELLO hello} + do_execsql_test 1.4 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hello') ORDER BY rank; + } {hello.Hello hello.HELLO hello} + } finish_test diff --git a/manifest b/manifest index 13a7da6a20..04745fb5d2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssigned\sinteger\soverflow\sin\sfts5. -D 2023-11-29T16:22:39.960 +C Different\sapproach\sto\squerying\sa\stokendata=1\stable.\sSaves\scpu\sand\smemory. +D 2023-12-01T20:09:59.031 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -90,14 +90,14 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 5e5630fc81e212f658afaa5b2650dac939d2729d0723aef1eeaff908f1725648 -F ext/fts5/fts5Int.h 782151060d176be22861f57bf38e087a82cfb0dfc4b2fa6f9ccbc2641b6d01e3 +F ext/fts5/fts5Int.h 2dc73393460e5c5cab67adc7e32e1387cc225b57e05f629d490e65cddea1a8c5 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 5d557c7ebefaeac5a5111cc47d4fee8a2fc6bc15245d5c99eebf53dd04bf794e +F ext/fts5/fts5_expr.c aac8026aedf56c9a6e32b31c89f9dd7e5548378457085093307d06be14f1a176 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c bafdef8be40a20bb86a131af4f09b56919f416fa13d1a86af1bf92bad9a2870d -F ext/fts5/fts5_main.c 55b53085dbd1693b5735463198a8d124dfbc27f08311c839637b44b8254ef7cb +F ext/fts5/fts5_index.c b6012920df8963245226bb536db7ddea62dfd7a860d6112887b175f7aaf55b82 +F ext/fts5/fts5_main.c 20596de592af135f68b9be875f0a28715f6562bbdedd215e1c89eac1b42e97f9 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -190,9 +190,9 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 7caef7634889bab8b44d145141c0d9325299398fb89b116bccd6262fde5659db -F ext/fts5/test/fts5origintext2.test 26482f4af1f2785cb01d06af9aae202289b6e8cf7b708d18aea305b459c2f302 -F ext/fts5/test/fts5origintext3.test 87a212b8235794348c56cb70f21e122d182a5af688c56057b90b7c151d0aa347 +F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0703aff5981625b527e62 +F ext/fts5/test/fts5origintext2.test 3259b331073fec918e02fd4d14d50586f9a3531da047a2a8f4624983eb654229 +F ext/fts5/test/fts5origintext3.test cb0f5835f8dff5954ee20570b68ee520cf04a08f6f9ca967b9d01d27e532da37 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2146,8 +2146,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 554fc13f2ca5f2ebd9ad0206034c25b556ff40db3106051c5e539f2e142e88ea -R 5c7c1632cd09d2b131adef895eab90a8 +P 60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 +R e2c2c11ec4bab5e354a94b25a41ae3cd U dan -Z 009b023eba91b6acc35beb7d051a677a +Z f7d96142774ad25f02aad90982aa0fc8 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index df2c2789ad..e669309199 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 \ No newline at end of file +c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e \ No newline at end of file From f4c2962558e19b1b86734919a74c50856121d3d6 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 Dec 2023 20:37:11 +0000 Subject: [PATCH 14/24] Remove old code for tokendata=1 queries. FossilOrigin-Name: b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b --- ext/fts5/fts5_index.c | 423 +++++---------------------------- ext/fts5/test/fts5simple2.test | 4 +- manifest | 14 +- manifest.uuid | 2 +- 4 files changed, 67 insertions(+), 376 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 5f39a1e764..05f4c5e6aa 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -323,10 +323,6 @@ typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; -typedef struct Fts5TokenMapEntry Fts5TokenMapEntry; -typedef struct Fts5TokenMapToken Fts5TokenMapToken; -typedef struct Fts5TokenMap Fts5TokenMap; - typedef struct Fts5TokenDataIter Fts5TokenDataIter; struct Fts5Data { @@ -370,7 +366,6 @@ struct Fts5Index { sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ sqlite3_stmt *pIdxSelect; - sqlite3_stmt *pIdxProbe; int nRead; /* Total number of blocks read */ sqlite3_stmt *pDeleteFromIdx; @@ -618,37 +613,11 @@ struct Fts5Iter { int bRev; /* True to iterate in reverse order */ u8 bSkipEmpty; /* True to skip deleted entries */ - Fts5TokenMap *pTokenMap; i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ Fts5CResult *aFirst; /* Current merge state (see above) */ Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; -struct Fts5TokenMapEntry { - i64 iRowid; - u16 iCol; - int iOff; - int iTok; /* Offset into aToken[] + 1 */ -}; - -struct Fts5TokenMapToken { - u8 *pTerm; - int nTerm; -}; - -struct Fts5TokenMap { - int bHashed; /* True once hashed */ - - int nEntryAlloc; - int nEntry; - Fts5TokenMapEntry *aEntry; - - int nTokenAlloc; - int nToken; - Fts5TokenMapToken *aToken; -}; - - /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. @@ -2641,18 +2610,6 @@ static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ return p->pIdxSelect; } -static sqlite3_stmt *fts5IdxProbeStmt(Fts5Index *p){ - if( p->pIdxProbe==0 ){ - Fts5Config *pConfig = p->pConfig; - fts5IndexPrepareStmt(p, &p->pIdxProbe, sqlite3_mprintf( - "SELECT 1 FROM '%q'.'%q_idx' WHERE " - "segid=? AND term>? AND termzDb, pConfig->zName - )); - } - return p->pIdxProbe; -} - /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. @@ -3073,21 +3030,6 @@ static void fts5SegIterNextFrom( }while( p->rc==SQLITE_OK ); } -/* -** Free the Fts5TokenMap object passed as the only argument. -*/ -static void fts5TokenMapFree(Fts5TokenMap *pMap){ - if( pMap ){ - int ii; - for(ii=0; iinToken; ii++){ - sqlite3_free(pMap->aToken[ii].pTerm); - } - sqlite3_free(pMap->aToken); - sqlite3_free(pMap->aEntry); - sqlite3_free(pMap); - } -} - /* ** Free the iterator object passed as the second argument. */ @@ -3098,7 +3040,6 @@ static void fts5MultiIterFree(Fts5Iter *pIter){ fts5SegIterClear(&pIter->aSeg[i]); } fts5BufferFree(&pIter->poslist); - fts5TokenMapFree(pIter->pTokenMap); sqlite3_free(pIter); } } @@ -3915,7 +3856,6 @@ fts5MultiIterNew_post_check: static void fts5MultiIterNew2( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Data *pData, /* Doclist to iterate through */ - Fts5TokenMap *pMap, /* Token-map, if any */ int bDesc, /* True for descending rowid order */ Fts5Iter **ppOut /* New object */ ){ @@ -3923,8 +3863,6 @@ static void fts5MultiIterNew2( pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; - pNew->pTokenMap = pMap; - pMap = 0; pIter->flags = FTS5_SEGITER_ONETERM; if( pData->szLeaf>0 ){ pIter->pLeaf = pData; @@ -3947,7 +3885,6 @@ static void fts5MultiIterNew2( *ppOut = pNew; } - fts5TokenMapFree(pMap); fts5DataRelease(pData); } @@ -6141,210 +6078,9 @@ static void fts5MergePrefixLists( *p1 = out; } -static u8 *fts5IdxBufferDup(Fts5Index *p, const u8 *pDup, int nDup){ - u8 *pRet = fts5IdxMalloc(p, nDup+1); - if( pRet ){ - memcpy(pRet, pDup, nDup); - } - return pRet; -} - -static void fts5TokenMapTerm( - Fts5Index *p, - Fts5TokenMap *pMap, - const u8 *pTerm, - int nTerm -){ - if( p->rc==SQLITE_OK ){ - Fts5TokenMapToken *pToken = 0; - if( pMap->nToken==pMap->nTokenAlloc ){ - i64 nNew = (pMap->nTokenAlloc ? pMap->nTokenAlloc * 2 : 32); - Fts5TokenMapToken *aNew = sqlite3_realloc64( - pMap->aToken, nNew*sizeof(Fts5TokenMapToken) - ); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - return; - } - pMap->nTokenAlloc = nNew; - pMap->aToken = aNew; - } - pToken = &pMap->aToken[pMap->nToken++]; - pToken->nTerm = nTerm; - pToken->pTerm = fts5IdxBufferDup(p, pTerm, nTerm); - } -} - - -static void fts5TokenMapPoslist( - Fts5Index *p, - Fts5TokenMap *pMap, - Fts5Iter *p1 -){ - if( p->rc==SQLITE_OK ){ - const u8 *a = p1->base.pData; - i64 iPos = 0; - int iOff = 0; - - while( 0==sqlite3Fts5PoslistNext64(a, p1->base.nData, &iOff, &iPos) ){ - Fts5TokenMapEntry *pEntry = 0; - int iCol = FTS5_POS2COLUMN(iPos); - int iTokOff = FTS5_POS2OFFSET(iPos); - - if( pMap->nEntry==pMap->nEntryAlloc ){ - i64 nNew = (pMap->nEntryAlloc ? pMap->nEntryAlloc * 2 : 32); - Fts5TokenMapEntry *aNew = sqlite3_realloc64( - pMap->aEntry, nNew*sizeof(Fts5TokenMapEntry) - ); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - return; - } - pMap->nEntryAlloc = nNew; - pMap->aEntry = aNew; - } - pEntry = &pMap->aEntry[pMap->nEntry++]; - pEntry->iRowid = p1->base.iRowid; - pEntry->iCol = iCol; - pEntry->iOff = iTokOff; - pEntry->iTok = pMap->nToken; - } - } -} - -static int fts5TokenMapHash(i64 iRowid, int iCol, int iOff){ - return (iRowid + (iRowid << 3) + (iCol << 6) + (iOff << 9)) & 0x7FFFFFFF; -} - -static void fts5TokenMapHashify(Fts5Index *p, Fts5TokenMap *pMap){ - int nHash = pMap->nEntry*2; - Fts5TokenMapEntry *aHash = 0; - - aHash = (Fts5TokenMapEntry*)fts5IdxMalloc(p, nHash*sizeof(Fts5TokenMapEntry)); - if( aHash ){ - int ii; - for(ii=0; iinEntry; ii++){ - Fts5TokenMapEntry *pEntry = &pMap->aEntry[ii]; - Fts5TokenMapEntry *pCopy = 0; - int iHash = fts5TokenMapHash(pEntry->iRowid, pEntry->iCol, pEntry->iOff); - - while( aHash[iHash % nHash].iTok ){ - iHash++; - } - pCopy = &aHash[iHash % nHash]; - memcpy(pCopy, pEntry, sizeof(Fts5TokenMapEntry)); - } - - sqlite3_free(pMap->aEntry); - pMap->aEntry = aHash; - pMap->nEntry = pMap->nEntryAlloc = nHash; - } -} - -static const u8 *fts5TokenMapLookup( - Fts5TokenMap *pMap, - i64 iRowid, - int iCol, - int iOff, - int *pnOut -){ - int iHash = fts5TokenMapHash(iRowid, iCol, iOff) % pMap->nEntry; - - for(; pMap->aEntry[iHash].iTok!=0; iHash = (iHash+1)%pMap->nEntry){ - Fts5TokenMapEntry *pEntry = &pMap->aEntry[iHash]; - if( pEntry->iRowid==iRowid && pEntry->iCol==iCol && pEntry->iOff==iOff ){ - *pnOut = pMap->aToken[pEntry->iTok-1].nTerm; - return pMap->aToken[pEntry->iTok-1].pTerm; - } - } - - *pnOut = 0; - return 0; -} - -/* -** The iterator passed as the second argument has been opened to scan and -** merge doclists for a series of tokens in tokendata=1 mode. This function -** tests whether or not, instead of using the cursor to read doclists to -** merge, it can be used directly by the upper layer. This is the case -** if the cursor currently points to the only token that corresponds to -** the queried term. i.e. if the next token that will be visited by the -** iterator does not match the query. -*/ -int fts5TokendataIterIsOk( - Fts5Index *p, - Fts5Iter *pIter, - const u8 *pToken, - int nToken -){ - int ii; - Fts5Buffer buf = {0, 0, 0}; - int bRet = 1; - Fts5Buffer *pTerm = 0; - - /* Iterator is not usable if it uses the hash table */ - if( pIter->aSeg[0].pSeg==0 ) return 0; - - for(ii=0; bRet && iinSeg; ii++){ - Fts5SegIter *pSeg = &pIter->aSeg[ii]; - Fts5Data *pLeaf = pSeg->pLeaf; - if( pLeaf ){ - - if( pTerm==0 ){ - pTerm = &pSeg->term; - }else{ - if( pSeg->term.n!=pTerm->n - || memcmp(pSeg->term.p, pTerm->p, pTerm->n) - ){ - bRet = 0; - break; - } - } - - if( pSeg->iEndofDoclistszLeaf ){ - /* Next term is on this node. Check it directly. */ - int nPrefix = 0; - fts5GetVarint32(&pLeaf->p[pSeg->iEndofDoclist], nPrefix); - if( nPrefix>=nToken ) bRet = 0; - }else{ - /* Next term is on a subsequent page. In this case query the %_idx - ** table to discover exactly what that next term is. */ - sqlite3_stmt *pProbe = fts5IdxProbeStmt(p); - if( pProbe ){ - int rc = SQLITE_OK; - if( buf.n==0 ){ - sqlite3Fts5BufferAppendBlob(&p->rc, &buf, nToken, pToken); - sqlite3Fts5BufferAppendBlob(&p->rc, &buf, 1, (const u8*)"\1"); - } - sqlite3_bind_int(pProbe, 1, pSeg->pSeg->iSegid); - sqlite3_bind_blob(pProbe,2, pSeg->term.p,pSeg->term.n, SQLITE_STATIC); - sqlite3_bind_blob(pProbe,3, buf.p, buf.n, SQLITE_STATIC); - - if( sqlite3_step(pProbe)==SQLITE_ROW ){ - bRet = 0; - } - rc = sqlite3_reset(pProbe); - if( p->rc==SQLITE_OK ) p->rc = rc; - } - } - } - } - - if( bRet ){ - for(ii=0; iinSeg; ii++){ - Fts5SegIter *pSeg = &pIter->aSeg[ii]; - pSeg->flags |= FTS5_SEGITER_ONETERM; - } - } - - fts5BufferFree(&buf); - return bRet; -} - static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ - int bTokenscan, int iIdx, /* Index to scan for data */ u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ @@ -6355,7 +6091,6 @@ static void fts5SetupPrefixIter( Fts5Buffer *aBuf; int nBuf = 32; int nMerge = 1; - Fts5TokenMap *pMap = 0; void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); @@ -6369,8 +6104,6 @@ static void fts5SetupPrefixIter( xAppend = fts5AppendPoslist; } - assert( bTokenscan==0 || iIdx==0 ); - aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); @@ -6417,90 +6150,69 @@ static void fts5SetupPrefixIter( fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); fts5IterSetOutputCb(&p->rc, p1); - if( bDesc==0 && bTokenscan && fts5TokendataIterIsOk(p, p1, pToken,nToken) ){ - /* In this case iterator p1 may be used as is. */ - *ppIter = p1; - }else{ + for( /* no-op */ ; + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext2(p, p1, &bNewTerm) + ){ + Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; + int nTerm = pSeg->term.n; + const u8 *pTerm = pSeg->term.p; + p1->xSetOutputs(p1, pSeg); - if( iIdx==0 && p->pConfig->eDetail==FTS5_DETAIL_FULL && bTokenscan ){ - pMap = (Fts5TokenMap*)fts5IdxMalloc(p, sizeof(Fts5TokenMap)); + assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); + if( bNewTerm ){ + if( nTermrc!=SQLITE_OK || (aBuf && pStruct) ); - for( /* no-op */ ; - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext2(p, p1, &bNewTerm) - ){ - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; - int nTerm = pSeg->term.n; - const u8 *pTerm = pSeg->term.p; - p1->xSetOutputs(p1, pSeg); - - assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); - if( bNewTerm ){ - if( nTermnToken && pTerm[nToken]!=0x00 ) break; - } - - if( pMap ){ - if( bNewTerm ){ - fts5TokenMapTerm(p, pMap, &pTerm[1], nTerm-1); + if( p1->base.nData==0 ) continue; + if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ + for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ + int i1 = i*nMerge; + int iStore; + assert( i1+nMerge<=nBuf ); + for(iStore=i1; iStorebase.nData==0 ) continue; - if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ - for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ - int i1 = i*nMerge; - int iStore; - assert( i1+nMerge<=nBuf ); + if( iStore==i1+nMerge ){ + xMerge(p, &doclist, nMerge, &aBuf[i1]); for(iStore=i1; iStorebase.iRowid-(u64)iLastRowid, p1, &doclist); - iLastRowid = p1->base.iRowid; + iLastRowid = 0; } - - assert( (nBuf%nMerge)==0 ); - for(i=0; irc==SQLITE_OK ){ - xMerge(p, &doclist, nMerge, &aBuf[i]); - } - for(iFree=i; iFreep = (u8*)&pData[1]; - pData->nn = pData->szLeaf = doclist.n; - if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); - fts5MultiIterNew2(p, pData, pMap, bDesc, ppIter); - pMap = 0; - } - fts5BufferFree(&doclist); + + xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); + iLastRowid = p1->base.iRowid; } + + assert( (nBuf%nMerge)==0 ); + for(i=0; irc==SQLITE_OK ){ + xMerge(p, &doclist, nMerge, &aBuf[i]); + } + for(iFree=i; iFreep = (u8*)&pData[1]; + pData->nn = pData->szLeaf = doclist.n; + if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); + fts5MultiIterNew2(p, pData, bDesc, ppIter); + } + fts5BufferFree(&doclist); } - fts5TokenMapFree(pMap); fts5StructureRelease(pStruct); sqlite3_free(aBuf); } @@ -6634,7 +6346,6 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ sqlite3_finalize(p->pIdxWriter); sqlite3_finalize(p->pIdxDeleter); sqlite3_finalize(p->pIdxSelect); - sqlite3_finalize(p->pIdxProbe); sqlite3_finalize(p->pDataVersion); sqlite3_finalize(p->pDeleteFromIdx); sqlite3Fts5HashFree(p->pHash); @@ -7038,7 +6749,6 @@ static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ static Fts5Iter *fts5SetupTokendataIter( Fts5Index *p, /* FTS index to query */ - int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing query term */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset /* Colset to filter on */ @@ -7048,8 +6758,6 @@ static Fts5Iter *fts5SetupTokendataIter( Fts5Structure *pStruct = 0; const int flags = FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_SCAN; - assert( bDesc==0 ); - Fts5Buffer bSeek = {0, 0, 0}; Fts5Buffer *pSmall = 0; @@ -7204,7 +6912,10 @@ int sqlite3Fts5IndexQuery( } } - if( iIdx<=pConfig->nPrefix && (bTokendata==0 || iIdx!=0) ){ + if( bTokendata && iIdx==0 ){ + buf.p[0] = '0'; + pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset); + }else if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); @@ -7214,17 +6925,10 @@ int sqlite3Fts5IndexQuery( ); fts5StructureRelease(pStruct); } - }else if( bTokendata && iIdx==0 ){ - int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - buf.p[0] = '0'; - pRet = fts5SetupTokendataIter(p, bDesc, buf.p, nToken+1, pColset); }else{ /* Scan multiple terms in the main index */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; - int bTokenscan = (iIdx==0); - fts5SetupPrefixIter( - p, bDesc, bTokenscan, iPrefixIdx, buf.p, nToken+1, pColset, &pRet - ); + fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); if( pRet==0 ){ assert( p->rc!=SQLITE_OK ); }else{ @@ -7325,26 +7029,11 @@ int sqlite3Fts5IterToken( const char **ppOut, int *pnOut ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - Fts5TokenMap *pMap = pIter->pTokenMap; if( pIter->pTokenDataIter ){ return fts5TokendataIterToken(pIter, iRowid, iCol, iOff, ppOut, pnOut); } - if( pMap ){ - if( pMap->bHashed==0 ){ - Fts5Index *p = pIter->pIndex; - fts5TokenMapHashify(p, pMap); - if( p->rc ){ - return fts5IndexReturn(p); - } - } - *ppOut = (const char*)fts5TokenMapLookup( - pIter->pTokenMap, pIndexIter->iRowid, iCol, iOff, pnOut - ); - }else{ - *ppOut = sqlite3Fts5IterTerm(pIndexIter, pnOut); - } return SQLITE_OK; } diff --git a/ext/fts5/test/fts5simple2.test b/ext/fts5/test/fts5simple2.test index e57cea70fa..6c0e0e1662 100644 --- a/ext/fts5/test/fts5simple2.test +++ b/ext/fts5/test/fts5simple2.test @@ -343,7 +343,9 @@ do_execsql_test 17.0 { INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb'); COMMIT; } -do_execsql_test 17.1 { SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20 } +do_execsql_test 17.1 { + SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20 +} do_execsql_test 17.2 { BEGIN; INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb'); diff --git a/manifest b/manifest index 07814168f5..92cc5ca8d2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\swith\sthis\sbranch. -D 2023-12-01T20:10:20.295 +C Remove\sold\scode\sfor\stokendata=1\squeries. +D 2023-12-01T20:37:11.688 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -96,7 +96,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c aac8026aedf56c9a6e32b31c89f9dd7e5548378457085093307d06be14f1a176 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c b6012920df8963245226bb536db7ddea62dfd7a860d6112887b175f7aaf55b82 +F ext/fts5/fts5_index.c 2296bcd6736eaf093212892474619dfdb7ac1e262732e2b72cb528172c0b13d6 F ext/fts5/fts5_main.c 20596de592af135f68b9be875f0a28715f6562bbdedd215e1c89eac1b42e97f9 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -215,7 +215,7 @@ F ext/fts5/test/fts5secure7.test fd03d0868d64340a1db8615b02e5508fea409de13910114 F ext/fts5/test/fts5secure8.test eb3579e9d58b0acad97e8082dee1f99b2d393198f03500b453c2b25761c0c298 F ext/fts5/test/fts5securefault.test dbca2b6a1c16700017f5051138991b705410889933f2a37c57ae8a23b296b10b F ext/fts5/test/fts5simple.test a298670508c1458b88ce6030440f26a30673931884eb5f4094ac1773b3ba217b -F ext/fts5/test/fts5simple2.test 258a1b0c590409bfa5271e872c79572b319d2a56554d0585f68f146a0da603f0 +F ext/fts5/test/fts5simple2.test 8dd2389ee75e21a1429fe87e5f8c7d9a97ad1470304a8a2d3ba4b8c3c345fecd F ext/fts5/test/fts5simple3.test d5c74a9d3ca71bd5dd5cacb7c55b86ea12cdddfc8b1910e3de2995206898380f F ext/fts5/test/fts5synonym.test 1651815b8008de170e8e600dcacc17521d765482ea8f074ae82cfa870d8bb7fb F ext/fts5/test/fts5synonym2.test 8f891fc49cc1e8daed727051e77e1f42849c784a6a54bef82564761b2cb3e016 @@ -2148,8 +2148,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e 883990e7938c1f63906300a6113f0fadce143913b7c384e8aeb5f886f0be7c62 -R 070accca4f04b7da788d8a587600de24 +P 8258967411d3ff212424b25fec79ded0d8ae83e773cd35a0bbf300c94923f25b +R d3ee6e0cbe8554b06da39b239b5142bc U dan -Z 0994bbca14fc53076151cd58ec1e7d74 +Z cbe01276ff2d44c618e4cc899051c453 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 1398e08598..359f2cca45 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8258967411d3ff212424b25fec79ded0d8ae83e773cd35a0bbf300c94923f25b \ No newline at end of file +b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b \ No newline at end of file From c44041e03bc4d7ad0a5edbe8277a325eaaf5f5e6 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 2 Dec 2023 17:32:16 +0000 Subject: [PATCH 15/24] Ensure that tokendata=1 queries avoid loading large doclists for queries like "common AND uncommon", just as tokendata=0 queries do. FossilOrigin-Name: 7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e --- ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_expr.c | 25 ++++--- ext/fts5/fts5_index.c | 109 ++++++++++++++++++++++++++--- ext/fts5/fts5_main.c | 10 +++ ext/fts5/test/fts5origintext2.test | 1 - ext/fts5/test/fts5origintext3.test | 13 +++- ext/fts5/test/fts5origintext4.test | 66 +++++++++++++++++ manifest | 23 +++--- manifest.uuid | 2 +- 9 files changed, 217 insertions(+), 34 deletions(-) create mode 100644 ext/fts5/test/fts5origintext4.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index cee9528858..911f547d17 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -397,6 +397,7 @@ struct Fts5IndexIter { #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 #define FTS5INDEX_QUERY_SKIPHASH 0x0040 #define FTS5INDEX_QUERY_NOTOKENDATA 0x0080 +#define FTS5INDEX_QUERY_SCANONETERM 0x0100 /* ** Create/destroy an Fts5Index object. @@ -786,6 +787,7 @@ int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); int sqlite3Fts5ExprInstToken(Fts5Expr*, int, int, int, int, const char**, int*); +void sqlite3Fts5ExprClearTokens(Fts5Expr*); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 89e7cbf364..95d102062d 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3050,17 +3050,6 @@ int sqlite3Fts5ExprPopulatePoslists( sCtx.aPopulator = aPopulator; sCtx.iOff = (((i64)iCol) << 32) - 1; - /* If this is a tokendata=1 table, clear out the hash tables of - ** full-terms. */ - if( pConfig->bTokendata ){ - for(i=0; inPhrase; i++){ - Fts5ExprTerm *pT; - for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ - sqlite3Fts5IndexIterClearTokendata(pT->pIter); - } - } - } - for(i=0; inPhrase; i++){ Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; Fts5Colset *pColset = pNode->pNear->pColset; @@ -3225,3 +3214,17 @@ int sqlite3Fts5ExprInstToken( return sqlite3Fts5IterToken(pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut); } +/* +** Clear the token mappings for all Fts5IndexIter objects mannaged by +** the expression passed as the only argument. +*/ +void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){ + int ii; + for(ii=0; iinPhrase; ii++){ + Fts5ExprTerm *pT; + for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){ + sqlite3Fts5IndexIterClearTokendata(pT->pIter); + } + } +} + diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 05f4c5e6aa..94b4767677 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -366,6 +366,7 @@ struct Fts5Index { sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ sqlite3_stmt *pIdxSelect; + sqlite3_stmt *pIdxNextSelect; int nRead; /* Total number of blocks read */ sqlite3_stmt *pDeleteFromIdx; @@ -2660,7 +2661,7 @@ static void fts5SegIterSeekInit( fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); } - if( p->rc==SQLITE_OK && bGe==0 ){ + if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ if( flags & FTS5INDEX_QUERY_DESC ){ @@ -2693,6 +2694,79 @@ static void fts5SegIterSeekInit( ); } + +/* +** SQL used by fts5SegIterNextInit() to find the page to open. +*/ +static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){ + if( p->pIdxNextSelect==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf( + "SELECT pgno FROM '%q'.'%q_idx' WHERE " + "segid=? AND term>? ORDER BY term ASC LIMIT 1", + pConfig->zDb, pConfig->zName + )); + + } + return p->pIdxNextSelect; +} + +/* +** This is similar to fts5SegIterSeekInit(), except that it initializes +** the segment iterator to point to the first term following the page +** with pToken/nToken on it. +*/ +static void fts5SegIterNextInit( + Fts5Index *p, + const char *pTerm, int nTerm, + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + int iPg = -1; /* Page of segment to open */ + int bDlidx = 0; + sqlite3_stmt *pSel = 0; /* SELECT to find iPg */ + + pSel = fts5IdxNextStmt(p); + if( pSel ){ + assert( p->rc==SQLITE_OK ); + sqlite3_bind_int(pSel, 1, pSeg->iSegid); + sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC); + + if( sqlite3_step(pSel)==SQLITE_ROW ){ + i64 val = sqlite3_column_int64(pSel, 0); + iPg = (int)(val>>1); + bDlidx = (val & 0x0001); + } + p->rc = sqlite3_reset(pSel); + if( p->rc ) return; + } + + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->flags |= FTS5_SEGITER_ONETERM; + if( iPg>=0 ){ + pIter->iLeafPgno = iPg - 1; + fts5SegIterNextPage(p, pIter); + fts5SegIterSetNext(p, pIter); + fts5SegIterAllocTombstone(p, pIter); + } + if( pIter->pLeaf ){ + const u8 *a = pIter->pLeaf->p; + int iTermOff = 0; + + pIter->iPgidxOff = pIter->pLeaf->szLeaf; + pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff); + pIter->iLeafOffset = iTermOff; + fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); + if( bDlidx ) fts5SegIterLoadDlidx(p, pIter); + + assert( p->rc!=SQLITE_OK || + fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0 + ); + } +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within the ** in-memory hash table. If there is no such term in the hash-table, the @@ -6346,6 +6420,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ sqlite3_finalize(p->pIdxWriter); sqlite3_finalize(p->pIdxDeleter); sqlite3_finalize(p->pIdxSelect); + sqlite3_finalize(p->pIdxNextSelect); sqlite3_finalize(p->pDataVersion); sqlite3_finalize(p->pDeleteFromIdx); sqlite3Fts5HashFree(p->pHash); @@ -6496,7 +6571,7 @@ static Fts5TokenDataIter *fts5AppendTokendataIter( if( p->rc==SQLITE_OK ){ if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; - int nByte = nAlloc * sizeof(Fts5Iter*); + int nByte = nAlloc * sizeof(Fts5Iter*) + sizeof(Fts5TokenDataIter); Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte); if( pNew==0 ){ @@ -6513,6 +6588,7 @@ static Fts5TokenDataIter *fts5AppendTokendataIter( }else{ pRet->apIter[pRet->nIter++] = pAppend; } + assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc ); return pRet; } @@ -6747,6 +6823,10 @@ static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ } } +/* +** This function sets up an iterator to use for a non-prefix query on a +** tokendata=1 table. +*/ static Fts5Iter *fts5SetupTokendataIter( Fts5Index *p, /* FTS index to query */ const u8 *pToken, /* Buffer containing query term */ @@ -6756,7 +6836,7 @@ static Fts5Iter *fts5SetupTokendataIter( Fts5Iter *pRet = 0; Fts5TokenDataIter *pSet = 0; Fts5Structure *pStruct = 0; - const int flags = FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_SCAN; + const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN; Fts5Buffer bSeek = {0, 0, 0}; Fts5Buffer *pSmall = 0; @@ -6787,20 +6867,32 @@ static Fts5Iter *fts5SetupTokendataIter( for(iLvl=0; iLvlnLevel; iLvl++){ for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; - fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); + int bDone = 0; - pNewIter++; if( pPrevIter ){ if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ - fts5SegIterSetEOF(pPrevIter); + memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter)); + memset(pPrevIter, 0, sizeof(Fts5SegIter)); + bDone = 1; + }else if( pPrevIter->pLeaf + && pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf + ){ + fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter); + bDone = 1; } - pPrevIter++; } + + if( bDone==0 ){ + fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); + } + + pNewIter++; + if( pPrevIter ) pPrevIter++; } } fts5TokendataSetTermIfEof(pPrev, pSmall); - pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); + pNew->bSkipEmpty = 1; pNew->pColset = pColset; fts5IterSetOutputCb(&p->rc, pNew); @@ -7043,7 +7135,6 @@ int sqlite3Fts5IterToken( */ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL ); if( pIter->pTokenDataIter ){ pIter->pTokenDataIter->nMap = 0; } diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 34050474f8..e911b0c0f9 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -916,6 +916,16 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ ); assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) ); + /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table, + ** clear any token mappings accumulated at the fts5_index.c level. In + ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH, + ** we need to retain the mappings for the entire query. */ + if( pCsr->ePlan==FTS5_PLAN_MATCH + && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata + ){ + sqlite3Fts5ExprClearTokens(pCsr->pExpr); + } + if( pCsr->ePlan<3 ){ int bSkip = 0; if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test index 948db1c519..a27309fe0c 100644 --- a/ext/fts5/test/fts5origintext2.test +++ b/ext/fts5/test/fts5origintext2.test @@ -98,7 +98,6 @@ do_execsql_test 1.10 { INSERT INTO ft VALUES('WORLD'); } -breakpoint do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3} do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index 2b1e5c6387..ac00bfabc0 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -46,7 +46,6 @@ foreach_detail_mode $testprefix { SELECT fts5_test_poslist(ft) FROM ft('hello'); } {{0.0.0 0.0.2 0.0.4}} -breakpoint do_execsql_test 1.3 { SELECT insttoken(ft, 0, 0), @@ -63,6 +62,18 @@ breakpoint FROM ft('hello') ORDER BY rank; } {hello.Hello hello.HELLO hello} + do_execsql_test 1.5 { + CREATE VIRTUAL TABLE ft2 USING fts5( + x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% + ); + INSERT INTO ft2(rowid, x) VALUES(1, 'ONE one two three ONE'); + INSERT INTO ft2(rowid, x) VALUES(2, 'TWO one two three TWO'); + INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE'); + } + + do_execsql_test 1.6 { + SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank; + } {three.THREE 3 three 1 three 2} } finish_test diff --git a/ext/fts5/test/fts5origintext4.test b/ext/fts5/test/fts5origintext4.test new file mode 100644 index 0000000000..8973a24b05 --- /dev/null +++ b/ext/fts5/test/fts5origintext4.test @@ -0,0 +1,66 @@ +# 2023 November 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext4 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); +} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO ft SELECT 'the first thing'; + + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<90000 + ) + INSERT INTO ft SELECT 'The second thing' FROM s; + + INSERT INTO ft SELECT 'the first thing'; + COMMIT; + INSERT INTO ft(ft) VALUES('optimize'); +} + +foreach {tn sql expr} { + 1 { SELECT rowid FROM ft('the') } {$mem > 250000} + 2 { SELECT rowid FROM ft('first') } {$mem < 50000} + 3 { SELECT rowid FROM ft('the first') } {$mem < 50000} +} { + db close + sqlite3 db test.db + sqlite3_fts5_register_origintext db + + execsql $sql + do_test 1.2.$tn { + set mem [lindex [sqlite3_db_status db CACHE_USED 0] 1] + expr $expr + } 1 +} + +proc b {x} { string map [list "\0" "."] $x } +db func b b +# execsql_pp { SELECT segid, b(term), pgno from ft_idx } + +finish_test + diff --git a/manifest b/manifest index 92cc5ca8d2..60fd22f769 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sold\scode\sfor\stokendata=1\squeries. -D 2023-12-01T20:37:11.688 +C Ensure\sthat\stokendata=1\squeries\savoid\sloading\slarge\sdoclists\sfor\squeries\slike\s"common\sAND\suncommon",\sjust\sas\stokendata=0\squeries\sdo. +D 2023-12-02T17:32:16.568 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -90,14 +90,14 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 5e5630fc81e212f658afaa5b2650dac939d2729d0723aef1eeaff908f1725648 -F ext/fts5/fts5Int.h 2dc73393460e5c5cab67adc7e32e1387cc225b57e05f629d490e65cddea1a8c5 +F ext/fts5/fts5Int.h 285118aa6dfccb382e84eaeb9f7bec334e4f7104efa9303240605447003445c9 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c aac8026aedf56c9a6e32b31c89f9dd7e5548378457085093307d06be14f1a176 +F ext/fts5/fts5_expr.c f83259b52b7b3e76768b835fe155cb7e345affdfafb96574372b4127d5f5496a F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 2296bcd6736eaf093212892474619dfdb7ac1e262732e2b72cb528172c0b13d6 -F ext/fts5/fts5_main.c 20596de592af135f68b9be875f0a28715f6562bbdedd215e1c89eac1b42e97f9 +F ext/fts5/fts5_index.c a02b6ff2d391dd9c2119f437eba1e8af5ac4b2f1798c7c39a93d73de95ad2337 +F ext/fts5/fts5_main.c 075995302198fe6f591fdbbedd415dfac564a9bfc20aea81e6fa0503b2d94af0 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -191,8 +191,9 @@ F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0703aff5981625b527e62 -F ext/fts5/test/fts5origintext2.test 3259b331073fec918e02fd4d14d50586f9a3531da047a2a8f4624983eb654229 -F ext/fts5/test/fts5origintext3.test cb0f5835f8dff5954ee20570b68ee520cf04a08f6f9ca967b9d01d27e532da37 +F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 +F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a +F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2148,8 +2149,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 8258967411d3ff212424b25fec79ded0d8ae83e773cd35a0bbf300c94923f25b -R d3ee6e0cbe8554b06da39b239b5142bc +P b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b +R aa740197e8da931b3ffaf4ddf853a1ed U dan -Z cbe01276ff2d44c618e4cc899051c453 +Z 62655ccf950056e7477fcaff4611778d # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 359f2cca45..7fc4017fab 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b \ No newline at end of file +7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e \ No newline at end of file From 94c521295aa898eb07dcfc4cf4ccdeb04ff7d735 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 2 Dec 2023 18:14:07 +0000 Subject: [PATCH 16/24] When tokendata=1 queries require multiple segment-cursors, allow those cursors to share a single array of in-memory tombstone pages. FossilOrigin-Name: e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f --- ext/fts5/fts5_index.c | 65 ++++++++++++++++++++++++++++++++----------- manifest | 12 ++++---- manifest.uuid | 2 +- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 94b4767677..9edf184c71 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -324,6 +324,7 @@ typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; typedef struct Fts5TokenDataIter Fts5TokenDataIter; +typedef struct Fts5TombstoneArray Fts5TombstoneArray; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ @@ -520,8 +521,7 @@ struct Fts5SegIter { Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ i64 iLeafOffset; /* Byte offset within current leaf */ - Fts5Data **apTombstone; /* Array of tombstone pages */ - int nTombstone; + Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */ /* Next method */ void (*xNext)(Fts5Index*, Fts5SegIter*, int*); @@ -548,6 +548,12 @@ struct Fts5SegIter { u8 bDel; /* True if the delete flag is set */ }; +struct Fts5TombstoneArray { + int nRef; /* Number of pointers to this object */ + int nTombstone; + Fts5Data *apTombstone[1]; /* Array of tombstone pages */ +}; + /* ** Argument is a pointer to an Fts5Data structure that contains a ** leaf page. @@ -1924,11 +1930,13 @@ static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ const int nTomb = pIter->pSeg->nPgTombstone; if( nTomb>0 ){ - Fts5Data **apTomb = 0; - apTomb = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data)*nTomb); - if( apTomb ){ - pIter->apTombstone = apTomb; - pIter->nTombstone = nTomb; + int nByte = nTomb * sizeof(Fts5Data*) + sizeof(Fts5TombstoneArray); + Fts5TombstoneArray *pNew; + pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( pNew ){ + pNew->nTombstone = nTomb; + pNew->nRef = 1; + pIter->pTombArray = pNew; } } } @@ -2677,7 +2685,9 @@ static void fts5SegIterSeekInit( } fts5SegIterSetNext(p, pIter); - fts5SegIterAllocTombstone(p, pIter); + if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM) ){ + fts5SegIterAllocTombstone(p, pIter); + } /* Either: ** @@ -2852,6 +2862,19 @@ static void fts5IndexFreeArray(Fts5Data **ap, int n){ } } +static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ + if( p ){ + p->nRef--; + if( p->nRef<=0 ){ + int ii; + for(ii=0; iinTombstone; ii++){ + fts5DataRelease(p->apTombstone[ii]); + } + sqlite3_free(p); + } + } +} + /* ** Zero the iterator passed as the only argument. */ @@ -2859,7 +2882,7 @@ static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); fts5DataRelease(pIter->pNextLeaf); - fts5IndexFreeArray(pIter->apTombstone, pIter->nTombstone); + fts5TombstoneArrayDelete(pIter->pTombArray); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); @@ -3248,24 +3271,25 @@ static int fts5IndexTombstoneQuery( static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ int iFirst = pIter->aFirst[1].iFirst; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + Fts5TombstoneArray *pArray = pSeg->pTombArray; - if( pSeg->pLeaf && pSeg->nTombstone ){ + if( pSeg->pLeaf && pArray ){ /* Figure out which page the rowid might be present on. */ - int iPg = ((u64)pSeg->iRowid) % pSeg->nTombstone; + int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone; assert( iPg>=0 ); /* If tombstone hash page iPg has not yet been loaded from the ** database, load it now. */ - if( pSeg->apTombstone[iPg]==0 ){ - pSeg->apTombstone[iPg] = fts5DataRead(pIter->pIndex, + if( pArray->apTombstone[iPg]==0 ){ + pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex, FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg) ); - if( pSeg->apTombstone[iPg]==0 ) return 0; + if( pArray->apTombstone[iPg]==0 ) return 0; } return fts5IndexTombstoneQuery( - pSeg->apTombstone[iPg], - pSeg->nTombstone, + pArray->apTombstone[iPg], + pArray->nTombstone, pSeg->iRowid ); } @@ -6886,6 +6910,15 @@ static Fts5Iter *fts5SetupTokendataIter( fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); } + if( pPrevIter ){ + if( pPrevIter->pTombArray ){ + pNewIter->pTombArray = pPrevIter->pTombArray; + pNewIter->pTombArray->nRef++; + } + }else{ + fts5SegIterAllocTombstone(p, pNewIter); + } + pNewIter++; if( pPrevIter ) pPrevIter++; } diff --git a/manifest b/manifest index 60fd22f769..699d024ca1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Ensure\sthat\stokendata=1\squeries\savoid\sloading\slarge\sdoclists\sfor\squeries\slike\s"common\sAND\suncommon",\sjust\sas\stokendata=0\squeries\sdo. -D 2023-12-02T17:32:16.568 +C When\stokendata=1\squeries\srequire\smultiple\ssegment-cursors,\sallow\sthose\scursors\sto\sshare\sa\ssingle\sarray\sof\sin-memory\stombstone\spages. +D 2023-12-02T18:14:07.393 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -96,7 +96,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c f83259b52b7b3e76768b835fe155cb7e345affdfafb96574372b4127d5f5496a F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c a02b6ff2d391dd9c2119f437eba1e8af5ac4b2f1798c7c39a93d73de95ad2337 +F ext/fts5/fts5_index.c 21f8f449666ac44c12d5051e153ad84a886a729cb2f5d6ad02a113095c3f8ec6 F ext/fts5/fts5_main.c 075995302198fe6f591fdbbedd415dfac564a9bfc20aea81e6fa0503b2d94af0 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -2149,8 +2149,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b -R aa740197e8da931b3ffaf4ddf853a1ed +P 7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e +R 1ce6343b4aa590c869e9b9aa51095415 U dan -Z 62655ccf950056e7477fcaff4611778d +Z 8664660cde606ab1a6fdc20b18622c4a # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 7fc4017fab..0cbd084480 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e \ No newline at end of file +e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f \ No newline at end of file From c22d2b7b7f2aaccb25e0fb1885f2af6703c3ada3 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 2 Dec 2023 20:35:04 +0000 Subject: [PATCH 17/24] Fix various compiler warnings and other problems with the new code on this branch. FossilOrigin-Name: 3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 --- ext/fts5/fts5.h | 2 +- ext/fts5/fts5Int.h | 5 ++- ext/fts5/fts5_expr.c | 8 ++++ ext/fts5/fts5_index.c | 92 +++++++++++++++++++++++++++++++++++++------ manifest | 18 ++++----- manifest.uuid | 2 +- 6 files changed, 102 insertions(+), 25 deletions(-) diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 9feedbba19..63c9765eb6 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -281,7 +281,7 @@ struct Fts5PhraseIter { ** includes any embedded 0x00 and trailing data. ** ** This API can be quite slow if used with an FTS5 table created with the -** "detail=none" or "detail=column" option. +** "detail=none" or "detail=column" option. */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 3 */ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 911f547d17..4e4385ce2b 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -467,7 +467,7 @@ void sqlite3Fts5StructureRelease(void*); int sqlite3Fts5StructureTest(Fts5Index*, void*); /* -** Used by xInstToken() and xPhraseToken(). +** Used by xInstToken(): */ int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*); @@ -547,8 +547,9 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin); int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); -/* Used to populate hash tables for xInstToken in detail=none/column mode. */ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); + +/* Used to populate hash tables for xInstToken in detail=none/column mode. */ int sqlite3Fts5IndexIterWriteTokendata( Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff ); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 95d102062d..6f58cf8735 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -2985,6 +2985,11 @@ static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ return 0; } +/* +** pToken is a buffer nToken bytes in size that may or may not contain +** an embedded 0x00 byte. If it does, return the number of bytes in +** the buffer before the 0x00. If it does not, return nToken. +*/ static int fts5QueryTerm(const char *pToken, int nToken){ int ii; for(ii=0; iiapTombstone) for the -** iterator passed as the second argument. If an OOM error occurs, leave -** an error in the Fts5Index object. +** Allocate a tombstone hash page array object (pIter->pTombArray) for +** the iterator passed as the second argument. If an OOM error occurs, +** leave an error in the Fts5Index object. */ static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ const int nTomb = pIter->pSeg->nPgTombstone; @@ -2748,6 +2752,7 @@ static void fts5SegIterNextInit( bDlidx = (val & 0x0001); } p->rc = sqlite3_reset(pSel); + sqlite3_bind_null(pSel, 2); if( p->rc ) return; } @@ -2772,7 +2777,7 @@ static void fts5SegIterNextInit( if( bDlidx ) fts5SegIterLoadDlidx(p, pIter); assert( p->rc!=SQLITE_OK || - fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0 + fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0 ); } } @@ -2862,6 +2867,10 @@ static void fts5IndexFreeArray(Fts5Data **ap, int n){ } } +/* +** Decrement the ref-count of the object passed as the only argument. If it +** reaches 0, free it and its contents. +*/ static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ if( p ){ p->nRef--; @@ -3828,6 +3837,10 @@ static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ } } +/* +** All the component segment-iterators of pIter have been set up. This +** functions finishes setup for iterator pIter itself. +*/ static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ int iIter; for(iIter=pIter->nSeg-1; iIter>0; iIter--){ @@ -6566,13 +6579,25 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ pSeg->pLeaf = 0; } -typedef struct Fts5TokenDataMap Fts5TokenDataMap; +/* +** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an +** array of these for each row it visits. Or, for an iterator used by an +** "ORDER BY rank" query, it accumulates an array of these for the entire +** query. +** +** Each instance in the array indicates the iterator (and therefore term) +** associated with position iPos of rowid iRowid. This is used by the +** xInstToken() API. +*/ struct Fts5TokenDataMap { - i64 iRowid; - i64 iPos; - int iIter; + i64 iRowid; /* Row this token is located in */ + i64 iPos; /* Position of token */ + int iIter; /* Iterator token was read from */ }; +/* +** An object used to supplement Fts5Iter for tokendata=1 iterators. +*/ struct Fts5TokenDataIter { int nIter; int nIterAlloc; @@ -6585,10 +6610,14 @@ struct Fts5TokenDataIter { Fts5Iter *apIter[1]; }; +/* +** This function appends iterator pAppend to Fts5TokenDataIter pIn and +** returns the result. +*/ static Fts5TokenDataIter *fts5AppendTokendataIter( - Fts5Index *p, - Fts5TokenDataIter *pIn, - Fts5Iter *pAppend + Fts5Index *p, /* Index object (for error code) */ + Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */ + Fts5Iter *pAppend /* Append this iterator */ ){ Fts5TokenDataIter *pRet = pIn; @@ -6617,6 +6646,9 @@ static Fts5TokenDataIter *fts5AppendTokendataIter( return pRet; } +/* +** Delete an Fts5TokenDataIter structure and its contents. +*/ static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ if( pSet ){ int ii; @@ -6629,6 +6661,13 @@ static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ } } +/* +** The iterator passed as the first argument must be a tokendata=1 iterator +** (pIter->pTokenDataIter!=0). This function is used to access the token +** instance located at offset iOff of column iCol of row iRowid. It is +** returned via output pointers *ppOut and *pnOut. This is used by the +** xInstToken() API. +*/ static int fts5TokendataIterToken( Fts5Iter *pIter, i64 iRowid, @@ -6673,6 +6712,9 @@ static int fts5TokendataIterToken( return SQLITE_OK; } +/* +** Append a mapping to the token-map belonging to object pT. +*/ static void fts5TokendataIterAppendMap( Fts5Index *p, Fts5TokenDataIter *pT, @@ -6703,6 +6745,12 @@ static void fts5TokendataIterAppendMap( } } +/* +** The iterator passed as the only argument must be a tokendata=1 iterator +** (pIter->pTokenDataIter!=0). This function sets the iterator output +** variables (pIter->base.*) according to the contents of the current +** row. +*/ static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ int ii; int nHit = 0; @@ -6819,6 +6867,13 @@ static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ } } +/* +** The iterator passed as the only argument must be a tokendata=1 iterator +** (pIter->pTokenDataIter!=0). This function advances the iterator. If +** argument bFrom is false, then the iterator is advanced to the next +** entry. Or, if bFrom is true, it is advanced to the first entry with +** a rowid of iFrom or greater. +*/ static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ int ii; Fts5TokenDataIter *pT = pIter->pTokenDataIter; @@ -6841,6 +6896,10 @@ static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ fts5IterSetOutputsTokendata(pIter); } +/* +** If the segment-iterator passed as the first argument is at EOF, then +** set pIter->term to a copy of buffer pTerm. +*/ static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ if( pIter && pIter->aSeg[0].pLeaf==0 ){ fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p); @@ -7144,7 +7203,9 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ } /* -** +** This is used by xInstToken() to access the token at offset iOff, column +** iCol of row iRowid. The token is returned via output variables *ppOut +** and *pnOut. */ int sqlite3Fts5IterToken( Fts5IndexIter *pIndexIter, @@ -7173,6 +7234,13 @@ void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ } } +/* +** Set a token-mapping for the iterator passed as the first argument. This +** is used in detail=column or detail=none mode when a token is requested +** using the xInstToken() API. In this case the caller tokenizers the +** current row and configures the token-mapping via multiple calls to this +** function. +*/ int sqlite3Fts5IndexIterWriteTokendata( Fts5IndexIter *pIndexIter, const char *pToken, int nToken, diff --git a/manifest b/manifest index 699d024ca1..2353c55e4c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C When\stokendata=1\squeries\srequire\smultiple\ssegment-cursors,\sallow\sthose\scursors\sto\sshare\sa\ssingle\sarray\sof\sin-memory\stombstone\spages. -D 2023-12-02T18:14:07.393 +C Fix\svarious\scompiler\swarnings\sand\sother\sproblems\swith\sthe\snew\scode\son\sthis\sbranch. +D 2023-12-02T20:35:04.768 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -89,14 +89,14 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 -F ext/fts5/fts5.h 5e5630fc81e212f658afaa5b2650dac939d2729d0723aef1eeaff908f1725648 -F ext/fts5/fts5Int.h 285118aa6dfccb382e84eaeb9f7bec334e4f7104efa9303240605447003445c9 +F ext/fts5/fts5.h ff90acaa97f8e865b66d1177d1b56b8c110fd5548ab5863bab43f055a1d745fe +F ext/fts5/fts5Int.h 1fdbf3d16bdd481fe2ee99927919e4c3db835efae00f8efd7efb5e6a93277459 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c f83259b52b7b3e76768b835fe155cb7e345affdfafb96574372b4127d5f5496a +F ext/fts5/fts5_expr.c 5619c3fab45a78eb5ed3021e3b40ec3b435ef3669293e8700354aa8dd3e6c796 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 21f8f449666ac44c12d5051e153ad84a886a729cb2f5d6ad02a113095c3f8ec6 +F ext/fts5/fts5_index.c b31bf4f0fb51a15cc1aa54c2f337197740f4f8898347266781ca6970ca751302 F ext/fts5/fts5_main.c 075995302198fe6f591fdbbedd415dfac564a9bfc20aea81e6fa0503b2d94af0 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -2149,8 +2149,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e -R 1ce6343b4aa590c869e9b9aa51095415 +P e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f +R a72f98879c56fe0b2489dc56b6289649 U dan -Z 8664660cde606ab1a6fdc20b18622c4a +Z 6a8664529c8f348c40cce12fb229aa10 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 0cbd084480..b4349493de 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f \ No newline at end of file +3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 \ No newline at end of file From 43b4864a98da1537203eab92e5112d88451884f4 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Dec 2023 15:08:21 +0000 Subject: [PATCH 18/24] Add tests for using tokendata=1 and contentless_delete=1 together. FossilOrigin-Name: a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 --- ext/fts5/test/fts5origintext5.test | 135 +++++++++++++++++++++++++++++ manifest | 11 +-- manifest.uuid | 2 +- 3 files changed, 142 insertions(+), 6 deletions(-) create mode 100644 ext/fts5/test/fts5origintext5.test diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test new file mode 100644 index 0000000000..a48dfbb3ab --- /dev/null +++ b/ext/fts5/test/fts5origintext5.test @@ -0,0 +1,135 @@ +# 2023 Dec 04 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests for tables that use both tokendata=1 and contentless_delete=1. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +# Return a random integer between 0 and n-1. +# +proc random {n} { expr {abs(int(rand()*$n))} } + +# Select an element of the list passed as the only argument at random and +# return it. +# +proc select_one {list} { + set n [llength $list] + lindex $list [random $n] +} + +# Given a term that consists entirely of alphabet characters, return all +# permutations of the term using upper and lower case characters. e.g. +# +# "abc" -> {CBA cBA CbA cbA CBa cBa Cba cba} +# +proc casify {term {lRet {{}}}} { + if {$term==""} { return $lRet } + set t [string range $term 1 end] + set f1 [string toupper [string range $term 0 0]] + set f2 [string tolower [string range $term 0 0]] + set ret [list] + foreach x $lRet { + lappend ret "$x$f1" + lappend ret "$x$f2" + } + return [casify $t $ret] +} + +proc vocab {} { + list abc def ghi jkl mno pqr stu vwx yza +} + +# Return a random 3 letter term. +# +proc term {} { + if {[info exists ::expanded_vocab]==0} { + foreach v [vocab] { lappend ::expanded_vocab {*}[casify $v] } + } + + select_one $::expanded_vocab +} + +# Return a document - between 3 and 6 terms. +# +proc document {} { + set nTerm [expr [random 3] + 3] + set doc "" + for {set ii 0} {$ii < $nTerm} {incr ii} { + lappend doc [term] + } + set doc +} +db func document document + +#------------------------------------------------------------------------- + +set NDOC 200 +set NLOOP 100 + + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", contentless_delete=1, content=, + tokendata=1 + ); + + CREATE TABLE ctrl(id INTEGER PRIMARY KEY, x TEXT); + INSERT INTO ft(ft, rank) VALUES('pgsz', 64); +} +do_test 1.1 { + for {set ii 0} {$ii < $NDOC} {incr ii} { + set doc [document] + execsql { + INSERT INTO ft(rowid, x) VALUES($ii, $doc); + INSERT INTO ctrl(id, x) VALUES($ii, $doc); + } + } +} {} + +proc do_all_vocab_test {tn} { + foreach ::v [vocab] { + set answer [execsql {SELECT id FROM ctrl WHERE x LIKE '%' || $::v || '%'}] + do_execsql_test $tn.$::v { + SELECT rowid FROM ft($::v) + } $answer + } +} + +do_all_vocab_test 1.2 + +for {set ii 0} {$ii < $NLOOP} {incr ii} { + set lRowid [execsql { SELECT id FROM ctrl WHERE random() % 2 }] + foreach r $lRowid { + execsql { DELETE FROM ft WHERE rowid = $r } + execsql { DELETE FROM ctrl WHERE rowid = $r } + + set doc [document] + execsql { INSERT INTO ft(rowid, x) VALUES($r, $doc) } + execsql { INSERT INTO ctrl(id, x) VALUES($r, $doc) } + } + do_all_vocab_test 1.3.$ii +} + + + + + +finish_test + diff --git a/manifest b/manifest index 2353c55e4c..61b1c976fd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\svarious\scompiler\swarnings\sand\sother\sproblems\swith\sthe\snew\scode\son\sthis\sbranch. -D 2023-12-02T20:35:04.768 +C Add\stests\sfor\susing\stokendata=1\sand\scontentless_delete=1\stogether. +D 2023-12-04T15:08:21.125 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -194,6 +194,7 @@ F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0 F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 +F ext/fts5/test/fts5origintext5.test 5b9fa1b7d2f8c5f933076000c30aea5b104c00c3f1b767334b87b76d46492e59 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2149,8 +2150,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f -R a72f98879c56fe0b2489dc56b6289649 +P 3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 +R 977c39d056cdc1226d7f3117e124e785 U dan -Z 6a8664529c8f348c40cce12fb229aa10 +Z 8168409535c63203870031d6c1a58cc5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index b4349493de..0187da7a6e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 \ No newline at end of file +a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 \ No newline at end of file From 9d373ca1c5eee2771e97af122f23264ba2f92576 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Dec 2023 17:05:37 +0000 Subject: [PATCH 19/24] Fix bug in xInstToken() causing the wrong token to be returned. FossilOrigin-Name: da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f --- ext/fts5/fts5_index.c | 10 +++++-- ext/fts5/test/fts5origintext5.test | 47 ++++++++++++++++++++++++++---- manifest | 14 ++++----- manifest.uuid | 2 +- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index df08ca2127..095b945838 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6607,6 +6607,7 @@ struct Fts5TokenDataIter { Fts5TokenDataMap *aMap; Fts5PoslistReader *aPoslistReader; + int *aPoslistToIter; Fts5Iter *apIter[1]; }; @@ -6794,16 +6795,19 @@ static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ /* Allocate array of iterators if they are not already allocated. */ if( pT->aPoslistReader==0 ){ - pT->aPoslistReader = sqlite3Fts5MallocZero( - &pIter->pIndex->rc, sizeof(Fts5PoslistReader) * pT->nIter + int nByte = pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int)); + pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero( + &pIter->pIndex->rc, nByte ); if( pT->aPoslistReader==0 ) return; + pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter]; } /* Populate an iterator for each poslist that will be merged */ for(ii=0; iinIter; ii++){ Fts5Iter *p = pT->apIter[ii]; if( iRowid==p->base.iRowid ){ + pT->aPoslistToIter[nReader] = ii; sqlite3Fts5PoslistReaderInit( p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] ); @@ -6855,7 +6859,7 @@ static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ if( eDetail==FTS5_DETAIL_FULL ){ pT->aMap[pT->nMap].iPos = iMinPos; - pT->aMap[pT->nMap].iIter = iMin; + pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin]; pT->aMap[pT->nMap].iRowid = iRowid; pT->nMap++; } diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test index a48dfbb3ab..9421758f88 100644 --- a/ext/fts5/test/fts5origintext5.test +++ b/ext/fts5/test/fts5origintext5.test @@ -65,10 +65,10 @@ proc term {} { select_one $::expanded_vocab } -# Return a document - between 3 and 6 terms. +# Return a document - between 3 and 10 terms. # proc document {} { - set nTerm [expr [random 3] + 3] + set nTerm [expr [random 3] + 7] set doc "" for {set ii 0} {$ii < $nTerm} {incr ii} { lappend doc [term] @@ -82,11 +82,39 @@ db func document document set NDOC 200 set NLOOP 100 - sqlite3_fts5_register_origintext db + +proc tokens {cmd} { + for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { + set txt [$cmd xInstToken $iTok 0] + if {$txt==""} break + set txt [string map [list "\0" "."] $txt] + lappend ret $txt + } + set ret +} +sqlite3_fts5_create_function db tokens tokens + +proc ctrl_tokens {doc term} { + set ret [list] + set term [string tolower $term] + foreach a $doc { + if {[string tolower $a]==$term} { + if {$a==$term} { + lappend ret $a + } else { + lappend ret [string tolower $a].$a + } + } + } + set ret +} +db func ctrl_tokens ctrl_tokens + + do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft USING fts5( - x, tokenize="origintext unicode61", contentless_delete=1, content=, + x, tokenize="origintext unicode61", tokendata=1 ); @@ -105,13 +133,20 @@ do_test 1.1 { proc do_all_vocab_test {tn} { foreach ::v [vocab] { - set answer [execsql {SELECT id FROM ctrl WHERE x LIKE '%' || $::v || '%'}] + set answer [execsql { + SELECT id, ctrl_tokens(x, $::v) FROM ctrl WHERE x LIKE '%' || $::v || '%' + }] do_execsql_test $tn.$::v { - SELECT rowid FROM ft($::v) + SELECT rowid, tokens(ft) FROM ft($::v) } $answer } } +#execsql_pp { SELECT * FROM ctrl } +#execsql_pp { SELECT * FROM ft } +#fts5_aux_test_functions db +#execsql_pp { SELECT rowid, tokens(ft), fts5_test_poslist(ft) FROM ft('ghi'); } + do_all_vocab_test 1.2 for {set ii 0} {$ii < $NLOOP} {incr ii} { diff --git a/manifest b/manifest index 61b1c976fd..4ecc82d692 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sfor\susing\stokendata=1\sand\scontentless_delete=1\stogether. -D 2023-12-04T15:08:21.125 +C Fix\sbug\sin\sxInstToken()\scausing\sthe\swrong\stoken\sto\sbe\sreturned. +D 2023-12-04T17:05:37.721 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -96,7 +96,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c 5619c3fab45a78eb5ed3021e3b40ec3b435ef3669293e8700354aa8dd3e6c796 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c b31bf4f0fb51a15cc1aa54c2f337197740f4f8898347266781ca6970ca751302 +F ext/fts5/fts5_index.c 072666814be04485445c07e0e75362d13245cef6c66e07aa2060c532190d0c10 F ext/fts5/fts5_main.c 075995302198fe6f591fdbbedd415dfac564a9bfc20aea81e6fa0503b2d94af0 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -194,7 +194,7 @@ F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0 F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 -F ext/fts5/test/fts5origintext5.test 5b9fa1b7d2f8c5f933076000c30aea5b104c00c3f1b767334b87b76d46492e59 +F ext/fts5/test/fts5origintext5.test f4377b67debb10e3731030ce51245b9843ffb31f85725615b3b8820bd5912702 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2150,8 +2150,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 -R 977c39d056cdc1226d7f3117e124e785 +P a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 +R 135aeb3f5184e1f0131c446ddc8eab37 U dan -Z 8168409535c63203870031d6c1a58cc5 +Z 33215e49f8892bbcd31004428ffced69 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 0187da7a6e..60beff1eed 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 \ No newline at end of file +da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f \ No newline at end of file From 3dfc063705277f68540fc8aa3d55967481ebf042 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Dec 2023 17:45:33 +0000 Subject: [PATCH 20/24] Fix a problem with the xInstCount() API and "ORDER BY rank" queries. FossilOrigin-Name: 317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc --- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_expr.c | 2 +- ext/fts5/fts5_main.c | 3 ++- ext/fts5/test/fts5origintext5.test | 17 ++++++++++++++--- manifest | 18 +++++++++--------- manifest.uuid | 2 +- 6 files changed, 28 insertions(+), 16 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 4e4385ce2b..9beb26e056 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -787,7 +787,7 @@ int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); -int sqlite3Fts5ExprInstToken(Fts5Expr*, int, int, int, int, const char**, int*); +int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*); void sqlite3Fts5ExprClearTokens(Fts5Expr*); /******************************************* diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 6f58cf8735..02d7e78cbe 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3199,6 +3199,7 @@ int sqlite3Fts5ExprQueryToken( */ int sqlite3Fts5ExprInstToken( Fts5Expr *pExpr, + i64 iRowid, int iPhrase, int iCol, int iOff, @@ -3208,7 +3209,6 @@ int sqlite3Fts5ExprInstToken( ){ Fts5ExprPhrase *pPhrase = 0; Fts5IndexIter *pIter = 0; - i64 iRowid = pExpr->pRoot->iRowid; if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ return SQLITE_RANGE; diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index e911b0c0f9..c183f3c579 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -2364,9 +2364,10 @@ static int fts5ApiInstToken( int iPhrase = pCsr->aInst[iIdx*3]; int iCol = pCsr->aInst[iIdx*3 + 1]; int iOff = pCsr->aInst[iIdx*3 + 2]; + i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5ExprInstToken( - pCsr->pExpr, iPhrase, iCol, iOff, iToken, ppOut, pnOut + pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut ); } } diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test index 9421758f88..b425c582e8 100644 --- a/ext/fts5/test/fts5origintext5.test +++ b/ext/fts5/test/fts5origintext5.test @@ -79,15 +79,17 @@ db func document document #------------------------------------------------------------------------- +expr srand(6) + set NDOC 200 set NLOOP 100 sqlite3_fts5_register_origintext db proc tokens {cmd} { + set ret [list] for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { set txt [$cmd xInstToken $iTok 0] - if {$txt==""} break set txt [string map [list "\0" "."] $txt] lappend ret $txt } @@ -95,6 +97,11 @@ proc tokens {cmd} { } sqlite3_fts5_create_function db tokens tokens +proc rankfunc {cmd} { + $cmd xRowid +} +sqlite3_fts5_create_function db rankfunc rankfunc + proc ctrl_tokens {doc term} { set ret [list] set term [string tolower $term] @@ -114,12 +121,13 @@ db func ctrl_tokens ctrl_tokens do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft USING fts5( - x, tokenize="origintext unicode61", + x, tokenize="origintext unicode61", content=, contentless_delete=1, tokendata=1 ); CREATE TABLE ctrl(id INTEGER PRIMARY KEY, x TEXT); INSERT INTO ft(ft, rank) VALUES('pgsz', 64); + INSERT INTO ft(ft, rank) VALUES('rank', 'rankfunc()'); } do_test 1.1 { for {set ii 0} {$ii < $NDOC} {incr ii} { @@ -136,9 +144,12 @@ proc do_all_vocab_test {tn} { set answer [execsql { SELECT id, ctrl_tokens(x, $::v) FROM ctrl WHERE x LIKE '%' || $::v || '%' }] - do_execsql_test $tn.$::v { + do_execsql_test $tn.$::v.1 { SELECT rowid, tokens(ft) FROM ft($::v) } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank + } $answer } } diff --git a/manifest b/manifest index 4ecc82d692..00c355a167 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sbug\sin\sxInstToken()\scausing\sthe\swrong\stoken\sto\sbe\sreturned. -D 2023-12-04T17:05:37.721 +C Fix\sa\sproblem\swith\sthe\sxInstCount()\sAPI\sand\s"ORDER\sBY\srank"\squeries. +D 2023-12-04T17:45:33.714 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -90,14 +90,14 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h ff90acaa97f8e865b66d1177d1b56b8c110fd5548ab5863bab43f055a1d745fe -F ext/fts5/fts5Int.h 1fdbf3d16bdd481fe2ee99927919e4c3db835efae00f8efd7efb5e6a93277459 +F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880e9834 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 5619c3fab45a78eb5ed3021e3b40ec3b435ef3669293e8700354aa8dd3e6c796 +F ext/fts5/fts5_expr.c aec893108d90cc8eae6801283d38d4e705d8c22aba12717fcc56e1910b8e3b32 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c 072666814be04485445c07e0e75362d13245cef6c66e07aa2060c532190d0c10 -F ext/fts5/fts5_main.c 075995302198fe6f591fdbbedd415dfac564a9bfc20aea81e6fa0503b2d94af0 +F ext/fts5/fts5_main.c 1fbadf63a7381fd68753efdccce19683f587668489b1c5a9c7bf53d6e9b64872 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -194,7 +194,7 @@ F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0 F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 -F ext/fts5/test/fts5origintext5.test f4377b67debb10e3731030ce51245b9843ffb31f85725615b3b8820bd5912702 +F ext/fts5/test/fts5origintext5.test f9dfb005248d764fdc52859308875f680b523a897258a2cc40100f9e2356b5a9 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2150,8 +2150,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 -R 135aeb3f5184e1f0131c446ddc8eab37 +P da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f +R 17d195e142931273edf7c88c21ff2164 U dan -Z 33215e49f8892bbcd31004428ffced69 +Z 7652ccb47cdd7a8e28db861627789490 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 60beff1eed..f1f57381d1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f \ No newline at end of file +317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc \ No newline at end of file From 910c77b049f6d64866bc8d0a0745f8ed446e0dfa Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Dec 2023 17:58:56 +0000 Subject: [PATCH 21/24] Fix memory leak in new code on this branch. FossilOrigin-Name: ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f --- ext/fts5/fts5_index.c | 1 - manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 095b945838..a24308534c 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2763,7 +2763,6 @@ static void fts5SegIterNextInit( pIter->iLeafPgno = iPg - 1; fts5SegIterNextPage(p, pIter); fts5SegIterSetNext(p, pIter); - fts5SegIterAllocTombstone(p, pIter); } if( pIter->pLeaf ){ const u8 *a = pIter->pLeaf->p; diff --git a/manifest b/manifest index 00c355a167..355143083f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sthe\sxInstCount()\sAPI\sand\s"ORDER\sBY\srank"\squeries. -D 2023-12-04T17:45:33.714 +C Fix\smemory\sleak\sin\snew\scode\son\sthis\sbranch. +D 2023-12-04T17:58:56.324 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -96,7 +96,7 @@ F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b7292 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf F ext/fts5/fts5_expr.c aec893108d90cc8eae6801283d38d4e705d8c22aba12717fcc56e1910b8e3b32 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 072666814be04485445c07e0e75362d13245cef6c66e07aa2060c532190d0c10 +F ext/fts5/fts5_index.c 7111fed6c01048e227cc8c681306fa2db1e5ad29429b1373e665b8e95a7868ba F ext/fts5/fts5_main.c 1fbadf63a7381fd68753efdccce19683f587668489b1c5a9c7bf53d6e9b64872 F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -2150,8 +2150,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f -R 17d195e142931273edf7c88c21ff2164 +P 317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc +R 4cc8f92a68d4e11e53934c982deb6dc3 U dan -Z 7652ccb47cdd7a8e28db861627789490 +Z c8f15e3d33aff662e610fc8982efce75 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index f1f57381d1..bb8887198f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc \ No newline at end of file +ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f \ No newline at end of file From 54318b382a982fa08dfe3d034156afa20550aea9 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Dec 2023 18:45:14 +0000 Subject: [PATCH 22/24] Fixes for xInstToken() with tokendata=0 tables. And with prefix queries. FossilOrigin-Name: 78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 --- ext/fts5/fts5_expr.c | 18 ++++++++++---- ext/fts5/fts5_main.c | 1 - ext/fts5/test/fts5origintext.test | 39 +++++++++++++++++++++++++++++++ manifest | 16 ++++++------- manifest.uuid | 2 +- 5 files changed, 62 insertions(+), 14 deletions(-) diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 02d7e78cbe..c5de6eba6c 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -3208,7 +3208,8 @@ int sqlite3Fts5ExprInstToken( int *pnOut ){ Fts5ExprPhrase *pPhrase = 0; - Fts5IndexIter *pIter = 0; + Fts5ExprTerm *pTerm = 0; + int rc = SQLITE_OK; if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ return SQLITE_RANGE; @@ -3217,9 +3218,18 @@ int sqlite3Fts5ExprInstToken( if( iToken<0 || iToken>=pPhrase->nTerm ){ return SQLITE_RANGE; } - pIter = pPhrase->aTerm[iToken].pIter; - - return sqlite3Fts5IterToken(pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut); + pTerm = &pPhrase->aTerm[iToken]; + if( pTerm->bPrefix==0 ){ + if( pExpr->pConfig->bTokendata ){ + rc = sqlite3Fts5IterToken( + pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut + ); + }else{ + *ppOut = pTerm->pTerm; + *pnOut = pTerm->nFullTerm; + } + } + return rc; } /* diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index c183f3c579..d35e998da0 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -2365,7 +2365,6 @@ static int fts5ApiInstToken( int iCol = pCsr->aInst[iIdx*3 + 1]; int iOff = pCsr->aInst[iIdx*3 + 2]; i64 iRowid = fts5CursorRowid(pCsr); - rc = sqlite3Fts5ExprInstToken( pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut ); diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 8273b3ca4d..9752f35d34 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -252,6 +252,45 @@ do_execsql_test 5.3 { {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5} } +#------------------------------------------------------------------------- +# Test the xInstToken() API with: +# +# * a non tokendata=1 table. +# * prefix queries. +# +reset_db +sqlite3_fts5_register_origintext db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, y, tokenize='origintext unicode61', detail=%DETAIL% + ); + + INSERT INTO ft VALUES('One Two', 'Three two'); + INSERT INTO ft VALUES('three Three', 'one One'); +} +proc tokens {cmd} { + set ret [list] + for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { + set txt [$cmd xInstToken $iTok 0] + set txt [string map [list "\0" "."] $txt] + lappend ret $txt + } + set ret +} +sqlite3_fts5_create_function db tokens tokens + +do_execsql_test 6.1 { + SELECT rowid, tokens(ft) FROM ft('One'); +} {1 one.One 2 one.One} + +do_execsql_test 6.2 { + SELECT rowid, tokens(ft) FROM ft('on*'); +} {1 {{}} 2 {{} {}}} + +do_execsql_test 6.3 { + SELECT rowid, tokens(ft) FROM ft('Three*'); +} {1 {{}} 2 {{}}} + } finish_test diff --git a/manifest b/manifest index 355143083f..52485ee8e6 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\smemory\sleak\sin\snew\scode\son\sthis\sbranch. -D 2023-12-04T17:58:56.324 +C Fixes\sfor\sxInstToken()\swith\stokendata=0\stables.\sAnd\swith\sprefix\squeries. +D 2023-12-04T18:45:14.192 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -94,10 +94,10 @@ F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c aec893108d90cc8eae6801283d38d4e705d8c22aba12717fcc56e1910b8e3b32 +F ext/fts5/fts5_expr.c 920516be4aac44eccdd9e747fe26f367442848b9a58971e4b36edb0c8284b6b8 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c 7111fed6c01048e227cc8c681306fa2db1e5ad29429b1373e665b8e95a7868ba -F ext/fts5/fts5_main.c 1fbadf63a7381fd68753efdccce19683f587668489b1c5a9c7bf53d6e9b64872 +F ext/fts5/fts5_main.c fb7ec495d663f40d18e420e1986316591041a70e1e4b4696ab2a7384e4c7fd7a F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -190,7 +190,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 6574e8d2121460cda72866afe3e582693d9992f150b0703aff5981625b527e62 +F ext/fts5/test/fts5origintext.test d2796fa08ee7aecfabdc0c45bb8a2fb16a00ea8757e63fbc153b718dbe430a39 F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 @@ -2150,8 +2150,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc -R 4cc8f92a68d4e11e53934c982deb6dc3 +P ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f +R 4a97cb4432f9a2f4a872da3bc4853a9c U dan -Z c8f15e3d33aff662e610fc8982efce75 +Z d5e27dba49298422b322a3619fa6b5f7 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index bb8887198f..667968c045 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f \ No newline at end of file +78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 \ No newline at end of file From 49bfbc1ef37784f9f205f06c272716b058d5be3e Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Dec 2023 19:48:08 +0000 Subject: [PATCH 23/24] Add further tests for xInstToken(). FossilOrigin-Name: 8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4 --- ext/fts5/test/fts5origintext5.test | 96 +++++++++++++++++++++++------- manifest | 12 ++-- manifest.uuid | 2 +- 3 files changed, 81 insertions(+), 29 deletions(-) diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test index b425c582e8..89a5d8f39c 100644 --- a/ext/fts5/test/fts5origintext5.test +++ b/ext/fts5/test/fts5origintext5.test @@ -82,7 +82,7 @@ db func document document expr srand(6) set NDOC 200 -set NLOOP 100 +set NLOOP 50 sqlite3_fts5_register_origintext db @@ -102,15 +102,17 @@ proc rankfunc {cmd} { } sqlite3_fts5_create_function db rankfunc rankfunc -proc ctrl_tokens {doc term} { +proc ctrl_tokens {term args} { set ret [list] set term [string tolower $term] - foreach a $doc { - if {[string tolower $a]==$term} { - if {$a==$term} { - lappend ret $a - } else { - lappend ret [string tolower $a].$a + foreach doc $args { + foreach a $doc { + if {[string tolower $a]==$term} { + if {$a==$term} { + lappend ret $a + } else { + lappend ret [string tolower $a].$a + } } } } @@ -118,6 +120,20 @@ proc ctrl_tokens {doc term} { } db func ctrl_tokens ctrl_tokens +proc do_all_vocab_test {tn} { + foreach ::v [vocab] { + set answer [execsql { + SELECT id, ctrl_tokens($::v, x) FROM ctrl WHERE x LIKE '%' || $::v || '%' + }] + do_execsql_test $tn.$::v.1 { + SELECT rowid, tokens(ft) FROM ft($::v) + } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank + } $answer + } +} + do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft USING fts5( @@ -139,20 +155,6 @@ do_test 1.1 { } } {} -proc do_all_vocab_test {tn} { - foreach ::v [vocab] { - set answer [execsql { - SELECT id, ctrl_tokens(x, $::v) FROM ctrl WHERE x LIKE '%' || $::v || '%' - }] - do_execsql_test $tn.$::v.1 { - SELECT rowid, tokens(ft) FROM ft($::v) - } $answer - do_execsql_test $tn.$::v.2 { - SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank - } $answer - } -} - #execsql_pp { SELECT * FROM ctrl } #execsql_pp { SELECT * FROM ft } #fts5_aux_test_functions db @@ -173,9 +175,59 @@ for {set ii 0} {$ii < $NLOOP} {incr ii} { do_all_vocab_test 1.3.$ii } +#------------------------------------------------------------------------- +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft2 USING fts5( + x, y, tokenize="origintext unicode61", content=, contentless_delete=1, + tokendata=1 + ); + CREATE TABLE ctrl2(id INTEGER PRIMARY KEY, x TEXT, y TEXT); + INSERT INTO ft2(ft2, rank) VALUES('pgsz', 64); + INSERT INTO ft2(ft2, rank) VALUES('rank', 'rankfunc()'); +} +do_test 2.1 { + for {set ii 0} {$ii < $NDOC} {incr ii} { + set doc1 [document] + set doc2 [document] + execsql { + INSERT INTO ft2(rowid, x, y) VALUES($ii, $doc, $doc2); + INSERT INTO ctrl2(id, x, y) VALUES($ii, $doc, $doc2); + } + } +} {} +proc do_all_vocab_test2 {tn} { + foreach ::v [vocab] { + set answer [execsql { + SELECT id, ctrl_tokens($::v, x, y) FROM ctrl2 + WHERE x LIKE '%' || $::v || '%' OR y LIKE '%' || $::v || '%'; + }] + do_execsql_test $tn.$::v.1 { + SELECT rowid, tokens(ft2) FROM ft2($::v) + } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft2) FROM ft2($::v) ORDER BY rank + } $answer + } +} + +do_all_vocab_test2 2.2 + +for {set ii 0} {$ii < $NLOOP} {incr ii} { + set lRowid [execsql { SELECT id FROM ctrl2 WHERE random() % 2 }] + foreach r $lRowid { + execsql { DELETE FROM ft2 WHERE rowid = $r } + execsql { DELETE FROM ctrl2 WHERE rowid = $r } + + set doc1 [document] + set doc2 [document] + execsql { INSERT INTO ft2(rowid, x, y) VALUES($r, $doc, $doc1) } + execsql { INSERT INTO ctrl2(id, x, y) VALUES($r, $doc, $doc2) } + } + do_all_vocab_test 2.3.$ii +} finish_test diff --git a/manifest b/manifest index 52485ee8e6..11d57c4176 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sfor\sxInstToken()\swith\stokendata=0\stables.\sAnd\swith\sprefix\squeries. -D 2023-12-04T18:45:14.192 +C Add\sfurther\stests\sfor\sxInstToken(). +D 2023-12-04T19:48:08.530 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -194,7 +194,7 @@ F ext/fts5/test/fts5origintext.test d2796fa08ee7aecfabdc0c45bb8a2fb16a00ea8757e6 F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 -F ext/fts5/test/fts5origintext5.test f9dfb005248d764fdc52859308875f680b523a897258a2cc40100f9e2356b5a9 +F ext/fts5/test/fts5origintext5.test 067bfb3008323585df640ab29e8ef7c4ca6dec62c597be07a9f896d88f98cd10 F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2150,8 +2150,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f -R 4a97cb4432f9a2f4a872da3bc4853a9c +P 78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 +R ced1cc2c59284a9ef3026043e080f745 U dan -Z d5e27dba49298422b322a3619fa6b5f7 +Z e2cce7acff968ef3c9472723192ae452 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 667968c045..780ed88175 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 \ No newline at end of file +8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4 \ No newline at end of file From fb923fc4ccb79fe27d6daa9477ac2ef55b1da9bc Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 5 Dec 2023 18:36:23 +0000 Subject: [PATCH 24/24] Further tests for the new code on this branch. FossilOrigin-Name: 59d008b6c23ab900377bc696ee19381feb7614bac80546eae361e401c3620c4e --- ext/fts5/fts5_expr.c | 6 +- ext/fts5/fts5_index.c | 113 ++++++++++++----------------- ext/fts5/test/fts5faultH.test | 93 ++++++++++++++++++++++++ ext/fts5/test/fts5origintext2.test | 39 ++++++++++ ext/fts5/test/fts5origintext3.test | 21 ++++++ ext/fts5/test/fts5origintext5.test | 44 ++++++++++- manifest | 21 +++--- manifest.uuid | 2 +- 8 files changed, 257 insertions(+), 82 deletions(-) create mode 100644 ext/fts5/test/fts5faultH.test diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index c5de6eba6c..bc7c9741ee 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1768,7 +1768,9 @@ static int fts5ParseTokenize( memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); pTerm->nFullTerm = pTerm->nQueryTerm = nToken; - if( pCtx->pConfig->bTokendata ) pTerm->nQueryTerm = strlen(pTerm->pTerm); + if( pCtx->pConfig->bTokendata && rc==SQLITE_OK ){ + pTerm->nQueryTerm = strlen(pTerm->pTerm); + } } } @@ -3027,7 +3029,7 @@ static int fts5ExprPopulatePoslistsCb( int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff ); - if( rc==SQLITE_OK && pExpr->pConfig->bTokendata ){ + if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){ int iCol = p->iOff>>32; int iTokOff = p->iOff & 0x7FFFFFFF; rc = sqlite3Fts5IndexIterWriteTokendata( diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a24308534c..9a2cc026b9 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6661,57 +6661,6 @@ static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ } } -/* -** The iterator passed as the first argument must be a tokendata=1 iterator -** (pIter->pTokenDataIter!=0). This function is used to access the token -** instance located at offset iOff of column iCol of row iRowid. It is -** returned via output pointers *ppOut and *pnOut. This is used by the -** xInstToken() API. -*/ -static int fts5TokendataIterToken( - Fts5Iter *pIter, - i64 iRowid, - int iCol, int iOff, - const char **ppOut, int *pnOut -){ - Fts5TokenDataIter *pT = pIter->pTokenDataIter; - Fts5TokenDataMap *aMap = pT->aMap; - i64 iPos = (((i64)iCol)<<32) + iOff; - - int i1 = 0; - int i2 = pT->nMap; - int iTest = 0; - - while( i2>i1 ){ - iTest = (i1 + i2) / 2; - - if( aMap[iTest].iRowidiRowid ){ - i2 = iTest; - }else{ - if( aMap[iTest].iPosiPos ){ - i2 = iTest; - }else{ - break; - } - } - } - - if( i2>i1 ){ - Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; - *ppOut = (const char*)pMap->aSeg[0].term.p+1; - *pnOut = pMap->aSeg[0].term.n-1; - } - - return SQLITE_OK; -} - /* ** Append a mapping to the token-map belonging to object pT. */ @@ -6960,9 +6909,7 @@ static Fts5Iter *fts5SetupTokendataIter( memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter)); memset(pPrevIter, 0, sizeof(Fts5SegIter)); bDone = 1; - }else if( pPrevIter->pLeaf - && pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf - ){ + }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){ fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter); bDone = 1; } @@ -7071,7 +7018,7 @@ int sqlite3Fts5IndexQuery( if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ int iPrefixIdx = 0; /* +1 prefix index */ - int bTokendata = (flags&FTS5INDEX_QUERY_NOTOKENDATA)?0:pConfig->bTokendata; + int bTokendata = pConfig->bTokendata; if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); /* Figure out which index to search and set iIdx accordingly. If this @@ -7085,6 +7032,7 @@ int sqlite3Fts5IndexQuery( ** for internal sanity checking by the integrity-check in debug ** mode only. */ #ifdef SQLITE_DEBUG + if( flags & FTS5INDEX_QUERY_NOTOKENDATA ) bTokendata = 0; if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; @@ -7208,7 +7156,8 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ /* ** This is used by xInstToken() to access the token at offset iOff, column ** iCol of row iRowid. The token is returned via output variables *ppOut -** and *pnOut. +** and *pnOut. The iterator passed as the first argument must be a tokendata=1 +** iterator (pIter->pTokenDataIter!=0). */ int sqlite3Fts5IterToken( Fts5IndexIter *pIndexIter, @@ -7218,9 +7167,39 @@ int sqlite3Fts5IterToken( const char **ppOut, int *pnOut ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + Fts5TokenDataMap *aMap = pT->aMap; + i64 iPos = (((i64)iCol)<<32) + iOff; - if( pIter->pTokenDataIter ){ - return fts5TokendataIterToken(pIter, iRowid, iCol, iOff, ppOut, pnOut); + int i1 = 0; + int i2 = pT->nMap; + int iTest = 0; + + while( i2>i1 ){ + iTest = (i1 + i2) / 2; + + if( aMap[iTest].iRowidiRowid ){ + i2 = iTest; + }else{ + if( aMap[iTest].iPosiPos ){ + i2 = iTest; + }else{ + break; + } + } + } + + if( i2>i1 ){ + Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; + *ppOut = (const char*)pMap->aSeg[0].term.p+1; + *pnOut = pMap->aSeg[0].term.n-1; } return SQLITE_OK; @@ -7252,17 +7231,17 @@ int sqlite3Fts5IndexIterWriteTokendata( Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5TokenDataIter *pT = pIter->pTokenDataIter; Fts5Index *p = pIter->pIndex; + int ii; assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL ); - if( pT ){ - int ii; - for(ii=0; iinIter; ii++){ - Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; - if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; - } - if( iinIter ){ - fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff); - } + assert( pIter->pTokenDataIter ); + + for(ii=0; iinIter; ii++){ + Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; + if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; + } + if( iinIter ){ + fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff); } return fts5IndexReturn(p); } diff --git a/ext/fts5/test/fts5faultH.test b/ext/fts5/test/fts5faultH.test new file mode 100644 index 0000000000..9dd4cac0d6 --- /dev/null +++ b/ext/fts5/test/fts5faultH.test @@ -0,0 +1,93 @@ +# 2010 June 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5faultG + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +set ::testprefix fts5faultH + +sqlite3_fts5_register_origintext db + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); + + BEGIN; + INSERT INTO t1 VALUES('oNe tWo thRee'); + INSERT INTO t1 VALUES('One Two Three'); + INSERT INTO t1 VALUES('onE twO threE'); + COMMIT; + BEGIN; + INSERT INTO t1 VALUES('one two three'); + INSERT INTO t1 VALUES('one two three'); + INSERT INTO t1 VALUES('one two three'); + COMMIT; +} + +do_faultsim_test 1 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM t1('three'); + } +} -test { + faultsim_integrity_check + faultsim_test_result {0 {1 2 3 4 5 6}} +} + + +reset_db +sqlite3_fts5_register_origintext db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + + BEGIN; + INSERT INTO t1(rowid, x) VALUES(10, 'aaa bbb BBB'); + INSERT INTO t1(rowid, x) VALUES(12, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(13, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(14, 'bbb BBB bbb'); + INSERT INTO t1(rowid, x) VALUES(15, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(16, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(17, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(18, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(19, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(20, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(21, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(22, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(23, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(24, 'aaa bbb BBB'); + COMMIT; +} + +do_faultsim_test 2 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM t1('BBB AND AAA'); + } +} -test { + faultsim_integrity_check + faultsim_test_result {0 {10 24}} +} + + + +finish_test diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test index a27309fe0c..a8c5d4eb50 100644 --- a/ext/fts5/test/fts5origintext2.test +++ b/ext/fts5/test/fts5origintext2.test @@ -103,5 +103,44 @@ do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} do_execsql_test 1.14 { SELECT rowid FROM ft('hello') ORDER BY rank; } {1 2 3} +#------------------------------------------------------------------------ +reset_db +sqlite3_fts5_register_origintext db +proc tokens {cmd} { + set ret [list] + for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { + set txt [$cmd xInstToken $iTok 0] + set txt [string map [list "\0" "."] $txt] + lappend ret $txt + } + set ret +} +sqlite3_fts5_create_function db tokens tokens + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE x1 USING fts5( + v, tokenize="origintext unicode61", tokendata=1, detail=none + ); + + INSERT INTO x1 VALUES('xxx Xxx XXX yyy YYY yyy'); + INSERT INTO x1 VALUES('xxx yyy xxx yyy yyy yyy'); +} + +do_execsql_test 2.1 { + SELECT tokens(x1) FROM x1('xxx'); +} { + {xxx xxx.Xxx xxx.XXX} {xxx xxx} +} + +do_execsql_test 2.2 { + UPDATE x1_content SET c0 = 'xxx xxX xxx yyy yyy yyy' WHERE id=1; +} + +do_execsql_test 2.3 { + SELECT tokens(x1) FROM x1('xxx'); +} { + {xxx {} xxx} {xxx xxx} +} + finish_test diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test index ac00bfabc0..834844595d 100644 --- a/ext/fts5/test/fts5origintext3.test +++ b/ext/fts5/test/fts5origintext3.test @@ -74,6 +74,27 @@ foreach_detail_mode $testprefix { do_execsql_test 1.6 { SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank; } {three.THREE 3 three 1 three 2} + + do_execsql_test 1.7 { + INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB'); + INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(14, 'bbb BBB bbb'); + INSERT INTO ft2(rowid, x) VALUES(15, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(16, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(17, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(18, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(19, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(20, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(21, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(22, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(23, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB'); + } + + do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} + do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} + } finish_test diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test index 89a5d8f39c..03d5bee215 100644 --- a/ext/fts5/test/fts5origintext5.test +++ b/ext/fts5/test/fts5origintext5.test @@ -121,7 +121,7 @@ proc ctrl_tokens {term args} { db func ctrl_tokens ctrl_tokens proc do_all_vocab_test {tn} { - foreach ::v [vocab] { + foreach ::v [concat [vocab] nnn] { set answer [execsql { SELECT id, ctrl_tokens($::v, x) FROM ctrl WHERE x LIKE '%' || $::v || '%' }] @@ -134,7 +134,6 @@ proc do_all_vocab_test {tn} { } } - do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft USING fts5( x, tokenize="origintext unicode61", content=, contentless_delete=1, @@ -229,5 +228,46 @@ for {set ii 0} {$ii < $NLOOP} {incr ii} { do_all_vocab_test 2.3.$ii } +#------------------------------------------------------------------------- + +unset -nocomplain ::expanded_vocab +proc vocab {} { + list abcde fghij klmno +} + +proc do_all_vocab_test3 {tn} { + foreach ::v [concat [vocab] nnn] { + set answer [execsql { + SELECT rowid, ctrl_tokens($::v, w) FROM ctrl3 WHERE w LIKE '%' || $::v || '%' + }] + do_execsql_test $tn.$::v.1 { + SELECT rowid, tokens(ft3) FROM ft3($::v) + } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft3) FROM ft3($::v) ORDER BY rank + } $answer + } +} + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft3 USING fts5( + w, tokenize="origintext unicode61", content=, contentless_delete=1, + tokendata=1 + ); + INSERT INTO ft3(ft3, rank) VALUES('rank', 'rankfunc()'); + CREATE TABLE ctrl3(w); +} + +do_execsql_test 3.1 { + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<2 + ) + INSERT INTO ctrl3 SELECT document() FROM s; + INSERT INTO ft3(rowid, w) SELECT rowid, w FROM ctrl3; +} + +do_all_vocab_test3 3.2 + + finish_test diff --git a/manifest b/manifest index 11d57c4176..91a29a053c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sfurther\stests\sfor\sxInstToken(). -D 2023-12-04T19:48:08.530 +C Further\stests\sfor\sthe\snew\scode\son\sthis\sbranch. +D 2023-12-05T18:36:23.618 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -94,9 +94,9 @@ F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf -F ext/fts5/fts5_expr.c 920516be4aac44eccdd9e747fe26f367442848b9a58971e4b36edb0c8284b6b8 +F ext/fts5/fts5_expr.c b1ec526371b9ffde82341423a5b9753c42cbea629a41b69f26fa377d13b95a8e F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 7111fed6c01048e227cc8c681306fa2db1e5ad29429b1373e665b8e95a7868ba +F ext/fts5/fts5_index.c be39b44ff8773cff56bcbc01f74701a83e068c20d773cafd01e8bb2fa0fc1bc5 F ext/fts5/fts5_main.c fb7ec495d663f40d18e420e1986316591041a70e1e4b4696ab2a7384e4c7fd7a F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 @@ -170,6 +170,7 @@ F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860 F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e4710c77eb8ce7075 F ext/fts5/test/fts5faultF.test 4abef99f86e99d9f0c6460dd68c586a766b6b9f1f660ada55bf2e8266bd1bbc1 F ext/fts5/test/fts5faultG.test d2e5a4d9a34e08dcaadcaeafef74d10cbc2abdd11aa2659a18af0294bf2812d3 +F ext/fts5/test/fts5faultH.test d845f45dac3e1a3f20c7e0a2be95280c95d3204c06802f86ab2c110e52ed3d14 F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079 F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717 F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e @@ -191,10 +192,10 @@ F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca F ext/fts5/test/fts5origintext.test d2796fa08ee7aecfabdc0c45bb8a2fb16a00ea8757e63fbc153b718dbe430a39 -F ext/fts5/test/fts5origintext2.test 43b07dd62d087743322b0003a27c8efdbda6c8659a27fde71f32ead27b5a0969 -F ext/fts5/test/fts5origintext3.test e0d47c187e7c279d25aa27aa3de8dd0d26b050a74db90670c9b20d0ecfcfb52a +F ext/fts5/test/fts5origintext2.test f3b9436de540828d01f0672df855b09ebc0863e126d5b56234701d71dfa73634 +F ext/fts5/test/fts5origintext3.test 0d25933506600452a5ab3873cbb418ed5f2de2446c3672b9997b1ea104b0e7f0 F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 -F ext/fts5/test/fts5origintext5.test 067bfb3008323585df640ab29e8ef7c4ca6dec62c597be07a9f896d88f98cd10 +F ext/fts5/test/fts5origintext5.test a037bdf7235a22033c4663837bdb12d9738245464a3ac2f60c71fc40d07ede7d F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2150,8 +2151,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 -R ced1cc2c59284a9ef3026043e080f745 +P 8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4 +R c18539c880ee946e73119264163283e7 U dan -Z e2cce7acff968ef3c9472723192ae452 +Z 86bd4754d1465fbe5cee9e784de6e900 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 780ed88175..c1e3ee2ba4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4 \ No newline at end of file +59d008b6c23ab900377bc696ee19381feb7614bac80546eae361e401c3620c4e \ No newline at end of file