From 7d56669bc48ba12c23713219a2e10923758b915f Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 28 Sep 2024 20:45:11 +0000 Subject: [PATCH] Rationalize some of the new code on this branch. FossilOrigin-Name: 66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075 --- ext/fts5/fts5_index.c | 309 ++++++++++++++++++++++++------------------ manifest | 12 +- manifest.uuid | 2 +- 3 files changed, 181 insertions(+), 142 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 1efbe5a7b4..ede091650d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -6198,6 +6198,101 @@ static void fts5MergePrefixLists( *p1 = out; } + +static int fts5VisitPrefixRange( + Fts5Index *p, + Fts5Colset *pColset, + u8 *pToken, + int nToken, + void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int), + void *pCtx +){ + const int flags = FTS5INDEX_QUERY_SCAN + | FTS5INDEX_QUERY_SKIPEMPTY + | FTS5INDEX_QUERY_NOOUTPUT; + Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ + int bNewTerm = 1; + Fts5Structure *pStruct = fts5StructureRead(p); + + fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); + fts5IterSetOutputCb(&p->rc, p1); + for( /* no-op */ ; + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext2(p, p1, &bNewTerm) + ){ + Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; + int nNew = 0; + const u8 *pNew = 0; + + p1->xSetOutputs(p1, pSeg); + + + if( bNewTerm ){ + nNew = pSeg->term.n; + pNew = pSeg->term.p; + if( nNewrc; +} + +typedef struct PrefixSetupCtx PrefixSetupCtx; +struct PrefixSetupCtx { + void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); + void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); + i64 iLastRowid; + int nMerge; + Fts5Buffer *aBuf; + int nBuf; + Fts5Buffer doclist; +}; + +static void prefixIterSetupCb( + Fts5Index *p, + void *pCtx, + Fts5Iter *p1, + const u8 *pNew, + int nNew +){ + PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx; + const int nMerge = pSetup->nMerge; + + if( p1->base.nData>0 ){ + if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){ + int i; + for(i=0; p->rc==SQLITE_OK && pSetup->doclist.n; i++){ + int i1 = i*nMerge; + int iStore; + assert( i1+nMerge<=pSetup->nBuf ); + for(iStore=i1; iStoreaBuf[iStore].n==0 ){ + fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]); + fts5BufferZero(&pSetup->doclist); + break; + } + } + if( iStore==i1+nMerge ){ + pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]); + for(iStore=i1; iStoreaBuf[iStore]); + } + } + } + pSetup->iLastRowid = 0; + } + + pSetup->xAppend( + p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist + ); + pSetup->iLastRowid = p1->base.iRowid; + } +} + static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ @@ -6208,38 +6303,30 @@ static void fts5SetupPrefixIter( Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; - Fts5Buffer *aBuf; - int nBuf = 32; - int nMerge = 1; + PrefixSetupCtx s; + + memset(&s, 0, sizeof(s)); + s.nMerge = 1; + s.iLastRowid = 0; + s.nBuf = 32; - void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); - void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ - xMerge = fts5MergeRowidLists; - xAppend = fts5AppendRowid; + s.xMerge = fts5MergeRowidLists; + s.xAppend = fts5AppendRowid; }else{ - nMerge = FTS5_MERGE_NLIST-1; - nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ - xMerge = fts5MergePrefixLists; - xAppend = fts5AppendPoslist; + s.nMerge = FTS5_MERGE_NLIST-1; + s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ + s.xMerge = fts5MergePrefixLists; + s.xAppend = fts5AppendPoslist; } - aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); + s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf); pStruct = fts5StructureRead(p); - assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); + assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) ); if( p->rc==SQLITE_OK ){ - const int flags = FTS5INDEX_QUERY_SCAN - | FTS5INDEX_QUERY_SKIPEMPTY - | FTS5INDEX_QUERY_NOOUTPUT; int i; - i64 iLastRowid = 0; - Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ Fts5Data *pData; - Fts5Buffer doclist; - int bNewTerm = 1; - - memset(&doclist, 0, sizeof(doclist)); /* If iIdx is non-zero, then it is the number of a prefix-index for ** prefixes 1 character longer than the prefix being queried for. That @@ -6247,6 +6334,7 @@ static void fts5SetupPrefixIter( ** corresponding to the prefix itself. That one is extracted from the ** main term index here. */ if( iIdx!=0 ){ + Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ int dummy = 0; const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; pToken[0] = FTS5_MAIN_PREFIX; @@ -6259,82 +6347,41 @@ static void fts5SetupPrefixIter( Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; p1->xSetOutputs(p1, pSeg); if( p1->base.nData ){ - xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); - iLastRowid = p1->base.iRowid; + s.xAppend(p, (u64)p1->base.iRowid-(u64)s.iLastRowid, p1, &s.doclist); + s.iLastRowid = p1->base.iRowid; } } fts5MultiIterFree(p1); } pToken[0] = FTS5_MAIN_PREFIX + iIdx; - fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); - fts5IterSetOutputCb(&p->rc, p1); + fts5VisitPrefixRange( + p, pColset, pToken, nToken, prefixIterSetupCb, (void*)&s + ); - for( /* no-op */ ; - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext2(p, p1, &bNewTerm) - ){ - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; - int nTerm = pSeg->term.n; - const u8 *pTerm = pSeg->term.p; - p1->xSetOutputs(p1, pSeg); - - assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); - if( bNewTerm ){ - if( nTermbase.nData==0 ) continue; - if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ - for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ - int i1 = i*nMerge; - int iStore; - assert( i1+nMerge<=nBuf ); - for(iStore=i1; iStorebase.iRowid-(u64)iLastRowid, p1, &doclist); - iLastRowid = p1->base.iRowid; - } - - assert( (nBuf%nMerge)==0 ); - for(i=0; irc==SQLITE_OK ){ - xMerge(p, &doclist, nMerge, &aBuf[i]); + s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]); } - for(iFree=i; iFreep = (u8*)&pData[1]; - pData->nn = pData->szLeaf = doclist.n; - if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); + pData->nn = pData->szLeaf = s.doclist.n; + if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n); fts5MultiIterNew2(p, pData, bDesc, ppIter); } - fts5BufferFree(&doclist); } + fts5BufferFree(&s.doclist); fts5StructureRelease(pStruct); - sqlite3_free(aBuf); + sqlite3_free(s.aBuf); } @@ -7021,7 +7068,6 @@ static Fts5Iter *fts5SetupTokendataIter( return pRet; } - /* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. @@ -7046,9 +7092,11 @@ int sqlite3Fts5IndexQuery( int bTokendata = pConfig->bTokendata; if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); - /* The NOTOKENDATA flag is set when it is known that tokendata data will - ** not be required. e.g. for queries performed as part of an - ** integrity-check, or by the fts5vocab module. */ + /* The NOTOKENDATA flag is set when each token in a tokendata=1 table + ** should be treated individually, instead of merging all those with + ** a common prefix into a single entry. This is used, for example, by + ** queries performed as part of an integrity-check, or by the fts5vocab + ** module. */ if( flags & (FTS5INDEX_QUERY_NOTOKENDATA|FTS5INDEX_QUERY_SCAN) ){ bTokendata = 0; } @@ -7092,7 +7140,7 @@ int sqlite3Fts5IndexQuery( fts5StructureRelease(pStruct); } }else{ - /* Scan multiple terms in the main index */ + /* Scan multiple terms in the main index for a prefix query. */ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); if( pRet==0 ){ @@ -7250,6 +7298,39 @@ static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ } } +typedef struct TokendataSetupCtx TokendataSetupCtx; +struct TokendataSetupCtx { + Fts5TokenDataIter *pT; + int iTermOff; + int nTermByte; +}; + +static void prefixIterSetupTokendataCb( + Fts5Index *p, + void *pCtx, + Fts5Iter *p1, + const u8 *pNew, + int nNew +){ + TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; + int iPosOff = 0; + i64 iPos = 0; + + if( pNew ){ + pSetup->nTermByte = nNew-1; + pSetup->iTermOff = pSetup->pT->terms.n; + fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1); + } + + while( 0==sqlite3Fts5PoslistNext64( + p1->base.pData, p1->base.nData, &iPosOff, &iPos + ) ){ + fts5TokendataIterAppendMap(p, + pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos + ); + } +} + static int fts5SetupPrefixIterTokendata( Fts5Iter *pIter, const char *pToken, @@ -7257,73 +7338,31 @@ static int fts5SetupPrefixIterTokendata( ){ Fts5Index *p = pIter->pIndex; Fts5Buffer token = {0, 0, 0}; - Fts5TokenDataIter *pT = 0; + TokendataSetupCtx ctx; + + memset(&ctx, 0, sizeof(ctx)); fts5BufferGrow(&p->rc, &token, nToken+1); - pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*pT)); + ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*ctx.pT)); if( p->rc==SQLITE_OK ){ - const int flags = FTS5INDEX_QUERY_SCAN - | FTS5INDEX_QUERY_SKIPEMPTY - | FTS5INDEX_QUERY_NOOUTPUT; - Fts5Structure *pStruct = 0; - Fts5Iter *p1 = 0; /* Iterator used to find tokendata */ - - int bNewTerm = 1; - int iTermOff = 0; - int nTermByte = 0; /* Fill in the token prefix to search for */ token.p[0] = FTS5_MAIN_PREFIX; memcpy(&token.p[1], pToken, nToken); token.n = nToken+1; - /* Grab a reference to the table structure. That will be released before - ** this function returns. */ - pStruct = fts5StructureRead(p); + fts5VisitPrefixRange( + p, 0, token.p, token.n, prefixIterSetupTokendataCb, (void*)&ctx + ); - fts5MultiIterNew(p, pStruct, flags, 0, token.p, token.n, -1, 0, &p1); - fts5IterSetOutputCb(&p->rc, p1); - for( /* no-op */ ; - fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext2(p, p1, &bNewTerm) - ){ - i64 iPos = 0; - int iPosOff = 0; - - Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; - p1->xSetOutputs(p1, pSeg); - - if( bNewTerm ){ - int nTerm = pSeg->term.n; - const u8 *pTerm = pSeg->term.p; - assert_nc( memcmp(token.p, pTerm, MIN(token.n, nTerm))<=0 ); - if( nTermterms.n; - fts5BufferAppendBlob(&p->rc, &pT->terms, nTermByte, pTerm+1); - } - - while( 0==sqlite3Fts5PoslistNext64( - p1->base.pData, p1->base.nData, &iPosOff, &iPos - ) ){ - fts5TokendataIterAppendMap( - p, pT, iTermOff, nTermByte, p1->base.iRowid, iPos - ); - } - } - - /* fts5SetupPrefixIter */ - fts5MultiIterFree(p1); - fts5StructureRelease(pStruct); - - fts5TokendataIterSortMap(p, pT); + fts5TokendataIterSortMap(p, ctx.pT); } if( p->rc==SQLITE_OK ){ - pIter->pTokenDataIter = pT; + pIter->pTokenDataIter = ctx.pT; }else{ - fts5TokendataIterDelete(pT); + fts5TokendataIterDelete(ctx.pT); } fts5BufferFree(&token); diff --git a/manifest b/manifest index 91a17cd96b..0d2ad1c4fb 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sway\stokendata\sindexes\sare\scollected\sfor\sprefix\squeries. -D 2024-09-25T18:55:11.223 +C Rationalize\ssome\sof\sthe\snew\scode\son\sthis\sbranch. +D 2024-09-28T20:45:11.387 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -99,7 +99,7 @@ F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70 F ext/fts5/fts5_config.c da21548ddbc1a457cb42545f527065221ede8ada6a734891b8c34317a7a9506b F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 -F ext/fts5/fts5_index.c 8dfb22c5e42cd56d3abbe107a5561fc3b4f731fc4c821ac049482d9dedc50acc +F ext/fts5/fts5_index.c c1005920192146452a3545500761ecc8cfab84572d251e8536103a01899f67d5 F ext/fts5/fts5_main.c 4503498d3453e29a3cd89dacaba029011e89cb8c481a6241611d106e7a369bd4 F ext/fts5/fts5_storage.c 3332497823c3d171cf56379f2bd8c971ce15a19aadacff961106462022c92470 F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe @@ -2214,8 +2214,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 9945206e6e26a48a49b9747650d299eb983cc21a3a61c621cd81f0bbc85a74d7 -R d12c6f9d3e41d3b7f32c957f52650189 +P 204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff +R da06610bae74973a44f69a92b9b60e12 U dan -Z 0b9676f39cb90827333f01676ed89ac5 +Z ca9c653ea5575a07920ac6ddffa15d1d # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 92e4aa62cf..f550314bcd 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff +66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075