1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Rationalize some of the new code on this branch.

FossilOrigin-Name: 66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075
This commit is contained in:
dan
2024-09-28 20:45:11 +00:00
parent 2eff8f2252
commit 7d56669bc4
3 changed files with 181 additions and 142 deletions

View File

@ -6198,6 +6198,101 @@ static void fts5MergePrefixLists(
*p1 = out;
}
static int fts5VisitPrefixRange(
Fts5Index *p,
Fts5Colset *pColset,
u8 *pToken,
int nToken,
void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int),
void *pCtx
){
const int flags = FTS5INDEX_QUERY_SCAN
| FTS5INDEX_QUERY_SKIPEMPTY
| FTS5INDEX_QUERY_NOOUTPUT;
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
int bNewTerm = 1;
Fts5Structure *pStruct = fts5StructureRead(p);
fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
fts5IterSetOutputCb(&p->rc, p1);
for( /* no-op */ ;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &bNewTerm)
){
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
int nNew = 0;
const u8 *pNew = 0;
p1->xSetOutputs(p1, pSeg);
if( bNewTerm ){
nNew = pSeg->term.n;
pNew = pSeg->term.p;
if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break;
}
xVisit(p, pCtx, p1, pNew, nNew);
}
fts5MultiIterFree(p1);
fts5StructureRelease(pStruct);
return p->rc;
}
typedef struct PrefixSetupCtx PrefixSetupCtx;
struct PrefixSetupCtx {
void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
i64 iLastRowid;
int nMerge;
Fts5Buffer *aBuf;
int nBuf;
Fts5Buffer doclist;
};
static void prefixIterSetupCb(
Fts5Index *p,
void *pCtx,
Fts5Iter *p1,
const u8 *pNew,
int nNew
){
PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx;
const int nMerge = pSetup->nMerge;
if( p1->base.nData>0 ){
if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){
int i;
for(i=0; p->rc==SQLITE_OK && pSetup->doclist.n; i++){
int i1 = i*nMerge;
int iStore;
assert( i1+nMerge<=pSetup->nBuf );
for(iStore=i1; iStore<i1+nMerge; iStore++){
if( pSetup->aBuf[iStore].n==0 ){
fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]);
fts5BufferZero(&pSetup->doclist);
break;
}
}
if( iStore==i1+nMerge ){
pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]);
for(iStore=i1; iStore<i1+nMerge; iStore++){
fts5BufferZero(&pSetup->aBuf[iStore]);
}
}
}
pSetup->iLastRowid = 0;
}
pSetup->xAppend(
p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist
);
pSetup->iLastRowid = p1->base.iRowid;
}
}
static void fts5SetupPrefixIter(
Fts5Index *p, /* Index to read from */
int bDesc, /* True for "ORDER BY rowid DESC" */
@ -6208,38 +6303,30 @@ static void fts5SetupPrefixIter(
Fts5Iter **ppIter /* OUT: New iterator */
){
Fts5Structure *pStruct;
Fts5Buffer *aBuf;
int nBuf = 32;
int nMerge = 1;
PrefixSetupCtx s;
memset(&s, 0, sizeof(s));
s.nMerge = 1;
s.iLastRowid = 0;
s.nBuf = 32;
void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
xMerge = fts5MergeRowidLists;
xAppend = fts5AppendRowid;
s.xMerge = fts5MergeRowidLists;
s.xAppend = fts5AppendRowid;
}else{
nMerge = FTS5_MERGE_NLIST-1;
nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
xMerge = fts5MergePrefixLists;
xAppend = fts5AppendPoslist;
s.nMerge = FTS5_MERGE_NLIST-1;
s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
s.xMerge = fts5MergePrefixLists;
s.xAppend = fts5AppendPoslist;
}
aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf);
pStruct = fts5StructureRead(p);
assert( p->rc!=SQLITE_OK || (aBuf && pStruct) );
assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) );
if( p->rc==SQLITE_OK ){
const int flags = FTS5INDEX_QUERY_SCAN
| FTS5INDEX_QUERY_SKIPEMPTY
| FTS5INDEX_QUERY_NOOUTPUT;
int i;
i64 iLastRowid = 0;
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
Fts5Data *pData;
Fts5Buffer doclist;
int bNewTerm = 1;
memset(&doclist, 0, sizeof(doclist));
/* If iIdx is non-zero, then it is the number of a prefix-index for
** prefixes 1 character longer than the prefix being queried for. That
@ -6247,6 +6334,7 @@ static void fts5SetupPrefixIter(
** corresponding to the prefix itself. That one is extracted from the
** main term index here. */
if( iIdx!=0 ){
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
int dummy = 0;
const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
pToken[0] = FTS5_MAIN_PREFIX;
@ -6259,82 +6347,41 @@ static void fts5SetupPrefixIter(
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
p1->xSetOutputs(p1, pSeg);
if( p1->base.nData ){
xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
iLastRowid = p1->base.iRowid;
s.xAppend(p, (u64)p1->base.iRowid-(u64)s.iLastRowid, p1, &s.doclist);
s.iLastRowid = p1->base.iRowid;
}
}
fts5MultiIterFree(p1);
}
pToken[0] = FTS5_MAIN_PREFIX + iIdx;
fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
fts5IterSetOutputCb(&p->rc, p1);
fts5VisitPrefixRange(
p, pColset, pToken, nToken, prefixIterSetupCb, (void*)&s
);
for( /* no-op */ ;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &bNewTerm)
){
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
int nTerm = pSeg->term.n;
const u8 *pTerm = pSeg->term.p;
p1->xSetOutputs(p1, pSeg);
assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
if( bNewTerm ){
if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
}
if( p1->base.nData==0 ) continue;
if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
int i1 = i*nMerge;
int iStore;
assert( i1+nMerge<=nBuf );
for(iStore=i1; iStore<i1+nMerge; iStore++){
if( aBuf[iStore].n==0 ){
fts5BufferSwap(&doclist, &aBuf[iStore]);
fts5BufferZero(&doclist);
break;
}
}
if( iStore==i1+nMerge ){
xMerge(p, &doclist, nMerge, &aBuf[i1]);
for(iStore=i1; iStore<i1+nMerge; iStore++){
fts5BufferZero(&aBuf[iStore]);
}
}
}
iLastRowid = 0;
}
xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
iLastRowid = p1->base.iRowid;
}
assert( (nBuf%nMerge)==0 );
for(i=0; i<nBuf; i+=nMerge){
assert( (s.nBuf%s.nMerge)==0 );
for(i=0; i<s.nBuf; i+=s.nMerge){
int iFree;
if( p->rc==SQLITE_OK ){
xMerge(p, &doclist, nMerge, &aBuf[i]);
s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]);
}
for(iFree=i; iFree<i+nMerge; iFree++){
fts5BufferFree(&aBuf[iFree]);
for(iFree=i; iFree<i+s.nMerge; iFree++){
fts5BufferFree(&s.aBuf[iFree]);
}
}
fts5MultiIterFree(p1);
pData = fts5IdxMalloc(p, sizeof(*pData)+doclist.n+FTS5_DATA_ZERO_PADDING);
pData = fts5IdxMalloc(p, sizeof(*pData)+s.doclist.n+FTS5_DATA_ZERO_PADDING);
if( pData ){
pData->p = (u8*)&pData[1];
pData->nn = pData->szLeaf = doclist.n;
if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
pData->nn = pData->szLeaf = s.doclist.n;
if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n);
fts5MultiIterNew2(p, pData, bDesc, ppIter);
}
fts5BufferFree(&doclist);
}
fts5BufferFree(&s.doclist);
fts5StructureRelease(pStruct);
sqlite3_free(aBuf);
sqlite3_free(s.aBuf);
}
@ -7021,7 +7068,6 @@ static Fts5Iter *fts5SetupTokendataIter(
return pRet;
}
/*
** Open a new iterator to iterate though all rowid that match the
** specified token or token prefix.
@ -7046,9 +7092,11 @@ int sqlite3Fts5IndexQuery(
int bTokendata = pConfig->bTokendata;
if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
/* The NOTOKENDATA flag is set when it is known that tokendata data will
** not be required. e.g. for queries performed as part of an
** integrity-check, or by the fts5vocab module. */
/* The NOTOKENDATA flag is set when each token in a tokendata=1 table
** should be treated individually, instead of merging all those with
** a common prefix into a single entry. This is used, for example, by
** queries performed as part of an integrity-check, or by the fts5vocab
** module. */
if( flags & (FTS5INDEX_QUERY_NOTOKENDATA|FTS5INDEX_QUERY_SCAN) ){
bTokendata = 0;
}
@ -7092,7 +7140,7 @@ int sqlite3Fts5IndexQuery(
fts5StructureRelease(pStruct);
}
}else{
/* Scan multiple terms in the main index */
/* Scan multiple terms in the main index for a prefix query. */
int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
if( pRet==0 ){
@ -7250,6 +7298,39 @@ static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
}
}
typedef struct TokendataSetupCtx TokendataSetupCtx;
struct TokendataSetupCtx {
Fts5TokenDataIter *pT;
int iTermOff;
int nTermByte;
};
static void prefixIterSetupTokendataCb(
Fts5Index *p,
void *pCtx,
Fts5Iter *p1,
const u8 *pNew,
int nNew
){
TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx;
int iPosOff = 0;
i64 iPos = 0;
if( pNew ){
pSetup->nTermByte = nNew-1;
pSetup->iTermOff = pSetup->pT->terms.n;
fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1);
}
while( 0==sqlite3Fts5PoslistNext64(
p1->base.pData, p1->base.nData, &iPosOff, &iPos
) ){
fts5TokendataIterAppendMap(p,
pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos
);
}
}
static int fts5SetupPrefixIterTokendata(
Fts5Iter *pIter,
const char *pToken,
@ -7257,73 +7338,31 @@ static int fts5SetupPrefixIterTokendata(
){
Fts5Index *p = pIter->pIndex;
Fts5Buffer token = {0, 0, 0};
Fts5TokenDataIter *pT = 0;
TokendataSetupCtx ctx;
memset(&ctx, 0, sizeof(ctx));
fts5BufferGrow(&p->rc, &token, nToken+1);
pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*pT));
ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*ctx.pT));
if( p->rc==SQLITE_OK ){
const int flags = FTS5INDEX_QUERY_SCAN
| FTS5INDEX_QUERY_SKIPEMPTY
| FTS5INDEX_QUERY_NOOUTPUT;
Fts5Structure *pStruct = 0;
Fts5Iter *p1 = 0; /* Iterator used to find tokendata */
int bNewTerm = 1;
int iTermOff = 0;
int nTermByte = 0;
/* Fill in the token prefix to search for */
token.p[0] = FTS5_MAIN_PREFIX;
memcpy(&token.p[1], pToken, nToken);
token.n = nToken+1;
/* Grab a reference to the table structure. That will be released before
** this function returns. */
pStruct = fts5StructureRead(p);
fts5VisitPrefixRange(
p, 0, token.p, token.n, prefixIterSetupTokendataCb, (void*)&ctx
);
fts5MultiIterNew(p, pStruct, flags, 0, token.p, token.n, -1, 0, &p1);
fts5IterSetOutputCb(&p->rc, p1);
for( /* no-op */ ;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &bNewTerm)
){
i64 iPos = 0;
int iPosOff = 0;
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
p1->xSetOutputs(p1, pSeg);
if( bNewTerm ){
int nTerm = pSeg->term.n;
const u8 *pTerm = pSeg->term.p;
assert_nc( memcmp(token.p, pTerm, MIN(token.n, nTerm))<=0 );
if( nTerm<token.n || memcmp(token.p, pTerm, token.n) ) break;
nTermByte = nTerm-1;
iTermOff = pT->terms.n;
fts5BufferAppendBlob(&p->rc, &pT->terms, nTermByte, pTerm+1);
}
while( 0==sqlite3Fts5PoslistNext64(
p1->base.pData, p1->base.nData, &iPosOff, &iPos
) ){
fts5TokendataIterAppendMap(
p, pT, iTermOff, nTermByte, p1->base.iRowid, iPos
);
}
}
/* fts5SetupPrefixIter */
fts5MultiIterFree(p1);
fts5StructureRelease(pStruct);
fts5TokendataIterSortMap(p, pT);
fts5TokendataIterSortMap(p, ctx.pT);
}
if( p->rc==SQLITE_OK ){
pIter->pTokenDataIter = pT;
pIter->pTokenDataIter = ctx.pT;
}else{
fts5TokendataIterDelete(pT);
fts5TokendataIterDelete(ctx.pT);
}
fts5BufferFree(&token);

View File

@ -1,5 +1,5 @@
C Change\sthe\sway\stokendata\sindexes\sare\scollected\sfor\sprefix\squeries.
D 2024-09-25T18:55:11.223
C Rationalize\ssome\sof\sthe\snew\scode\son\sthis\sbranch.
D 2024-09-28T20:45:11.387
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -99,7 +99,7 @@ F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70
F ext/fts5/fts5_config.c da21548ddbc1a457cb42545f527065221ede8ada6a734891b8c34317a7a9506b
F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c 8dfb22c5e42cd56d3abbe107a5561fc3b4f731fc4c821ac049482d9dedc50acc
F ext/fts5/fts5_index.c c1005920192146452a3545500761ecc8cfab84572d251e8536103a01899f67d5
F ext/fts5/fts5_main.c 4503498d3453e29a3cd89dacaba029011e89cb8c481a6241611d106e7a369bd4
F ext/fts5/fts5_storage.c 3332497823c3d171cf56379f2bd8c971ce15a19aadacff961106462022c92470
F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe
@ -2214,8 +2214,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 9945206e6e26a48a49b9747650d299eb983cc21a3a61c621cd81f0bbc85a74d7
R d12c6f9d3e41d3b7f32c957f52650189
P 204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff
R da06610bae74973a44f69a92b9b60e12
U dan
Z 0b9676f39cb90827333f01676ed89ac5
Z ca9c653ea5575a07920ac6ddffa15d1d
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff
66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075