1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-09-11 08:30:57 +03:00

Rationalize code further. And add tests.

FossilOrigin-Name: 0ca002a4ab88f3e7ae1e6e518038157eaa20759f57888c2ed7e50cb92bd96348
This commit is contained in:
dan
2024-10-01 20:38:08 +00:00
parent 7d56669bc4
commit d2a88e961a
4 changed files with 151 additions and 53 deletions

View File

@@ -6199,15 +6199,45 @@ static void fts5MergePrefixLists(
}
static int fts5VisitPrefixRange(
Fts5Index *p,
Fts5Colset *pColset,
u8 *pToken,
int nToken,
/*
** Iterate through a range of entries in the FTS index, invoking the xVisit
** callback for each of them.
**
** Parameter pToken points to an nToken buffer containing an FTS index term
** (i.e. a document term with the preceding 1 byte index identifier -
** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits
** all entries for terms that have pToken/nToken as a prefix. If bPrefix
** is false, then only entries with pToken/nToken as the entire key are
** visited.
**
** If the current table is a tokendata=1 table, then if bPrefix is true then
** each index term is treated separately. However, if bPrefix is false, then
** all index terms corresponding to pToken/nToken are collapsed into a single
** term before the callback is invoked.
**
** The callback invoked for each entry visited is specified by paramter xVisit.
** Each time it is invoked, it is passed a pointer to the Fts5Index object,
** a copy of the 7th paramter to this function (pCtx) and a pointer to the
** iterator that indicates the current entry. If the current entry is the
** first with a new term (i.e. different from that of the previous entry,
** including the very first term), then the final two parameters are passed
** a pointer to the term and its size in bytes, respectively. If the current
** entry is not the first associated with its term, these two parameters
** are passed 0.
**
** If parameter pColset is not NULL, then it is used to filter entries before
** the callback is invoked.
*/
static int fts5VisitEntries(
Fts5Index *p, /* Fts5 index object */
Fts5Colset *pColset, /* Columns filter to apply, or NULL */
u8 *pToken, /* Buffer containing token */
int nToken, /* Size of buffer pToken in bytes */
int bPrefix, /* True for a prefix scan */
void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int),
void *pCtx
void *pCtx /* Passed as second argument to xVisit() */
){
const int flags = FTS5INDEX_QUERY_SCAN
const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN : 0)
| FTS5INDEX_QUERY_SKIPEMPTY
| FTS5INDEX_QUERY_NOOUTPUT;
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
@@ -6226,7 +6256,6 @@ static int fts5VisitPrefixRange(
p1->xSetOutputs(p1, pSeg);
if( bNewTerm ){
nNew = pSeg->term.n;
pNew = pSeg->term.p;
@@ -6241,6 +6270,9 @@ static int fts5VisitPrefixRange(
return p->rc;
}
/*
** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries().
*/
typedef struct PrefixSetupCtx PrefixSetupCtx;
struct PrefixSetupCtx {
void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
@@ -6252,6 +6284,9 @@ struct PrefixSetupCtx {
Fts5Buffer doclist;
};
/*
** fts5VisitEntries() callback used by fts5SetupPrefixIter()
*/
static void prefixIterSetupCb(
Fts5Index *p,
void *pCtx,
@@ -6325,6 +6360,7 @@ static void fts5SetupPrefixIter(
assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) );
if( p->rc==SQLITE_OK ){
void *pCtx = (void*)&s;
int i;
Fts5Data *pData;
@@ -6334,30 +6370,12 @@ static void fts5SetupPrefixIter(
** corresponding to the prefix itself. That one is extracted from the
** main term index here. */
if( iIdx!=0 ){
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
int dummy = 0;
const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
pToken[0] = FTS5_MAIN_PREFIX;
fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
fts5IterSetOutputCb(&p->rc, p1);
for(;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &dummy)
){
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
p1->xSetOutputs(p1, pSeg);
if( p1->base.nData ){
s.xAppend(p, (u64)p1->base.iRowid-(u64)s.iLastRowid, p1, &s.doclist);
s.iLastRowid = p1->base.iRowid;
}
}
fts5MultiIterFree(p1);
fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx);
}
pToken[0] = FTS5_MAIN_PREFIX + iIdx;
fts5VisitPrefixRange(
p, pColset, pToken, nToken, prefixIterSetupCb, (void*)&s
);
fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx);
assert( (s.nBuf%s.nMerge)==0 );
for(i=0; i<s.nBuf; i+=s.nMerge){
@@ -6637,29 +6655,57 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
/*
** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
** array of these for each row it visits. Or, for an iterator used by an
** "ORDER BY rank" query, it accumulates an array of these for the entire
** query.
** array of these for each row it visits (so all iRowid fields are the same).
** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an
** array of these for the entire query (in which case iRowid fields may take
** a variety of values).
**
** Each instance in the array indicates the iterator (and therefore term)
** associated with position iPos of rowid iRowid. This is used by the
** xInstToken() API.
**
** iRowid:
** Rowid for the current entry.
**
** iPos:
** Position of current entry within row. In the usual ((iCol<<32)+iOff)
** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()).
**
** iIter:
** If the Fts5TokenDataIter iterator that the entry is part of is
** actually an iterator (i.e. with nIter>0, not just a container for
** Fts5TokenDataMap structures), then this variable is an index into
** the apIter[] array. The corresponding term is that which the iterator
** at apIter[iIter] currently points to.
**
** Or, if the Fts5TokenDataIter iterator is just a container object
** (nIter==0), then iIter is an index into the term.p[] buffer where
** the term is stored.
**
** nByte:
** In the case where iIter is an index into term.p[], this variable
** is the size of the term in bytes. If iIter is an index into apIter[],
** this variable is unused.
*/
struct Fts5TokenDataMap {
i64 iRowid; /* Row this token is located in */
i64 iPos; /* Position of token */
int iIter; /* Iterator token was read from */
int nByte; /* Length of token in bytes (or 0) */
};
/*
** An object used to supplement Fts5Iter for tokendata=1 iterators.
**
** This object serves two purposes. The first is as a container for an array
** of Fts5TokenDataMap structures, which are used to find the token required
** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and
** aMap[] variables.
*/
struct Fts5TokenDataIter {
int nMap;
int nMapAlloc;
Fts5TokenDataMap *aMap;
int nMapAlloc; /* Allocated size of aMap[] in entries */
int nMap; /* Number of valid entries in aMap[] */
Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */
/* The following are used for prefix-queries only. */
Fts5Buffer terms;
@@ -7234,10 +7280,18 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
return (z ? &z[1] : 0);
}
/*
** The two input arrays - a1[] and a2[] - are in sorted order. This function
** merges the two arrays together and writes the result to output array
** aOut[]. aOut[] is guaranteed to be large enough to hold the result.
**
** Duplicate entries are copied into the output. So the size of the output
** array is always (n1+n2) entries.
*/
static void fts5TokendataMerge(
Fts5TokenDataMap *a1, int n1,
Fts5TokenDataMap *a2, int n2,
Fts5TokenDataMap *aOut
Fts5TokenDataMap *a1, int n1, /* Input array 1 */
Fts5TokenDataMap *a2, int n2, /* Input array 2 */
Fts5TokenDataMap *aOut /* Output array */
){
int i1 = 0;
int i2 = 0;
@@ -7258,6 +7312,12 @@ static void fts5TokendataMerge(
}
}
/*
** Sort the contents of the pT->aMap[] array.
**
** The sorting algorithm requries a malloc(). If this fails, an error code
** is left in Fts5Index.rc before returning.
*/
static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
Fts5TokenDataMap *aTmp = 0;
int nByte = pT->nMap * sizeof(Fts5TokenDataMap);
@@ -7298,13 +7358,23 @@ static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
}
}
/*
** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata()
** to pass data to prefixIterSetupTokendataCb().
*/
typedef struct TokendataSetupCtx TokendataSetupCtx;
struct TokendataSetupCtx {
Fts5TokenDataIter *pT;
int iTermOff;
int nTermByte;
Fts5TokenDataIter *pT; /* Object being populated with mappings */
int iTermOff; /* Offset of current term in terms.p[] */
int nTermByte; /* Size of current term in bytes */
};
/*
** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This
** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each
** position in the current position-list. It doesn't matter that some of
** these may be out of order - they will be sorted later.
*/
static void prefixIterSetupTokendataCb(
Fts5Index *p,
void *pCtx,
@@ -7331,10 +7401,15 @@ static void prefixIterSetupTokendataCb(
}
}
/*
** pIter is a prefix query. This function populates pIter->pTokenDataIter
** with an Fts5TokenDataIter object containing mappings for all rows
** matched by the query.
*/
static int fts5SetupPrefixIterTokendata(
Fts5Iter *pIter,
const char *pToken,
int nToken
const char *pToken, /* Token prefix to search for */
int nToken /* Size of pToken in bytes */
){
Fts5Index *p = pIter->pIndex;
Fts5Buffer token = {0, 0, 0};
@@ -7352,8 +7427,8 @@ static int fts5SetupPrefixIterTokendata(
memcpy(&token.p[1], pToken, nToken);
token.n = nToken+1;
fts5VisitPrefixRange(
p, 0, token.p, token.n, prefixIterSetupTokendataCb, (void*)&ctx
fts5VisitEntries(
p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx
);
fts5TokendataIterSortMap(p, ctx.pT);

View File

@@ -95,6 +95,29 @@ foreach_detail_mode $testprefix {
do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24}
do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24}
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE ft3 USING fts5(
x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%,
prefix=2
);
}
do_execsql_test 2.1 {
INSERT INTO ft3(rowid, x) VALUES(1, 'one');
INSERT INTO ft3(rowid, x) VALUES(2, 'ONE');
INSERT INTO ft3(rowid, x) VALUES(3, 'ONT');
INSERT INTO ft3(rowid, x) VALUES(4, 'on');
INSERT INTO ft3(rowid, x) VALUES(5, 'On');
}
do_execsql_test 2.2 {
SELECT rowid FROM ft3('on*');
} {1 2 3 4 5}
do_execsql_test 2.3 {
SELECT rowid, insttoken(ft3, 0, 0) FROM ft3('on*');
} {1 one 2 one.ONE 3 ont.ONT 4 on 5 on.On}
}
finish_test

View File

@@ -1,5 +1,5 @@
C Rationalize\ssome\sof\sthe\snew\scode\son\sthis\sbranch.
D 2024-09-28T20:45:11.387
C Rationalize\scode\sfurther.\sAnd\sadd\stests.
D 2024-10-01T20:38:08.239
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -99,7 +99,7 @@ F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70
F ext/fts5/fts5_config.c da21548ddbc1a457cb42545f527065221ede8ada6a734891b8c34317a7a9506b
F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c c1005920192146452a3545500761ecc8cfab84572d251e8536103a01899f67d5
F ext/fts5/fts5_index.c 9b2b9636ccefd6140c0ad7a44c51c2ea39f377753a13f06a2e6792215b62cede
F ext/fts5/fts5_main.c 4503498d3453e29a3cd89dacaba029011e89cb8c481a6241611d106e7a369bd4
F ext/fts5/fts5_storage.c 3332497823c3d171cf56379f2bd8c971ce15a19aadacff961106462022c92470
F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe
@@ -203,7 +203,7 @@ F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9b
F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3
F ext/fts5/test/fts5origintext.test 63d5b0dc00f0104add8960da0705a70bffd4d86b6feb6ddbb38bff21141d42f0
F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba
F ext/fts5/test/fts5origintext3.test 45c33cf0c91a9ca0e36d298462db3edc7c8fe45fd185649a9dbfd66bb670058b
F ext/fts5/test/fts5origintext3.test 1f5174a9f4cf42f58f833dbfb314940793ca4723854ec2651e7530ddb35a66a6
F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8
F ext/fts5/test/fts5origintext5.test ee12b440ec335e5b422d1668aca0051b52ff28b6ee67073e8bbc29f509fd562b
F ext/fts5/test/fts5phrase.test bb2554bb61d15f859678c96dc89a7de415cd5fc3b7b54c29b82a0d0ad138091c
@@ -2214,8 +2214,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff
R da06610bae74973a44f69a92b9b60e12
P 66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075
R 364baf490f2f60461704cce12defe7d5
U dan
Z ca9c653ea5575a07920ac6ddffa15d1d
Z 98928b751e601e5ab0ec38779c287b09
# Remove this line to create a well-formed Fossil manifest.

View File

@@ -1 +1 @@
66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075
0ca002a4ab88f3e7ae1e6e518038157eaa20759f57888c2ed7e50cb92bd96348