mirror of
https://github.com/sqlite/sqlite.git
synced 2025-09-11 08:30:57 +03:00
Rationalize code further. And add tests.
FossilOrigin-Name: 0ca002a4ab88f3e7ae1e6e518038157eaa20759f57888c2ed7e50cb92bd96348
This commit is contained in:
@@ -6199,15 +6199,45 @@ static void fts5MergePrefixLists(
|
||||
}
|
||||
|
||||
|
||||
static int fts5VisitPrefixRange(
|
||||
Fts5Index *p,
|
||||
Fts5Colset *pColset,
|
||||
u8 *pToken,
|
||||
int nToken,
|
||||
/*
|
||||
** Iterate through a range of entries in the FTS index, invoking the xVisit
|
||||
** callback for each of them.
|
||||
**
|
||||
** Parameter pToken points to an nToken buffer containing an FTS index term
|
||||
** (i.e. a document term with the preceding 1 byte index identifier -
|
||||
** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits
|
||||
** all entries for terms that have pToken/nToken as a prefix. If bPrefix
|
||||
** is false, then only entries with pToken/nToken as the entire key are
|
||||
** visited.
|
||||
**
|
||||
** If the current table is a tokendata=1 table, then if bPrefix is true then
|
||||
** each index term is treated separately. However, if bPrefix is false, then
|
||||
** all index terms corresponding to pToken/nToken are collapsed into a single
|
||||
** term before the callback is invoked.
|
||||
**
|
||||
** The callback invoked for each entry visited is specified by paramter xVisit.
|
||||
** Each time it is invoked, it is passed a pointer to the Fts5Index object,
|
||||
** a copy of the 7th paramter to this function (pCtx) and a pointer to the
|
||||
** iterator that indicates the current entry. If the current entry is the
|
||||
** first with a new term (i.e. different from that of the previous entry,
|
||||
** including the very first term), then the final two parameters are passed
|
||||
** a pointer to the term and its size in bytes, respectively. If the current
|
||||
** entry is not the first associated with its term, these two parameters
|
||||
** are passed 0.
|
||||
**
|
||||
** If parameter pColset is not NULL, then it is used to filter entries before
|
||||
** the callback is invoked.
|
||||
*/
|
||||
static int fts5VisitEntries(
|
||||
Fts5Index *p, /* Fts5 index object */
|
||||
Fts5Colset *pColset, /* Columns filter to apply, or NULL */
|
||||
u8 *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of buffer pToken in bytes */
|
||||
int bPrefix, /* True for a prefix scan */
|
||||
void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int),
|
||||
void *pCtx
|
||||
void *pCtx /* Passed as second argument to xVisit() */
|
||||
){
|
||||
const int flags = FTS5INDEX_QUERY_SCAN
|
||||
const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN : 0)
|
||||
| FTS5INDEX_QUERY_SKIPEMPTY
|
||||
| FTS5INDEX_QUERY_NOOUTPUT;
|
||||
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
|
||||
@@ -6226,7 +6256,6 @@ static int fts5VisitPrefixRange(
|
||||
|
||||
p1->xSetOutputs(p1, pSeg);
|
||||
|
||||
|
||||
if( bNewTerm ){
|
||||
nNew = pSeg->term.n;
|
||||
pNew = pSeg->term.p;
|
||||
@@ -6241,6 +6270,9 @@ static int fts5VisitPrefixRange(
|
||||
return p->rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries().
|
||||
*/
|
||||
typedef struct PrefixSetupCtx PrefixSetupCtx;
|
||||
struct PrefixSetupCtx {
|
||||
void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
|
||||
@@ -6252,6 +6284,9 @@ struct PrefixSetupCtx {
|
||||
Fts5Buffer doclist;
|
||||
};
|
||||
|
||||
/*
|
||||
** fts5VisitEntries() callback used by fts5SetupPrefixIter()
|
||||
*/
|
||||
static void prefixIterSetupCb(
|
||||
Fts5Index *p,
|
||||
void *pCtx,
|
||||
@@ -6325,6 +6360,7 @@ static void fts5SetupPrefixIter(
|
||||
assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) );
|
||||
|
||||
if( p->rc==SQLITE_OK ){
|
||||
void *pCtx = (void*)&s;
|
||||
int i;
|
||||
Fts5Data *pData;
|
||||
|
||||
@@ -6334,30 +6370,12 @@ static void fts5SetupPrefixIter(
|
||||
** corresponding to the prefix itself. That one is extracted from the
|
||||
** main term index here. */
|
||||
if( iIdx!=0 ){
|
||||
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
|
||||
int dummy = 0;
|
||||
const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
|
||||
pToken[0] = FTS5_MAIN_PREFIX;
|
||||
fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
|
||||
fts5IterSetOutputCb(&p->rc, p1);
|
||||
for(;
|
||||
fts5MultiIterEof(p, p1)==0;
|
||||
fts5MultiIterNext2(p, p1, &dummy)
|
||||
){
|
||||
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
|
||||
p1->xSetOutputs(p1, pSeg);
|
||||
if( p1->base.nData ){
|
||||
s.xAppend(p, (u64)p1->base.iRowid-(u64)s.iLastRowid, p1, &s.doclist);
|
||||
s.iLastRowid = p1->base.iRowid;
|
||||
}
|
||||
}
|
||||
fts5MultiIterFree(p1);
|
||||
fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx);
|
||||
}
|
||||
|
||||
pToken[0] = FTS5_MAIN_PREFIX + iIdx;
|
||||
fts5VisitPrefixRange(
|
||||
p, pColset, pToken, nToken, prefixIterSetupCb, (void*)&s
|
||||
);
|
||||
fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx);
|
||||
|
||||
assert( (s.nBuf%s.nMerge)==0 );
|
||||
for(i=0; i<s.nBuf; i+=s.nMerge){
|
||||
@@ -6637,29 +6655,57 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
|
||||
|
||||
/*
|
||||
** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
|
||||
** array of these for each row it visits. Or, for an iterator used by an
|
||||
** "ORDER BY rank" query, it accumulates an array of these for the entire
|
||||
** query.
|
||||
** array of these for each row it visits (so all iRowid fields are the same).
|
||||
** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an
|
||||
** array of these for the entire query (in which case iRowid fields may take
|
||||
** a variety of values).
|
||||
**
|
||||
** Each instance in the array indicates the iterator (and therefore term)
|
||||
** associated with position iPos of rowid iRowid. This is used by the
|
||||
** xInstToken() API.
|
||||
**
|
||||
** iRowid:
|
||||
** Rowid for the current entry.
|
||||
**
|
||||
** iPos:
|
||||
** Position of current entry within row. In the usual ((iCol<<32)+iOff)
|
||||
** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()).
|
||||
**
|
||||
** iIter:
|
||||
** If the Fts5TokenDataIter iterator that the entry is part of is
|
||||
** actually an iterator (i.e. with nIter>0, not just a container for
|
||||
** Fts5TokenDataMap structures), then this variable is an index into
|
||||
** the apIter[] array. The corresponding term is that which the iterator
|
||||
** at apIter[iIter] currently points to.
|
||||
**
|
||||
** Or, if the Fts5TokenDataIter iterator is just a container object
|
||||
** (nIter==0), then iIter is an index into the term.p[] buffer where
|
||||
** the term is stored.
|
||||
**
|
||||
** nByte:
|
||||
** In the case where iIter is an index into term.p[], this variable
|
||||
** is the size of the term in bytes. If iIter is an index into apIter[],
|
||||
** this variable is unused.
|
||||
*/
|
||||
struct Fts5TokenDataMap {
|
||||
i64 iRowid; /* Row this token is located in */
|
||||
i64 iPos; /* Position of token */
|
||||
|
||||
int iIter; /* Iterator token was read from */
|
||||
int nByte; /* Length of token in bytes (or 0) */
|
||||
};
|
||||
|
||||
/*
|
||||
** An object used to supplement Fts5Iter for tokendata=1 iterators.
|
||||
**
|
||||
** This object serves two purposes. The first is as a container for an array
|
||||
** of Fts5TokenDataMap structures, which are used to find the token required
|
||||
** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and
|
||||
** aMap[] variables.
|
||||
*/
|
||||
struct Fts5TokenDataIter {
|
||||
int nMap;
|
||||
int nMapAlloc;
|
||||
Fts5TokenDataMap *aMap;
|
||||
int nMapAlloc; /* Allocated size of aMap[] in entries */
|
||||
int nMap; /* Number of valid entries in aMap[] */
|
||||
Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */
|
||||
|
||||
/* The following are used for prefix-queries only. */
|
||||
Fts5Buffer terms;
|
||||
@@ -7234,10 +7280,18 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
|
||||
return (z ? &z[1] : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** The two input arrays - a1[] and a2[] - are in sorted order. This function
|
||||
** merges the two arrays together and writes the result to output array
|
||||
** aOut[]. aOut[] is guaranteed to be large enough to hold the result.
|
||||
**
|
||||
** Duplicate entries are copied into the output. So the size of the output
|
||||
** array is always (n1+n2) entries.
|
||||
*/
|
||||
static void fts5TokendataMerge(
|
||||
Fts5TokenDataMap *a1, int n1,
|
||||
Fts5TokenDataMap *a2, int n2,
|
||||
Fts5TokenDataMap *aOut
|
||||
Fts5TokenDataMap *a1, int n1, /* Input array 1 */
|
||||
Fts5TokenDataMap *a2, int n2, /* Input array 2 */
|
||||
Fts5TokenDataMap *aOut /* Output array */
|
||||
){
|
||||
int i1 = 0;
|
||||
int i2 = 0;
|
||||
@@ -7258,6 +7312,12 @@ static void fts5TokendataMerge(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Sort the contents of the pT->aMap[] array.
|
||||
**
|
||||
** The sorting algorithm requries a malloc(). If this fails, an error code
|
||||
** is left in Fts5Index.rc before returning.
|
||||
*/
|
||||
static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
|
||||
Fts5TokenDataMap *aTmp = 0;
|
||||
int nByte = pT->nMap * sizeof(Fts5TokenDataMap);
|
||||
@@ -7298,13 +7358,23 @@ static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata()
|
||||
** to pass data to prefixIterSetupTokendataCb().
|
||||
*/
|
||||
typedef struct TokendataSetupCtx TokendataSetupCtx;
|
||||
struct TokendataSetupCtx {
|
||||
Fts5TokenDataIter *pT;
|
||||
int iTermOff;
|
||||
int nTermByte;
|
||||
Fts5TokenDataIter *pT; /* Object being populated with mappings */
|
||||
int iTermOff; /* Offset of current term in terms.p[] */
|
||||
int nTermByte; /* Size of current term in bytes */
|
||||
};
|
||||
|
||||
/*
|
||||
** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This
|
||||
** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each
|
||||
** position in the current position-list. It doesn't matter that some of
|
||||
** these may be out of order - they will be sorted later.
|
||||
*/
|
||||
static void prefixIterSetupTokendataCb(
|
||||
Fts5Index *p,
|
||||
void *pCtx,
|
||||
@@ -7331,10 +7401,15 @@ static void prefixIterSetupTokendataCb(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** pIter is a prefix query. This function populates pIter->pTokenDataIter
|
||||
** with an Fts5TokenDataIter object containing mappings for all rows
|
||||
** matched by the query.
|
||||
*/
|
||||
static int fts5SetupPrefixIterTokendata(
|
||||
Fts5Iter *pIter,
|
||||
const char *pToken,
|
||||
int nToken
|
||||
const char *pToken, /* Token prefix to search for */
|
||||
int nToken /* Size of pToken in bytes */
|
||||
){
|
||||
Fts5Index *p = pIter->pIndex;
|
||||
Fts5Buffer token = {0, 0, 0};
|
||||
@@ -7352,8 +7427,8 @@ static int fts5SetupPrefixIterTokendata(
|
||||
memcpy(&token.p[1], pToken, nToken);
|
||||
token.n = nToken+1;
|
||||
|
||||
fts5VisitPrefixRange(
|
||||
p, 0, token.p, token.n, prefixIterSetupTokendataCb, (void*)&ctx
|
||||
fts5VisitEntries(
|
||||
p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx
|
||||
);
|
||||
|
||||
fts5TokendataIterSortMap(p, ctx.pT);
|
||||
|
@@ -95,6 +95,29 @@ foreach_detail_mode $testprefix {
|
||||
do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24}
|
||||
do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24}
|
||||
|
||||
do_execsql_test 2.0 {
|
||||
CREATE VIRTUAL TABLE ft3 USING fts5(
|
||||
x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%,
|
||||
prefix=2
|
||||
);
|
||||
}
|
||||
do_execsql_test 2.1 {
|
||||
INSERT INTO ft3(rowid, x) VALUES(1, 'one');
|
||||
INSERT INTO ft3(rowid, x) VALUES(2, 'ONE');
|
||||
INSERT INTO ft3(rowid, x) VALUES(3, 'ONT');
|
||||
INSERT INTO ft3(rowid, x) VALUES(4, 'on');
|
||||
INSERT INTO ft3(rowid, x) VALUES(5, 'On');
|
||||
}
|
||||
|
||||
do_execsql_test 2.2 {
|
||||
SELECT rowid FROM ft3('on*');
|
||||
} {1 2 3 4 5}
|
||||
|
||||
do_execsql_test 2.3 {
|
||||
SELECT rowid, insttoken(ft3, 0, 0) FROM ft3('on*');
|
||||
} {1 one 2 one.ONE 3 ont.ONT 4 on 5 on.On}
|
||||
|
||||
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
14
manifest
14
manifest
@@ -1,5 +1,5 @@
|
||||
C Rationalize\ssome\sof\sthe\snew\scode\son\sthis\sbranch.
|
||||
D 2024-09-28T20:45:11.387
|
||||
C Rationalize\scode\sfurther.\sAnd\sadd\stests.
|
||||
D 2024-10-01T20:38:08.239
|
||||
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
|
||||
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
|
||||
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
|
||||
@@ -99,7 +99,7 @@ F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70
|
||||
F ext/fts5/fts5_config.c da21548ddbc1a457cb42545f527065221ede8ada6a734891b8c34317a7a9506b
|
||||
F ext/fts5/fts5_expr.c 69b8d976058512c07dfe86e229521b7a871768157bd1607cedf1a5038dfd72c9
|
||||
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
|
||||
F ext/fts5/fts5_index.c c1005920192146452a3545500761ecc8cfab84572d251e8536103a01899f67d5
|
||||
F ext/fts5/fts5_index.c 9b2b9636ccefd6140c0ad7a44c51c2ea39f377753a13f06a2e6792215b62cede
|
||||
F ext/fts5/fts5_main.c 4503498d3453e29a3cd89dacaba029011e89cb8c481a6241611d106e7a369bd4
|
||||
F ext/fts5/fts5_storage.c 3332497823c3d171cf56379f2bd8c971ce15a19aadacff961106462022c92470
|
||||
F ext/fts5/fts5_tcl.c 4db9258a7882c5eac0da4433042132aaf15b87dd1e1636c7a6ca203abd2c8bfe
|
||||
@@ -203,7 +203,7 @@ F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9b
|
||||
F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3
|
||||
F ext/fts5/test/fts5origintext.test 63d5b0dc00f0104add8960da0705a70bffd4d86b6feb6ddbb38bff21141d42f0
|
||||
F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba
|
||||
F ext/fts5/test/fts5origintext3.test 45c33cf0c91a9ca0e36d298462db3edc7c8fe45fd185649a9dbfd66bb670058b
|
||||
F ext/fts5/test/fts5origintext3.test 1f5174a9f4cf42f58f833dbfb314940793ca4723854ec2651e7530ddb35a66a6
|
||||
F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8
|
||||
F ext/fts5/test/fts5origintext5.test ee12b440ec335e5b422d1668aca0051b52ff28b6ee67073e8bbc29f509fd562b
|
||||
F ext/fts5/test/fts5phrase.test bb2554bb61d15f859678c96dc89a7de415cd5fc3b7b54c29b82a0d0ad138091c
|
||||
@@ -2214,8 +2214,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
|
||||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 204ddf4e726b695dd12ab4a945ec2461655aa0bcc38b74e970f07ed2ac43c6ff
|
||||
R da06610bae74973a44f69a92b9b60e12
|
||||
P 66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075
|
||||
R 364baf490f2f60461704cce12defe7d5
|
||||
U dan
|
||||
Z ca9c653ea5575a07920ac6ddffa15d1d
|
||||
Z 98928b751e601e5ab0ec38779c287b09
|
||||
# Remove this line to create a well-formed Fossil manifest.
|
||||
|
@@ -1 +1 @@
|
||||
66f209ba40e7de49b304d7931ff38a4994038452aab08e9347286a234c6f7075
|
||||
0ca002a4ab88f3e7ae1e6e518038157eaa20759f57888c2ed7e50cb92bd96348
|
||||
|
Reference in New Issue
Block a user