mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-05 15:55:57 +03:00
Fix prefix indexes so that they work in characters, not bytes.
FossilOrigin-Name: af8d43a4a08528bbae25ee38fe25de8a86f8a21c
This commit is contained in:
@@ -81,7 +81,7 @@
|
|||||||
** + for each segment from oldest to newest:
|
** + for each segment from oldest to newest:
|
||||||
** + segment id (always > 0)
|
** + segment id (always > 0)
|
||||||
** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
|
** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
|
||||||
** + first leaf page number (often 1)
|
** + first leaf page number (often 1, always greater than 0)
|
||||||
** + final leaf page number
|
** + final leaf page number
|
||||||
**
|
**
|
||||||
** 2. The Averages Record:
|
** 2. The Averages Record:
|
||||||
@@ -4049,6 +4049,39 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Argument p points to a buffer containing utf-8 text that is n bytes in
|
||||||
|
** size. Return the number of bytes in the nChar character prefix of the
|
||||||
|
** buffer, or 0 if there are less than nChar characters in total.
|
||||||
|
*/
|
||||||
|
static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
|
||||||
|
int n = 0;
|
||||||
|
int i;
|
||||||
|
for(i=0; i<nChar; i++){
|
||||||
|
if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
|
||||||
|
if( (unsigned char)p[n++]>=0xc0 ){
|
||||||
|
while( (p[n] & 0xc0)==0x80 ) n++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
|
||||||
|
** unicode characters in the string.
|
||||||
|
*/
|
||||||
|
int fts5IndexCharlen(const char *pIn, int nIn){
|
||||||
|
int nChar = 0;
|
||||||
|
int i = 0;
|
||||||
|
while( i<nIn ){
|
||||||
|
if( (unsigned char)pIn[i++]>=0xc0 ){
|
||||||
|
while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
|
||||||
|
}
|
||||||
|
nChar++;
|
||||||
|
}
|
||||||
|
return nChar;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Calculate and return a checksum that is the XOR of the index entry
|
** Calculate and return a checksum that is the XOR of the index entry
|
||||||
** checksum of all entries that would be generated by the token specified
|
** checksum of all entries that would be generated by the token specified
|
||||||
@@ -4064,10 +4097,12 @@ u64 sqlite3Fts5IndexCksum(
|
|||||||
u64 ret = 0; /* Return value */
|
u64 ret = 0; /* Return value */
|
||||||
int iIdx; /* For iterating through indexes */
|
int iIdx; /* For iterating through indexes */
|
||||||
|
|
||||||
for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
|
ret = fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nTerm);
|
||||||
int n = ((iIdx==pConfig->nPrefix) ? nTerm : pConfig->aPrefix[iIdx]);
|
|
||||||
if( n<=nTerm ){
|
for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
|
||||||
ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, n);
|
int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
|
||||||
|
if( nByte ){
|
||||||
|
ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nByte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4107,8 +4142,9 @@ int sqlite3Fts5IndexWrite(
|
|||||||
** prefix hash tables that it is large enough for. */
|
** prefix hash tables that it is large enough for. */
|
||||||
fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken);
|
fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken);
|
||||||
for(i=0; i<pConfig->nPrefix; i++){
|
for(i=0; i<pConfig->nPrefix; i++){
|
||||||
if( nToken>=pConfig->aPrefix[i] ){
|
int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
|
||||||
fts5AddTermToHash(p, i+1, iCol, iPos, pToken, pConfig->aPrefix[i]);
|
if( nByte ){
|
||||||
|
fts5AddTermToHash(p, i+1, iCol, iPos, pToken, nByte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4130,8 +4166,9 @@ int sqlite3Fts5IndexQuery(
|
|||||||
|
|
||||||
if( flags & FTS5INDEX_QUERY_PREFIX ){
|
if( flags & FTS5INDEX_QUERY_PREFIX ){
|
||||||
Fts5Config *pConfig = p->pConfig;
|
Fts5Config *pConfig = p->pConfig;
|
||||||
|
int nChar = fts5IndexCharlen(pToken, nToken);
|
||||||
for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
|
for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
|
||||||
if( pConfig->aPrefix[iIdx-1]==nToken ) break;
|
if( pConfig->aPrefix[iIdx-1]==nChar ) break;
|
||||||
}
|
}
|
||||||
if( iIdx>pConfig->nPrefix ){
|
if( iIdx>pConfig->nPrefix ){
|
||||||
iIdx = -1;
|
iIdx = -1;
|
||||||
@@ -4602,6 +4639,55 @@ static void fts5DecodeFunction(
|
|||||||
fts5BufferFree(&s);
|
fts5BufferFree(&s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** The implementation of user-defined scalar function fts5_rowid().
|
||||||
|
*/
|
||||||
|
static void fts5RowidFunction(
|
||||||
|
sqlite3_context *pCtx, /* Function call context */
|
||||||
|
int nArg, /* Number of args (always 2) */
|
||||||
|
sqlite3_value **apVal /* Function arguments */
|
||||||
|
){
|
||||||
|
const char *zArg;
|
||||||
|
if( nArg==0 ){
|
||||||
|
sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
|
||||||
|
}else{
|
||||||
|
zArg = (const char*)sqlite3_value_text(apVal[0]);
|
||||||
|
if( 0==sqlite3_stricmp(zArg, "segment") ){
|
||||||
|
i64 iRowid;
|
||||||
|
int idx, segid, height, pgno;
|
||||||
|
if( nArg!=5 ){
|
||||||
|
sqlite3_result_error(pCtx,
|
||||||
|
"should be: fts5_rowid('segment', idx, segid, height, pgno))", -1
|
||||||
|
);
|
||||||
|
}else{
|
||||||
|
idx = sqlite3_value_int(apVal[1]);
|
||||||
|
segid = sqlite3_value_int(apVal[2]);
|
||||||
|
height = sqlite3_value_int(apVal[3]);
|
||||||
|
pgno = sqlite3_value_int(apVal[4]);
|
||||||
|
iRowid = FTS5_SEGMENT_ROWID(idx, segid, height, pgno);
|
||||||
|
sqlite3_result_int64(pCtx, iRowid);
|
||||||
|
}
|
||||||
|
}else if( 0==sqlite3_stricmp(zArg, "start-of-index") ){
|
||||||
|
i64 iRowid;
|
||||||
|
int idx;
|
||||||
|
if( nArg!=2 ){
|
||||||
|
sqlite3_result_error(pCtx,
|
||||||
|
"should be: fts5_rowid('start-of-index', idx)", -1
|
||||||
|
);
|
||||||
|
}else{
|
||||||
|
idx = sqlite3_value_int(apVal[1]);
|
||||||
|
iRowid = FTS5_SEGMENT_ROWID(idx, 1, 0, 0);
|
||||||
|
sqlite3_result_int64(pCtx, iRowid);
|
||||||
|
}
|
||||||
|
}else {
|
||||||
|
sqlite3_result_error(pCtx,
|
||||||
|
"first arg to fts5_rowid() must be 'segment' "
|
||||||
|
"or 'start-of-index' ..."
|
||||||
|
, -1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** This is called as part of registering the FTS5 module with database
|
** This is called as part of registering the FTS5 module with database
|
||||||
@@ -4615,6 +4701,11 @@ int sqlite3Fts5IndexInit(sqlite3 *db){
|
|||||||
int rc = sqlite3_create_function(
|
int rc = sqlite3_create_function(
|
||||||
db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
|
db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
|
||||||
);
|
);
|
||||||
|
if( rc==SQLITE_OK ){
|
||||||
|
rc = sqlite3_create_function(
|
||||||
|
db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
|
||||||
|
);
|
||||||
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
60
ext/fts5/test/fts5prefix.test
Normal file
60
ext/fts5/test/fts5prefix.test
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# 2015 Jan 13
|
||||||
|
#
|
||||||
|
# The author disclaims copyright to this source code. In place of
|
||||||
|
# a legal notice, here is a blessing:
|
||||||
|
#
|
||||||
|
# May you do good and not evil.
|
||||||
|
# May you find forgiveness for yourself and forgive others.
|
||||||
|
# May you share freely, never taking more than you give.
|
||||||
|
#
|
||||||
|
#***********************************************************************
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
source [file join [file dirname [info script]] fts5_common.tcl]
|
||||||
|
set testprefix fts5prefix
|
||||||
|
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
# Check that prefix indexes really do index n-character prefixes, not
|
||||||
|
# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
|
||||||
|
# diacritic removal.
|
||||||
|
#
|
||||||
|
do_execsql_test 1.0 {
|
||||||
|
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
do_test 1.2 {
|
||||||
|
foreach {rowid string} {
|
||||||
|
1 "\xCA\xCB\xCC\xCD"
|
||||||
|
2 "\u1234\u5678\u4321\u8765"
|
||||||
|
} {
|
||||||
|
execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
|
||||||
|
}
|
||||||
|
} {}
|
||||||
|
|
||||||
|
do_execsql_test 1.1.2 {
|
||||||
|
INSERT INTO t1(t1) VALUES('integrity-check');
|
||||||
|
}
|
||||||
|
|
||||||
|
#db eval { select fts5_decode(id, block) AS d FROM t1_data; } { puts $d }
|
||||||
|
|
||||||
|
foreach o {1 2} {
|
||||||
|
if {$o==2} breakpoint
|
||||||
|
foreach {tn q res} {
|
||||||
|
1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
|
||||||
|
2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
|
||||||
|
} {
|
||||||
|
do_execsql_test 1.$o.$tn $q $res
|
||||||
|
}
|
||||||
|
|
||||||
|
execsql {
|
||||||
|
DELETE FROM t1_data WHERE
|
||||||
|
rowid>=fts5_rowid('start-of-index', 0) AND
|
||||||
|
rowid<fts5_rowid('start-of-index', 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
finish_test
|
||||||
|
|
13
manifest
13
manifest
@@ -1,5 +1,5 @@
|
|||||||
C Optimize\sthe\sunicode61\stokenizer\sso\sthat\sit\shandles\sascii\stext\sfaster.\sMake\sit\sthe\sdefault\stokenizer.\sChange\sthe\sname\sof\sthe\ssimple\stokenizer\sto\s"ascii".
|
C Fix\sprefix\sindexes\sso\sthat\sthey\swork\sin\scharacters,\snot\sbytes.
|
||||||
D 2015-01-12T17:58:04.627
|
D 2015-01-13T17:25:08.235
|
||||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||||
F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0
|
F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0
|
||||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||||
@@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9
|
|||||||
F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399
|
F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399
|
||||||
F ext/fts5/fts5_expr.c 6ba7a2e34a80989cca509bd295de1bc9f8e739a3
|
F ext/fts5/fts5_expr.c 6ba7a2e34a80989cca509bd295de1bc9f8e739a3
|
||||||
F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279
|
F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279
|
||||||
F ext/fts5/fts5_index.c ea36c1e42aaf8038b6139be95575eb7fe01f34e4
|
F ext/fts5/fts5_index.c 6f9f98875b2ee5a16255911e1dc1b0b32cb1c350
|
||||||
F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664
|
F ext/fts5/fts5_storage.c 8bc9e5b6654e1545e9513def277ef3f025921664
|
||||||
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
|
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
|
||||||
F ext/fts5/fts5_tokenize.c bdb6a1f599a94ec6e9c1cad037d1071e823dcb5d
|
F ext/fts5/fts5_tokenize.c bdb6a1f599a94ec6e9c1cad037d1071e823dcb5d
|
||||||
@@ -138,6 +138,7 @@ F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c
|
|||||||
F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947
|
F ext/fts5/test/fts5near.test 3f9f64e16cac82725d03d4e04c661090f0b3b947
|
||||||
F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54
|
F ext/fts5/test/fts5optimize.test 0028c90a7817d3e576d1148fc8dff17d89054e54
|
||||||
F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e
|
F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e
|
||||||
|
F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5
|
||||||
F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4
|
F ext/fts5/test/fts5rebuild.test 2a5e98205393487b4a732c8290999af7c0b907b4
|
||||||
F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c
|
F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c
|
||||||
F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d
|
F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d
|
||||||
@@ -1274,7 +1275,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
|||||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||||
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
|
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
|
||||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||||
P 512e1bdb4093b59d1494dfc63391476eadd52aea
|
P f22dbccad9499624880ddd48df1b07fb42b1ad66
|
||||||
R 30a0c3c40d1701cf92ddf5b1410b6af9
|
R 8d592e678c3bea0440cf749de24705b7
|
||||||
U dan
|
U dan
|
||||||
Z 9b7b348d489cfd6e15d4a8bf3e2c22e9
|
Z 3408fdf2714814208d88a4779f5de9eb
|
||||||
|
@@ -1 +1 @@
|
|||||||
f22dbccad9499624880ddd48df1b07fb42b1ad66
|
af8d43a4a08528bbae25ee38fe25de8a86f8a21c
|
Reference in New Issue
Block a user