1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fix the fts5 trigram tokenizer so that it handles non-nul-terminated strings.

FossilOrigin-Name: 84f4e37178a65e3128ac0240d37ac40df08b4050ab070d10707e35d11dcbeb10
This commit is contained in:
dan
2024-11-11 19:49:26 +00:00
parent be46f935dc
commit 0cd2ffffb7
5 changed files with 45 additions and 18 deletions

View File

@ -730,8 +730,9 @@ static int SQLITE_TCLAPI f5tTokenize(
int objc, int objc,
Tcl_Obj *CONST objv[] Tcl_Obj *CONST objv[]
){ ){
char *zText; char *pCopy = 0;
Tcl_Size nText; char *zText = 0;
Tcl_Size nText = 0;
sqlite3 *db = 0; sqlite3 *db = 0;
fts5_api *pApi = 0; fts5_api *pApi = 0;
Fts5Tokenizer *pTok = 0; Fts5Tokenizer *pTok = 0;
@ -778,22 +779,33 @@ static int SQLITE_TCLAPI f5tTokenize(
return TCL_ERROR; return TCL_ERROR;
} }
if( nText>0 ){
pCopy = sqlite3_malloc(nText);
if( pCopy==0 ){
tokenizer.xDelete(pTok);
Tcl_AppendResult(interp, "error in sqlite3_malloc()", (char*)0);
return TCL_ERROR;
}else{
memcpy(pCopy, zText, nText);
}
}
pRet = Tcl_NewObj(); pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet); Tcl_IncrRefCount(pRet);
ctx.bSubst = (objc==5); ctx.bSubst = (objc==5);
ctx.pRet = pRet; ctx.pRet = pRet;
ctx.zInput = zText; ctx.zInput = pCopy;
rc = tokenizer.xTokenize( rc = tokenizer.xTokenize(
pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText,(int)nText, xTokenizeCb2 pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, pCopy,(int)nText, xTokenizeCb2
); );
tokenizer.xDelete(pTok); tokenizer.xDelete(pTok);
sqlite3_free(pCopy);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", (char*)0); Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", (char*)0);
Tcl_DecrRefCount(pRet); Tcl_DecrRefCount(pRet);
return TCL_ERROR; return TCL_ERROR;
} }
Tcl_Free((void*)azArg); Tcl_Free((void*)azArg);
Tcl_SetObjResult(interp, pRet); Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet); Tcl_DecrRefCount(pRet);

View File

@ -1354,7 +1354,7 @@ static int fts5TriTokenize(
int ii; int ii;
const unsigned char *zIn = (const unsigned char*)pText; const unsigned char *zIn = (const unsigned char*)pText;
const unsigned char *zEof = &zIn[nText]; const unsigned char *zEof = &zIn[nText];
u32 iCode; u32 iCode = 0;
int aStart[3]; /* Input offset of each character in aBuf[] */ int aStart[3]; /* Input offset of each character in aBuf[] */
UNUSED_PARAM(unusedFlags); UNUSED_PARAM(unusedFlags);
@ -1363,8 +1363,8 @@ static int fts5TriTokenize(
for(ii=0; ii<3; ii++){ for(ii=0; ii<3; ii++){
do { do {
aStart[ii] = zIn - (const unsigned char*)pText; aStart[ii] = zIn - (const unsigned char*)pText;
if( zIn>=zEof ) return SQLITE_OK;
READ_UTF8(zIn, zEof, iCode); READ_UTF8(zIn, zEof, iCode);
if( iCode==0 ) return SQLITE_OK;
if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam);
}while( iCode==0 ); }while( iCode==0 );
WRITE_UTF8(zOut, iCode); WRITE_UTF8(zOut, iCode);
@ -1385,8 +1385,11 @@ static int fts5TriTokenize(
/* Read characters from the input up until the first non-diacritic */ /* Read characters from the input up until the first non-diacritic */
do { do {
iNext = zIn - (const unsigned char*)pText; iNext = zIn - (const unsigned char*)pText;
if( zIn>=zEof ){
iCode = 0;
break;
}
READ_UTF8(zIn, zEof, iCode); READ_UTF8(zIn, zEof, iCode);
if( iCode==0 ) break;
if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam);
}while( iCode==0 ); }while( iCode==0 );

View File

@ -350,5 +350,17 @@ do_execsql_test 11.1 {
INSERT INTO t4 VALUES( str('') ); INSERT INTO t4 VALUES( str('') );
} }
do_test 12.0 {
sqlite3_fts5_tokenize db trigram "abcd"
} {abc 0 3 bcd 1 4}
do_test 12.1 {
sqlite3_fts5_tokenize db trigram "a"
} {}
do_test 12.2 {
sqlite3_fts5_tokenize db trigram ""
} {}
finish_test finish_test

View File

@ -1,5 +1,5 @@
C Add\sthe\s".dbtotxt"\scommand\sto\sthe\sCLI. C Fix\sthe\sfts5\strigram\stokenizer\sso\sthat\sit\shandles\snon-nul-terminated\sstrings.
D 2024-11-11T19:07:58.682 D 2024-11-11T19:49:26.299
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d
@ -113,10 +113,10 @@ F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a
F ext/fts5/fts5_index.c 368a968570ce12ba40223e284a588d9f93ee23a0133727f0df1fcd64086b1fb6 F ext/fts5/fts5_index.c 368a968570ce12ba40223e284a588d9f93ee23a0133727f0df1fcd64086b1fb6
F ext/fts5/fts5_main.c 50eb059e51d730e8e0c77df4e568b018079e112a755c094488b0d5b1aa06afbb F ext/fts5/fts5_main.c 50eb059e51d730e8e0c77df4e568b018079e112a755c094488b0d5b1aa06afbb
F ext/fts5/fts5_storage.c 337b05e4c66fc822d031e264d65bde807ec2fab08665ca2cc8aaf9c5fa06792c F ext/fts5/fts5_storage.c 337b05e4c66fc822d031e264d65bde807ec2fab08665ca2cc8aaf9c5fa06792c
F ext/fts5/fts5_tcl.c aee6ae6d0c6968564c392bf0d09aaabb4d8bea9ca69fd224dc9b44243324acbf F ext/fts5/fts5_tcl.c 7fb5a3d3404099075aaa2457307cb459bbc257c0de3dbd52b1e80a5b503e0329
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c 033e2e43b8e852c0ef6cecc611266d61e2346e52ec7dcfb76a428fe56a07efa9 F ext/fts5/fts5_tokenize.c 87ab719f0556172da3414f1741c11bb4d333ebecde157945a55478bfe6e46c44
F ext/fts5/fts5_unicode2.c 6f9b0fb79a8facaed76628ffd4eb9c16d7f2b84b52872784f617cf3422a9b043 F ext/fts5/fts5_unicode2.c 6f9b0fb79a8facaed76628ffd4eb9c16d7f2b84b52872784f617cf3422a9b043
F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80 F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80
F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0 F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0
@ -248,7 +248,7 @@ F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98 F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98
F ext/fts5/test/fts5tokenizer2.test ddb8b10fbe4b84b2a75812671f127774c1d2e3e2bf82d2e0e4f0bb1cd8a2b2d6 F ext/fts5/test/fts5tokenizer2.test ddb8b10fbe4b84b2a75812671f127774c1d2e3e2bf82d2e0e4f0bb1cd8a2b2d6
F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3 F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3
F ext/fts5/test/fts5trigram.test 9927c9e9b35116ea00748c8e41d9cbc2b95a6c90845cd82a59c11fedfd16404a F ext/fts5/test/fts5trigram.test a55fde7065ae69a0f82c5a7a5bf5286a97de11ae4bff6537fd3e27ca9a01416f
F ext/fts5/test/fts5trigram2.test 6fde9de7f63a6b4aa18dc731be56dbd6be4e755c9b13dcd55479e200d1df0e61 F ext/fts5/test/fts5trigram2.test 6fde9de7f63a6b4aa18dc731be56dbd6be4e755c9b13dcd55479e200d1df0e61
F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2 F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2
F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602 F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602
@ -2198,8 +2198,8 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350
F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7
F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 073080cae3ea0d12b133d9c9ae98413bb32870a9738c98b89bc345181be81f23 P b43acf5a8cd4a5efbb90b71af7710084f49bb90ffe4f56de168e8c3a6b679124
R 6bdbd63031b22370793e2437aff7acb5 R 823df7bdc2f581383fdd27b861d3511c
U drh U dan
Z d185df0f51d41a6d3859bb43f39c0c62 Z 55fb3f376e1035a7680545a25c6be334
# Remove this line to create a well-formed Fossil manifest. # Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
b43acf5a8cd4a5efbb90b71af7710084f49bb90ffe4f56de168e8c3a6b679124 84f4e37178a65e3128ac0240d37ac40df08b4050ab070d10707e35d11dcbeb10