mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Fix the fts5 trigram tokenizer so that it handles non-nul-terminated strings.
FossilOrigin-Name: 84f4e37178a65e3128ac0240d37ac40df08b4050ab070d10707e35d11dcbeb10
This commit is contained in:
@ -730,8 +730,9 @@ static int SQLITE_TCLAPI f5tTokenize(
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
char *zText;
|
||||
Tcl_Size nText;
|
||||
char *pCopy = 0;
|
||||
char *zText = 0;
|
||||
Tcl_Size nText = 0;
|
||||
sqlite3 *db = 0;
|
||||
fts5_api *pApi = 0;
|
||||
Fts5Tokenizer *pTok = 0;
|
||||
@ -778,22 +779,33 @@ static int SQLITE_TCLAPI f5tTokenize(
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
if( nText>0 ){
|
||||
pCopy = sqlite3_malloc(nText);
|
||||
if( pCopy==0 ){
|
||||
tokenizer.xDelete(pTok);
|
||||
Tcl_AppendResult(interp, "error in sqlite3_malloc()", (char*)0);
|
||||
return TCL_ERROR;
|
||||
}else{
|
||||
memcpy(pCopy, zText, nText);
|
||||
}
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
ctx.bSubst = (objc==5);
|
||||
ctx.pRet = pRet;
|
||||
ctx.zInput = zText;
|
||||
ctx.zInput = pCopy;
|
||||
rc = tokenizer.xTokenize(
|
||||
pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText,(int)nText, xTokenizeCb2
|
||||
pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, pCopy,(int)nText, xTokenizeCb2
|
||||
);
|
||||
tokenizer.xDelete(pTok);
|
||||
sqlite3_free(pCopy);
|
||||
if( rc!=SQLITE_OK ){
|
||||
Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", (char*)0);
|
||||
Tcl_DecrRefCount(pRet);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
Tcl_Free((void*)azArg);
|
||||
Tcl_SetObjResult(interp, pRet);
|
||||
Tcl_DecrRefCount(pRet);
|
||||
|
@ -1354,7 +1354,7 @@ static int fts5TriTokenize(
|
||||
int ii;
|
||||
const unsigned char *zIn = (const unsigned char*)pText;
|
||||
const unsigned char *zEof = &zIn[nText];
|
||||
u32 iCode;
|
||||
u32 iCode = 0;
|
||||
int aStart[3]; /* Input offset of each character in aBuf[] */
|
||||
|
||||
UNUSED_PARAM(unusedFlags);
|
||||
@ -1363,8 +1363,8 @@ static int fts5TriTokenize(
|
||||
for(ii=0; ii<3; ii++){
|
||||
do {
|
||||
aStart[ii] = zIn - (const unsigned char*)pText;
|
||||
if( zIn>=zEof ) return SQLITE_OK;
|
||||
READ_UTF8(zIn, zEof, iCode);
|
||||
if( iCode==0 ) return SQLITE_OK;
|
||||
if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam);
|
||||
}while( iCode==0 );
|
||||
WRITE_UTF8(zOut, iCode);
|
||||
@ -1385,8 +1385,11 @@ static int fts5TriTokenize(
|
||||
/* Read characters from the input up until the first non-diacritic */
|
||||
do {
|
||||
iNext = zIn - (const unsigned char*)pText;
|
||||
if( zIn>=zEof ){
|
||||
iCode = 0;
|
||||
break;
|
||||
}
|
||||
READ_UTF8(zIn, zEof, iCode);
|
||||
if( iCode==0 ) break;
|
||||
if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam);
|
||||
}while( iCode==0 );
|
||||
|
||||
|
@ -350,5 +350,17 @@ do_execsql_test 11.1 {
|
||||
INSERT INTO t4 VALUES( str('') );
|
||||
}
|
||||
|
||||
do_test 12.0 {
|
||||
sqlite3_fts5_tokenize db trigram "abcd"
|
||||
} {abc 0 3 bcd 1 4}
|
||||
|
||||
do_test 12.1 {
|
||||
sqlite3_fts5_tokenize db trigram "a"
|
||||
} {}
|
||||
|
||||
do_test 12.2 {
|
||||
sqlite3_fts5_tokenize db trigram ""
|
||||
} {}
|
||||
|
||||
finish_test
|
||||
|
||||
|
Reference in New Issue
Block a user