1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fix handling of U+fffd in the LIKE optimization.

dbsqlfuzz eee57fb9eea1dfa5aa40dfa87865cf8c84d12f96.

FossilOrigin-Name: bce52ce2a6e7f3d3d1b2807d1ea95243d9b655e557c1bb6f0b8a9a6cefb1aed6
This commit is contained in:
drh
2024-10-07 12:19:23 +00:00
parent 29f976432a
commit ce527f2e97
4 changed files with 28 additions and 22 deletions

View File

@ -219,20 +219,25 @@ static int isLikeOrGlob(
z = (u8*)pRight->u.zToken;
}
if( z ){
/* Count the number of prefix characters prior to the first wildcard.
** If the underlying database has a UTF16LE encoding, then only consider
** ASCII characters. Note that the encoding of z[] is UTF8 - we are
** dealing with only UTF8 here in this code, but the database engine
** itself might be processing content using a different encoding. */
/* Count the number of prefix bytes prior to the first wildcard.
** or U+fffd character. If the underlying database has a UTF16LE
** encoding, then only consider ASCII characters. Note that the
** encoding of z[] is UTF8 - we are dealing with only UTF8 here in
** this code, but the database engine itself might be processing
** content using a different encoding. */
cnt = 0;
while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
cnt++;
if( c==wc[3] && z[cnt]!=0 ){
cnt++;
}else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){
cnt--;
break;
}else if( c>=0x80 ){
const u8 *z2 = z+cnt-1;
if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){
cnt--;
break;
}else{
cnt = (int)(z2-z);
}
}
}
@ -244,7 +249,7 @@ static int isLikeOrGlob(
** range search. The third is because the caller assumes that the pattern
** consists of at least one character after all escapes have been
** removed. */
if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){
if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){
Expr *pPrefix;
/* A "complete" match if the pattern ends with "*" or "%" */