mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Fix corner cases in UTF8 handling in the REGEXP extension.
[forum:/forumpost/3ffe058b04|Forum post 3ffe058b04]. FossilOrigin-Name: abb18f61c5cec0f524acc41453b4c06b61c5af51ff46417588837fc0c3967288
This commit is contained in:
@ -185,7 +185,7 @@ static unsigned re_next_char(ReInput *p){
|
||||
c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
|
||||
p->i += 2;
|
||||
if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
|
||||
}else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
|
||||
}else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80
|
||||
&& (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
|
||||
c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
|
||||
| (p->z[p->i+2]&0x3f);
|
||||
@ -712,15 +712,15 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
|
||||
** one or more matching characters, enter those matching characters into
|
||||
** zInit[]. The re_match() routine can then search ahead in the input
|
||||
** string looking for the initial match without having to run the whole
|
||||
** regex engine over the string. Do not worry able trying to match
|
||||
** regex engine over the string. Do not worry about trying to match
|
||||
** unicode characters beyond plane 0 - those are very rare and this is
|
||||
** just an optimization. */
|
||||
if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){
|
||||
for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
|
||||
unsigned x = pRe->aArg[i];
|
||||
if( x<=127 ){
|
||||
if( x<=0x7f ){
|
||||
pRe->zInit[j++] = (unsigned char)x;
|
||||
}else if( x<=0xfff ){
|
||||
}else if( x<=0x7ff ){
|
||||
pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6));
|
||||
pRe->zInit[j++] = 0x80 | (x&0x3f);
|
||||
}else if( x<=0xffff ){
|
||||
|
Reference in New Issue
Block a user