mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Bug#31081 server crash in regexp function
Problem: The "regex" library written by Henry Spencer does not support tricky character sets like UCS2. Fix: convert tricky character sets to UTF8 before calling regex functions.
This commit is contained in:
42
mysql-test/include/ctype_regex.inc
Normal file
42
mysql-test/include/ctype_regex.inc
Normal file
@ -0,0 +1,42 @@
|
||||
#
|
||||
# To test a desired collation, set session.collation_connection to
|
||||
# this collation before including this file
|
||||
#
|
||||
|
||||
--disable_warnings
|
||||
drop table if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
#
|
||||
# Create a table with two varchar(64) null-able column,
|
||||
# using current values of
|
||||
# @@character_set_connection and @@collation_connection.
|
||||
#
|
||||
|
||||
create table t1 as
|
||||
select repeat(' ', 64) as s1, repeat(' ',64) as s2
|
||||
union
|
||||
select null, null;
|
||||
show create table t1;
|
||||
delete from t1;
|
||||
|
||||
insert into t1 values('aaa','aaa');
|
||||
insert into t1 values('aaa|qqq','qqq');
|
||||
insert into t1 values('gheis','^[^a-dXYZ]+$');
|
||||
insert into t1 values('aab','^aa?b');
|
||||
insert into t1 values('Baaan','^Ba*n');
|
||||
insert into t1 values('aaa','qqq|aaa');
|
||||
insert into t1 values('qqq','qqq|aaa');
|
||||
|
||||
insert into t1 values('bbb','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq');
|
||||
insert into t1 values('aaa','aba');
|
||||
|
||||
insert into t1 values(null,'abc');
|
||||
insert into t1 values('def',null);
|
||||
insert into t1 values(null,null);
|
||||
insert into t1 values('ghi','ghi[');
|
||||
|
||||
select HIGH_PRIORITY s1 regexp s2 from t1;
|
||||
|
||||
drop table t1;
|
@ -2754,4 +2754,49 @@ a
|
||||
c
|
||||
ch
|
||||
drop table t1;
|
||||
set collation_connection=ucs2_unicode_ci;
|
||||
drop table if exists t1;
|
||||
create table t1 as
|
||||
select repeat(' ', 64) as s1, repeat(' ',64) as s2
|
||||
union
|
||||
select null, null;
|
||||
show create table t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`s1` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL,
|
||||
`s2` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
delete from t1;
|
||||
insert into t1 values('aaa','aaa');
|
||||
insert into t1 values('aaa|qqq','qqq');
|
||||
insert into t1 values('gheis','^[^a-dXYZ]+$');
|
||||
insert into t1 values('aab','^aa?b');
|
||||
insert into t1 values('Baaan','^Ba*n');
|
||||
insert into t1 values('aaa','qqq|aaa');
|
||||
insert into t1 values('qqq','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq');
|
||||
insert into t1 values('aaa','aba');
|
||||
insert into t1 values(null,'abc');
|
||||
insert into t1 values('def',null);
|
||||
insert into t1 values(null,null);
|
||||
insert into t1 values('ghi','ghi[');
|
||||
select HIGH_PRIORITY s1 regexp s2 from t1;
|
||||
s1 regexp s2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
End for 5.0 tests
|
||||
|
@ -922,4 +922,49 @@ ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_gen
|
||||
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
|
||||
drop table t1;
|
||||
set collation_connection=ucs2_general_ci;
|
||||
drop table if exists t1;
|
||||
create table t1 as
|
||||
select repeat(' ', 64) as s1, repeat(' ',64) as s2
|
||||
union
|
||||
select null, null;
|
||||
show create table t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`s1` varchar(64) character set ucs2 default NULL,
|
||||
`s2` varchar(64) character set ucs2 default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
delete from t1;
|
||||
insert into t1 values('aaa','aaa');
|
||||
insert into t1 values('aaa|qqq','qqq');
|
||||
insert into t1 values('gheis','^[^a-dXYZ]+$');
|
||||
insert into t1 values('aab','^aa?b');
|
||||
insert into t1 values('Baaan','^Ba*n');
|
||||
insert into t1 values('aaa','qqq|aaa');
|
||||
insert into t1 values('qqq','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq');
|
||||
insert into t1 values('aaa','aba');
|
||||
insert into t1 values(null,'abc');
|
||||
insert into t1 values('def',null);
|
||||
insert into t1 values(null,null);
|
||||
insert into t1 values('ghi','ghi[');
|
||||
select HIGH_PRIORITY s1 regexp s2 from t1;
|
||||
s1 regexp s2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
drop table t1;
|
||||
set names latin1;
|
||||
End of 5.0 tests
|
||||
|
@ -267,6 +267,51 @@ b
|
||||
select * from t1 where a = 'b' and a != 'b';
|
||||
a
|
||||
drop table t1;
|
||||
set collation_connection=utf8_general_ci;
|
||||
drop table if exists t1;
|
||||
create table t1 as
|
||||
select repeat(' ', 64) as s1, repeat(' ',64) as s2
|
||||
union
|
||||
select null, null;
|
||||
show create table t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`s1` varchar(64) character set utf8 default NULL,
|
||||
`s2` varchar(64) character set utf8 default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
delete from t1;
|
||||
insert into t1 values('aaa','aaa');
|
||||
insert into t1 values('aaa|qqq','qqq');
|
||||
insert into t1 values('gheis','^[^a-dXYZ]+$');
|
||||
insert into t1 values('aab','^aa?b');
|
||||
insert into t1 values('Baaan','^Ba*n');
|
||||
insert into t1 values('aaa','qqq|aaa');
|
||||
insert into t1 values('qqq','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq');
|
||||
insert into t1 values('aaa','aba');
|
||||
insert into t1 values(null,'abc');
|
||||
insert into t1 values('def',null);
|
||||
insert into t1 values(null,null);
|
||||
insert into t1 values('ghi','ghi[');
|
||||
select HIGH_PRIORITY s1 regexp s2 from t1;
|
||||
s1 regexp s2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
set names utf8;
|
||||
select 'вася' rlike '[[:<:]]вася[[:>:]]';
|
||||
'вася' rlike '[[:<:]]вася[[:>:]]'
|
||||
|
@ -1,5 +1,17 @@
|
||||
drop table if exists t1;
|
||||
create table t1 (s1 char(64),s2 char(64));
|
||||
set names latin1;
|
||||
drop table if exists t1;
|
||||
create table t1 as
|
||||
select repeat(' ', 64) as s1, repeat(' ',64) as s2
|
||||
union
|
||||
select null, null;
|
||||
show create table t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`s1` varchar(64) default NULL,
|
||||
`s2` varchar(64) default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
delete from t1;
|
||||
insert into t1 values('aaa','aaa');
|
||||
insert into t1 values('aaa|qqq','qqq');
|
||||
insert into t1 values('gheis','^[^a-dXYZ]+$');
|
||||
|
@ -538,4 +538,8 @@ alter table t1 convert to character set ucs2 collate ucs2_czech_ci;
|
||||
select * from t1 where a like 'c%';
|
||||
drop table t1;
|
||||
|
||||
set collation_connection=ucs2_unicode_ci;
|
||||
--source include/ctype_regex.inc
|
||||
set names utf8;
|
||||
|
||||
-- echo End for 5.0 tests
|
||||
|
@ -651,4 +651,8 @@ select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
|
||||
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
|
||||
drop table t1;
|
||||
|
||||
set collation_connection=ucs2_general_ci;
|
||||
--source include/ctype_regex.inc
|
||||
set names latin1;
|
||||
|
||||
--echo End of 5.0 tests
|
||||
|
@ -185,6 +185,13 @@ select * from t1 where a = 'b' and a = 'b';
|
||||
select * from t1 where a = 'b' and a != 'b';
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing regexp
|
||||
#
|
||||
set collation_connection=utf8_general_ci;
|
||||
--source include/ctype_regex.inc
|
||||
set names utf8;
|
||||
|
||||
#
|
||||
# Bug #3928 regexp [[:>:]] and UTF-8
|
||||
#
|
||||
|
@ -6,28 +6,9 @@
|
||||
drop table if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
create table t1 (s1 char(64),s2 char(64));
|
||||
set names latin1;
|
||||
--source include/ctype_regex.inc
|
||||
|
||||
insert into t1 values('aaa','aaa');
|
||||
insert into t1 values('aaa|qqq','qqq');
|
||||
insert into t1 values('gheis','^[^a-dXYZ]+$');
|
||||
insert into t1 values('aab','^aa?b');
|
||||
insert into t1 values('Baaan','^Ba*n');
|
||||
insert into t1 values('aaa','qqq|aaa');
|
||||
insert into t1 values('qqq','qqq|aaa');
|
||||
|
||||
insert into t1 values('bbb','qqq|aaa');
|
||||
insert into t1 values('bbb','qqq');
|
||||
insert into t1 values('aaa','aba');
|
||||
|
||||
insert into t1 values(null,'abc');
|
||||
insert into t1 values('def',null);
|
||||
insert into t1 values(null,null);
|
||||
insert into t1 values('ghi','ghi[');
|
||||
|
||||
select HIGH_PRIORITY s1 regexp s2 from t1;
|
||||
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# This test a bug in regexp on Alpha
|
||||
|
@ -4225,6 +4225,51 @@ void Item_func_like::cleanup()
|
||||
|
||||
#ifdef USE_REGEX
|
||||
|
||||
bool
|
||||
Item_func_regex::regcomp(bool send_error)
|
||||
{
|
||||
char buff[MAX_FIELD_WIDTH];
|
||||
String tmp(buff,sizeof(buff),&my_charset_bin);
|
||||
String *res= args[1]->val_str(&tmp);
|
||||
int error;
|
||||
|
||||
if (args[1]->null_value)
|
||||
return TRUE;
|
||||
|
||||
if (regex_compiled)
|
||||
{
|
||||
if (!stringcmp(res, &prev_regexp))
|
||||
return FALSE;
|
||||
prev_regexp.copy(*res);
|
||||
my_regfree(&preg);
|
||||
regex_compiled= 0;
|
||||
}
|
||||
|
||||
if (cmp_collation.collation != regex_lib_charset)
|
||||
{
|
||||
/* Convert UCS2 strings to UTF8 */
|
||||
uint dummy_errors;
|
||||
if (conv.copy(res->ptr(), res->length(), res->charset(),
|
||||
regex_lib_charset, &dummy_errors))
|
||||
return TRUE;
|
||||
res= &conv;
|
||||
}
|
||||
|
||||
if ((error= my_regcomp(&preg, res->c_ptr(),
|
||||
regex_lib_flags, regex_lib_charset)))
|
||||
{
|
||||
if (send_error)
|
||||
{
|
||||
(void) my_regerror(error, &preg, buff, sizeof(buff));
|
||||
my_error(ER_REGEXP_ERROR, MYF(0), buff);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
regex_compiled= 1;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
Item_func_regex::fix_fields(THD *thd, Item **ref)
|
||||
{
|
||||
@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, Item **ref)
|
||||
if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1))
|
||||
return TRUE;
|
||||
|
||||
regex_lib_flags= (cmp_collation.collation->state &
|
||||
(MY_CS_BINSORT | MY_CS_CSSORT)) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE;
|
||||
/*
|
||||
If the case of UCS2 and other non-ASCII character sets,
|
||||
we will convert patterns and strings to UTF8.
|
||||
*/
|
||||
regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
|
||||
&my_charset_utf8_general_ci :
|
||||
cmp_collation.collation;
|
||||
|
||||
used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
|
||||
not_null_tables_cache= (args[0]->not_null_tables() |
|
||||
args[1]->not_null_tables());
|
||||
const_item_cache=args[0]->const_item() && args[1]->const_item();
|
||||
if (!regex_compiled && args[1]->const_item())
|
||||
{
|
||||
char buff[MAX_FIELD_WIDTH];
|
||||
String tmp(buff,sizeof(buff),&my_charset_bin);
|
||||
String *res=args[1]->val_str(&tmp);
|
||||
if (args[1]->null_value)
|
||||
{ // Will always return NULL
|
||||
maybe_null=1;
|
||||
return FALSE;
|
||||
}
|
||||
int error;
|
||||
if ((error= my_regcomp(&preg,res->c_ptr(),
|
||||
((cmp_collation.collation->state &
|
||||
(MY_CS_BINSORT | MY_CS_CSSORT)) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE),
|
||||
cmp_collation.collation)))
|
||||
{
|
||||
(void) my_regerror(error,&preg,buff,sizeof(buff));
|
||||
my_error(ER_REGEXP_ERROR, MYF(0), buff);
|
||||
if (regcomp(TRUE))
|
||||
return TRUE;
|
||||
}
|
||||
regex_compiled=regex_is_const=1;
|
||||
maybe_null=args[0]->maybe_null;
|
||||
regex_is_const= 1;
|
||||
maybe_null= args[0]->maybe_null;
|
||||
}
|
||||
else
|
||||
maybe_null=1;
|
||||
@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int()
|
||||
{
|
||||
DBUG_ASSERT(fixed == 1);
|
||||
char buff[MAX_FIELD_WIDTH];
|
||||
String *res, tmp(buff,sizeof(buff),&my_charset_bin);
|
||||
String tmp(buff,sizeof(buff),&my_charset_bin);
|
||||
String *res= args[0]->val_str(&tmp);
|
||||
|
||||
res=args[0]->val_str(&tmp);
|
||||
if (args[0]->null_value)
|
||||
{
|
||||
null_value=1;
|
||||
if ((null_value= (args[0]->null_value ||
|
||||
(!regex_is_const && regcomp(FALSE)))))
|
||||
return 0;
|
||||
}
|
||||
if (!regex_is_const)
|
||||
{
|
||||
char buff2[MAX_FIELD_WIDTH];
|
||||
String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin);
|
||||
|
||||
res2= args[1]->val_str(&tmp2);
|
||||
if (args[1]->null_value)
|
||||
if (cmp_collation.collation != regex_lib_charset)
|
||||
{
|
||||
null_value=1;
|
||||
/* Convert UCS2 strings to UTF8 */
|
||||
uint dummy_errors;
|
||||
if (conv.copy(res->ptr(), res->length(), res->charset(),
|
||||
regex_lib_charset, &dummy_errors))
|
||||
{
|
||||
null_value= 1;
|
||||
return 0;
|
||||
}
|
||||
if (!regex_compiled || stringcmp(res2,&prev_regexp))
|
||||
{
|
||||
prev_regexp.copy(*res2);
|
||||
if (regex_compiled)
|
||||
{
|
||||
my_regfree(&preg);
|
||||
regex_compiled=0;
|
||||
res= &conv;
|
||||
}
|
||||
if (my_regcomp(&preg,res2->c_ptr_safe(),
|
||||
((cmp_collation.collation->state &
|
||||
(MY_CS_BINSORT | MY_CS_CSSORT)) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE),
|
||||
cmp_collation.collation))
|
||||
{
|
||||
null_value=1;
|
||||
return 0;
|
||||
}
|
||||
regex_compiled=1;
|
||||
}
|
||||
}
|
||||
null_value=0;
|
||||
return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
|
||||
}
|
||||
|
||||
|
@ -1313,6 +1313,10 @@ class Item_func_regex :public Item_bool_func
|
||||
bool regex_is_const;
|
||||
String prev_regexp;
|
||||
DTCollation cmp_collation;
|
||||
CHARSET_INFO *regex_lib_charset;
|
||||
int regex_lib_flags;
|
||||
String conv;
|
||||
bool regcomp(bool send_error);
|
||||
public:
|
||||
Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b),
|
||||
regex_compiled(0),regex_is_const(0) {}
|
||||
|
Reference in New Issue
Block a user