mirror of
https://github.com/MariaDB/server.git
synced 2026-01-06 05:22:24 +03:00
Bug #3928 regexp [[:>:]] and UTF-8
This commit is contained in:
@@ -218,3 +218,25 @@ b
|
||||
select * from t1 where a = 'b' and a != 'b';
|
||||
a
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
select 'вася' rlike '[[:<:]]вася[[:>:]]';
|
||||
'вася' rlike '[[:<:]]вася[[:>:]]'
|
||||
1
|
||||
select 'вася ' rlike '[[:<:]]вася[[:>:]]';
|
||||
'вася ' rlike '[[:<:]]вася[[:>:]]'
|
||||
1
|
||||
select ' вася' rlike '[[:<:]]вася[[:>:]]';
|
||||
' вася' rlike '[[:<:]]вася[[:>:]]'
|
||||
1
|
||||
select ' вася ' rlike '[[:<:]]вася[[:>:]]';
|
||||
' вася ' rlike '[[:<:]]вася[[:>:]]'
|
||||
1
|
||||
select 'васяz' rlike '[[:<:]]вася[[:>:]]';
|
||||
'васяz' rlike '[[:<:]]вася[[:>:]]'
|
||||
0
|
||||
select 'zвася' rlike '[[:<:]]вася[[:>:]]';
|
||||
'zвася' rlike '[[:<:]]вася[[:>:]]'
|
||||
0
|
||||
select 'zвасяz' rlike '[[:<:]]вася[[:>:]]';
|
||||
'zвасяz' rlike '[[:<:]]вася[[:>:]]'
|
||||
0
|
||||
|
||||
@@ -141,3 +141,19 @@ select * from t1 where a = 'b';
|
||||
select * from t1 where a = 'b' and a = 'b';
|
||||
select * from t1 where a = 'b' and a != 'b';
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug #3928 regexp [[:>:]] and UTF-8
|
||||
#
|
||||
set names utf8;
|
||||
|
||||
# This should return TRUE
|
||||
select 'вася' rlike '[[:<:]]вася[[:>:]]';
|
||||
select 'вася ' rlike '[[:<:]]вася[[:>:]]';
|
||||
select ' вася' rlike '[[:<:]]вася[[:>:]]';
|
||||
select ' вася ' rlike '[[:<:]]вася[[:>:]]';
|
||||
|
||||
# This should return FALSE
|
||||
select 'васяz' rlike '[[:<:]]вася[[:>:]]';
|
||||
select 'zвася' rlike '[[:<:]]вася[[:>:]]';
|
||||
select 'zвасяz' rlike '[[:<:]]вася[[:>:]]';
|
||||
|
||||
@@ -1524,8 +1524,12 @@ MY_UNICASE_INFO *uni_plane[256]={
|
||||
|
||||
#ifdef HAVE_CHARSET_utf8
|
||||
|
||||
/* These arrays are taken from usa7 implementation */
|
||||
|
||||
/*
|
||||
We consider bytes with code more than 127 as a letter.
|
||||
This garantees that word boundaries work fine with regular
|
||||
expressions. Note, there is no need to mark byte 255 as a
|
||||
letter, it is illegal byte in UTF8.
|
||||
*/
|
||||
static uchar ctype_utf8[] = {
|
||||
0,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
|
||||
@@ -1536,16 +1540,18 @@ static uchar ctype_utf8[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
|
||||
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
|
||||
};
|
||||
|
||||
/* The below are taken from usa7 implementation */
|
||||
|
||||
static uchar to_lower_utf8[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
|
||||
Reference in New Issue
Block a user