mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
use utf8mb4 with PCRE2, not utf8mb3
This commit is contained in:
@ -895,3 +895,12 @@ REGEXP_INSTR('a_kollision', 'o([lm])\\1')
|
|||||||
REGEXP_INSTR('a_kollision', '(oll)')
|
REGEXP_INSTR('a_kollision', '(oll)')
|
||||||
4
|
4
|
||||||
SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
|
SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
|
||||||
|
REGEXP_INSTR('a_kollision', 'o([lm])\\1')
|
||||||
|
4
|
||||||
|
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
|
||||||
|
a
|
||||||
|
#
|
||||||
|
# MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
|
||||||
|
#
|
||||||
|
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
|
||||||
|
Text
|
||||||
|
@ -470,3 +470,11 @@ SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
|
|||||||
#
|
#
|
||||||
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
|
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
|
||||||
--enable_service_connection
|
--enable_service_connection
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
|
||||||
|
--echo #
|
||||||
|
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.6 tests
|
||||||
|
--echo #
|
||||||
|
@ -6096,7 +6096,7 @@ void Regexp_processor_pcre::init(CHARSET_INFO *data_charset, int extra_flags)
|
|||||||
|
|
||||||
// Convert text data to utf-8.
|
// Convert text data to utf-8.
|
||||||
m_library_charset= data_charset == &my_charset_bin ?
|
m_library_charset= data_charset == &my_charset_bin ?
|
||||||
&my_charset_bin : &my_charset_utf8mb3_general_ci;
|
&my_charset_bin : &my_charset_utf8mb4_general_ci;
|
||||||
|
|
||||||
m_conversion_is_needed= (data_charset != &my_charset_bin) &&
|
m_conversion_is_needed= (data_charset != &my_charset_bin) &&
|
||||||
!my_charset_same(data_charset, m_library_charset);
|
!my_charset_same(data_charset, m_library_charset);
|
||||||
|
@ -3057,7 +3057,7 @@ public:
|
|||||||
m_pcre(NULL), m_pcre_match_data(NULL),
|
m_pcre(NULL), m_pcre_match_data(NULL),
|
||||||
m_conversion_is_needed(true), m_is_const(0),
|
m_conversion_is_needed(true), m_is_const(0),
|
||||||
m_library_flags(0),
|
m_library_flags(0),
|
||||||
m_library_charset(&my_charset_utf8mb3_general_ci)
|
m_library_charset(&my_charset_utf8mb4_general_ci)
|
||||||
{}
|
{}
|
||||||
int default_regex_flags();
|
int default_regex_flags();
|
||||||
void init(CHARSET_INFO *data_charset, int extra_flags);
|
void init(CHARSET_INFO *data_charset, int extra_flags);
|
||||||
|
Reference in New Issue
Block a user