1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'

use utf8mb4 with PCRE2, not utf8mb3
This commit is contained in:
Sergei Golubchik
2023-12-14 15:17:59 +01:00
parent 6538a91e94
commit 022ae42155
4 changed files with 19 additions and 2 deletions

View File

@ -895,3 +895,12 @@ REGEXP_INSTR('a_kollision', 'o([lm])\\1')
REGEXP_INSTR('a_kollision', '(oll)') REGEXP_INSTR('a_kollision', '(oll)')
4 4
SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1'); SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
REGEXP_INSTR('a_kollision', 'o([lm])\\1')
4
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
a
#
# MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
#
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
Text

View File

@ -470,3 +470,11 @@ SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
# #
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]'; SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
--enable_service_connection --enable_service_connection
--echo #
--echo # MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
--echo #
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
--echo #
--echo # End of 10.6 tests
--echo #

View File

@ -6096,7 +6096,7 @@ void Regexp_processor_pcre::init(CHARSET_INFO *data_charset, int extra_flags)
// Convert text data to utf-8. // Convert text data to utf-8.
m_library_charset= data_charset == &my_charset_bin ? m_library_charset= data_charset == &my_charset_bin ?
&my_charset_bin : &my_charset_utf8mb3_general_ci; &my_charset_bin : &my_charset_utf8mb4_general_ci;
m_conversion_is_needed= (data_charset != &my_charset_bin) && m_conversion_is_needed= (data_charset != &my_charset_bin) &&
!my_charset_same(data_charset, m_library_charset); !my_charset_same(data_charset, m_library_charset);

View File

@ -3057,7 +3057,7 @@ public:
m_pcre(NULL), m_pcre_match_data(NULL), m_pcre(NULL), m_pcre_match_data(NULL),
m_conversion_is_needed(true), m_is_const(0), m_conversion_is_needed(true), m_is_const(0),
m_library_flags(0), m_library_flags(0),
m_library_charset(&my_charset_utf8mb3_general_ci) m_library_charset(&my_charset_utf8mb4_general_ci)
{} {}
int default_regex_flags(); int default_regex_flags();
void init(CHARSET_INFO *data_charset, int extra_flags); void init(CHARSET_INFO *data_charset, int extra_flags);