1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-35614: JSON_UNQUOTE doesn't work with emojis

emojis are a 4 byte utf sequence. Fix the conversion in JSON_UNQUOTE
to utf8mb4_bin by default.
This commit is contained in:
Daniel Black
2024-12-12 15:02:46 +11:00
parent 5a536adb03
commit ccbcafc22e
4 changed files with 63 additions and 4 deletions

View File

@@ -1766,6 +1766,43 @@ FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t;
data
<root language="de"></root>
#
# MDEV-35614 JSON_UNQUOTE doesn't work with emojis
#
SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley;
hex_smiley
F09F988A
set names utf8mb4;
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley;
smiley
😊
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys;
equal_smileys
1
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys;
less_or_equal_smileys
1
set @v='{ "color":"😊" }';
select @v as v, collation(@v) as collation_v;
v collation_v
{ "color":"😊" } utf8mb4_general_ci
select json_valid(@v) as valid;
valid
1
select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation;
color_extraction color_extraction_collation
"😊" utf8mb4_general_ci
select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation;
unquoted unquoted_collation
😊 utf8mb4_bin
SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4;
invalid_utf8mb4
"\uc080\ude0a"
Warnings:
Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13
show warnings;
Level Code Message
Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13
#
# End of 10.6 tests
#
#

View File

@@ -1194,6 +1194,7 @@ SELECT JSON_EXTRACT('{"a": 1,"b": 2}','$.a');
SET @@collation_connection= @save_collation_connection;
--echo #
--echo # End of 10.5 tests
--echo #
@@ -1231,6 +1232,27 @@ SELECT
data
FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t;
--echo #
--echo # MDEV-35614 JSON_UNQUOTE doesn't work with emojis
--echo #
SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley;
set names utf8mb4;
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley;
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys;
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys;
set @v='{ "color":"😊" }';
select @v as v, collation(@v) as collation_v;
select json_valid(@v) as valid;
select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation;
select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation;
SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4;
show warnings;
--echo #
--echo # End of 10.6 tests
--echo #

View File

@@ -2886,7 +2886,7 @@ json_unquote(json_compact('["a", "b", "c"]'))
["a", "b", "c"]
select charset(json_unquote('"abc"'));
charset(json_unquote('"abc"'))
utf8mb3
utf8mb4
select json_quote(convert(X'e68891' using utf8));
json_quote(convert(X'e68891' using utf8))
"我"

View File

@@ -851,7 +851,7 @@ String *Item_func_json_quote::val_str(String *str)
bool Item_func_json_unquote::fix_length_and_dec(THD *thd)
{
collation.set(&my_charset_utf8mb3_general_ci,
collation.set(&my_charset_utf8mb4_bin,
DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
max_length= args[0]->max_char_length() * collation.collation->mbmaxlen;
set_maybe_null();
@@ -894,12 +894,12 @@ String *Item_func_json_unquote::val_str(String *str)
return js;
str->length(0);
str->set_charset(&my_charset_utf8mb3_general_ci);
str->set_charset(&my_charset_utf8mb4_bin);
if (str->realloc_with_extra_if_needed(je.value_len) ||
(c_len= json_unescape(js->charset(),
je.value, je.value + je.value_len,
&my_charset_utf8mb3_general_ci,
&my_charset_utf8mb4_bin,
(uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0)
goto error;