From ccbcafc22e27e8267243cdd6e7b113ff34ce6563 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Thu, 12 Dec 2024 15:02:46 +1100 Subject: [PATCH] MDEV-35614: JSON_UNQUOTE doesn't work with emojis emojis are a 4 byte utf sequence. Fix the conversion in JSON_UNQUOTE to utf8mb4_bin by default. --- mysql-test/main/func_json.result | 37 ++++++++++++++++++++ mysql-test/main/func_json.test | 22 ++++++++++++ mysql-test/suite/json/r/json_no_table.result | 2 +- sql/item_jsonfunc.cc | 6 ++-- 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/mysql-test/main/func_json.result b/mysql-test/main/func_json.result index 5b3909b961e..df4685ba9ee 100644 --- a/mysql-test/main/func_json.result +++ b/mysql-test/main/func_json.result @@ -1766,6 +1766,43 @@ FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t; data # +# MDEV-35614 JSON_UNQUOTE doesn't work with emojis +# +SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley; +hex_smiley +F09F988A +set names utf8mb4; +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley; +smiley +😊 +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys; +equal_smileys +1 +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys; +less_or_equal_smileys +1 +set @v='{ "color":"😊" }'; +select @v as v, collation(@v) as collation_v; +v collation_v +{ "color":"😊" } utf8mb4_general_ci +select json_valid(@v) as valid; +valid +1 +select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation; +color_extraction color_extraction_collation +"😊" utf8mb4_general_ci +select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation; +unquoted unquoted_collation +😊 utf8mb4_bin +SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4; +invalid_utf8mb4 +"\uc080\ude0a" +Warnings: +Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13 +show warnings; +Level Code Message +Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13 +# # End of 10.6 tests # # diff --git a/mysql-test/main/func_json.test b/mysql-test/main/func_json.test index 8f7a0e1aa66..bdb53be509f 100644 --- a/mysql-test/main/func_json.test +++ b/mysql-test/main/func_json.test @@ -1194,6 +1194,7 @@ SELECT JSON_EXTRACT('{"a": 1,"b": 2}','$.a'); SET @@collation_connection= @save_collation_connection; + --echo # --echo # End of 10.5 tests --echo # @@ -1231,6 +1232,27 @@ SELECT data FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t; + +--echo # +--echo # MDEV-35614 JSON_UNQUOTE doesn't work with emojis +--echo # + +SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley; +set names utf8mb4; +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley; + +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys; +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys; + +set @v='{ "color":"😊" }'; +select @v as v, collation(@v) as collation_v; +select json_valid(@v) as valid; +select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation; +select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation; + +SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4; +show warnings; + --echo # --echo # End of 10.6 tests --echo # diff --git a/mysql-test/suite/json/r/json_no_table.result b/mysql-test/suite/json/r/json_no_table.result index 5819a10ebfa..ab32364b163 100644 --- a/mysql-test/suite/json/r/json_no_table.result +++ b/mysql-test/suite/json/r/json_no_table.result @@ -2886,7 +2886,7 @@ json_unquote(json_compact('["a", "b", "c"]')) ["a", "b", "c"] select charset(json_unquote('"abc"')); charset(json_unquote('"abc"')) -utf8mb3 +utf8mb4 select json_quote(convert(X'e68891' using utf8)); json_quote(convert(X'e68891' using utf8)) "我" diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 7408c840dd0..9fa0d66c47e 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -851,7 +851,7 @@ String *Item_func_json_quote::val_str(String *str) bool Item_func_json_unquote::fix_length_and_dec(THD *thd) { - collation.set(&my_charset_utf8mb3_general_ci, + collation.set(&my_charset_utf8mb4_bin, DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); max_length= args[0]->max_char_length() * collation.collation->mbmaxlen; set_maybe_null(); @@ -894,12 +894,12 @@ String *Item_func_json_unquote::val_str(String *str) return js; str->length(0); - str->set_charset(&my_charset_utf8mb3_general_ci); + str->set_charset(&my_charset_utf8mb4_bin); if (str->realloc_with_extra_if_needed(je.value_len) || (c_len= json_unescape(js->charset(), je.value, je.value + je.value_len, - &my_charset_utf8mb3_general_ci, + &my_charset_utf8mb4_bin, (uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0) goto error;