diff --git a/mysql-test/main/ctype_ucs.result b/mysql-test/main/ctype_ucs.result index eb1220f9a56..8f747dfd51e 100644 --- a/mysql-test/main/ctype_ucs.result +++ b/mysql-test/main/ctype_ucs.result @@ -6520,5 +6520,25 @@ SELECT 1 COLLATE latin1_swedish_ci; ERROR 42000: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'ucs2' SET NAMES utf8; # +# MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion +# +SET NAMES utf8mb3, @@collation_connection=ucs2_general_ci; +CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET ucs2; +INSERT INTO t1 VALUES ('a'),('A'); +CREATE OR REPLACE VIEW v1 AS +SELECT COUNT(*) AS cnt, GROUP_CONCAT(c) AS c1 FROM t1 GROUP BY c; +SELECT * FROM v1; +cnt c1 +2 a,A +SELECT HEX(c1) FROM v1; +HEX(c1) +0061002C0041 +SHOW CREATE VIEW v1; +View Create View character_set_client collation_connection +v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select count(0) AS `cnt`,group_concat(`t1`.`c` separator ',') AS `c1` from `t1` group by `t1`.`c` utf8 ucs2_general_ci +DROP VIEW v1; +DROP TABLE t1; +SET NAMES utf8mb3; +# # End of 10.5 tests # diff --git a/mysql-test/main/ctype_ucs.test b/mysql-test/main/ctype_ucs.test index 3df155c0f9d..f2fcee1dbb4 100644 --- a/mysql-test/main/ctype_ucs.test +++ b/mysql-test/main/ctype_ucs.test @@ -1189,6 +1189,23 @@ SELECT HEX(1 COLLATE ucs2_bin); SELECT 1 COLLATE latin1_swedish_ci; SET NAMES utf8; +--echo # +--echo # MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion +--echo # + +SET NAMES utf8mb3, @@collation_connection=ucs2_general_ci; +CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET ucs2; +INSERT INTO t1 VALUES ('a'),('A'); +CREATE OR REPLACE VIEW v1 AS + SELECT COUNT(*) AS cnt, GROUP_CONCAT(c) AS c1 FROM t1 GROUP BY c; +SELECT * FROM v1; +SELECT HEX(c1) FROM v1; +SHOW CREATE VIEW v1; +DROP VIEW v1; +DROP TABLE t1; +SET NAMES utf8mb3; + + --echo # --echo # End of 10.5 tests --echo # diff --git a/mysql-test/main/func_gconcat.result b/mysql-test/main/func_gconcat.result index 7bc6441661b..e922134a9da 100644 --- a/mysql-test/main/func_gconcat.result +++ b/mysql-test/main/func_gconcat.result @@ -1517,4 +1517,24 @@ deallocate prepare stmt; set join_cache_level=default; set group_concat_max_len=default; drop table t1,t2; +# +# MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion +# +SET NAMES utf8, @@collation_connection=latin1_swedish_ci; +CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET latin1; +INSERT INTO t1 VALUES ('a'),('A'); +CREATE OR REPLACE VIEW v1 AS +SELECT GROUP_CONCAT(c SEPARATOR 'ß') AS c1 FROM t1 GROUP BY c; +SELECT * FROM v1; +c1 +aßA +SELECT HEX(c1) FROM v1; +HEX(c1) +61DF41 +SHOW CREATE VIEW v1; +View Create View character_set_client collation_connection +v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select group_concat(`t1`.`c` separator 'ß') AS `c1` from `t1` group by `t1`.`c` utf8 latin1_swedish_ci +DROP VIEW v1; +DROP TABLE t1; +SET NAMES latin1; # End of 10.5 tests diff --git a/mysql-test/main/func_gconcat.test b/mysql-test/main/func_gconcat.test index 2e09bddbb8b..c9787ce4471 100644 --- a/mysql-test/main/func_gconcat.test +++ b/mysql-test/main/func_gconcat.test @@ -1105,4 +1105,20 @@ set group_concat_max_len=default; drop table t1,t2; +--echo # +--echo # MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion +--echo # + +SET NAMES utf8, @@collation_connection=latin1_swedish_ci; +CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET latin1; +INSERT INTO t1 VALUES ('a'),('A'); +CREATE OR REPLACE VIEW v1 AS + SELECT GROUP_CONCAT(c SEPARATOR 'ß') AS c1 FROM t1 GROUP BY c; +SELECT * FROM v1; +SELECT HEX(c1) FROM v1; +SHOW CREATE VIEW v1; +DROP VIEW v1; +DROP TABLE t1; +SET NAMES latin1; + --echo # End of 10.5 tests diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 7f39efe6c9a..4cf403c1618 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -4587,7 +4587,7 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type) if (sum_func() == GROUP_CONCAT_FUNC) { str->append(STRING_WITH_LEN(" separator \'")); - str->append_for_single_quote(separator->ptr(), separator->length()); + str->append_for_single_quote_opt_convert(*separator); str->append(STRING_WITH_LEN("\'")); } diff --git a/sql/sql_string.cc b/sql/sql_string.cc index b723ac5db66..cf1ed210ca3 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -1126,26 +1126,45 @@ bool String::append_for_single_quote(const char *st, size_t len) int chlen; for (; st < end; st++) { - switch (*st) + char ch2= (char) (uchar) escaped_wc_for_single_quote((uchar) *st); + if (ch2) { - case '\\': APPEND(STRING_WITH_LEN("\\\\")); break; - case '\0': APPEND(STRING_WITH_LEN("\\0")); break; - case '\'': APPEND(STRING_WITH_LEN("\\'")); break; - case '\n': APPEND(STRING_WITH_LEN("\\n")); break; - case '\r': APPEND(STRING_WITH_LEN("\\r")); break; - case '\032': APPEND(STRING_WITH_LEN("\\Z")); break; - default: if ((chlen=charset()->charlen(st, end)) > 0) - { - APPEND(st, chlen); - st+= chlen-1; - } - else - APPEND(*st); + if (append('\\') || append(ch2)) + return true; + continue; } + if ((chlen= charset()->charlen(st, end)) > 0) + { + APPEND(st, chlen); + st+= chlen-1; + } + else + APPEND(*st); } return 0; } + +bool String::append_for_single_quote_using_mb_wc(const char *src, + size_t length, + CHARSET_INFO *cs) +{ + DBUG_ASSERT(&my_charset_bin != charset()); + DBUG_ASSERT(&my_charset_bin != cs); + const uchar *str= (const uchar *) src; + const uchar *end= (const uchar *) src + length; + int chlen; + my_wc_t wc; + for ( ; (chlen= cs->cset->mb_wc(cs, &wc, str, end)) > 0; str+= chlen) + { + my_wc_t wc2= escaped_wc_for_single_quote(wc); + if (wc2 ? (append_wc('\\') || append_wc(wc2)) : append_wc(wc)) + return true; + } + return false; +} + + void String::print(String *str) const { str->append_for_single_quote(Ptr, str_length); diff --git a/sql/sql_string.h b/sql/sql_string.h index 13820329750..3dbeb7b83cf 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -1134,6 +1134,42 @@ public: print_with_conversion(to, cs); } + static my_wc_t escaped_wc_for_single_quote(my_wc_t ch) + { + switch (ch) + { + case '\\': return '\\'; + case '\0': return '0'; + case '\'': return '\''; + case '\n': return 'n'; + case '\r': return 'r'; + case '\032': return 'Z'; + } + return 0; + } + + // Append for single quote using mb_wc/wc_mb Unicode conversion + bool append_for_single_quote_using_mb_wc(const char *str, size_t length, + CHARSET_INFO *cs); + + // Append for single quote with optional mb_wc/wc_mb conversion + bool append_for_single_quote_opt_convert(const char *str, + size_t length, + CHARSET_INFO *cs) + { + return charset() == &my_charset_bin || cs == &my_charset_bin || + my_charset_same(charset(), cs) ? + append_for_single_quote(str, length) : + append_for_single_quote_using_mb_wc(str, length, cs); + } + + bool append_for_single_quote_opt_convert(const String &str) + { + return append_for_single_quote_opt_convert(str.ptr(), + str.length(), + str.charset()); + } + bool append_for_single_quote(const char *st, size_t len); bool append_for_single_quote(const String *s) {