mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
Item_func_group_concat::print() did not take into account that Item_func_group_concat::separator can be of a different character set than the "String *str" (when the printing is being done to). Therefore, printing did not work correctly for: - non-ASCII separators when GROUP_CONCAT is done on 8bit data or multi-byte data with mbminlen==1. - all separators (even including simple ones like comma) when GROUP_CONCAT is done on ucs2/utf16/utf32 data (mbminlen>1). Because of this problem, VIEW definitions did not print correctly to their FRM files. This later led to a wrong SELECT and SHOW CREATE output. Fix: - Adding new String methods: bool append_for_single_quote_using_mb_wc(const char *str, size_t length, CHARSET_INFO *cs); bool append_for_single_quote_opt_convert(const char *str, size_t length, CHARSET_INFO *cs) which perform both escaping and character set conversion at the same time. - Adding a new String method escaped_wc_for_single_quote(), to reuse the code between the old and the new methods. - Fixing Item_func_group_concat::print() to use the new method append_for_single_quote_opt_convert().
This commit is contained in:
@ -6520,5 +6520,25 @@ SELECT 1 COLLATE latin1_swedish_ci;
|
|||||||
ERROR 42000: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'ucs2'
|
ERROR 42000: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'ucs2'
|
||||||
SET NAMES utf8;
|
SET NAMES utf8;
|
||||||
#
|
#
|
||||||
|
# MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
|
||||||
|
#
|
||||||
|
SET NAMES utf8mb3, @@collation_connection=ucs2_general_ci;
|
||||||
|
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET ucs2;
|
||||||
|
INSERT INTO t1 VALUES ('a'),('A');
|
||||||
|
CREATE OR REPLACE VIEW v1 AS
|
||||||
|
SELECT COUNT(*) AS cnt, GROUP_CONCAT(c) AS c1 FROM t1 GROUP BY c;
|
||||||
|
SELECT * FROM v1;
|
||||||
|
cnt c1
|
||||||
|
2 a,A
|
||||||
|
SELECT HEX(c1) FROM v1;
|
||||||
|
HEX(c1)
|
||||||
|
0061002C0041
|
||||||
|
SHOW CREATE VIEW v1;
|
||||||
|
View Create View character_set_client collation_connection
|
||||||
|
v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select count(0) AS `cnt`,group_concat(`t1`.`c` separator ',') AS `c1` from `t1` group by `t1`.`c` utf8 ucs2_general_ci
|
||||||
|
DROP VIEW v1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES utf8mb3;
|
||||||
|
#
|
||||||
# End of 10.5 tests
|
# End of 10.5 tests
|
||||||
#
|
#
|
||||||
|
@ -1189,6 +1189,23 @@ SELECT HEX(1 COLLATE ucs2_bin);
|
|||||||
SELECT 1 COLLATE latin1_swedish_ci;
|
SELECT 1 COLLATE latin1_swedish_ci;
|
||||||
SET NAMES utf8;
|
SET NAMES utf8;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
SET NAMES utf8mb3, @@collation_connection=ucs2_general_ci;
|
||||||
|
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET ucs2;
|
||||||
|
INSERT INTO t1 VALUES ('a'),('A');
|
||||||
|
CREATE OR REPLACE VIEW v1 AS
|
||||||
|
SELECT COUNT(*) AS cnt, GROUP_CONCAT(c) AS c1 FROM t1 GROUP BY c;
|
||||||
|
SELECT * FROM v1;
|
||||||
|
SELECT HEX(c1) FROM v1;
|
||||||
|
SHOW CREATE VIEW v1;
|
||||||
|
DROP VIEW v1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES utf8mb3;
|
||||||
|
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.5 tests
|
--echo # End of 10.5 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
@ -1517,4 +1517,24 @@ deallocate prepare stmt;
|
|||||||
set join_cache_level=default;
|
set join_cache_level=default;
|
||||||
set group_concat_max_len=default;
|
set group_concat_max_len=default;
|
||||||
drop table t1,t2;
|
drop table t1,t2;
|
||||||
|
#
|
||||||
|
# MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
|
||||||
|
#
|
||||||
|
SET NAMES utf8, @@collation_connection=latin1_swedish_ci;
|
||||||
|
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET latin1;
|
||||||
|
INSERT INTO t1 VALUES ('a'),('A');
|
||||||
|
CREATE OR REPLACE VIEW v1 AS
|
||||||
|
SELECT GROUP_CONCAT(c SEPARATOR 'ß') AS c1 FROM t1 GROUP BY c;
|
||||||
|
SELECT * FROM v1;
|
||||||
|
c1
|
||||||
|
aßA
|
||||||
|
SELECT HEX(c1) FROM v1;
|
||||||
|
HEX(c1)
|
||||||
|
61DF41
|
||||||
|
SHOW CREATE VIEW v1;
|
||||||
|
View Create View character_set_client collation_connection
|
||||||
|
v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select group_concat(`t1`.`c` separator 'ß') AS `c1` from `t1` group by `t1`.`c` utf8 latin1_swedish_ci
|
||||||
|
DROP VIEW v1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES latin1;
|
||||||
# End of 10.5 tests
|
# End of 10.5 tests
|
||||||
|
@ -1105,4 +1105,20 @@ set group_concat_max_len=default;
|
|||||||
|
|
||||||
drop table t1,t2;
|
drop table t1,t2;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
SET NAMES utf8, @@collation_connection=latin1_swedish_ci;
|
||||||
|
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET latin1;
|
||||||
|
INSERT INTO t1 VALUES ('a'),('A');
|
||||||
|
CREATE OR REPLACE VIEW v1 AS
|
||||||
|
SELECT GROUP_CONCAT(c SEPARATOR 'ß') AS c1 FROM t1 GROUP BY c;
|
||||||
|
SELECT * FROM v1;
|
||||||
|
SELECT HEX(c1) FROM v1;
|
||||||
|
SHOW CREATE VIEW v1;
|
||||||
|
DROP VIEW v1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES latin1;
|
||||||
|
|
||||||
--echo # End of 10.5 tests
|
--echo # End of 10.5 tests
|
||||||
|
@ -4587,7 +4587,7 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type)
|
|||||||
if (sum_func() == GROUP_CONCAT_FUNC)
|
if (sum_func() == GROUP_CONCAT_FUNC)
|
||||||
{
|
{
|
||||||
str->append(STRING_WITH_LEN(" separator \'"));
|
str->append(STRING_WITH_LEN(" separator \'"));
|
||||||
str->append_for_single_quote(separator->ptr(), separator->length());
|
str->append_for_single_quote_opt_convert(*separator);
|
||||||
str->append(STRING_WITH_LEN("\'"));
|
str->append(STRING_WITH_LEN("\'"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1126,26 +1126,45 @@ bool String::append_for_single_quote(const char *st, size_t len)
|
|||||||
int chlen;
|
int chlen;
|
||||||
for (; st < end; st++)
|
for (; st < end; st++)
|
||||||
{
|
{
|
||||||
switch (*st)
|
char ch2= (char) (uchar) escaped_wc_for_single_quote((uchar) *st);
|
||||||
|
if (ch2)
|
||||||
{
|
{
|
||||||
case '\\': APPEND(STRING_WITH_LEN("\\\\")); break;
|
if (append('\\') || append(ch2))
|
||||||
case '\0': APPEND(STRING_WITH_LEN("\\0")); break;
|
return true;
|
||||||
case '\'': APPEND(STRING_WITH_LEN("\\'")); break;
|
continue;
|
||||||
case '\n': APPEND(STRING_WITH_LEN("\\n")); break;
|
|
||||||
case '\r': APPEND(STRING_WITH_LEN("\\r")); break;
|
|
||||||
case '\032': APPEND(STRING_WITH_LEN("\\Z")); break;
|
|
||||||
default: if ((chlen=charset()->charlen(st, end)) > 0)
|
|
||||||
{
|
|
||||||
APPEND(st, chlen);
|
|
||||||
st+= chlen-1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
APPEND(*st);
|
|
||||||
}
|
}
|
||||||
|
if ((chlen= charset()->charlen(st, end)) > 0)
|
||||||
|
{
|
||||||
|
APPEND(st, chlen);
|
||||||
|
st+= chlen-1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
APPEND(*st);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool String::append_for_single_quote_using_mb_wc(const char *src,
|
||||||
|
size_t length,
|
||||||
|
CHARSET_INFO *cs)
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(&my_charset_bin != charset());
|
||||||
|
DBUG_ASSERT(&my_charset_bin != cs);
|
||||||
|
const uchar *str= (const uchar *) src;
|
||||||
|
const uchar *end= (const uchar *) src + length;
|
||||||
|
int chlen;
|
||||||
|
my_wc_t wc;
|
||||||
|
for ( ; (chlen= cs->cset->mb_wc(cs, &wc, str, end)) > 0; str+= chlen)
|
||||||
|
{
|
||||||
|
my_wc_t wc2= escaped_wc_for_single_quote(wc);
|
||||||
|
if (wc2 ? (append_wc('\\') || append_wc(wc2)) : append_wc(wc))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void String::print(String *str) const
|
void String::print(String *str) const
|
||||||
{
|
{
|
||||||
str->append_for_single_quote(Ptr, str_length);
|
str->append_for_single_quote(Ptr, str_length);
|
||||||
|
@ -1134,6 +1134,42 @@ public:
|
|||||||
print_with_conversion(to, cs);
|
print_with_conversion(to, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static my_wc_t escaped_wc_for_single_quote(my_wc_t ch)
|
||||||
|
{
|
||||||
|
switch (ch)
|
||||||
|
{
|
||||||
|
case '\\': return '\\';
|
||||||
|
case '\0': return '0';
|
||||||
|
case '\'': return '\'';
|
||||||
|
case '\n': return 'n';
|
||||||
|
case '\r': return 'r';
|
||||||
|
case '\032': return 'Z';
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append for single quote using mb_wc/wc_mb Unicode conversion
|
||||||
|
bool append_for_single_quote_using_mb_wc(const char *str, size_t length,
|
||||||
|
CHARSET_INFO *cs);
|
||||||
|
|
||||||
|
// Append for single quote with optional mb_wc/wc_mb conversion
|
||||||
|
bool append_for_single_quote_opt_convert(const char *str,
|
||||||
|
size_t length,
|
||||||
|
CHARSET_INFO *cs)
|
||||||
|
{
|
||||||
|
return charset() == &my_charset_bin || cs == &my_charset_bin ||
|
||||||
|
my_charset_same(charset(), cs) ?
|
||||||
|
append_for_single_quote(str, length) :
|
||||||
|
append_for_single_quote_using_mb_wc(str, length, cs);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool append_for_single_quote_opt_convert(const String &str)
|
||||||
|
{
|
||||||
|
return append_for_single_quote_opt_convert(str.ptr(),
|
||||||
|
str.length(),
|
||||||
|
str.charset());
|
||||||
|
}
|
||||||
|
|
||||||
bool append_for_single_quote(const char *st, size_t len);
|
bool append_for_single_quote(const char *st, size_t len);
|
||||||
bool append_for_single_quote(const String *s)
|
bool append_for_single_quote(const String *s)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user