MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion

Item_func_group_concat::print() did not take into account that Item_func_group_concat::separator can be of a different character set than the "String *str" (when the printing is being done to). Therefore, printing did not work correctly for: - non-ASCII separators when GROUP_CONCAT is done on 8bit data or multi-byte data with mbminlen==1. - all separators (even including simple ones like comma) when GROUP_CONCAT is done on ucs2/utf16/utf32 data (mbminlen>1). Because of this problem, VIEW definitions did not print correctly to their FRM files. This later led to a wrong SELECT and SHOW CREATE output. Fix: - Adding new String methods: bool append_for_single_quote_using_mb_wc(const char *str, size_t length, CHARSET_INFO *cs); bool append_for_single_quote_opt_convert(const char *str, size_t length, CHARSET_INFO *cs) which perform both escaping and character set conversion at the same time. - Adding a new String method escaped_wc_for_single_quote(), to reuse the code between the old and the new methods. - Fixing Item_func_group_concat::print() to use the new method append_for_single_quote_opt_convert().
2025-07-29 05:21:33 +03:00 · 2024-03-27 15:22:58 +04:00
parent 58df20974b
commit 0fc123c595
7 changed files with 143 additions and 15 deletions
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@ -1126,26 +1126,45 @@ bool String::append_for_single_quote(const char *st, size_t len)
  int chlen;
  for (; st < end; st++)
  {
-    switch (*st)
+    char ch2= (char) (uchar) escaped_wc_for_single_quote((uchar) *st);
+    if (ch2)
    {
-    case '\\':   APPEND(STRING_WITH_LEN("\\\\")); break;
-    case '\0':   APPEND(STRING_WITH_LEN("\\0")); break;
-    case '\'':   APPEND(STRING_WITH_LEN("\\'")); break;
-    case '\n':   APPEND(STRING_WITH_LEN("\\n")); break;
-    case '\r':   APPEND(STRING_WITH_LEN("\\r")); break;
-    case '\032': APPEND(STRING_WITH_LEN("\\Z")); break;
-    default:     if ((chlen=charset()->charlen(st, end)) > 0)
-                 {
-                   APPEND(st, chlen);
-                   st+= chlen-1;
-                 }
-                 else
-                   APPEND(*st);
+      if (append('\\') || append(ch2))
+        return true;
+      continue;
    }
+    if ((chlen= charset()->charlen(st, end)) > 0)
+    {
+     APPEND(st, chlen);
+      st+= chlen-1;
+    }
+    else
+      APPEND(*st);
  }
  return 0;
 }

+
+bool String::append_for_single_quote_using_mb_wc(const char *src,
+                                                 size_t length,
+                                                 CHARSET_INFO *cs)
+{
+  DBUG_ASSERT(&my_charset_bin != charset());
+  DBUG_ASSERT(&my_charset_bin != cs);
+  const uchar *str= (const uchar *) src;
+  const uchar *end= (const uchar *) src + length;
+  int chlen;
+  my_wc_t wc;
+  for ( ; (chlen= cs->cset->mb_wc(cs, &wc, str, end)) > 0; str+= chlen)
+  {
+    my_wc_t wc2= escaped_wc_for_single_quote(wc);
+    if (wc2 ? (append_wc('\\') || append_wc(wc2)) : append_wc(wc))
+      return true;
+  }
+  return false;
+}
+
+
 void String::print(String *str) const
 {
  str->append_for_single_quote(Ptr, str_length);