Change CHARSET_INFO character set and collaction names to LEX_CSTRING

This change removed 68 explict strlen() calls from the code. The following renames was done to ensure we don't use the old names when merging code from earlier releases, as using the new variables for print function could result in crashes: - charset->csname renamed to charset->cs_name - charset->name renamed to charset->coll_name Almost everything where mechanical changes except: - Changed to use the new Protocol::store(LEX_CSTRING..) when possible - Changed to use field->store(LEX_CSTRING*, CHARSET_INFO*) when possible - Changed to use String->append(LEX_CSTRING&) when possible Other things: - There where compiler issues with ensuring that all character set names points to the same string: gcc doesn't allow one to use integer constants when defining global structures (constant char * pointers works fine). To get around this, I declared defines for each character set name length.
2025-07-30 16:24:05 +03:00 · 2020-08-22 02:08:59 +03:00
parent b0910dddf5
commit a206658b98
78 changed files with 1019 additions and 1022 deletions
--- a/sql/sql_type.cc
+++ b/sql/sql_type.cc
@ -6231,17 +6231,15 @@ String *Type_handler::

  StringBuffer<STRING_BUFFER_USUAL_SIZE> buf(result->charset());
  CHARSET_INFO *cs= thd->variables.character_set_client;
-  const char *res_cs_name= result->charset()->csname;
-  const char *collation_name= item->collation.collation->name;

  buf.append('_');
-  buf.append(res_cs_name, strlen(res_cs_name));
+  buf.append(result->charset()->cs_name);
  if (cs->escape_with_backslash_is_dangerous)
    buf.append(' ');
  append_query_string(cs, &buf, result->ptr(), result->length(),
                     thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES);
  buf.append(STRING_WITH_LEN(" COLLATE '"));
-  buf.append(collation_name, strlen(collation_name));
+  buf.append(item->collation.collation->coll_name);
  buf.append('\'');
  str->copy(buf);

@ -9382,33 +9380,27 @@ LEX_CSTRING Charset::collation_specific_name() const
    for character sets and collations, so a collation
    name not necessarily starts with the character set name.
  */
-  LEX_CSTRING retval;
-  size_t csname_length= strlen(m_charset->csname);
-  if (strncmp(m_charset->name, m_charset->csname, csname_length))
-  {
-    retval.str= NULL;
-    retval.length= 0;
-    return retval;
-  }
-  const char *ptr= m_charset->name + csname_length;
-  retval.str= ptr;
-  retval.length= strlen(ptr);
-  return retval;
+  size_t cs_name_length= m_charset->cs_name.length;
+  if (strncmp(m_charset->coll_name.str, m_charset->cs_name.str,
+              cs_name_length))
+    return {NULL, 0};
+  const char *ptr= m_charset->coll_name.str + cs_name_length;
+  return {ptr, m_charset->coll_name.length - cs_name_length };
 }


 bool
 Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const
 {
-  if (!strcmp(m_charset->csname, cs->csname))
+  if (my_charset_same(m_charset, cs))
    return true;

-  if (!strcmp(m_charset->csname, MY_UTF8MB3) &&
-      !strcmp(cs->csname, MY_UTF8MB4))
+  if (!strcmp(m_charset->cs_name.str, MY_UTF8MB3) &&
+      !strcmp(cs->cs_name.str, MY_UTF8MB4))
    return true;

  /*
-    Originally we allowed here instat ALTER for ASCII-to-LATIN1
+    Originally we allowed here instant ALTER for ASCII-to-LATIN1
    and UCS2-to-UTF16, but this was wrong:
    - MariaDB's ascii is not a subset for 8-bit character sets
      like latin1, because it allows storing bytes 0x80..0xFF as