1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-7947 strcmp() takes 0.37% in OLTP RO

This patch ensures that all identical character sets shares the same
cs->csname.
This allows us to replace strcmp() in my_charset_same() with comparisons
of pointers. This fixes a long standing performance issue that could cause
as strcmp() for every item sent trough the protocol class to the end user.

One consequence of this patch is that we don't allow one to add a character
definition in the Index.xml file that changes the csname of an existing
character set. This is by design as changing character set names of existing
ones is extremely dangerous, especially as some storage engines just records
character set numbers.

As we now have a hash over character set's csname, we can in the future
use that for faster access to a specific character set. This could be done
by changing the hash to non unique and use the hash to find the next
character set with same csname.
This commit is contained in:
Monty
2020-07-20 19:26:31 +03:00
parent 46ffd47f42
commit dbcd3384e0
30 changed files with 386 additions and 245 deletions

View File

@@ -35,7 +35,6 @@
#define HAVE_CHARSET_mb2_or_mb4
#endif
#ifndef EILSEQ
#define EILSEQ ENOENT
#endif
@@ -1278,6 +1277,7 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
#ifdef HAVE_CHARSET_utf16
const char charset_name_utf16le[]= "utf16le";
static inline void
my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
@@ -1599,7 +1599,7 @@ struct charset_info_st my_charset_utf16_general_ci=
{
54,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16", /* cs name */
charset_name_utf16, /* cs name */
"utf16_general_ci", /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
@@ -1632,7 +1632,7 @@ struct charset_info_st my_charset_utf16_bin=
{
55,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16", /* cs name */
charset_name_utf16, /* cs name */
"utf16_bin", /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
@@ -1665,7 +1665,7 @@ struct charset_info_st my_charset_utf16_general_nopad_ci=
{
MY_NOPAD_ID(54),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
"utf16", /* cs name */
charset_name_utf16, /* cs name */
"utf16_general_nopad_ci", /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
@@ -1699,7 +1699,7 @@ struct charset_info_st my_charset_utf16_nopad_bin=
MY_NOPAD_ID(55),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
MY_CS_NOPAD,
"utf16", /* cs name */
charset_name_utf16, /* cs name */
"utf16_nopad_bin", /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
@@ -1940,7 +1940,7 @@ struct charset_info_st my_charset_utf16le_general_ci=
{
56,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16le", /* cs name */
charset_name_utf16le, /* cs name */
"utf16le_general_ci",/* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
@@ -1973,7 +1973,7 @@ struct charset_info_st my_charset_utf16le_bin=
{
62,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16le", /* cs name */
charset_name_utf16le, /* cs name */
"utf16le_bin", /* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
@@ -2006,7 +2006,7 @@ struct charset_info_st my_charset_utf16le_general_nopad_ci=
{
MY_NOPAD_ID(56),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
"utf16le", /* cs name */
charset_name_utf16le, /* cs name */
"utf16le_general_nopad_ci",/* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
@@ -2040,7 +2040,7 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
MY_NOPAD_ID(62),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
MY_CS_NOPAD,
"utf16le", /* cs name */
charset_name_utf16le, /* cs name */
"utf16le_nopad_bin", /* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
@@ -2763,7 +2763,7 @@ struct charset_info_st my_charset_utf32_general_ci=
{
60,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf32", /* cs name */
charset_name_utf32, /* cs name */
"utf32_general_ci", /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
@@ -2796,7 +2796,7 @@ struct charset_info_st my_charset_utf32_bin=
{
61,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf32", /* cs name */
charset_name_utf32, /* cs name */
"utf32_bin", /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
@@ -2829,7 +2829,7 @@ struct charset_info_st my_charset_utf32_general_nopad_ci=
{
MY_NOPAD_ID(60),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
"utf32", /* cs name */
charset_name_utf32, /* cs name */
"utf32_general_nopad_ci", /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
@@ -2863,7 +2863,7 @@ struct charset_info_st my_charset_utf32_nopad_bin=
MY_NOPAD_ID(61),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
MY_CS_NOPAD,
"utf32", /* cs name */
charset_name_utf32, /* cs name */
"utf32_nopad_bin", /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
@@ -3354,7 +3354,7 @@ struct charset_info_st my_charset_ucs2_general_ci=
{
35,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"ucs2", /* cs name */
charset_name_ucs2, /* cs name */
"ucs2_general_ci", /* name */
"", /* comment */
NULL, /* tailoring */
@@ -3387,7 +3387,7 @@ struct charset_info_st my_charset_ucs2_general_mysql500_ci=
{
159, 0, 0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
"ucs2", /* cs name */
charset_name_ucs2, /* cs name */
"ucs2_general_mysql500_ci", /* name */
"", /* comment */
NULL, /* tailoring */
@@ -3420,7 +3420,7 @@ struct charset_info_st my_charset_ucs2_bin=
{
90,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
"ucs2", /* cs name */
charset_name_ucs2, /* cs name */
"ucs2_bin", /* name */
"", /* comment */
NULL, /* tailoring */
@@ -3453,7 +3453,7 @@ struct charset_info_st my_charset_ucs2_general_nopad_ci=
{
MY_NOPAD_ID(35),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
"ucs2", /* cs name */
charset_name_ucs2, /* cs name */
"ucs2_general_nopad_ci", /* name */
"", /* comment */
NULL, /* tailoring */
@@ -3486,7 +3486,7 @@ struct charset_info_st my_charset_ucs2_nopad_bin=
{
MY_NOPAD_ID(90),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
"ucs2", /* cs name */
charset_name_ucs2, /* cs name */
"ucs2_nopad_bin", /* name */
"", /* comment */
NULL, /* tailoring */