MDEV-7947 strcmp() takes 0.37% in OLTP RO

This patch ensures that all identical character sets shares the same cs->csname. This allows us to replace strcmp() in my_charset_same() with comparisons of pointers. This fixes a long standing performance issue that could cause as strcmp() for every item sent trough the protocol class to the end user. One consequence of this patch is that we don't allow one to add a character definition in the Index.xml file that changes the csname of an existing character set. This is by design as changing character set names of existing ones is extremely dangerous, especially as some storage engines just records character set numbers. As we now have a hash over character set's csname, we can in the future use that for faster access to a specific character set. This could be done by changing the hash to non unique and use the hash to find the next character set with same csname.
2025-12-01 17:39:21 +03:00 · 2020-07-20 19:26:31 +03:00
parent 46ffd47f42
commit dbcd3384e0
30 changed files with 386 additions and 245 deletions
--- a/mysql-test/suite/innodb/r/innodb_ctype_ldml.result
+++ b/mysql-test/suite/innodb/r/innodb_ctype_ldml.result
@@ -383,7 +383,7 @@ Bug#46448 trailing spaces are not ignored when user collation maps space != 0x20
 set names latin1;
 show collation like 'latin1_test';
 Collation	Charset	Id	Default	Compiled	Sortlen
-latin1_test	latin1	99		Yes	1
+latin1_test	latin1	331			1
 select "foo" = "foo " collate latin1_test;
 "foo" = "foo " collate latin1_test
 1
@@ -402,6 +402,7 @@ utf8mb4_test_ci	utf8mb4	326			8
 utf16_test_ci	utf16	327			8
 utf8mb4_test_400_ci	utf8mb4	328			8
 utf8mb4_test_520_nopad_ci	utf8mb4	329			8
+latin1_test	latin1	331			1
 latin1_test2	latin1	332			1
 latin1_test2_cs	latin1	333			1
 latin1_swedish_nopad2_ci	latin1	334			1
@@ -426,7 +427,7 @@ utf32_test_ci	utf32	391			8
 utf8_maxuserid_ci	utf8	2047			8
 show collation like '%test%';
 Collation	Charset	Id	Default	Compiled	Sortlen
-latin1_test	latin1	99		Yes	1
+latin1_test	latin1	331			1
 latin1_test2	latin1	332			1
 latin1_test2_cs	latin1	333			1
 utf8_test_ci	utf8	353			8
--- a/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
+++ b/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
@@ -13,6 +13,7 @@ drop table if exists t1;
 --disable_query_log
 call mtr.add_suppression("Syntax error at '\\[strength tertiary\\]'");
 call mtr.add_suppression("Can't reset before a primary ignorable character U\\+A48C");
+call mtr.add_suppression("Charset id.*trying to replace");
 --enable_query_log

 --echo In the following tests we change the order of letter "b"