1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-7947 strcmp() takes 0.37% in OLTP RO

This patch ensures that all identical character sets shares the same
cs->csname.
This allows us to replace strcmp() in my_charset_same() with comparisons
of pointers. This fixes a long standing performance issue that could cause
as strcmp() for every item sent trough the protocol class to the end user.

One consequence of this patch is that we don't allow one to add a character
definition in the Index.xml file that changes the csname of an existing
character set. This is by design as changing character set names of existing
ones is extremely dangerous, especially as some storage engines just records
character set numbers.

As we now have a hash over character set's csname, we can in the future
use that for faster access to a specific character set. This could be done
by changing the hash to non unique and use the hash to find the next
character set with same csname.
This commit is contained in:
Monty
2020-07-20 19:26:31 +03:00
parent 46ffd47f42
commit dbcd3384e0
30 changed files with 386 additions and 245 deletions

View File

@@ -20,6 +20,7 @@
#include <m_ctype.h>
#include <m_string.h>
#include <my_dir.h>
#include <hash.h>
#include <my_xml.h>
#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
@@ -28,6 +29,8 @@
#include <locale.h>
#endif
extern HASH charset_name_hash;
/*
The code below implements this functionality:
@@ -38,15 +41,10 @@
- Setting server default character set
*/
my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
{
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
}
static uint
get_collation_number_internal(const char *name)
{
CHARSET_INFO **cs;
for (cs= all_charsets;
cs < all_charsets + array_elements(all_charsets);
@@ -72,11 +70,10 @@ static my_bool init_state_maps(struct charset_info_st *cs)
uchar *state_map;
uchar *ident_map;
if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256*2, MYF(MY_WME))))
return 1;
if (!(cs->ident_map= ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
return 1;
cs->ident_map= ident_map= state_map + 256;
/* Fill state_map with states to get a faster parser */
for (i=0; i < 256 ; i++)
@@ -153,7 +150,8 @@ static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)
{
to->number= from->number ? from->number : to->number;
if (from->csname)
/* Don't replace csname if already set */
if (from->csname && !to->csname)
if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
goto err;
@@ -322,7 +320,21 @@ static int add_collation(struct charset_info_st *cs)
return MY_XML_ERROR;
bzero(newcs,sizeof(CHARSET_INFO));
}
else
{
/* Don't allow change of csname */
if (newcs->csname && strcmp(newcs->csname, cs->csname))
{
my_error(EE_DUPLICATE_CHARSET, MYF(ME_WARNING),
cs->number, cs->csname, newcs->csname);
/*
Continue parsing rest of Index.xml. We got an warning in the log
so the user can fix the wrong character set definition.
*/
return MY_XML_OK;
}
}
if (cs->primary_number == cs->number)
cs->state |= MY_CS_PRIMARY;
@@ -402,8 +414,8 @@ static int add_collation(struct charset_info_st *cs)
{
newcs->state |= MY_CS_LOADED;
}
newcs->state|= MY_CS_AVAILABLE;
}
add_compiled_extra_collation(newcs);
}
else
{
@@ -420,7 +432,7 @@ static int add_collation(struct charset_info_st *cs)
if (cs->comment)
if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
return MY_XML_ERROR;
if (cs->csname)
if (cs->csname && ! newcs->csname)
if (!(newcs->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
return MY_XML_ERROR;
if (cs->name)
@@ -557,14 +569,55 @@ char *get_charsets_dir(char *buf)
CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL};
CHARSET_INFO *default_charset_info = &my_charset_latin1;
/*
Add standard character set compiled into the application
All related character sets should share same cname
*/
void add_compiled_collation(struct charset_info_st *cs)
{
DBUG_ASSERT(cs->number < array_elements(all_charsets));
all_charsets[cs->number]= cs;
cs->state|= MY_CS_AVAILABLE;
if ((my_hash_insert(&charset_name_hash, (uchar*) cs)))
{
#ifndef DBUG_OFF
CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash,
(uchar*) cs->csname,
strlen(cs->csname));
DBUG_ASSERT(org);
DBUG_ASSERT(org->csname == cs->csname);
#endif
}
}
/*
Add optional characters sets from ctype-extra.c
If cname is already in use, replace csname in new object with a pointer to
the already used csname to ensure that all csname's points to the same string
for the same character set.
*/
void add_compiled_extra_collation(struct charset_info_st *cs)
{
DBUG_ASSERT(cs->number < array_elements(all_charsets));
all_charsets[cs->number]= cs;
cs->state|= MY_CS_AVAILABLE;
if ((my_hash_insert(&charset_name_hash, (uchar*) cs)))
{
CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash,
(uchar*) cs->csname,
strlen(cs->csname));
cs->csname= org->csname;
}
}
static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
@@ -612,14 +665,31 @@ const char *my_collation_get_tailoring(uint id)
}
HASH charset_name_hash;
static uchar *get_charset_key(const uchar *object,
size_t *size,
my_bool not_used __attribute__((unused)))
{
CHARSET_INFO *cs= (CHARSET_INFO*) object;
*size= strlen(cs->csname);
return (uchar*) cs->csname;
}
static void init_available_charsets(void)
{
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
struct charset_info_st **cs;
MY_CHARSET_LOADER loader;
DBUG_ENTER("init_available_charsets");
bzero((char*) &all_charsets,sizeof(all_charsets));
bzero((char*) &my_collation_statistics, sizeof(my_collation_statistics));
my_hash_init2(key_memory_charsets, &charset_name_hash, 16,
&my_charset_latin1, 64, 0, 0, get_charset_key,
0, 0, HASH_UNIQUE);
init_compiled_charsets(MYF(0));
/* Copy compiled charsets */
@@ -640,12 +710,14 @@ static void init_available_charsets(void)
my_charset_loader_init_mysys(&loader);
strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
my_read_charset_file(&loader, fname, MYF(0));
DBUG_VOID_RETURN;
}
void free_charsets(void)
{
charsets_initialized= charsets_template;
my_hash_free(&charset_name_hash);
}
@@ -1431,4 +1503,4 @@ const char* my_default_csname()
csname = my_os_charset_to_mysql_charset(csname);
#endif
return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME;
}
}