1
0
mirror of https://github.com/MariaDB/server.git synced 2025-06-13 13:01:51 +03:00

Backporting WL#3759 Optimize identifier conversion in client-server protocol

This patch provides performance improvements:
- send_fields() when character_set_results = latin1
  is now about twice faster for column/table/database
  names, consisting on ASCII characters.

Changes:

- Protocol doesn't use "convert" temporary buffer anymore,
  and converts strings directly to "packet".

- General conversion optimization: quick conversion
  of ASCII strings was added.

modified files:

include/m_ctype.h
- Adding a new flag.
- Adding a new function prototype

libmysqld/lib_sql.cc
- Adding quick conversion method for embedded library:
  conversion is now done directly to result buffer,
  without using a temporary buffer.

mysys/charset.c
- Mark all dynamic ucs2 character sets as non-ASCII
- Mark some dymamic 7bit and 8bit charsets as non-ASCII
  (for example swe7 is not fully ASCII compatible).

sql/protocol.cc
- Adding quick method to convert a string directly
  into protocol buffer, without using a temporary buffer.

sql/protocol.h
- Adding a new method prototype

sql/sql_string.cc
  Optimization for conversion between two ASCII-compatible charsets:
- quickly convert ASCII strings,
  switch to mc_wc->wc_mb method only when a non-ASCII character is met.
- copy four ASCII characters at once on i386

strings/conf_to_src.c
- Marking non-ASCII character sets with a flag.

strings/ctype-extra.c
- Regenerating ctype-extra.c by running "conf_to_src".

strings/ctype-uca.c
- Marking UCS2 character set as non-ASCII.

strings/ctype-ucs2.c
- Marking UCS2 character set as non-ASCII.

strings/ctype.c
- A new function to detect if a 7bit or 8bit character set
  is ascii compatible.
This commit is contained in:
Alexander Barkov
2009-09-30 10:09:28 +05:00
parent a8edd0aabb
commit f02800bd97
11 changed files with 222 additions and 118 deletions

View File

@ -794,10 +794,11 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
*/
uint32
copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length, CHARSET_INFO *from_cs,
uint *errors)
static uint32
copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length,
CHARSET_INFO *from_cs,
uint *errors)
{
int cnvres;
my_wc_t wc;
@ -849,6 +850,65 @@ outp:
}
/*
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
*/
uint32
copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length, CHARSET_INFO *from_cs,
uint *errors)
{
/*
If any of the character sets is not ASCII compatible,
immediately switch to slow mb_wc->wc_mb method.
*/
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
return copy_and_convert_extended(to, to_length, to_cs,
from, from_length, from_cs, errors);
uint32 length= min(to_length, from_length), length2= length;
#if defined(__i386__)
/*
Special loop for i386, it allows to refer to a
non-aligned memory block as UINT32, which makes
it possible to copy four bytes at once. This
gives about 10% performance improvement comparing
to byte-by-byte loop.
*/
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
{
if ((*(uint32*)from) & 0x80808080)
break;
*((uint32*) to)= *((const uint32*) from);
}
#endif
for (; ; *to++= *from++, length--)
{
if (!length)
{
*errors= 0;
return length2;
}
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
{
uint32 copied_length= length2 - length;
to_length-= copied_length;
from_length-= copied_length;
return copied_length + copy_and_convert_extended(to, to_length,
to_cs,
from, from_length,
from_cs,
errors);
}
}
DBUG_ASSERT(FALSE); // Should never get to here
return 0; // Make compiler happy
}
/**
Copy string with HEX-encoding of "bad" characters.