1
0
mirror of https://github.com/mariadb-corporation/mariadb-connector-c.git synced 2025-08-08 14:02:17 +03:00

Fix for mariadb_convert_string - charset names for utf16 and utf32 are changed so iconv understands it. Also if endianness is not specified, BE charsets used by default, to avoid BOMs

Names mapped for both source and destination charsets.
Also the regression test for this change is added to charset.c
This commit is contained in:
Lawrin Novitsky
2015-10-28 15:53:39 +02:00
parent d46a64c4d7
commit fd96479d51
2 changed files with 119 additions and 8 deletions

View File

@@ -51,6 +51,7 @@
#endif #endif
#include <my_global.h> #include <my_global.h>
#include <m_ctype.h> #include <m_ctype.h>
#include <m_string.h>
#include <iconv.h> #include <iconv.h>
@@ -1121,13 +1122,55 @@ int madb_get_windows_cp(const char *charset)
#endif #endif
/* }}} */ /* }}} */
/* {{{ map_charset_name
Changing charset name into something iconv understands, if necessary.
Another purpose it to avoid BOMs in result string, adding BE if necessary
e.g.UTF16 does not work form iconv, while UTF-16 does.
*/
static void map_charset_name(const char *cs_name, my_bool target_cs, char *buffer, size_t buff_len)
{
char *ptr= buffer, digits[3], endianness[3]= "BE";
if (sscanf(cs_name, "UTF%2[0-9]%2[LBE]", digits, endianness))
{
/* We should have at least digits. Endianness we write either default(BE), or what we found in the string */
ptr= strnmov(ptr, "UTF-", buff_len);
ptr= strnmov(ptr, digits, buff_len - (ptr - buffer));
ptr= strnmov(ptr, endianness, buff_len - (ptr - buffer));
}
else
{
/* Not our client - copy as is*/
ptr= strnmov(ptr, cs_name, buff_len);
}
if (target_cs)
{
strnmov(ptr, "//TRANSLIT", buff_len - (ptr - buffer));
}
}
/* }}} */
/* {{{ mariadb_convert_string
Converts string from one charset to another, and writes converted string to given buffer
@param[in] from
@param[in/out] from_len
@param[in] from_cs
@param[out] to
@param[in/out] to_len
@param[in] to_cs
@param[out] errorcode
@return -1 in case of error, bytes used in the "to" buffer, otherwise
*/
size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, CHARSET_INFO *from_cs, size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, CHARSET_INFO *from_cs,
char *to, size_t *to_len, CHARSET_INFO *to_cs, int *errorcode) char *to, size_t *to_len, CHARSET_INFO *to_cs, int *errorcode)
{ {
iconv_t conv= 0; iconv_t conv= 0;
size_t rc= -1; size_t rc= -1;
size_t save_len= *to_len; size_t save_len= *to_len;
char to_encoding[128]; char to_encoding[128], from_encoding[128];
*errorcode= 0; *errorcode= 0;
@@ -1138,14 +1181,11 @@ size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, CHARSE
*errorcode= EINVAL; *errorcode= EINVAL;
return rc; return rc;
} }
/* UTF16 does not work form iconv, while UTF-16 does.
Besides we don't want iconv to generate BOM, thus we used either UTF-16LE or BE by default
TODO: Need to do the same for UTF-32(at leased re BOM) */
snprintf(to_encoding, 128, "%s//TRANSLIT", strncmp(to_cs->encoding, "UTF16", 5) == 0
? (strcmp(to_cs->encoding + 5, "LE") == 0 ? "UTF-16LE" : "UTF-16BE")
: to_cs->encoding);
if ((conv= iconv_open(to_encoding, from_cs->encoding)) == (iconv_t)-1) map_charset_name(to_cs->encoding, 1, to_encoding, sizeof(to_encoding));
map_charset_name(from_cs->encoding, 0, from_encoding, sizeof(from_encoding));
if ((conv= iconv_open(to_encoding, from_encoding)) == (iconv_t)-1)
{ {
*errorcode= errno; *errorcode= errno;
goto error; goto error;
@@ -1161,4 +1201,5 @@ error:
iconv_close(conv); iconv_close(conv);
return rc; return rc;
} }
/* }}} */

View File

@@ -656,6 +656,75 @@ static int test_bug_54100(MYSQL *mysql)
} }
/* We need this internal function for the test */
CHARSET_INFO * mysql_find_charset_name(const char *name);
static int test_utf16_utf32_noboms(MYSQL *mysql)
{
char *csname[]= {"utf16", "utf16le", "utf32", "utf8"};
CHARSET_INFO *csinfo[sizeof(csname)/sizeof(char*)];
const int UTF8= sizeof(csname)/sizeof(char*) - 1;
unsigned char in_string[][8]= {"\xd8\x02\xdc\x60\0", /* utf16(be) */
"\x02\xd8\x60\xdc\0", /* utf16le */
"\x00\x01\x08\x60\0\0\0", /* utf32(be) */
"\xF0\x90\xA1\xA0" }; /* utf8 */
size_t in_oct_len[]= {6, 6, 8, 5};
char buffer[8], as_hex[16];
int i, error;
size_t rc, in_len, out_len;
for (i= 0; i < sizeof(csname)/sizeof(char*); ++i)
{
csinfo[i]= mysql_find_charset_name(csname[i]);
if (csinfo[i] == NULL)
{
diag("Could not get cs info for %s", csname[i]);
return FAIL;
}
}
for (i= 0; i < UTF8; ++i)
{
in_len= in_oct_len[i];
out_len= sizeof(buffer);
diag("Converting %s->%s", csname[i], csname[UTF8]);
rc= mariadb_convert_string(in_string[i], &in_len, csinfo[i], buffer, &out_len, csinfo[UTF8], &error);
FAIL_IF(rc == -1, "Conversion failed");
FAIL_IF(rc != in_oct_len[UTF8], "Incorrect number of written bytes");
if (memcmp(buffer, in_string[UTF8], rc) != 0)
{
mysql_hex_string(as_hex, buffer, rc);
diag("Converted string(%s) does not match the expected one", as_hex);
return FAIL;
}
in_len= in_oct_len[UTF8];
out_len= sizeof(buffer);
diag("Converting %s->%s", csname[UTF8], csname[i]);
rc= mariadb_convert_string(in_string[UTF8], &in_len, csinfo[UTF8], buffer, &out_len, csinfo[i], &error);
FAIL_IF(rc==-1, "Conversion failed");
FAIL_IF(rc != in_oct_len[i], "Incorrect number of written bytes");
if (memcmp(buffer, in_string[i], rc) != 0)
{
mysql_hex_string(as_hex, buffer, rc);
diag("Converted string(%s) does not match the expected one", as_hex);
return FAIL;
}
}
return OK;
}
struct my_tests_st my_tests[] = { struct my_tests_st my_tests[] = {
{"bug_8378: mysql_real_escape with gbk", bug_8378, TEST_CONNECTION_NEW, 0, opt_bug8378, NULL}, {"bug_8378: mysql_real_escape with gbk", bug_8378, TEST_CONNECTION_NEW, 0, opt_bug8378, NULL},
{"test_client_character_set", test_client_character_set, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, {"test_client_character_set", test_client_character_set, TEST_CONNECTION_DEFAULT, 0, NULL, NULL},
@@ -667,6 +736,7 @@ struct my_tests_st my_tests[] = {
{"test_bug30472", test_bug30472, TEST_CONNECTION_NEW, 0, NULL, NULL}, {"test_bug30472", test_bug30472, TEST_CONNECTION_NEW, 0, NULL, NULL},
{"test_ps_i18n", test_ps_i18n, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, {"test_ps_i18n", test_ps_i18n, TEST_CONNECTION_DEFAULT, 0, NULL, NULL},
{"test_bug_54100", test_bug_54100, TEST_CONNECTION_NEW, 0, NULL, NULL}, {"test_bug_54100", test_bug_54100, TEST_CONNECTION_NEW, 0, NULL, NULL},
{"test_utf16_utf32_noboms", test_utf16_utf32_noboms, TEST_CONNECTION_DEFAULT, 0, NULL, NULL},
{NULL, NULL, 0, 0, NULL, 0} {NULL, NULL, 0, 0, NULL, 0}
}; };