mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
Bug#15376: Unassigned multibyte codes are converted to U+0000
Mergeing changes into 5.0 mysql-test/r/ctype_eucjpms.result: Adding test mysql-test/r/ctype_ujis.result: After merge fix mysql-test/t/ctype_eucjpms.test: Adding test strings/ctype-cp932.c: After merge fix strings/ctype-eucjpms.c: Applying changes to eucjpms, similar to ujis. Note: eucjpms appeared in 5.0 so the original patch, which was for for 4.1, didn't fix eucjpms.
This commit is contained in:
@ -9819,3 +9819,9 @@ eucjpms_bin 6109
|
|||||||
eucjpms_bin 61
|
eucjpms_bin 61
|
||||||
eucjpms_bin 6120
|
eucjpms_bin 6120
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
select hex(convert(_eucjpms 0xA5FE41 using ucs2));
|
||||||
|
hex(convert(_eucjpms 0xA5FE41 using ucs2))
|
||||||
|
003F0041
|
||||||
|
select hex(convert(_eucjpms 0x8FABF841 using ucs2));
|
||||||
|
hex(convert(_eucjpms 0x8FABF841 using ucs2))
|
||||||
|
003F0041
|
||||||
|
@ -2307,6 +2307,12 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
|
|||||||
c2h
|
c2h
|
||||||
ab_def
|
ab_def
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
select hex(convert(_ujis 0xA5FE41 using ucs2));
|
||||||
|
hex(convert(_ujis 0xA5FE41 using ucs2))
|
||||||
|
003F0041
|
||||||
|
select hex(convert(_ujis 0x8FABF841 using ucs2));
|
||||||
|
hex(convert(_ujis 0x8FABF841 using ucs2))
|
||||||
|
003F0041
|
||||||
DROP TABLE IF EXISTS t1, t2;
|
DROP TABLE IF EXISTS t1, t2;
|
||||||
DROP PROCEDURE IF EXISTS sp1;
|
DROP PROCEDURE IF EXISTS sp1;
|
||||||
set names ujis;
|
set names ujis;
|
||||||
@ -2337,9 +2343,3 @@ DROP TABLE t2;
|
|||||||
set names default;
|
set names default;
|
||||||
set character_set_database=default;
|
set character_set_database=default;
|
||||||
set character_set_server=default;
|
set character_set_server=default;
|
||||||
select hex(convert(_ujis 0xA5FE41 using ucs2));
|
|
||||||
hex(convert(_ujis 0xA5FE41 using ucs2))
|
|
||||||
003F0041
|
|
||||||
select hex(convert(_ujis 0x8FABF841 using ucs2));
|
|
||||||
hex(convert(_ujis 0x8FABF841 using ucs2))
|
|
||||||
003F0041
|
|
||||||
|
@ -363,3 +363,20 @@ SET collation_connection='eucjpms_japanese_ci';
|
|||||||
-- source include/ctype_filesort.inc
|
-- source include/ctype_filesort.inc
|
||||||
SET collation_connection='eucjpms_bin';
|
SET collation_connection='eucjpms_bin';
|
||||||
-- source include/ctype_filesort.inc
|
-- source include/ctype_filesort.inc
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bugs#15375: Unassigned multibyte codes are broken
|
||||||
|
# into parts when converting to Unicode.
|
||||||
|
# This query should return 0x003F0041. I.e. it should
|
||||||
|
# scan unassigned double-byte character 0xA5FE, convert
|
||||||
|
# it as QUESTION MARK 0x003F and then scan the next
|
||||||
|
# character, which is a single byte character 0x41.
|
||||||
|
#
|
||||||
|
select hex(convert(_eucjpms 0xA5FE41 using ucs2));
|
||||||
|
# This one should return 0x003F0041:
|
||||||
|
# scan unassigned three-byte character 0x8FABF8,
|
||||||
|
# convert it as QUESTION MARK 0x003F and then scan
|
||||||
|
# the next character, which is a single byte character 0x41.
|
||||||
|
select hex(convert(_eucjpms 0x8FABF841 using ucs2));
|
||||||
|
|
||||||
|
@ -5355,7 +5355,7 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
int hi=s[0];
|
int hi=s[0];
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
if (hi < 0x80)
|
if (hi < 0x80)
|
||||||
{
|
{
|
||||||
@ -5370,10 +5370,10 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
|
if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -243,7 +243,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
const uchar *e __attribute__((unused)))
|
const uchar *e __attribute__((unused)))
|
||||||
{
|
{
|
||||||
wc[0]=tab_jisx0201_uni[*s];
|
wc[0]=tab_jisx0201_uni[*s];
|
||||||
return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1;
|
return (!wc[0] && s[0]) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -8473,7 +8473,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
int c1,c2,c3;
|
int c1,c2,c3;
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
c1=s[0];
|
c1=s[0];
|
||||||
|
|
||||||
@ -8485,7 +8485,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
c2=s[1];
|
c2=s[1];
|
||||||
|
|
||||||
@ -8500,7 +8500,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
{
|
{
|
||||||
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
|
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
|
||||||
if (!pwc[0])
|
if (!pwc[0])
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -8520,7 +8520,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
|
|
||||||
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
|
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
|
||||||
if (ret!=1)
|
if (ret!=1)
|
||||||
return ret;
|
return -2;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -8531,7 +8531,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
return MY_CS_ILSEQ;
|
return MY_CS_ILSEQ;
|
||||||
|
|
||||||
if (s+3>e)
|
if (s+3>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
c3=s[2];
|
c3=s[2];
|
||||||
if (c3 < 0xA1 || c3>=0xFF)
|
if (c3 < 0xA1 || c3>=0xFF)
|
||||||
@ -8540,8 +8540,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
if (c2<0xF5)
|
if (c2<0xF5)
|
||||||
{
|
{
|
||||||
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
|
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
|
||||||
if (!pwc)
|
if (!pwc[0])
|
||||||
return MY_CS_ILSEQ;
|
return -3;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -8572,7 +8572,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if ((jp=my_uni_jisx0208_onechar(wc)))
|
if ((jp=my_uni_jisx0208_onechar(wc)))
|
||||||
{
|
{
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
jp+=0x8080;
|
jp+=0x8080;
|
||||||
s[0]=jp>>8;
|
s[0]=jp>>8;
|
||||||
@ -8584,7 +8584,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
|
if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
|
||||||
{
|
{
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
s[1]= s[0];
|
s[1]= s[0];
|
||||||
s[0]= 0x8E;
|
s[0]= 0x8E;
|
||||||
return 2;
|
return 2;
|
||||||
@ -8594,7 +8594,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if ((jp=my_uni_jisx0212_onechar(wc)))
|
if ((jp=my_uni_jisx0212_onechar(wc)))
|
||||||
{
|
{
|
||||||
if (s+3>e)
|
if (s+3>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
jp+=0x8080;
|
jp+=0x8080;
|
||||||
s[0]=0x8F;
|
s[0]=0x8F;
|
||||||
@ -8608,7 +8608,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if (wc>=0xE000 && wc<0xE3AC)
|
if (wc>=0xE000 && wc<0xE3AC)
|
||||||
{
|
{
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
c1=((unsigned)(wc-0xE000)/94)+0xF5;
|
c1=((unsigned)(wc-0xE000)/94)+0xF5;
|
||||||
s[0]=c1;
|
s[0]=c1;
|
||||||
@ -8622,7 +8622,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if (wc>=0xE3AC && wc<0xE758)
|
if (wc>=0xE3AC && wc<0xE758)
|
||||||
{
|
{
|
||||||
if (s+3>e)
|
if (s+3>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
s[0]=0x8F;
|
s[0]=0x8F;
|
||||||
c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
|
c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
|
||||||
|
Reference in New Issue
Block a user