1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

Bug#15375 Unassigned multibyte codes are broken

into parts when converting to Unicode.
m_ctype.h:
  Reorganizing mb_wc return codes to be able
  to return "an unassigned N-byte-long character".
sql_string.cc:
  Adding code to detect and properly handle
  unassigned characters (i.e. the those character
  which are correctly formed according to the 
  character specifications, but don't have Unicode
  mapping).
Many files:
  Fixing conversion function to return new codes.
ctype_ujis.test, ctype_gbk.test, ctype_big5.test:
  Adding a test case.
ctype_ujis.result, ctype_gbk.result, ctype_big5.result:
  Fixing results accordingly.


include/m_ctype.h:
  Reorganizing mb_wc return codes to be able
  to return "an unassigned N-byte long character".
  Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode.
mysql-test/r/ctype_big5.result:
  Fixing results accordingly.
mysql-test/r/ctype_gbk.result:
  Fixing results accordingly.
mysql-test/r/ctype_ujis.result:
  Fixing results accordingly.
mysql-test/t/ctype_big5.test:
  Adding a test case.
mysql-test/t/ctype_gbk.test:
  Adding a test case.
mysql-test/t/ctype_ujis.test:
  Adding a test case.
sql/sql_string.cc:
  Adding code to detect and properly hanlde
  unassigned characters (i.e. the those character
  which are correctly formed according to the 
  character specifications, but don't have Unicode
  mapping).
strings/ctype-big5.c:
  Fixing conversion function to return new codes.
strings/ctype-bin.c:
  Fixing conversion function to return new codes.
strings/ctype-cp932.c:
  Fixing conversion function to return new codes.
strings/ctype-euc_kr.c:
  Fixing conversion function to return new codes.
strings/ctype-gb2312.c:
  Fixing conversion function to return new codes.
strings/ctype-gbk.c:
  Fixing conversion function to return new codes.
strings/ctype-latin1.c:
  Fixing conversion function to return new codes.
strings/ctype-simple.c:
  Fixing conversion function to return new codes.
strings/ctype-sjis.c:
  Fixing conversion function to return new codes.
strings/ctype-tis620.c:
  Fixing conversion function to return new codes.
strings/ctype-ucs2.c:
  Fixing conversion function to return new codes.
strings/ctype-ujis.c:
  Fixing conversion function to return new codes.
strings/ctype-utf8.c:
  Fixing conversion function to return new codes.
This commit is contained in:
unknown
2005-12-12 21:42:09 +04:00
parent 5aeb69296a
commit a464e01713
21 changed files with 122 additions and 56 deletions

View File

@ -242,7 +242,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs __attribute__((unused)),
const uchar *e __attribute__((unused)))
{
wc[0]=tab_jisx0201_uni[*s];
return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1;
return (!wc[0] && s[0]) ? -1 : 1;
}
@ -8341,7 +8341,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
int c1,c2,c3;
if (s >= e)
return MY_CS_TOOFEW(0);
return MY_CS_TOOSMALL;
c1=s[0];
@ -8353,7 +8353,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
}
if (s+2>e)
return MY_CS_TOOFEW(0);
return MY_CS_TOOSMALL2;
c2=s[1];
@ -8368,7 +8368,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
{
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
if (!pwc[0])
return MY_CS_ILSEQ;
return -2;
}
else
{
@ -8388,7 +8388,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
if (ret!=1)
return ret;
return -2;
return 2;
}
@ -8399,7 +8399,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
return MY_CS_ILSEQ;
if (s+3>e)
return MY_CS_TOOFEW(0);
return MY_CS_TOOSMALL3;
c3=s[2];
if (c3 < 0xA1 || c3>=0xFF)
@ -8408,8 +8408,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
if (c2<0xF5)
{
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
if (!pwc)
return MY_CS_ILSEQ;
if (!pwc[0])
return -3;
}
else
{
@ -8440,7 +8440,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if ((jp=my_uni_jisx0208_onechar(wc)))
{
if (s+2>e)
return MY_CS_TOOSMALL;
return MY_CS_TOOSMALL2;
jp+=0x8080;
s[0]=jp>>8;
@ -8452,7 +8452,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
{
if (s+2>e)
return MY_CS_TOOSMALL;
return MY_CS_TOOSMALL2;
s[1]= s[0];
s[0]= 0x8E;
return 2;
@ -8462,7 +8462,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if ((jp=my_uni_jisx0212_onechar(wc)))
{
if (s+3>e)
return MY_CS_TOOSMALL;
return MY_CS_TOOSMALL3;
jp+=0x8080;
s[0]=0x8F;
@ -8476,7 +8476,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if (wc>=0xE000 && wc<0xE3AC)
{
if (s+2>e)
return MY_CS_TOOSMALL;
return MY_CS_TOOSMALL2;
c1=((unsigned)(wc-0xE000)/94)+0xF5;
s[0]=c1;
@ -8490,7 +8490,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if (wc>=0xE3AC && wc<0xE758)
{
if (s+3>e)
return MY_CS_TOOSMALL;
return MY_CS_TOOSMALL3;
s[0]=0x8F;
c1=((unsigned)(wc-0xE3AC)/94)+0xF5;