1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-17 06:41:24 +03:00

Fix long standing Asian multibyte charsets bug.

See:

Subject: [HACKERS] bugs with certain Asian multibyte charsets
From: Tatsuo Ishii <ishii@sraoss.co.jp>
To: pgsql-hackers@postgresql.org
Date: Sat, 24 Dec 2005 18:25:33 +0900 (JST)

for more details.
This commit is contained in:
Tatsuo Ishii 2005-12-24 10:40:55 +00:00
parent 0862aeaeec
commit b2af5a28cc

View File

@ -1,7 +1,7 @@
/* /*
* conversion functions between pg_wchar and multibyte streams. * conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii * Tatsuo Ishii
* $Id: wchar.c,v 1.34 2003/09/25 06:58:05 petere Exp $ * $Id: wchar.c,v 1.34.2.1 2005/12/24 10:40:55 ishii Exp $
* *
* WIN1250 client encoding updated by Pavel Behal * WIN1250 client encoding updated by Pavel Behal
* *
@ -52,7 +52,6 @@ pg_ascii_mblen(const unsigned char *s)
/* /*
* EUC * EUC
*/ */
static int pg_euc2wchar_with_len static int pg_euc2wchar_with_len
(const unsigned char *from, pg_wchar *to, int len) (const unsigned char *from, pg_wchar *to, int len)
{ {
@ -60,26 +59,26 @@ static int pg_euc2wchar_with_len
while (len > 0 && *from) while (len > 0 && *from)
{ {
if (*from == SS2 && len >= 2) if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte KANA") */
{ {
from++; from++;
*to = 0xff & *from++; *to = (SS2 << 8) | *from++;
len -= 2; len -= 2;
} }
else if (*from == SS3 && len >= 3) else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
{ {
from++; from++;
*to = *from++ << 8; *to = (SS3 << 16) | (*from++ << 8);
*to |= 0x3f & *from++; *to |= *from++;
len -= 3; len -= 3;
} }
else if ((*from & 0x80) && len >= 2) else if ((*from & 0x80) && len >= 2) /* JIS X 0208 KANJI */
{ {
*to = *from++ << 8; *to = *from++ << 8;
*to |= *from++; *to |= *from++;
len -= 2; len -= 2;
} }
else else /* must be ASCII */
{ {
*to = *from++; *to = *from++;
len--; len--;
@ -139,6 +138,7 @@ pg_euckr_mblen(const unsigned char *s)
/* /*
* EUC_CN * EUC_CN
*
*/ */
static int pg_euccn2wchar_with_len static int pg_euccn2wchar_with_len
(const unsigned char *from, pg_wchar *to, int len) (const unsigned char *from, pg_wchar *to, int len)
@ -147,21 +147,21 @@ static int pg_euccn2wchar_with_len
while (len > 0 && *from) while (len > 0 && *from)
{ {
if (*from == SS2 && len >= 3) if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
{ {
from++; from++;
*to = 0x3f00 & (*from++ << 8); *to = (SS2 << 16) | (*from++ << 8);
*to = *from++; *to |= *from++;
len -= 3; len -= 3;
} }
else if (*from == SS3 && len >= 3) else if (*from == SS3 && len >= 3) /* code set 3 (unsed ?) */
{ {
from++; from++;
*to = *from++ << 8; *to = (SS3 << 16) | (*from++ << 8);
*to |= 0x3f & *from++; *to |= *from++;
len -= 3; len -= 3;
} }
else if ((*from & 0x80) && len >= 2) else if ((*from & 0x80) && len >= 2) /* code set 1 */
{ {
*to = *from++ << 8; *to = *from++ << 8;
*to |= *from++; *to |= *from++;
@ -193,6 +193,7 @@ pg_euccn_mblen(const unsigned char *s)
/* /*
* EUC_TW * EUC_TW
*
*/ */
static int pg_euctw2wchar_with_len static int pg_euctw2wchar_with_len
(const unsigned char *from, pg_wchar *to, int len) (const unsigned char *from, pg_wchar *to, int len)
@ -201,22 +202,22 @@ static int pg_euctw2wchar_with_len
while (len > 0 && *from) while (len > 0 && *from)
{ {
if (*from == SS2 && len >= 4) if (*from == SS2 && len >= 4) /* code set 2 */
{ {
from++; from++;
*to = *from++ << 16; *to = (SS2 << 24) | (*from++ << 16) ;
*to |= *from++ << 8; *to |= *from++ << 8;
*to |= *from++; *to |= *from++;
len -= 4; len -= 4;
} }
else if (*from == SS3 && len >= 3) else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
{ {
from++; from++;
*to = *from++ << 8; *to = (SS3 << 16) | (*from++ << 8);
*to |= 0x3f & *from++; *to |= *from++;
len -= 3; len -= 3;
} }
else if ((*from & 0x80) && len >= 2) else if ((*from & 0x80) && len >= 2) /* code set 2 */
{ {
*to = *from++ << 8; *to = *from++ << 8;
*to |= *from++; *to |= *from++;