From b2af5a28cc364d45c8ec41f32ece9e13d0598406 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sat, 24 Dec 2005 10:40:55 +0000 Subject: [PATCH] Fix long standing Asian multibyte charsets bug. See: Subject: [HACKERS] bugs with certain Asian multibyte charsets From: Tatsuo Ishii To: pgsql-hackers@postgresql.org Date: Sat, 24 Dec 2005 18:25:33 +0900 (JST) for more details. --- src/backend/utils/mb/wchar.c | 45 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 0e19a6075f6..5dbc01032ac 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $Id: wchar.c,v 1.34 2003/09/25 06:58:05 petere Exp $ + * $Id: wchar.c,v 1.34.2.1 2005/12/24 10:40:55 ishii Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -52,7 +52,6 @@ pg_ascii_mblen(const unsigned char *s) /* * EUC */ - static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) { @@ -60,26 +59,26 @@ static int pg_euc2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 2) + if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte KANA") */ { from++; - *to = 0xff & *from++; + *to = (SS2 << 8) | *from++; len -= 2; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* JIS X 0208 KANJI */ { *to = *from++ << 8; *to |= *from++; len -= 2; } - else + else /* must be ASCII */ { *to = *from++; len--; @@ -139,6 +138,7 @@ pg_euckr_mblen(const unsigned char *s) /* * EUC_CN + * */ static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) @@ -147,21 +147,21 @@ static int pg_euccn2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 3) + if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ { from++; - *to = 0x3f00 & (*from++ << 8); - *to = *from++; + *to = (SS2 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* code set 3 (unsed ?) */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* code set 1 */ { *to = *from++ << 8; *to |= *from++; @@ -193,6 +193,7 @@ pg_euccn_mblen(const unsigned char *s) /* * EUC_TW + * */ static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) @@ -201,22 +202,22 @@ static int pg_euctw2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 4) + if (*from == SS2 && len >= 4) /* code set 2 */ { from++; - *to = *from++ << 16; + *to = (SS2 << 24) | (*from++ << 16) ; *to |= *from++ << 8; *to |= *from++; len -= 4; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* code set 2 */ { *to = *from++ << 8; *to |= *from++;