mirror of
https://github.com/postgres/postgres.git
synced 2025-07-08 11:42:09 +03:00
Add GB18030 support. Contributed by Bill Huang <bill_huanghb@ybb.ne.jp>
(ODBC support has not been committed yet. left for Hiroshi...)
This commit is contained in:
63489
src/backend/utils/mb/Unicode/ISO10646-GB18030.TXT
Normal file
63489
src/backend/utils/mb/Unicode/ISO10646-GB18030.TXT
Normal file
File diff suppressed because it is too large
Load Diff
95
src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
Executable file
95
src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
Executable file
@ -0,0 +1,95 @@
|
|||||||
|
#! /usr/bin/perl
|
||||||
|
#
|
||||||
|
# Copyright 2002 by Bill Huang
|
||||||
|
#
|
||||||
|
# $Id: UCS_to_GB18030.pl,v 1.1 2002/06/13 08:28:55 ishii Exp $
|
||||||
|
#
|
||||||
|
# Generate UTF-8 <--> GB18030 code conversion tables from
|
||||||
|
# map files provided by Unicode organization.
|
||||||
|
# Unfortunately it is prohibited by the organization
|
||||||
|
# to distribute the map files. So if you try to use this script,
|
||||||
|
# you have to obtain ISO10646-GB18030.TXT from
|
||||||
|
# the organization's ftp site.
|
||||||
|
#
|
||||||
|
# ISO10646-GB18030.TXT format:
|
||||||
|
# GB18030 code in hex
|
||||||
|
# UCS-2 code in hex
|
||||||
|
# # and Unicode name (not used in this script)
|
||||||
|
|
||||||
|
require "ucs2utf.pl";
|
||||||
|
|
||||||
|
# first generate UTF-8 --> GB18030 table
|
||||||
|
|
||||||
|
$in_file = "ISO10646-GB18030.TXT";
|
||||||
|
|
||||||
|
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||||
|
|
||||||
|
while( <FILE> ){
|
||||||
|
chop;
|
||||||
|
if( /^#/ ){
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
( $u, $c, $rest ) = split;
|
||||||
|
$utf = hex($u);
|
||||||
|
$code = hex($c);
|
||||||
|
$count++;
|
||||||
|
$array{ $utf } = ($code);
|
||||||
|
}
|
||||||
|
close( FILE );
|
||||||
|
|
||||||
|
#
|
||||||
|
# first, generate UTF8 --> GB18030 table
|
||||||
|
#
|
||||||
|
|
||||||
|
$file = "utf8_to_gb18030.map";
|
||||||
|
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||||
|
print FILE "static pg_utf_to_local ULmapGB18030[ $count ] = {\n";
|
||||||
|
|
||||||
|
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||||
|
$code = $array{ $index };
|
||||||
|
$count--;
|
||||||
|
if( $count == 0 ){
|
||||||
|
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||||
|
} else {
|
||||||
|
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print FILE "};\n";
|
||||||
|
close(FILE);
|
||||||
|
|
||||||
|
#
|
||||||
|
# then generate GB18030 --> UTF8 table
|
||||||
|
#
|
||||||
|
reset 'array';
|
||||||
|
|
||||||
|
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||||
|
|
||||||
|
while( <FILE> ){
|
||||||
|
chop;
|
||||||
|
if( /^#/ ){
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
( $u, $c, $rest ) = split;
|
||||||
|
$utf = hex($u);
|
||||||
|
$code = hex($c);
|
||||||
|
$count++;
|
||||||
|
$array{ $code } = $utf;
|
||||||
|
}
|
||||||
|
close( FILE );
|
||||||
|
|
||||||
|
$file = "gb18030_to_utf8.map";
|
||||||
|
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||||
|
print FILE "static pg_local_to_utf LUmapGB18030[ $count ] = {\n";
|
||||||
|
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||||
|
$utf = $array{ $index };
|
||||||
|
$count--;
|
||||||
|
if( $count == 0 ){
|
||||||
|
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||||
|
} else {
|
||||||
|
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print FILE "};\n";
|
||||||
|
close(FILE);
|
63490
src/backend/utils/mb/Unicode/gb18030_to_utf8.map
Normal file
63490
src/backend/utils/mb/Unicode/gb18030_to_utf8.map
Normal file
File diff suppressed because it is too large
Load Diff
63490
src/backend/utils/mb/Unicode/utf8_to_gb18030.map
Normal file
63490
src/backend/utils/mb/Unicode/utf8_to_gb18030.map
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
|||||||
* WIN1250 client encoding support contributed by Pavel Behal
|
* WIN1250 client encoding support contributed by Pavel Behal
|
||||||
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
|
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
|
||||||
*
|
*
|
||||||
* $Id: conv.c,v 1.37 2002/03/06 06:10:26 momjian Exp $
|
* $Id: conv.c,v 1.38 2002/06/13 08:28:54 ishii Exp $
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -48,6 +48,8 @@
|
|||||||
#include "Unicode/euc_jp_to_utf8.map"
|
#include "Unicode/euc_jp_to_utf8.map"
|
||||||
#include "Unicode/utf8_to_euc_cn.map"
|
#include "Unicode/utf8_to_euc_cn.map"
|
||||||
#include "Unicode/euc_cn_to_utf8.map"
|
#include "Unicode/euc_cn_to_utf8.map"
|
||||||
|
#include "Unicode/utf8_to_gb18030.map"
|
||||||
|
#include "Unicode/gb18030_to_utf8.map"
|
||||||
#include "Unicode/utf8_to_euc_kr.map"
|
#include "Unicode/utf8_to_euc_kr.map"
|
||||||
#include "Unicode/euc_kr_to_utf8.map"
|
#include "Unicode/euc_kr_to_utf8.map"
|
||||||
#include "Unicode/utf8_to_euc_tw.map"
|
#include "Unicode/utf8_to_euc_tw.map"
|
||||||
@ -515,6 +517,96 @@ mic2euc_cn(unsigned char *mic, unsigned char *p, int len)
|
|||||||
*p = '\0';
|
*p = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GB18030 ---> MIC
|
||||||
|
* Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
gb180302mic(unsigned char *gb18030, unsigned char *p, int len)
|
||||||
|
{
|
||||||
|
int c1;
|
||||||
|
int c2;
|
||||||
|
|
||||||
|
while (len > 0 && (c1 = *gb18030++))
|
||||||
|
{
|
||||||
|
if (c1 < 0x80)
|
||||||
|
{ /* should be ASCII */
|
||||||
|
len--;
|
||||||
|
*p++ = c1;
|
||||||
|
}
|
||||||
|
else if(c1 >= 0x81 && c1 <= 0xfe)
|
||||||
|
{
|
||||||
|
c2 = *gb18030++;
|
||||||
|
|
||||||
|
if(c2 >= 0x30 && c2 <= 0x69){
|
||||||
|
len -= 4;
|
||||||
|
*p++ = c1;
|
||||||
|
*p++ = c2;
|
||||||
|
*p++ = *gb18030++;
|
||||||
|
*p++ = *gb18030++;
|
||||||
|
*p++ = *gb18030++;
|
||||||
|
}
|
||||||
|
else if ((c2 >=0x40 && c2 <= 0x7e) ||(c2 >=0x80 && c2 <= 0xfe)){
|
||||||
|
len -= 2;
|
||||||
|
*p++ = c1;
|
||||||
|
*p++ = c2;
|
||||||
|
*p++ = *gb18030++;
|
||||||
|
}
|
||||||
|
else{ /*throw the strange code*/
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*p = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MIC ---> GB18030
|
||||||
|
* Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
mic2gb18030(unsigned char *mic, unsigned char *p, int len)
|
||||||
|
{
|
||||||
|
int c1;
|
||||||
|
int c2;
|
||||||
|
|
||||||
|
while (len > 0 && (c1 = *mic))
|
||||||
|
{
|
||||||
|
len -= pg_mic_mblen(mic++);
|
||||||
|
|
||||||
|
if (c1 <= 0x7f) /*ASCII*/
|
||||||
|
{
|
||||||
|
*p++ = c1;
|
||||||
|
}
|
||||||
|
else if (c1 >= 0x81 && c1 <= 0xfe)
|
||||||
|
{
|
||||||
|
c2 = *mic++;
|
||||||
|
|
||||||
|
if((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfe)){
|
||||||
|
*p++ = c1;
|
||||||
|
*p++ = c2;
|
||||||
|
}
|
||||||
|
else if(c2 >= 0x30 && c2 <= 0x39){
|
||||||
|
*p++ = c1;
|
||||||
|
*p++ = c2;
|
||||||
|
*p++ = *mic++;
|
||||||
|
*p++ = *mic++;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
mic--;
|
||||||
|
printBogusChar(&mic, &p);
|
||||||
|
mic--;
|
||||||
|
printBogusChar(&mic, &p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
mic--;
|
||||||
|
printBogusChar(&mic, &p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*p = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EUC_TW ---> MIC
|
* EUC_TW ---> MIC
|
||||||
*/
|
*/
|
||||||
@ -1596,6 +1688,26 @@ euc_cn_to_utf(unsigned char *euc, unsigned char *utf, int len)
|
|||||||
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
|
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UTF-8 ---> GB18030
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
utf_to_gb18030(unsigned char *utf, unsigned char *euc, int len)
|
||||||
|
|
||||||
|
{
|
||||||
|
utf_to_local(utf, euc, ULmapGB18030,
|
||||||
|
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GB18030 ---> UTF-8
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
gb18030_to_utf(unsigned char *euc, unsigned char *utf, int len)
|
||||||
|
{
|
||||||
|
local_to_utf(euc, utf, LUmapGB18030,
|
||||||
|
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len);
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* UTF-8 ---> EUC_KR
|
* UTF-8 ---> EUC_KR
|
||||||
*/
|
*/
|
||||||
@ -1935,6 +2047,9 @@ pg_enconv pg_enconv_tbl[] =
|
|||||||
{
|
{
|
||||||
PG_WIN1250, win12502mic, mic2win1250, win1250_to_utf, utf_to_win1250
|
PG_WIN1250, win12502mic, mic2win1250, win1250_to_utf, utf_to_win1250
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
PG_GB18030, gb180302mic, mic2gb18030, gb18030_to_utf, utf_to_gb18030
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@ -2019,9 +2134,18 @@ pg_enconv pg_enconv_tbl[] =
|
|||||||
{
|
{
|
||||||
PG_BIG5, big52mic, mic2big5, 0, 0
|
PG_BIG5, big52mic, mic2big5, 0, 0
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
PG_GBK, 0, 0, 0, 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
PG_UHC, 0, 0, 0, 0
|
||||||
|
},
|
||||||
{
|
{
|
||||||
PG_WIN1250, win12502mic, mic2win1250, 0, 0
|
PG_WIN1250, win12502mic, mic2win1250, 0, 0
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
PG_GB18030, gb180302mic, mic2gb18030, 0, 0
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* UNICODE_CONVERSION */
|
#endif /* UNICODE_CONVERSION */
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
* Encoding names and routines for work with it. All
|
* Encoding names and routines for work with it. All
|
||||||
* in this file is shared bedween FE and BE.
|
* in this file is shared bedween FE and BE.
|
||||||
*
|
*
|
||||||
* $Id: encnames.c,v 1.7 2002/03/05 05:52:44 momjian Exp $
|
* $Id: encnames.c,v 1.8 2002/06/13 08:28:54 ishii Exp $
|
||||||
*/
|
*/
|
||||||
#ifdef FRONTEND
|
#ifdef FRONTEND
|
||||||
#include "postgres_fe.h"
|
#include "postgres_fe.h"
|
||||||
@ -60,7 +60,11 @@ pg_encname pg_encname_tbl[] =
|
|||||||
{
|
{
|
||||||
"euctw", PG_EUC_TW
|
"euctw", PG_EUC_TW
|
||||||
}, /* EUC-TW; Extended Unix Code for
|
}, /* EUC-TW; Extended Unix Code for
|
||||||
|
|
||||||
* traditional Chinese */
|
* traditional Chinese */
|
||||||
|
{
|
||||||
|
"gb18030", PG_GB18030
|
||||||
|
}, /* GB18030;GB18030 */
|
||||||
{
|
{
|
||||||
"gbk", PG_GBK
|
"gbk", PG_GBK
|
||||||
}, /* GBK; Chinese Windows CodePage 936
|
}, /* GBK; Chinese Windows CodePage 936
|
||||||
@ -239,7 +243,6 @@ pg_encname pg_encname_tbl[] =
|
|||||||
{
|
{
|
||||||
"windows950", PG_BIG5
|
"windows950", PG_BIG5
|
||||||
}, /* alias for BIG5 */
|
}, /* alias for BIG5 */
|
||||||
|
|
||||||
{
|
{
|
||||||
NULL, 0
|
NULL, 0
|
||||||
} /* last */
|
} /* last */
|
||||||
@ -353,6 +356,9 @@ pg_enc2name pg_enc2name_tbl[] =
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"WIN1250", PG_WIN1250
|
"WIN1250", PG_WIN1250
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"GB18030", PG_GB18030
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* conversion functions between pg_wchar and multi-byte streams.
|
* conversion functions between pg_wchar and multi-byte streams.
|
||||||
* Tatsuo Ishii
|
* Tatsuo Ishii
|
||||||
* $Id: wchar.c,v 1.27 2002/03/05 05:52:44 momjian Exp $
|
* $Id: wchar.c,v 1.28 2002/06/13 08:28:54 ishii Exp $
|
||||||
*
|
*
|
||||||
* WIN1250 client encoding updated by Pavel Behal
|
* WIN1250 client encoding updated by Pavel Behal
|
||||||
*
|
*
|
||||||
@ -510,6 +510,31 @@ pg_uhc_mblen(const unsigned char *s)
|
|||||||
return (len);
|
return (len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* * GB18030
|
||||||
|
* * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
|
||||||
|
* */
|
||||||
|
static int
|
||||||
|
pg_gb18030_mblen(const unsigned char *s)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
if (*s <= 0x7f)
|
||||||
|
{ /* ASCII */
|
||||||
|
len = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if((*(s+1) >= 0x40 && *(s+1) <= 0x7e)|| (*(s+1) >= 0x80 && *(s+1) <= 0xfe))
|
||||||
|
len = 2;
|
||||||
|
else if(*(s+1) >= 0x30 && *(s+1) <= 0x39)
|
||||||
|
len = 4;
|
||||||
|
else
|
||||||
|
len = 2;
|
||||||
|
}
|
||||||
|
return (len);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
pg_wchar_tbl pg_wchar_table[] = {
|
pg_wchar_tbl pg_wchar_table[] = {
|
||||||
{pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */
|
{pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */
|
||||||
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */
|
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */
|
||||||
@ -544,6 +569,7 @@ pg_wchar_tbl pg_wchar_table[] = {
|
|||||||
{0, pg_gbk_mblen, 2}, /* 30; PG_GBK */
|
{0, pg_gbk_mblen, 2}, /* 30; PG_GBK */
|
||||||
{0, pg_uhc_mblen, 2}, /* 31; PG_UHC */
|
{0, pg_uhc_mblen, 2}, /* 31; PG_UHC */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 32; PG_WIN1250 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 32; PG_WIN1250 */
|
||||||
|
{0, pg_gb18030_mblen, 2} /* 33; PG_GB18030 */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* returns the byte length of a word for mule internal code */
|
/* returns the byte length of a word for mule internal code */
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* $Id: pg_wchar.h,v 1.38 2002/03/05 05:52:50 momjian Exp $ */
|
/* $Id: pg_wchar.h,v 1.39 2002/06/13 08:30:22 ishii Exp $ */
|
||||||
|
|
||||||
#ifndef PG_WCHAR_H
|
#ifndef PG_WCHAR_H
|
||||||
#define PG_WCHAR_H
|
#define PG_WCHAR_H
|
||||||
@ -189,7 +189,7 @@ typedef enum pg_enc
|
|||||||
PG_GBK, /* GBK (Windows-936) */
|
PG_GBK, /* GBK (Windows-936) */
|
||||||
PG_UHC, /* UHC (Windows-949) */
|
PG_UHC, /* UHC (Windows-949) */
|
||||||
PG_WIN1250, /* windows-1250 */
|
PG_WIN1250, /* windows-1250 */
|
||||||
|
PG_GB18030, /* GB18030 */
|
||||||
_PG_LAST_ENCODING_ /* mark only */
|
_PG_LAST_ENCODING_ /* mark only */
|
||||||
|
|
||||||
} pg_enc;
|
} pg_enc;
|
||||||
|
Reference in New Issue
Block a user