1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Add support for code conversion between Unicode and other encodings.

Supported encodings are: EUC_JP, EUC_CN, EUC_KR, EUC_TW, Shift JIS,
Big5, ISO8859-[1-5].
TODO: testings! and documentations...
This commit is contained in:
Tatsuo Ishii
2000-10-30 10:41:05 +00:00
parent 0b10d35e2b
commit 1acf6f9c8e
39 changed files with 141346 additions and 26764 deletions

View File

@ -0,0 +1,61 @@
#-------------------------------------------------------------------------
#
# Makefile for src/backend/utils/mb/Unicode
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Header: /cvsroot/pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/Unicode
top_builddir = ../../../../..
include $(top_builddir)/src/Makefile.global
ISO8859MAPS=iso8859_2_to_utf8.map iso8859_3_to_utf8.map \
iso8859_4_to_utf8.map iso8859_5_to_utf8.map \
utf8_to_iso8859_2.map utf8_to_iso8859_3.map \
utf8_to_iso8859_4.map utf8_to_iso8859_5.map
MAPS= $(ISO8859MAPS) \
big5_to_utf8.map euc_cn_to_utf8.map euc_jp_to_utf8.map \
euc_kr_to_utf8.map euc_tw_to_utf8.map sjis_to_utf8.map \
utf8_to_big5.map utf8_to_euc_cn.map utf8_to_euc_jp.map \
utf8_to_euc_kr.map utf8_to_euc_tw.map utf8_to_iso8859_2.map \
utf8_to_sjis.map
ISO8859TEXTS= 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT
TEXTS=$(ISO8859TEXTS) \
BIG5.TXT CNS11643.TXT GB2312.TXT \
JIS0201.TXT JIS0208.TXT JIS0212.TXT \
OLD5601.TXT SHIFTJIS.TXT
all: $(MAPS)
$(ISO8859MAPS) : $(ISO8859TEXTS)
./UCS_to_8859.pl
euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT
./UCS_to_EUC_JP.pl
euc_cn_to_utf8.map utf8_to_euc_cn.map : GB2312.TXT
./UCS_to_EUC_CN.pl
euc_kr_to_utf8.map utf8_to_euc_kr.map : OLD5601.TXT
./UCS_to_EUC_KR.pl
euc_tw_to_utf8.map utf8_to_euc_tw.map : CNS11643.TXT
./UCS_to_EUC_TW.pl
sjis_to_utf8.map utf8_to_sjis.map : SHIFTJIS.TXT
./UCS_to_SJIS.pl
big5_to_utf8.map utf8_to_big5.map : BIG5.TXT
./UCS_to_BIG5.pl
clean:
rm -f $(MAPS)
distclean: clean
rm -f $(TEXTS)

View File

@ -0,0 +1,110 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_8859.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> ISO8859 code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain "8859-[2-5].TXT" from the organization's ftp site.
# We assume the file include three tab-separated columns:
# ISO/IEC 8859 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
@charsets = (2,3,4,5);
foreach $charset (@charsets) {
#
# first, generate UTF8->ISO8859 table
#
$in_file = "8859-${charset}.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
}
close( FILE );
$file = "utf8_to_iso8859_${charset}.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapISO8859_${charset}[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate ISO885->UTF8 table
#
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "iso8859_${charset}_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapISO8859_${charset}[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
}

View File

@ -0,0 +1,111 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_BIG5.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> BIG5 code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain OLD5601.TXT from
# the organization's ftp site.
#
# OLD5601.TXT format:
# KSC5601 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> BIG5 table
$in_file = "BIG5.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
}
close( FILE );
#
# first, generate UTF8 --> BIG5 table
#
$file = "utf8_to_big5.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "big5_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);

View File

@ -0,0 +1,112 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_CN.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_CN code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain GB2312.TXT from
# the organization's ftp site.
#
# GB2312.TXT format:
# GB2312 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_CN table
$in_file = "GB2312.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8080);
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_CN table
#
$file = "utf8_to_euc_cn.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8080;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "euc_cn_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);

View File

@ -0,0 +1,250 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_JP.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_JP code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
# the organization's ftp site.
#
# JIS0201.TXT format:
# JIS0201 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
#
# JIS0208.TXT format:
# JIS0208 shift-JIS code in hex
# JIS0208 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
#
# JIS0212.TXT format:
# JIS0212 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_JP table
#
# JIS0201
#
$in_file = "JIS0201.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
# add single shift 2
$array{ $utf } = ($code | 0x8e00);
}
}
close( FILE );
#
# JIS0208
#
$in_file = "JIS0208.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $s, $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8080);
}
}
close( FILE );
#
# JIS0212
#
$in_file = "JIS0212.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8f8080);
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_JP table
#
$file = "utf8_to_euc_jp.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
#
# JIS0201
#
$in_file = "JIS0201.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
# add single shift 2
$code |= 0x8e00;
$array{ $code } = $utf;
}
}
close( FILE );
#
# JIS0208
#
$in_file = "JIS0208.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $s, $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8080;
$array{ $code } = $utf;
}
}
close( FILE );
#
# JIS0212
#
$in_file = "JIS0212.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8f8080;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "euc_jp_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);

View File

@ -0,0 +1,112 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_KR.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_KR code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain OLD5601.TXT from
# the organization's ftp site.
#
# OLD5601.TXT format:
# KSC5601 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_KR table
$in_file = "OLD5601.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8080);
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_KR table
#
$file = "utf8_to_euc_kr.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8080;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "euc_kr_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);

View File

@ -0,0 +1,134 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_TW.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_TW code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain CNS11643.TXT from
# the organization's ftp site.
#
# CNS11643.TXT format:
# CNS11643 code in hex (3 bytes)
# (I guess the first byte means the plane No.)
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_TW table
$in_file = "CNS11643.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$plane = ($code & 0x1f0000) >> 16;
if ($plane > 16) {
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
next;
}
if ($plane == 1) {
$array{ $utf } = (($code & 0xffff) | 0x8080);
} else {
$array{ $utf } = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
}
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_TW table
#
$file = "utf8_to_euc_tw.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$plane = ($code & 0x1f0000) >> 16;
if ($plane > 16) {
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
next;
}
if ($plane == 1) {
$c = (($code & 0xffff) | 0x8080);
$array{ $c } = $utf;
$count++;
}
$c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
$array{ $c } = $utf;
}
}
close( FILE );
$file = "euc_tw_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);

View File

@ -0,0 +1,113 @@
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_SJIS.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> SJIS code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain SHIFTJIS.TXT from
# the organization's ftp site.
#
# SHIFTJIS.TXT format:
# SHIFTJIS code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
# Warning: SHIFTJIS.TXT contains only JIS0201 and JIS0208. no JIS0212.
require "ucs2utf.pl";
# first generate UTF-8 --> SJIS table
$in_file = "SHIFTJIS.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
}
close( FILE );
#
# first, generate UTF8 --> SJIS table
#
$file = "utf8_to_sjis.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "sjis_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
static pg_local_to_utf LUmapISO8859_2[ 57 ] = {
{0x00a1, 0xc484},
{0x00a2, 0xcb98},
{0x00a3, 0xc581},
{0x00a5, 0xc4bd},
{0x00a6, 0xc59a},
{0x00a9, 0xc5a0},
{0x00aa, 0xc59e},
{0x00ab, 0xc5a4},
{0x00ac, 0xc5b9},
{0x00ae, 0xc5bd},
{0x00af, 0xc5bb},
{0x00b1, 0xc485},
{0x00b2, 0xcb9b},
{0x00b3, 0xc582},
{0x00b5, 0xc4be},
{0x00b6, 0xc59b},
{0x00b7, 0xcb87},
{0x00b9, 0xc5a1},
{0x00ba, 0xc59f},
{0x00bb, 0xc5a5},
{0x00bc, 0xc5ba},
{0x00bd, 0xcb9d},
{0x00be, 0xc5be},
{0x00bf, 0xc5bc},
{0x00c0, 0xc594},
{0x00c3, 0xc482},
{0x00c5, 0xc4b9},
{0x00c6, 0xc486},
{0x00c8, 0xc48c},
{0x00ca, 0xc498},
{0x00cc, 0xc49a},
{0x00cf, 0xc48e},
{0x00d0, 0xc490},
{0x00d1, 0xc583},
{0x00d2, 0xc587},
{0x00d5, 0xc590},
{0x00d8, 0xc598},
{0x00d9, 0xc5ae},
{0x00db, 0xc5b0},
{0x00de, 0xc5a2},
{0x00e0, 0xc595},
{0x00e3, 0xc483},
{0x00e5, 0xc4ba},
{0x00e6, 0xc487},
{0x00e8, 0xc48d},
{0x00ea, 0xc499},
{0x00ec, 0xc49b},
{0x00ef, 0xc48f},
{0x00f0, 0xc491},
{0x00f1, 0xc584},
{0x00f2, 0xc588},
{0x00f5, 0xc591},
{0x00f8, 0xc599},
{0x00f9, 0xc5af},
{0x00fb, 0xc5b1},
{0x00fe, 0xc5a3},
{0x00ff, 0xcb99}
};

View File

@ -0,0 +1,30 @@
static pg_local_to_utf LUmapISO8859_3[ 28 ] = {
{0x00a1, 0xc4a6},
{0x00a2, 0xcb98},
{0x00a6, 0xc4a4},
{0x00a9, 0xc4b0},
{0x00aa, 0xc59e},
{0x00ab, 0xc49e},
{0x00ac, 0xc4b4},
{0x00af, 0xc5bb},
{0x00b1, 0xc4a7},
{0x00b6, 0xc4a5},
{0x00b9, 0xc4b1},
{0x00ba, 0xc59f},
{0x00bb, 0xc49f},
{0x00bc, 0xc4b5},
{0x00bf, 0xc5bc},
{0x00c5, 0xc48a},
{0x00c6, 0xc488},
{0x00d5, 0xc4a0},
{0x00d8, 0xc49c},
{0x00dd, 0xc5ac},
{0x00de, 0xc59c},
{0x00e5, 0xc48b},
{0x00e6, 0xc489},
{0x00f5, 0xc4a1},
{0x00f8, 0xc49d},
{0x00fd, 0xc5ad},
{0x00fe, 0xc59d},
{0x00ff, 0xcb99}
};

View File

@ -0,0 +1,52 @@
static pg_local_to_utf LUmapISO8859_4[ 50 ] = {
{0x00a1, 0xc484},
{0x00a2, 0xc4b8},
{0x00a3, 0xc596},
{0x00a5, 0xc4a8},
{0x00a6, 0xc4bb},
{0x00a9, 0xc5a0},
{0x00aa, 0xc492},
{0x00ab, 0xc4a2},
{0x00ac, 0xc5a6},
{0x00ae, 0xc5bd},
{0x00b1, 0xc485},
{0x00b2, 0xcb9b},
{0x00b3, 0xc597},
{0x00b5, 0xc4a9},
{0x00b6, 0xc4bc},
{0x00b7, 0xcb87},
{0x00b9, 0xc5a1},
{0x00ba, 0xc493},
{0x00bb, 0xc4a3},
{0x00bc, 0xc5a7},
{0x00bd, 0xc58a},
{0x00be, 0xc5be},
{0x00bf, 0xc58b},
{0x00c0, 0xc480},
{0x00c7, 0xc4ae},
{0x00c8, 0xc48c},
{0x00ca, 0xc498},
{0x00cc, 0xc496},
{0x00cf, 0xc4aa},
{0x00d0, 0xc490},
{0x00d1, 0xc585},
{0x00d2, 0xc58c},
{0x00d3, 0xc4b6},
{0x00d9, 0xc5b2},
{0x00dd, 0xc5a8},
{0x00de, 0xc5aa},
{0x00e0, 0xc481},
{0x00e7, 0xc4af},
{0x00e8, 0xc48d},
{0x00ea, 0xc499},
{0x00ec, 0xc497},
{0x00ef, 0xc4ab},
{0x00f0, 0xc491},
{0x00f1, 0xc586},
{0x00f2, 0xc58d},
{0x00f3, 0xc4b7},
{0x00f9, 0xc5b3},
{0x00fd, 0xc5a9},
{0x00fe, 0xc5ab},
{0x00ff, 0xcb99}
};

View File

@ -0,0 +1,95 @@
static pg_local_to_utf LUmapISO8859_5[ 93 ] = {
{0x00a1, 0xd081},
{0x00a2, 0xd082},
{0x00a3, 0xd083},
{0x00a4, 0xd084},
{0x00a5, 0xd085},
{0x00a6, 0xd086},
{0x00a7, 0xd087},
{0x00a8, 0xd088},
{0x00a9, 0xd089},
{0x00aa, 0xd08a},
{0x00ab, 0xd08b},
{0x00ac, 0xd08c},
{0x00ae, 0xd08e},
{0x00af, 0xd08f},
{0x00b0, 0xd090},
{0x00b1, 0xd091},
{0x00b2, 0xd092},
{0x00b3, 0xd093},
{0x00b4, 0xd094},
{0x00b5, 0xd095},
{0x00b6, 0xd096},
{0x00b7, 0xd097},
{0x00b8, 0xd098},
{0x00b9, 0xd099},
{0x00ba, 0xd09a},
{0x00bb, 0xd09b},
{0x00bc, 0xd09c},
{0x00bd, 0xd09d},
{0x00be, 0xd09e},
{0x00bf, 0xd09f},
{0x00c0, 0xd0a0},
{0x00c1, 0xd0a1},
{0x00c2, 0xd0a2},
{0x00c3, 0xd0a3},
{0x00c4, 0xd0a4},
{0x00c5, 0xd0a5},
{0x00c6, 0xd0a6},
{0x00c7, 0xd0a7},
{0x00c8, 0xd0a8},
{0x00c9, 0xd0a9},
{0x00ca, 0xd0aa},
{0x00cb, 0xd0ab},
{0x00cc, 0xd0ac},
{0x00cd, 0xd0ad},
{0x00ce, 0xd0ae},
{0x00cf, 0xd0af},
{0x00d0, 0xd0b0},
{0x00d1, 0xd0b1},
{0x00d2, 0xd0b2},
{0x00d3, 0xd0b3},
{0x00d4, 0xd0b4},
{0x00d5, 0xd0b5},
{0x00d6, 0xd0b6},
{0x00d7, 0xd0b7},
{0x00d8, 0xd0b8},
{0x00d9, 0xd0b9},
{0x00da, 0xd0ba},
{0x00db, 0xd0bb},
{0x00dc, 0xd0bc},
{0x00dd, 0xd0bd},
{0x00de, 0xd0be},
{0x00df, 0xd0bf},
{0x00e0, 0xd180},
{0x00e1, 0xd181},
{0x00e2, 0xd182},
{0x00e3, 0xd183},
{0x00e4, 0xd184},
{0x00e5, 0xd185},
{0x00e6, 0xd186},
{0x00e7, 0xd187},
{0x00e8, 0xd188},
{0x00e9, 0xd189},
{0x00ea, 0xd18a},
{0x00eb, 0xd18b},
{0x00ec, 0xd18c},
{0x00ed, 0xd18d},
{0x00ee, 0xd18e},
{0x00ef, 0xd18f},
{0x00f0, 0xe28496},
{0x00f1, 0xd191},
{0x00f2, 0xd192},
{0x00f3, 0xd193},
{0x00f4, 0xd194},
{0x00f5, 0xd195},
{0x00f6, 0xd196},
{0x00f7, 0xd197},
{0x00f8, 0xd198},
{0x00f9, 0xd199},
{0x00fa, 0xd19a},
{0x00fb, 0xd19b},
{0x00fc, 0xd19c},
{0x00fe, 0xd19e},
{0x00ff, 0xd19f}
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
#
# $Id: ucs2utf.pl,v 1.1 2000/10/30 10:40:30 ishii Exp $
# convert UCS-2 to UTF-8
#
sub ucs2utf {
local($ucs) = @_;
local $utf;
if ($ucs <= 0x007f) {
$utf = $ucs;
} elsif ($ucs > 0x007f && $ucs <= 0x07ff) {
$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
} else {
$utf = ((($ucs >> 12) | 0xe0) << 16) |
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
(($ucs & 0x003f) | 0x80);
}
return($utf);
}
1;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
static pg_utf_to_local ULmapISO8859_2[ 57 ] = {
{0xc482, 0x00c3},
{0xc483, 0x00e3},
{0xc484, 0x00a1},
{0xc485, 0x00b1},
{0xc486, 0x00c6},
{0xc487, 0x00e6},
{0xc48c, 0x00c8},
{0xc48d, 0x00e8},
{0xc48e, 0x00cf},
{0xc48f, 0x00ef},
{0xc490, 0x00d0},
{0xc491, 0x00f0},
{0xc498, 0x00ca},
{0xc499, 0x00ea},
{0xc49a, 0x00cc},
{0xc49b, 0x00ec},
{0xc4b9, 0x00c5},
{0xc4ba, 0x00e5},
{0xc4bd, 0x00a5},
{0xc4be, 0x00b5},
{0xc581, 0x00a3},
{0xc582, 0x00b3},
{0xc583, 0x00d1},
{0xc584, 0x00f1},
{0xc587, 0x00d2},
{0xc588, 0x00f2},
{0xc590, 0x00d5},
{0xc591, 0x00f5},
{0xc594, 0x00c0},
{0xc595, 0x00e0},
{0xc598, 0x00d8},
{0xc599, 0x00f8},
{0xc59a, 0x00a6},
{0xc59b, 0x00b6},
{0xc59e, 0x00aa},
{0xc59f, 0x00ba},
{0xc5a0, 0x00a9},
{0xc5a1, 0x00b9},
{0xc5a2, 0x00de},
{0xc5a3, 0x00fe},
{0xc5a4, 0x00ab},
{0xc5a5, 0x00bb},
{0xc5ae, 0x00d9},
{0xc5af, 0x00f9},
{0xc5b0, 0x00db},
{0xc5b1, 0x00fb},
{0xc5b9, 0x00ac},
{0xc5ba, 0x00bc},
{0xc5bb, 0x00af},
{0xc5bc, 0x00bf},
{0xc5bd, 0x00ae},
{0xc5be, 0x00be},
{0xcb87, 0x00b7},
{0xcb98, 0x00a2},
{0xcb99, 0x00ff},
{0xcb9b, 0x00b2},
{0xcb9d, 0x00bd}
};

View File

@ -0,0 +1,30 @@
static pg_utf_to_local ULmapISO8859_3[ 28 ] = {
{0xc488, 0x00c6},
{0xc489, 0x00e6},
{0xc48a, 0x00c5},
{0xc48b, 0x00e5},
{0xc49c, 0x00d8},
{0xc49d, 0x00f8},
{0xc49e, 0x00ab},
{0xc49f, 0x00bb},
{0xc4a0, 0x00d5},
{0xc4a1, 0x00f5},
{0xc4a4, 0x00a6},
{0xc4a5, 0x00b6},
{0xc4a6, 0x00a1},
{0xc4a7, 0x00b1},
{0xc4b0, 0x00a9},
{0xc4b1, 0x00b9},
{0xc4b4, 0x00ac},
{0xc4b5, 0x00bc},
{0xc59c, 0x00de},
{0xc59d, 0x00fe},
{0xc59e, 0x00aa},
{0xc59f, 0x00ba},
{0xc5ac, 0x00dd},
{0xc5ad, 0x00fd},
{0xc5bb, 0x00af},
{0xc5bc, 0x00bf},
{0xcb98, 0x00a2},
{0xcb99, 0x00ff}
};

View File

@ -0,0 +1,52 @@
static pg_utf_to_local ULmapISO8859_4[ 50 ] = {
{0xc480, 0x00c0},
{0xc481, 0x00e0},
{0xc484, 0x00a1},
{0xc485, 0x00b1},
{0xc48c, 0x00c8},
{0xc48d, 0x00e8},
{0xc490, 0x00d0},
{0xc491, 0x00f0},
{0xc492, 0x00aa},
{0xc493, 0x00ba},
{0xc496, 0x00cc},
{0xc497, 0x00ec},
{0xc498, 0x00ca},
{0xc499, 0x00ea},
{0xc4a2, 0x00ab},
{0xc4a3, 0x00bb},
{0xc4a8, 0x00a5},
{0xc4a9, 0x00b5},
{0xc4aa, 0x00cf},
{0xc4ab, 0x00ef},
{0xc4ae, 0x00c7},
{0xc4af, 0x00e7},
{0xc4b6, 0x00d3},
{0xc4b7, 0x00f3},
{0xc4b8, 0x00a2},
{0xc4bb, 0x00a6},
{0xc4bc, 0x00b6},
{0xc585, 0x00d1},
{0xc586, 0x00f1},
{0xc58a, 0x00bd},
{0xc58b, 0x00bf},
{0xc58c, 0x00d2},
{0xc58d, 0x00f2},
{0xc596, 0x00a3},
{0xc597, 0x00b3},
{0xc5a0, 0x00a9},
{0xc5a1, 0x00b9},
{0xc5a6, 0x00ac},
{0xc5a7, 0x00bc},
{0xc5a8, 0x00dd},
{0xc5a9, 0x00fd},
{0xc5aa, 0x00de},
{0xc5ab, 0x00fe},
{0xc5b2, 0x00d9},
{0xc5b3, 0x00f9},
{0xc5bd, 0x00ae},
{0xc5be, 0x00be},
{0xcb87, 0x00b7},
{0xcb99, 0x00ff},
{0xcb9b, 0x00b2}
};

View File

@ -0,0 +1,95 @@
static pg_utf_to_local ULmapISO8859_5[ 93 ] = {
{0xd081, 0x00a1},
{0xd082, 0x00a2},
{0xd083, 0x00a3},
{0xd084, 0x00a4},
{0xd085, 0x00a5},
{0xd086, 0x00a6},
{0xd087, 0x00a7},
{0xd088, 0x00a8},
{0xd089, 0x00a9},
{0xd08a, 0x00aa},
{0xd08b, 0x00ab},
{0xd08c, 0x00ac},
{0xd08e, 0x00ae},
{0xd08f, 0x00af},
{0xd090, 0x00b0},
{0xd091, 0x00b1},
{0xd092, 0x00b2},
{0xd093, 0x00b3},
{0xd094, 0x00b4},
{0xd095, 0x00b5},
{0xd096, 0x00b6},
{0xd097, 0x00b7},
{0xd098, 0x00b8},
{0xd099, 0x00b9},
{0xd09a, 0x00ba},
{0xd09b, 0x00bb},
{0xd09c, 0x00bc},
{0xd09d, 0x00bd},
{0xd09e, 0x00be},
{0xd09f, 0x00bf},
{0xd0a0, 0x00c0},
{0xd0a1, 0x00c1},
{0xd0a2, 0x00c2},
{0xd0a3, 0x00c3},
{0xd0a4, 0x00c4},
{0xd0a5, 0x00c5},
{0xd0a6, 0x00c6},
{0xd0a7, 0x00c7},
{0xd0a8, 0x00c8},
{0xd0a9, 0x00c9},
{0xd0aa, 0x00ca},
{0xd0ab, 0x00cb},
{0xd0ac, 0x00cc},
{0xd0ad, 0x00cd},
{0xd0ae, 0x00ce},
{0xd0af, 0x00cf},
{0xd0b0, 0x00d0},
{0xd0b1, 0x00d1},
{0xd0b2, 0x00d2},
{0xd0b3, 0x00d3},
{0xd0b4, 0x00d4},
{0xd0b5, 0x00d5},
{0xd0b6, 0x00d6},
{0xd0b7, 0x00d7},
{0xd0b8, 0x00d8},
{0xd0b9, 0x00d9},
{0xd0ba, 0x00da},
{0xd0bb, 0x00db},
{0xd0bc, 0x00dc},
{0xd0bd, 0x00dd},
{0xd0be, 0x00de},
{0xd0bf, 0x00df},
{0xd180, 0x00e0},
{0xd181, 0x00e1},
{0xd182, 0x00e2},
{0xd183, 0x00e3},
{0xd184, 0x00e4},
{0xd185, 0x00e5},
{0xd186, 0x00e6},
{0xd187, 0x00e7},
{0xd188, 0x00e8},
{0xd189, 0x00e9},
{0xd18a, 0x00ea},
{0xd18b, 0x00eb},
{0xd18c, 0x00ec},
{0xd18d, 0x00ed},
{0xd18e, 0x00ee},
{0xd18f, 0x00ef},
{0xd191, 0x00f1},
{0xd192, 0x00f2},
{0xd193, 0x00f3},
{0xd194, 0x00f4},
{0xd195, 0x00f5},
{0xd196, 0x00f6},
{0xd197, 0x00f7},
{0xd198, 0x00f8},
{0xd199, 0x00f9},
{0xd19a, 0x00fa},
{0xd19b, 0x00fb},
{0xd19c, 0x00fc},
{0xd19e, 0x00fe},
{0xd19f, 0x00ff},
{0xe28496, 0x00f0}
};

File diff suppressed because it is too large Load Diff