1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-25 13:17:41 +03:00

Fix broken GB18030 <--> UTF-8 conversion map

This commit is contained in:
Tatsuo Ishii
2002-11-12 11:33:40 +00:00
parent 5eb6de5991
commit 90a06dba16
3 changed files with 126746 additions and 126986 deletions

View File

@@ -2,7 +2,7 @@
# #
# Copyright 2002 by Bill Huang # Copyright 2002 by Bill Huang
# #
# $Id: UCS_to_GB18030.pl,v 1.1 2002/06/13 08:28:55 ishii Exp $ # $Id: UCS_to_GB18030.pl,v 1.2 2002/11/12 11:33:40 ishii Exp $
# #
# Generate UTF-8 <--> GB18030 code conversion tables from # Generate UTF-8 <--> GB18030 code conversion tables from
# map files provided by Unicode organization. # map files provided by Unicode organization.
@@ -30,10 +30,18 @@ while( <FILE> ){
next; next;
} }
( $u, $c, $rest ) = split; ( $u, $c, $rest ) = split;
$utf = hex($u); $ucs = hex($u);
$code = hex($c); $code = hex($c);
if( $code >= 0x80 && $ucs >= 0x0080 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++; $count++;
$array{ $utf } = ($code);
$array{ $utf } = $code;
}
} }
close( FILE ); close( FILE );
@@ -70,12 +78,20 @@ while( <FILE> ){
if( /^#/ ){ if( /^#/ ){
next; next;
} }
( $u, $c, $rest ) = split; ( $c, $u, $rest ) = split;
$utf = hex($u); $ucs = hex($u);
$code = hex($c); $code = hex($c);
if( $code >= 0x80 && $ucs >= 0x0080 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++; $count++;
$array{ $code } = $utf; $array{ $code } = $utf;
} }
}
close( FILE ); close( FILE );
$file = "gb18030_to_utf8.map"; $file = "gb18030_to_utf8.map";

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff