mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Run newly-configured perltidy script on Perl files.
Run on HEAD and 9.2.
This commit is contained in:
@@ -19,29 +19,29 @@ use strict;
|
||||
use warnings;
|
||||
|
||||
# Collect arguments
|
||||
my $infile; # pg_proc.h
|
||||
my $infile; # pg_proc.h
|
||||
my $output_path = '';
|
||||
while (@ARGV)
|
||||
{
|
||||
my $arg = shift @ARGV;
|
||||
if ($arg !~ /^-/)
|
||||
{
|
||||
$infile = $arg;
|
||||
}
|
||||
elsif ($arg =~ /^-o/)
|
||||
{
|
||||
$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
|
||||
}
|
||||
else
|
||||
{
|
||||
usage();
|
||||
}
|
||||
my $arg = shift @ARGV;
|
||||
if ($arg !~ /^-/)
|
||||
{
|
||||
$infile = $arg;
|
||||
}
|
||||
elsif ($arg =~ /^-o/)
|
||||
{
|
||||
$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
|
||||
}
|
||||
else
|
||||
{
|
||||
usage();
|
||||
}
|
||||
}
|
||||
|
||||
# Make sure output_path ends in a slash.
|
||||
if ($output_path ne '' && substr($output_path, -1) ne '/')
|
||||
{
|
||||
$output_path .= '/';
|
||||
$output_path .= '/';
|
||||
}
|
||||
|
||||
# Read all the data from the include/catalog files.
|
||||
@@ -50,48 +50,47 @@ my $catalogs = Catalog::Catalogs($infile);
|
||||
# Collect the raw data from pg_proc.h.
|
||||
my @fmgr = ();
|
||||
my @attnames;
|
||||
foreach my $column ( @{ $catalogs->{pg_proc}->{columns} } )
|
||||
foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
|
||||
{
|
||||
push @attnames, keys %$column;
|
||||
push @attnames, keys %$column;
|
||||
}
|
||||
|
||||
my $data = $catalogs->{pg_proc}->{data};
|
||||
foreach my $row (@$data)
|
||||
{
|
||||
# To construct fmgroids.h and fmgrtab.c, we need to inspect some
|
||||
# of the individual data fields. Just splitting on whitespace
|
||||
# won't work, because some quoted fields might contain internal
|
||||
# whitespace. We handle this by folding them all to a simple
|
||||
# "xxx". Fortunately, this script doesn't need to look at any
|
||||
# fields that might need quoting, so this simple hack is
|
||||
# sufficient.
|
||||
$row->{bki_values} =~ s/"[^"]*"/"xxx"/g;
|
||||
@{$row}{@attnames} = split /\s+/, $row->{bki_values};
|
||||
|
||||
# Select out just the rows for internal-language procedures.
|
||||
# Note assumption here that INTERNALlanguageId is 12.
|
||||
next if $row->{prolang} ne '12';
|
||||
# To construct fmgroids.h and fmgrtab.c, we need to inspect some
|
||||
# of the individual data fields. Just splitting on whitespace
|
||||
# won't work, because some quoted fields might contain internal
|
||||
# whitespace. We handle this by folding them all to a simple
|
||||
# "xxx". Fortunately, this script doesn't need to look at any
|
||||
# fields that might need quoting, so this simple hack is
|
||||
# sufficient.
|
||||
$row->{bki_values} =~ s/"[^"]*"/"xxx"/g;
|
||||
@{$row}{@attnames} = split /\s+/, $row->{bki_values};
|
||||
|
||||
push @fmgr,
|
||||
{
|
||||
oid => $row->{oid},
|
||||
strict => $row->{proisstrict},
|
||||
retset => $row->{proretset},
|
||||
nargs => $row->{pronargs},
|
||||
prosrc => $row->{prosrc},
|
||||
};
|
||||
# Select out just the rows for internal-language procedures.
|
||||
# Note assumption here that INTERNALlanguageId is 12.
|
||||
next if $row->{prolang} ne '12';
|
||||
|
||||
# Hack to work around memory leak in some versions of Perl
|
||||
$row = undef;
|
||||
push @fmgr,
|
||||
{ oid => $row->{oid},
|
||||
strict => $row->{proisstrict},
|
||||
retset => $row->{proretset},
|
||||
nargs => $row->{pronargs},
|
||||
prosrc => $row->{prosrc}, };
|
||||
|
||||
# Hack to work around memory leak in some versions of Perl
|
||||
$row = undef;
|
||||
}
|
||||
|
||||
# Emit headers for both files
|
||||
my $tmpext = ".tmp$$";
|
||||
my $tmpext = ".tmp$$";
|
||||
my $oidsfile = $output_path . 'fmgroids.h';
|
||||
my $tabfile = $output_path . 'fmgrtab.c';
|
||||
my $tabfile = $output_path . 'fmgrtab.c';
|
||||
|
||||
open H, '>', $oidsfile . $tmpext or die "Could not open $oidsfile$tmpext: $!";
|
||||
open T, '>', $tabfile . $tmpext or die "Could not open $tabfile$tmpext: $!";
|
||||
open T, '>', $tabfile . $tmpext or die "Could not open $tabfile$tmpext: $!";
|
||||
|
||||
print H
|
||||
qq|/*-------------------------------------------------------------------------
|
||||
@@ -160,12 +159,12 @@ qq|/*-------------------------------------------------------------------------
|
||||
|
||||
# Emit #define's and extern's -- only one per prosrc value
|
||||
my %seenit;
|
||||
foreach my $s (sort {$a->{oid} <=> $b->{oid}} @fmgr)
|
||||
foreach my $s (sort { $a->{oid} <=> $b->{oid} } @fmgr)
|
||||
{
|
||||
next if $seenit{$s->{prosrc}};
|
||||
$seenit{$s->{prosrc}} = 1;
|
||||
print H "#define F_" . uc $s->{prosrc} . " $s->{oid}\n";
|
||||
print T "extern Datum $s->{prosrc} (PG_FUNCTION_ARGS);\n";
|
||||
next if $seenit{ $s->{prosrc} };
|
||||
$seenit{ $s->{prosrc} } = 1;
|
||||
print H "#define F_" . uc $s->{prosrc} . " $s->{oid}\n";
|
||||
print T "extern Datum $s->{prosrc} (PG_FUNCTION_ARGS);\n";
|
||||
}
|
||||
|
||||
# Create the fmgr_builtins table
|
||||
@@ -173,10 +172,10 @@ print T "\nconst FmgrBuiltin fmgr_builtins[] = {\n";
|
||||
my %bmap;
|
||||
$bmap{'t'} = 'true';
|
||||
$bmap{'f'} = 'false';
|
||||
foreach my $s (sort {$a->{oid} <=> $b->{oid}} @fmgr)
|
||||
foreach my $s (sort { $a->{oid} <=> $b->{oid} } @fmgr)
|
||||
{
|
||||
print T
|
||||
" { $s->{oid}, \"$s->{prosrc}\", $s->{nargs}, $bmap{$s->{strict}}, $bmap{$s->{retset}}, $s->{prosrc} },\n";
|
||||
print T
|
||||
" { $s->{oid}, \"$s->{prosrc}\", $s->{nargs}, $bmap{$s->{strict}}, $bmap{$s->{retset}}, $s->{prosrc} },\n";
|
||||
}
|
||||
|
||||
# And add the file footers.
|
||||
@@ -198,11 +197,11 @@ close(T);
|
||||
|
||||
# Finally, rename the completed files into place.
|
||||
Catalog::RenameTempFile($oidsfile, $tmpext);
|
||||
Catalog::RenameTempFile($tabfile, $tmpext);
|
||||
Catalog::RenameTempFile($tabfile, $tmpext);
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<EOM;
|
||||
die <<EOM;
|
||||
Usage: perl -I [directory of Catalog.pm] Gen_fmgrtab.pl [path to pg_proc.h]
|
||||
|
||||
Gen_fmgrtab.pl generates fmgroids.h and fmgrtab.c from pg_proc.h
|
||||
|
@@ -6,36 +6,41 @@
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
print "/* autogenerated from src/backend/utils/errcodes.txt, do not edit */\n";
|
||||
print
|
||||
"/* autogenerated from src/backend/utils/errcodes.txt, do not edit */\n";
|
||||
print "/* there is deliberately not an #ifndef ERRCODES_H here */\n";
|
||||
|
||||
open my $errcodes, $ARGV[0] or die;
|
||||
|
||||
while (<$errcodes>) {
|
||||
chomp;
|
||||
while (<$errcodes>)
|
||||
{
|
||||
chomp;
|
||||
|
||||
# Skip comments
|
||||
next if /^#/;
|
||||
next if /^\s*$/;
|
||||
# Skip comments
|
||||
next if /^#/;
|
||||
next if /^\s*$/;
|
||||
|
||||
# Emit a comment for each section header
|
||||
if (/^Section:(.*)/) {
|
||||
# Emit a comment for each section header
|
||||
if (/^Section:(.*)/)
|
||||
{
|
||||
my $header = $1;
|
||||
$header =~ s/^\s+//;
|
||||
print "\n/* $header */\n";
|
||||
next;
|
||||
}
|
||||
|
||||
die "unable to parse errcodes.txt" unless /^([^\s]{5})\s+[EWS]\s+([^\s]+)/;
|
||||
die "unable to parse errcodes.txt"
|
||||
unless /^([^\s]{5})\s+[EWS]\s+([^\s]+)/;
|
||||
|
||||
(my $sqlstate, my $errcode_macro) = ($1, $2);
|
||||
(my $sqlstate, my $errcode_macro) = ($1, $2);
|
||||
|
||||
# Split the sqlstate letters
|
||||
$sqlstate = join ",", split "", $sqlstate;
|
||||
# And quote them
|
||||
$sqlstate =~ s/([^,])/'$1'/g;
|
||||
# Split the sqlstate letters
|
||||
$sqlstate = join ",", split "", $sqlstate;
|
||||
|
||||
print "#define $errcode_macro MAKE_SQLSTATE($sqlstate)\n";
|
||||
# And quote them
|
||||
$sqlstate =~ s/([^,])/'$1'/g;
|
||||
|
||||
print "#define $errcode_macro MAKE_SQLSTATE($sqlstate)\n";
|
||||
}
|
||||
|
||||
close $errcodes;
|
||||
|
@@ -33,68 +33,82 @@ require "ucs2utf.pl";
|
||||
#
|
||||
$in_file = "BIG5.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $utf } = $code;
|
||||
$array{$utf} = $code;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$in_file = "CP950.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
|
||||
# Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
|
||||
# from CP950.TXT
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 &&
|
||||
$code >= 0xf9d6 && $code <= 0xf9dc ){
|
||||
if ( $code >= 0x80
|
||||
&& $ucs >= 0x0080
|
||||
&& $code >= 0xf9d6
|
||||
&& $code <= 0xf9dc)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $utf } = $code;
|
||||
$array{$utf} = $code;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = lc("utf8_to_big5.map");
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -107,67 +121,81 @@ close(FILE);
|
||||
#
|
||||
$in_file = "BIG5.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$in_file = "CP950.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
|
||||
# Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
|
||||
# from CP950.TXT
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 &&
|
||||
$code >= 0xf9d6 && $code <= 0xf9dc ){
|
||||
if ( $code >= 0x80
|
||||
&& $ucs >= 0x0080
|
||||
&& $code >= 0xf9d6
|
||||
&& $code <= 0xf9dc)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = lc("big5_to_utf8.map");
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -22,43 +22,51 @@ require "ucs2utf.pl";
|
||||
|
||||
$in_file = "GB2312.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = ($code | 0x8080);
|
||||
$array{$utf} = ($code | 0x8080);
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# first, generate UTF8 --> EUC_CN table
|
||||
#
|
||||
|
||||
$file = "utf8_to_euc_cn.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -71,39 +79,47 @@ close(FILE);
|
||||
#
|
||||
reset 'array';
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$code |= 0x8080;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "euc_cn_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -15,89 +15,110 @@ $TEST = 1;
|
||||
|
||||
$in_file = "euc-jis-2004-std.txt";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
reset 'array1';
|
||||
reset 'comment';
|
||||
reset 'comment1';
|
||||
|
||||
while($line = <FILE> ){
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{ $str } = $code;
|
||||
$comment1{ $str } = $rest;
|
||||
while ($line = <FILE>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{$str} = $code;
|
||||
$comment1{$str} = $rest;
|
||||
$count1++;
|
||||
next;
|
||||
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
$rest = "U+" . $u . $3;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
next;
|
||||
}
|
||||
|
||||
$ucs = hex($u);
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
$utf = &ucs2utf($ucs);
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = $code;
|
||||
$comment{ $code } = $rest;
|
||||
$array{$utf} = $code;
|
||||
$comment{$code} = $rest;
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "utf8_to_euc_jis_2004.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code };
|
||||
} else {
|
||||
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code };
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
}
|
||||
|
||||
print FILE "};\n";
|
||||
close(FILE);
|
||||
|
||||
if ($TEST == 1) {
|
||||
if ($TEST == 1)
|
||||
{
|
||||
$file1 = "utf8.data";
|
||||
$file2 = "euc_jis_2004.data";
|
||||
open( FILE1, "> $file1" ) || die( "cannot open $file1" );
|
||||
open( FILE2, "> $file2" ) || die( "cannot open $file2" );
|
||||
open(FILE1, "> $file1") || die("cannot open $file1");
|
||||
open(FILE2, "> $file2") || die("cannot open $file2");
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d &&
|
||||
$code != 0x5c &&
|
||||
($code < 0x80 ||
|
||||
($code >= 0x8ea1 && $code <= 0x8efe) ||
|
||||
($code >= 0x8fa1a1 && $code <= 0x8ffefe) ||
|
||||
($code >= 0xa1a1 && $code <= 0x8fefe))) {
|
||||
for ($i = 3; $i >= 0; $i--) {
|
||||
$s = $i * 8;
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
if ( $code > 0x00
|
||||
&& $code != 0x09
|
||||
&& $code != 0x0a
|
||||
&& $code != 0x0d
|
||||
&& $code != 0x5c
|
||||
&& ( $code < 0x80
|
||||
|| ($code >= 0x8ea1 && $code <= 0x8efe)
|
||||
|| ($code >= 0x8fa1a1 && $code <= 0x8ffefe)
|
||||
|| ($code >= 0xa1a1 && $code <= 0x8fefe)))
|
||||
{
|
||||
for ($i = 3; $i >= 0; $i--)
|
||||
{
|
||||
$s = $i * 8;
|
||||
$mask = 0xff << $s;
|
||||
print FILE1 pack("C", ($index & $mask) >> $s) if $index & $mask;
|
||||
print FILE1 pack("C", ($index & $mask) >> $s)
|
||||
if $index & $mask;
|
||||
print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
|
||||
}
|
||||
print FILE1 "\n";
|
||||
@@ -107,46 +128,62 @@ if ($TEST == 1) {
|
||||
}
|
||||
|
||||
$file = "utf8_to_euc_jis_2004_combined.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
|
||||
print FILE
|
||||
"static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
|
||||
|
||||
for $index ( sort {$a cmp $b} keys( %array1 ) ){
|
||||
$code = $array1{ $index };
|
||||
for $index (sort { $a cmp $b } keys(%array1))
|
||||
{
|
||||
$code = $array1{$index};
|
||||
$count1--;
|
||||
if( $count1 == 0 ){
|
||||
printf FILE " {0x%s, 0x%s, 0x%06x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
|
||||
} else {
|
||||
printf FILE " {0x%s, 0x%s, 0x%06x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
|
||||
if ($count1 == 0)
|
||||
{
|
||||
printf FILE " {0x%s, 0x%s, 0x%06x} /* %s */\n", substr($index, 0, 8),
|
||||
substr($index, 8, 8), $code, $comment1{$index};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%s, 0x%s, 0x%06x}, /* %s */\n",
|
||||
substr($index, 0, 8), substr($index, 8, 8), $code,
|
||||
$comment1{$index};
|
||||
}
|
||||
}
|
||||
|
||||
print FILE "};\n";
|
||||
close(FILE);
|
||||
|
||||
if ($TEST == 1) {
|
||||
for $index ( sort {$a cmp $b} keys( %array1 ) ){
|
||||
$code = $array1{ $index };
|
||||
if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d &&
|
||||
$code != 0x5c &&
|
||||
($code < 0x80 ||
|
||||
($code >= 0x8ea1 && $code <= 0x8efe) ||
|
||||
($code >= 0x8fa1a1 && $code <= 0x8ffefe) ||
|
||||
($code >= 0xa1a1 && $code <= 0x8fefe))) {
|
||||
if ($TEST == 1)
|
||||
{
|
||||
for $index (sort { $a cmp $b } keys(%array1))
|
||||
{
|
||||
$code = $array1{$index};
|
||||
if ( $code > 0x00
|
||||
&& $code != 0x09
|
||||
&& $code != 0x0a
|
||||
&& $code != 0x0d
|
||||
&& $code != 0x5c
|
||||
&& ( $code < 0x80
|
||||
|| ($code >= 0x8ea1 && $code <= 0x8efe)
|
||||
|| ($code >= 0x8fa1a1 && $code <= 0x8ffefe)
|
||||
|| ($code >= 0xa1a1 && $code <= 0x8fefe)))
|
||||
{
|
||||
|
||||
$v1 = hex(substr($index, 0, 8));
|
||||
$v2 = hex(substr($index, 8, 8));
|
||||
|
||||
for ($i = 3; $i >= 0; $i--) {
|
||||
$s = $i * 8;
|
||||
for ($i = 3; $i >= 0; $i--)
|
||||
{
|
||||
$s = $i * 8;
|
||||
$mask = 0xff << $s;
|
||||
print FILE1 pack("C", ($v1 & $mask) >> $s) if $v1 & $mask;
|
||||
print FILE1 pack("C", ($v1 & $mask) >> $s) if $v1 & $mask;
|
||||
print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
|
||||
}
|
||||
for ($i = 3; $i >= 0; $i--) {
|
||||
$s = $i * 8;
|
||||
for ($i = 3; $i >= 0; $i--)
|
||||
{
|
||||
$s = $i * 8;
|
||||
$mask = 0xff << $s;
|
||||
print FILE1 pack("C", ($v2 & $mask) >> $s) if $v2 & $mask;
|
||||
}
|
||||
@@ -162,65 +199,78 @@ if ($TEST == 1) {
|
||||
|
||||
$in_file = "euc-jis-2004-std.txt";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
reset 'array1';
|
||||
reset 'comment';
|
||||
reset 'comment1';
|
||||
|
||||
while($line = <FILE> ){
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{ $code } = $str;
|
||||
$comment1{ $code } = $rest;
|
||||
while ($line = <FILE>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{$code} = $str;
|
||||
$comment1{$code} = $rest;
|
||||
$count1++;
|
||||
next;
|
||||
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
$rest = "U+" . $u . $3;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
next;
|
||||
}
|
||||
|
||||
$ucs = hex($u);
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
$utf = &ucs2utf($ucs);
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $code } = $utf;
|
||||
$comment{ $utf } = $rest;
|
||||
$array{$code} = $utf;
|
||||
$comment{$utf} = $rest;
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "euc_jis_2004_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
printf FILE " {0x%06x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code };
|
||||
} else {
|
||||
printf FILE " {0x%06x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code };
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%06x, 0x%08x} /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%06x, 0x%08x}, /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,19 +278,26 @@ print FILE "};\n";
|
||||
close(FILE);
|
||||
|
||||
$file = "euc_jis_2004_to_utf8_combined.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
|
||||
print FILE
|
||||
"static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array1 ) ){
|
||||
$code = $array1{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array1))
|
||||
{
|
||||
$code = $array1{$index};
|
||||
$count1--;
|
||||
if( $count1 == 0 ){
|
||||
printf FILE " {0x%06x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
|
||||
} else {
|
||||
printf FILE " {0x%06x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
|
||||
if ($count1 == 0)
|
||||
{
|
||||
printf FILE " {0x%06x, 0x%s, 0x%s} /* %s */\n", $index,
|
||||
substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%06x, 0x%s, 0x%s}, /* %s */\n", $index,
|
||||
substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -36,102 +36,118 @@ require "ucs2utf.pl";
|
||||
#
|
||||
$in_file = "JIS0201.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
# add single shift 2
|
||||
$array{ $utf } = ($code | 0x8e00);
|
||||
$array{$utf} = ($code | 0x8e00);
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# JIS0208
|
||||
#
|
||||
$in_file = "JIS0208.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $s, $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($s, $c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = ($code | 0x8080);
|
||||
$array{$utf} = ($code | 0x8080);
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# JIS0212
|
||||
#
|
||||
$in_file = "JIS0212.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = ($code | 0x8f8080);
|
||||
$array{$utf} = ($code | 0x8f8080);
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# first, generate UTF8 --> EUC_JP table
|
||||
#
|
||||
|
||||
$file = "utf8_to_euc_jp.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -148,100 +164,116 @@ close(FILE);
|
||||
#
|
||||
$in_file = "JIS0201.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
# add single shift 2
|
||||
$code |= 0x8e00;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# JIS0208
|
||||
#
|
||||
$in_file = "JIS0208.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $s, $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($s, $c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$code |= 0x8080;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# JIS0212
|
||||
#
|
||||
$in_file = "JIS0212.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$code |= 0x8f8080;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "euc_jp_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -22,43 +22,51 @@ require "ucs2utf.pl";
|
||||
|
||||
$in_file = "KSX1001.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = ($code | 0x8080);
|
||||
$array{$utf} = ($code | 0x8080);
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# first, generate UTF8 --> EUC_KR table
|
||||
#
|
||||
|
||||
$file = "utf8_to_euc_kr.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -71,39 +79,47 @@ close(FILE);
|
||||
#
|
||||
reset 'array';
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$code |= 0x8080;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "euc_kr_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -23,53 +23,66 @@ require "ucs2utf.pl";
|
||||
|
||||
$in_file = "CNS11643.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$plane = ($code & 0x1f0000) >> 16;
|
||||
if ($plane > 16) {
|
||||
if ($plane > 16)
|
||||
{
|
||||
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
|
||||
next;
|
||||
}
|
||||
|
||||
if ($plane == 1) {
|
||||
$array{ $utf } = (($code & 0xffff) | 0x8080);
|
||||
} else {
|
||||
$array{ $utf } = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
|
||||
if ($plane == 1)
|
||||
{
|
||||
$array{$utf} = (($code & 0xffff) | 0x8080);
|
||||
}
|
||||
else
|
||||
{
|
||||
$array{$utf} =
|
||||
(0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
|
||||
}
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# first, generate UTF8 --> EUC_TW table
|
||||
#
|
||||
|
||||
$file = "utf8_to_euc_tw.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -82,50 +95,60 @@ close(FILE);
|
||||
#
|
||||
reset 'array';
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$plane = ($code & 0x1f0000) >> 16;
|
||||
if ($plane > 16) {
|
||||
if ($plane > 16)
|
||||
{
|
||||
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
|
||||
next;
|
||||
}
|
||||
|
||||
if ($plane == 1) {
|
||||
if ($plane == 1)
|
||||
{
|
||||
$c = (($code & 0xffff) | 0x8080);
|
||||
$array{ $c } = $utf;
|
||||
$array{$c} = $utf;
|
||||
$count++;
|
||||
}
|
||||
$c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
|
||||
$array{ $c } = $utf;
|
||||
$array{$c} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "euc_tw_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -18,28 +18,32 @@ require "ucs2utf.pl";
|
||||
|
||||
$in_file = "ISO10646-GB18030.TXT";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $u, $c, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($u, $c, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = $code;
|
||||
$array{$utf} = $code;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
|
||||
#
|
||||
@@ -47,15 +51,19 @@ close( FILE );
|
||||
#
|
||||
|
||||
$file = "utf8_to_gb18030.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapGB18030[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -69,38 +77,46 @@ close(FILE);
|
||||
#
|
||||
reset 'array';
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $u, $c, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($u, $c, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate code: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "gb18030_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapGB18030[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -13,65 +13,80 @@ require "ucs2utf.pl";
|
||||
|
||||
$in_file = "sjis-0213-2004-std.txt";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
reset 'array1';
|
||||
reset 'comment';
|
||||
reset 'comment1';
|
||||
|
||||
while($line = <FILE> ){
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{ $str } = $code;
|
||||
$comment1{ $str } = $rest;
|
||||
while ($line = <FILE>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{$str} = $code;
|
||||
$comment1{$str} = $rest;
|
||||
$count1++;
|
||||
next;
|
||||
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
$rest = "U+" . $u . $3;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
next;
|
||||
}
|
||||
|
||||
$ucs = hex($u);
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code;
|
||||
$utf = &ucs2utf($ucs);
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR
|
||||
"Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
|
||||
$ucs, $code;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $utf } = $code;
|
||||
$comment{ $code } = $rest;
|
||||
$array{$utf} = $code;
|
||||
$comment{$code} = $rest;
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "utf8_to_shift_jis_2004.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code };
|
||||
} else {
|
||||
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code };
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,19 +94,27 @@ print FILE "};\n";
|
||||
close(FILE);
|
||||
|
||||
$file = "utf8_to_shift_jis_2004_combined.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
|
||||
print FILE
|
||||
"static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
|
||||
|
||||
for $index ( sort {$a cmp $b} keys( %array1 ) ){
|
||||
$code = $array1{ $index };
|
||||
for $index (sort { $a cmp $b } keys(%array1))
|
||||
{
|
||||
$code = $array1{$index};
|
||||
$count1--;
|
||||
if( $count1 == 0 ){
|
||||
printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
|
||||
} else {
|
||||
printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
|
||||
if ($count1 == 0)
|
||||
{
|
||||
printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8),
|
||||
substr($index, 8, 8), $code, $comment1{$index};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n",
|
||||
substr($index, 0, 8), substr($index, 8, 8), $code,
|
||||
$comment1{$index};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,66 +125,81 @@ close(FILE);
|
||||
|
||||
$in_file = "sjis-0213-2004-std.txt";
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
reset 'array1';
|
||||
reset 'comment';
|
||||
reset 'comment1';
|
||||
|
||||
while($line = <FILE> ){
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{ $code } = $str;
|
||||
$comment1{ $code } = $rest;
|
||||
while ($line = <FILE>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs = hex($u1);
|
||||
$utf1 = &ucs2utf($ucs);
|
||||
$ucs = hex($u2);
|
||||
$utf2 = &ucs2utf($ucs);
|
||||
$str = sprintf "%08x%08x", $utf1, $utf2;
|
||||
$array1{$code} = $str;
|
||||
$comment1{$code} = $rest;
|
||||
$count1++;
|
||||
next;
|
||||
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
$rest = "U+" . $u . $3;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
next;
|
||||
}
|
||||
|
||||
$ucs = hex($u);
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code;
|
||||
printf STDERR "Previous value: UTF-8: %08x\n", $array{ $utf };
|
||||
$utf = &ucs2utf($ucs);
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR
|
||||
"Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
|
||||
$ucs, $code;
|
||||
printf STDERR "Previous value: UTF-8: %08x\n", $array{$utf};
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
|
||||
$array{ $code } = $utf;
|
||||
$comment{ $utf } = $rest;
|
||||
$array{$code} = $utf;
|
||||
$comment{$utf} = $rest;
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "shift_jis_2004_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code };
|
||||
} else {
|
||||
printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code };
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code,
|
||||
$comment{$code};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,19 +207,26 @@ print FILE "};\n";
|
||||
close(FILE);
|
||||
|
||||
$file = "shift_jis_2004_to_utf8_combined.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "/*\n";
|
||||
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
|
||||
print FILE " */\n";
|
||||
print FILE "static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
|
||||
print FILE
|
||||
"static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array1 ) ){
|
||||
$code = $array1{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array1))
|
||||
{
|
||||
$code = $array1{$index};
|
||||
$count1--;
|
||||
if( $count1 == 0 ){
|
||||
printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
|
||||
} else {
|
||||
printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
|
||||
if ($count1 == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index,
|
||||
substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index,
|
||||
substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -22,60 +22,68 @@ require "ucs2utf.pl";
|
||||
# first generate UTF-8 --> SJIS table
|
||||
|
||||
$in_file = "CP932.TXT";
|
||||
$count = 0;
|
||||
$count = 0;
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
while( <FILE> ){
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
$utf = &ucs2utf($ucs);
|
||||
if((( $code >= 0xed40 )
|
||||
&& ( $code <= 0xeefc ))
|
||||
|| (( $code >= 0x8754 )
|
||||
&&( $code <= 0x875d ))
|
||||
|| ( $code == 0x878a )
|
||||
|| ( $code == 0x8782 )
|
||||
|| ( $code == 0x8784 )
|
||||
|| ( $code == 0xfa5b )
|
||||
|| ( $code == 0xfa54 )
|
||||
|| (( $code >= 0x8790 )
|
||||
&& ( $code <= 0x8792 ))
|
||||
|| (( $code >= 0x8795 )
|
||||
&& ( $code <= 0x8797 ))
|
||||
|| (( $code >= 0x879a )
|
||||
&& ( $code <= 0x879c )))
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8 : UCS=0x%04x SJIS=0x%04x\n",$ucs,$code;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $utf } = $code;
|
||||
}
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if ((($code >= 0xed40) && ($code <= 0xeefc))
|
||||
|| ( ($code >= 0x8754)
|
||||
&& ($code <= 0x875d))
|
||||
|| ($code == 0x878a)
|
||||
|| ($code == 0x8782)
|
||||
|| ($code == 0x8784)
|
||||
|| ($code == 0xfa5b)
|
||||
|| ($code == 0xfa54)
|
||||
|| ( ($code >= 0x8790)
|
||||
&& ($code <= 0x8792))
|
||||
|| ( ($code >= 0x8795)
|
||||
&& ($code <= 0x8797))
|
||||
|| ( ($code >= 0x879a)
|
||||
&& ($code <= 0x879c)))
|
||||
{
|
||||
printf STDERR
|
||||
"Warning: duplicate UTF8 : UCS=0x%04x SJIS=0x%04x\n", $ucs,
|
||||
$code;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{$utf} = $code;
|
||||
}
|
||||
}
|
||||
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# first, generate UTF8 --> SJIS table
|
||||
#
|
||||
|
||||
$file = "utf8_to_sjis.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -87,37 +95,44 @@ close(FILE);
|
||||
# then generate SJIS --> UTF8 table
|
||||
#
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
$count = 0;
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080 ){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
$count++;
|
||||
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = "sjis_to_utf8.map";
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -18,80 +18,88 @@
|
||||
require "ucs2utf.pl";
|
||||
|
||||
%filename = (
|
||||
'WIN866' => 'CP866.TXT',
|
||||
'WIN874' => 'CP874.TXT',
|
||||
'WIN1250' => 'CP1250.TXT',
|
||||
'WIN1251' => 'CP1251.TXT',
|
||||
'WIN1252' => 'CP1252.TXT',
|
||||
'WIN1253' => 'CP1253.TXT',
|
||||
'WIN1254' => 'CP1254.TXT',
|
||||
'WIN1255' => 'CP1255.TXT',
|
||||
'WIN1256' => 'CP1256.TXT',
|
||||
'WIN1257' => 'CP1257.TXT',
|
||||
'WIN1258' => 'CP1258.TXT',
|
||||
'ISO8859_2' => '8859-2.TXT',
|
||||
'ISO8859_3' => '8859-3.TXT',
|
||||
'ISO8859_4' => '8859-4.TXT',
|
||||
'ISO8859_5' => '8859-5.TXT',
|
||||
'ISO8859_6' => '8859-6.TXT',
|
||||
'ISO8859_7' => '8859-7.TXT',
|
||||
'ISO8859_8' => '8859-8.TXT',
|
||||
'ISO8859_9' => '8859-9.TXT',
|
||||
'WIN866' => 'CP866.TXT',
|
||||
'WIN874' => 'CP874.TXT',
|
||||
'WIN1250' => 'CP1250.TXT',
|
||||
'WIN1251' => 'CP1251.TXT',
|
||||
'WIN1252' => 'CP1252.TXT',
|
||||
'WIN1253' => 'CP1253.TXT',
|
||||
'WIN1254' => 'CP1254.TXT',
|
||||
'WIN1255' => 'CP1255.TXT',
|
||||
'WIN1256' => 'CP1256.TXT',
|
||||
'WIN1257' => 'CP1257.TXT',
|
||||
'WIN1258' => 'CP1258.TXT',
|
||||
'ISO8859_2' => '8859-2.TXT',
|
||||
'ISO8859_3' => '8859-3.TXT',
|
||||
'ISO8859_4' => '8859-4.TXT',
|
||||
'ISO8859_5' => '8859-5.TXT',
|
||||
'ISO8859_6' => '8859-6.TXT',
|
||||
'ISO8859_7' => '8859-7.TXT',
|
||||
'ISO8859_8' => '8859-8.TXT',
|
||||
'ISO8859_9' => '8859-9.TXT',
|
||||
'ISO8859_10' => '8859-10.TXT',
|
||||
'ISO8859_13' => '8859-13.TXT',
|
||||
'ISO8859_14' => '8859-14.TXT',
|
||||
'ISO8859_15' => '8859-15.TXT',
|
||||
'ISO8859_16' => '8859-16.TXT',
|
||||
'KOI8R' => 'KOI8-R.TXT',
|
||||
'KOI8U' => 'KOI8-U.TXT',
|
||||
'GBK' => 'CP936.TXT',
|
||||
'UHC' => 'CP949.TXT',
|
||||
'JOHAB' => 'JOHAB.TXT',
|
||||
);
|
||||
'KOI8R' => 'KOI8-R.TXT',
|
||||
'KOI8U' => 'KOI8-U.TXT',
|
||||
'GBK' => 'CP936.TXT',
|
||||
'UHC' => 'CP949.TXT',
|
||||
'JOHAB' => 'JOHAB.TXT',);
|
||||
|
||||
@charsets = keys(filename);
|
||||
@charsets = @ARGV if scalar(@ARGV);
|
||||
foreach $charset (@charsets) {
|
||||
foreach $charset (@charsets)
|
||||
{
|
||||
|
||||
#
|
||||
# first, generate UTF8-> charset table
|
||||
#
|
||||
$in_file = $filename{$charset};
|
||||
#
|
||||
# first, generate UTF8-> charset table
|
||||
#
|
||||
$in_file = $filename{$charset};
|
||||
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if( $code >= 0x80 && $ucs >= 0x0080){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $utf } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$utf} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $utf } = $code;
|
||||
$array{$utf} = $code;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = lc("utf8_to_${charset}.map");
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_utf_to_local ULmap${charset}[ $count ] = {\n";
|
||||
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$code = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$code = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
||||
}
|
||||
}
|
||||
@@ -99,42 +107,50 @@ foreach $charset (@charsets) {
|
||||
print FILE "};\n";
|
||||
close(FILE);
|
||||
|
||||
#
|
||||
# then generate character set code ->UTF8 table
|
||||
#
|
||||
open( FILE, $in_file ) || die( "cannot open $in_file" );
|
||||
#
|
||||
# then generate character set code ->UTF8 table
|
||||
#
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
|
||||
reset 'array';
|
||||
|
||||
while( <FILE> ){
|
||||
while (<FILE>)
|
||||
{
|
||||
chop;
|
||||
if( /^#/ ){
|
||||
if (/^#/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
( $c, $u, $rest ) = split;
|
||||
$ucs = hex($u);
|
||||
($c, $u, $rest) = split;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if($code >= 0x80 && $ucs >= 0x0080){
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
$utf = &ucs2utf($ucs);
|
||||
if( $array{ $code } ne "" ){
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
|
||||
if ($array{$code} ne "")
|
||||
{
|
||||
printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
|
||||
next;
|
||||
}
|
||||
$count++;
|
||||
$array{ $code } = $utf;
|
||||
$array{$code} = $utf;
|
||||
}
|
||||
}
|
||||
close( FILE );
|
||||
close(FILE);
|
||||
|
||||
$file = lc("${charset}_to_utf8.map");
|
||||
open( FILE, "> $file" ) || die( "cannot open $file" );
|
||||
open(FILE, "> $file") || die("cannot open $file");
|
||||
print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n";
|
||||
for $index ( sort {$a <=> $b} keys( %array ) ){
|
||||
$utf = $array{ $index };
|
||||
for $index (sort { $a <=> $b } keys(%array))
|
||||
{
|
||||
$utf = $array{$index};
|
||||
$count--;
|
||||
if( $count == 0 ){
|
||||
if ($count == 0)
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
|
||||
}
|
||||
}
|
||||
|
@@ -4,24 +4,32 @@
|
||||
# src/backend/utils/mb/Unicode/ucs2utf.pl
|
||||
# convert UCS-4 to UTF-8
|
||||
#
|
||||
sub ucs2utf {
|
||||
local($ucs) = @_;
|
||||
sub ucs2utf
|
||||
{
|
||||
local ($ucs) = @_;
|
||||
local $utf;
|
||||
|
||||
if ($ucs <= 0x007f) {
|
||||
if ($ucs <= 0x007f)
|
||||
{
|
||||
$utf = $ucs;
|
||||
} elsif ($ucs > 0x007f && $ucs <= 0x07ff) {
|
||||
}
|
||||
elsif ($ucs > 0x007f && $ucs <= 0x07ff)
|
||||
{
|
||||
$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
|
||||
} elsif ($ucs > 0x07ff && $ucs <= 0xffff) {
|
||||
$utf = ((($ucs >> 12) | 0xe0) << 16) |
|
||||
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
|
||||
(($ucs & 0x003f) | 0x80);
|
||||
} else {
|
||||
$utf = ((($ucs >> 18) | 0xf0) << 24) |
|
||||
(((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
|
||||
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
|
||||
(($ucs & 0x003f) | 0x80);
|
||||
}
|
||||
return($utf);
|
||||
}
|
||||
elsif ($ucs > 0x07ff && $ucs <= 0xffff)
|
||||
{
|
||||
$utf =
|
||||
((($ucs >> 12) | 0xe0) << 16) |
|
||||
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
|
||||
}
|
||||
else
|
||||
{
|
||||
$utf =
|
||||
((($ucs >> 18) | 0xf0) << 24) |
|
||||
(((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
|
||||
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
|
||||
}
|
||||
return ($utf);
|
||||
}
|
||||
1;
|
||||
|
@@ -32,16 +32,16 @@ my $CMPPARAMS;
|
||||
|
||||
emit_qsort_boilerplate();
|
||||
|
||||
$SUFFIX = 'tuple';
|
||||
$EXTRAARGS = ', SortTupleComparator cmp_tuple, Tuplesortstate *state';
|
||||
$SUFFIX = 'tuple';
|
||||
$EXTRAARGS = ', SortTupleComparator cmp_tuple, Tuplesortstate *state';
|
||||
$EXTRAPARAMS = ', cmp_tuple, state';
|
||||
$CMPPARAMS = ', state';
|
||||
$CMPPARAMS = ', state';
|
||||
emit_qsort_implementation();
|
||||
|
||||
$SUFFIX = 'ssup';
|
||||
$EXTRAARGS = ', SortSupport ssup';
|
||||
$SUFFIX = 'ssup';
|
||||
$EXTRAARGS = ', SortSupport ssup';
|
||||
$EXTRAPARAMS = ', ssup';
|
||||
$CMPPARAMS = ', ssup';
|
||||
$CMPPARAMS = ', ssup';
|
||||
print <<'EOM';
|
||||
#define cmp_ssup(a, b, ssup) \
|
||||
ApplySortComparator((a)->datum1, (a)->isnull1, \
|
||||
|
Reference in New Issue
Block a user