1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-09 06:21:09 +03:00

Post-PG 10 beta1 pgperltidy run

This commit is contained in:
Bruce Momjian
2017-05-17 19:01:23 -04:00
parent a6fd7b7a5f
commit ce55481032
66 changed files with 1869 additions and 1184 deletions

View File

@@ -58,6 +58,7 @@ foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
my $data = $catalogs->{pg_proc}->{data};
foreach my $row (@$data)
{
# Split line into tokens without interpreting their meaning.
my %bki_values;
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
@@ -75,14 +76,17 @@ foreach my $row (@$data)
}
# Emit headers for both files
my $tmpext = ".tmp$$";
my $oidsfile = $output_path . 'fmgroids.h';
my $tmpext = ".tmp$$";
my $oidsfile = $output_path . 'fmgroids.h';
my $protosfile = $output_path . 'fmgrprotos.h';
my $tabfile = $output_path . 'fmgrtab.c';
my $tabfile = $output_path . 'fmgrtab.c';
open my $ofh, '>', $oidsfile . $tmpext or die "Could not open $oidsfile$tmpext: $!";
open my $pfh, '>', $protosfile . $tmpext or die "Could not open $protosfile$tmpext: $!";
open my $tfh, '>', $tabfile . $tmpext or die "Could not open $tabfile$tmpext: $!";
open my $ofh, '>', $oidsfile . $tmpext
or die "Could not open $oidsfile$tmpext: $!";
open my $pfh, '>', $protosfile . $tmpext
or die "Could not open $protosfile$tmpext: $!";
open my $tfh, '>', $tabfile . $tmpext
or die "Could not open $tabfile$tmpext: $!";
print $ofh
qq|/*-------------------------------------------------------------------------
@@ -218,9 +222,9 @@ close($pfh);
close($tfh);
# Finally, rename the completed files into place.
Catalog::RenameTempFile($oidsfile, $tmpext);
Catalog::RenameTempFile($oidsfile, $tmpext);
Catalog::RenameTempFile($protosfile, $tmpext);
Catalog::RenameTempFile($tabfile, $tmpext);
Catalog::RenameTempFile($tabfile, $tmpext);
sub usage
{

View File

@@ -35,9 +35,10 @@ my $all = &read_source("BIG5.TXT");
# Load CP950.TXT
my $cp950txt = &read_source("CP950.TXT");
foreach my $i (@$cp950txt) {
foreach my $i (@$cp950txt)
{
my $code = $i->{code};
my $ucs = $i->{ucs};
my $ucs = $i->{ucs};
# Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
# from CP950.TXT
@@ -46,22 +47,24 @@ foreach my $i (@$cp950txt) {
&& $code >= 0xf9d6
&& $code <= 0xf9dc)
{
push @$all, {code => $code,
ucs => $ucs,
comment => $i->{comment},
direction => BOTH,
f => $i->{f},
l => $i->{l} };
push @$all,
{ code => $code,
ucs => $ucs,
comment => $i->{comment},
direction => BOTH,
f => $i->{f},
l => $i->{l} };
}
}
foreach my $i (@$all) {
foreach my $i (@$all)
{
my $code = $i->{code};
my $ucs = $i->{ucs};
my $ucs = $i->{ucs};
# BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can
# contain only one of them. XXX: Doesn't really make sense to include any of them,
# but for historical reasons, we map the first one of them.
# BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can
# contain only one of them. XXX: Doesn't really make sense to include any of them,
# but for historical reasons, we map the first one of them.
if ($i->{ucs} == 0xFFFD && $i->{code} != 0xA15A)
{
$i->{direction} = TO_UNICODE;

View File

@@ -38,8 +38,10 @@ while (<$in>)
# a lot of extra characters on top of the GB2312 character set that
# EUC_CN encodes. Filter out those extra characters.
next if (($code & 0xFF) < 0xA1);
next if (!($code >= 0xA100 && $code <= 0xA9FF ||
$code >= 0xB000 && $code <= 0xF7FF));
next
if (
!( $code >= 0xA100 && $code <= 0xA9FF
|| $code >= 0xB000 && $code <= 0xF7FF));
next if ($code >= 0xA2A1 && $code <= 0xA2B0);
next if ($code >= 0xA2E3 && $code <= 0xA2E4);
@@ -67,13 +69,12 @@ while (<$in>)
$ucs = 0x2015;
}
push @mapping, {
ucs => $ucs,
code => $code,
push @mapping,
{ ucs => $ucs,
code => $code,
direction => BOTH,
f => $in_file,
l => $.
};
f => $in_file,
l => $. };
}
close($in);

View File

@@ -24,6 +24,7 @@ while (my $line = <$in>)
{
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
{
# combined characters
my ($c, $u1, $u2) = ($1, $2, $3);
my $rest = "U+" . $u1 . "+" . $u2 . $4;
@@ -31,17 +32,18 @@ while (my $line = <$in>)
my $ucs1 = hex($u1);
my $ucs2 = hex($u2);
push @all, { direction => BOTH,
ucs => $ucs1,
ucs_second => $ucs2,
code => $code,
comment => $rest,
f => $in_file,
l => $.
};
push @all,
{ direction => BOTH,
ucs => $ucs1,
ucs_second => $ucs2,
code => $code,
comment => $rest,
f => $in_file,
l => $. };
}
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
{
# non-combined characters
my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
my $ucs = hex($u);
@@ -49,13 +51,13 @@ while (my $line = <$in>)
next if ($code < 0x80 && $ucs < 0x80);
push @all, { direction => BOTH,
ucs => $ucs,
code => $code,
comment => $rest,
f => $in_file,
l => $.
};
push @all,
{ direction => BOTH,
ucs => $ucs,
code => $code,
comment => $rest,
f => $in_file,
l => $. };
}
}
close($in);

View File

@@ -21,7 +21,9 @@ my $jis0212 = &read_source("JIS0212.TXT");
my @mapping;
foreach my $i (@$jis0212) {
foreach my $i (@$jis0212)
{
# We have a different mapping for this in the EUC_JP to UTF-8 direction.
if ($i->{code} == 0x2243)
{
@@ -48,13 +50,14 @@ foreach my $i (@$jis0212) {
# Load CP932.TXT.
my $ct932 = &read_source("CP932.TXT");
foreach my $i (@$ct932) {
foreach my $i (@$ct932)
{
my $sjis = $i->{code};
# We have a different mapping for this in the EUC_JP to UTF-8 direction.
if ($sjis == 0xeefa ||
$sjis == 0xeefb ||
$sjis == 0xeefc)
if ( $sjis == 0xeefa
|| $sjis == 0xeefb
|| $sjis == 0xeefc)
{
next;
}
@@ -63,8 +66,10 @@ foreach my $i (@$ct932) {
{
my $jis = &sjis2jis($sjis);
$i->{code} = $jis | ($jis < 0x100 ? 0x8e00 :
($sjis >= 0xeffd ? 0x8f8080 : 0x8080));
$i->{code} = $jis | (
$jis < 0x100
? 0x8e00
: ($sjis >= 0xeffd ? 0x8f8080 : 0x8080));
# Remember the SJIS code for later.
$i->{sjis} = $sjis;
@@ -73,13 +78,14 @@ foreach my $i (@$ct932) {
}
}
foreach my $i (@mapping) {
foreach my $i (@mapping)
{
my $sjis = $i->{sjis};
# These SJIS characters are excluded completely.
if ($sjis >= 0xed00 && $sjis <= 0xeef9 ||
$sjis >= 0xfa54 && $sjis <= 0xfa56 ||
$sjis >= 0xfa58 && $sjis <= 0xfc4b)
if ( $sjis >= 0xed00 && $sjis <= 0xeef9
|| $sjis >= 0xfa54 && $sjis <= 0xfa56
|| $sjis >= 0xfa58 && $sjis <= 0xfc4b)
{
$i->{direction} = NONE;
next;
@@ -92,10 +98,16 @@ foreach my $i (@mapping) {
next;
}
if ($sjis == 0x8790 || $sjis == 0x8791 || $sjis == 0x8792 ||
$sjis == 0x8795 || $sjis == 0x8796 || $sjis == 0x8797 ||
$sjis == 0x879a || $sjis == 0x879b || $sjis == 0x879c ||
($sjis >= 0xfa4a && $sjis <= 0xfa53))
if ( $sjis == 0x8790
|| $sjis == 0x8791
|| $sjis == 0x8792
|| $sjis == 0x8795
|| $sjis == 0x8796
|| $sjis == 0x8797
|| $sjis == 0x879a
|| $sjis == 0x879b
|| $sjis == 0x879c
|| ($sjis >= 0xfa4a && $sjis <= 0xfa53))
{
$i->{direction} = TO_UNICODE;
next;
@@ -103,95 +115,352 @@ foreach my $i (@mapping) {
}
push @mapping, (
{direction => BOTH, ucs => 0x4efc, code => 0x8ff4af, comment => '# CJK(4EFC)'},
{direction => BOTH, ucs => 0x50f4, code => 0x8ff4b0, comment => '# CJK(50F4)'},
{direction => BOTH, ucs => 0x51EC, code => 0x8ff4b1, comment => '# CJK(51EC)'},
{direction => BOTH, ucs => 0x5307, code => 0x8ff4b2, comment => '# CJK(5307)'},
{direction => BOTH, ucs => 0x5324, code => 0x8ff4b3, comment => '# CJK(5324)'},
{direction => BOTH, ucs => 0x548A, code => 0x8ff4b5, comment => '# CJK(548A)'},
{direction => BOTH, ucs => 0x5759, code => 0x8ff4b6, comment => '# CJK(5759)'},
{direction => BOTH, ucs => 0x589E, code => 0x8ff4b9, comment => '# CJK(589E)'},
{direction => BOTH, ucs => 0x5BEC, code => 0x8ff4ba, comment => '# CJK(5BEC)'},
{direction => BOTH, ucs => 0x5CF5, code => 0x8ff4bb, comment => '# CJK(5CF5)'},
{direction => BOTH, ucs => 0x5D53, code => 0x8ff4bc, comment => '# CJK(5D53)'},
{direction => BOTH, ucs => 0x5FB7, code => 0x8ff4be, comment => '# CJK(5FB7)'},
{direction => BOTH, ucs => 0x6085, code => 0x8ff4bf, comment => '# CJK(6085)'},
{direction => BOTH, ucs => 0x6120, code => 0x8ff4c0, comment => '# CJK(6120)'},
{direction => BOTH, ucs => 0x654E, code => 0x8ff4c1, comment => '# CJK(654E)'},
{direction => BOTH, ucs => 0x663B, code => 0x8ff4c2, comment => '# CJK(663B)'},
{direction => BOTH, ucs => 0x6665, code => 0x8ff4c3, comment => '# CJK(6665)'},
{direction => BOTH, ucs => 0x6801, code => 0x8ff4c6, comment => '# CJK(6801)'},
{direction => BOTH, ucs => 0x6A6B, code => 0x8ff4c9, comment => '# CJK(6A6B)'},
{direction => BOTH, ucs => 0x6AE2, code => 0x8ff4ca, comment => '# CJK(6AE2)'},
{direction => BOTH, ucs => 0x6DF2, code => 0x8ff4cc, comment => '# CJK(6DF2)'},
{direction => BOTH, ucs => 0x6DF8, code => 0x8ff4cb, comment => '# CJK(6DF8)'},
{direction => BOTH, ucs => 0x7028, code => 0x8ff4cd, comment => '# CJK(7028)'},
{direction => BOTH, ucs => 0x70BB, code => 0x8ff4ae, comment => '# CJK(70BB)'},
{direction => BOTH, ucs => 0x7501, code => 0x8ff4d0, comment => '# CJK(7501)'},
{direction => BOTH, ucs => 0x7682, code => 0x8ff4d1, comment => '# CJK(7682)'},
{direction => BOTH, ucs => 0x769E, code => 0x8ff4d2, comment => '# CJK(769E)'},
{direction => BOTH, ucs => 0x7930, code => 0x8ff4d4, comment => '# CJK(7930)'},
{direction => BOTH, ucs => 0x7AE7, code => 0x8ff4d9, comment => '# CJK(7AE7)'},
{direction => BOTH, ucs => 0x7DA0, code => 0x8ff4dc, comment => '# CJK(7DA0)'},
{direction => BOTH, ucs => 0x7DD6, code => 0x8ff4dd, comment => '# CJK(7DD6)'},
{direction => BOTH, ucs => 0x8362, code => 0x8ff4df, comment => '# CJK(8362)'},
{direction => BOTH, ucs => 0x85B0, code => 0x8ff4e1, comment => '# CJK(85B0)'},
{direction => BOTH, ucs => 0x8807, code => 0x8ff4e4, comment => '# CJK(8807)'},
{direction => BOTH, ucs => 0x8B7F, code => 0x8ff4e6, comment => '# CJK(8B7F)'},
{direction => BOTH, ucs => 0x8CF4, code => 0x8ff4e7, comment => '# CJK(8CF4)'},
{direction => BOTH, ucs => 0x8D76, code => 0x8ff4e8, comment => '# CJK(8D76)'},
{direction => BOTH, ucs => 0x90DE, code => 0x8ff4ec, comment => '# CJK(90DE)'},
{direction => BOTH, ucs => 0x9115, code => 0x8ff4ee, comment => '# CJK(9115)'},
{direction => BOTH, ucs => 0x9592, code => 0x8ff4f1, comment => '# CJK(9592)'},
{direction => BOTH, ucs => 0x973B, code => 0x8ff4f4, comment => '# CJK(973B)'},
{direction => BOTH, ucs => 0x974D, code => 0x8ff4f5, comment => '# CJK(974D)'},
{direction => BOTH, ucs => 0x9751, code => 0x8ff4f6, comment => '# CJK(9751)'},
{direction => BOTH, ucs => 0x999E, code => 0x8ff4fa, comment => '# CJK(999E)'},
{direction => BOTH, ucs => 0x9AD9, code => 0x8ff4fb, comment => '# CJK(9AD9)'},
{direction => BOTH, ucs => 0x9B72, code => 0x8ff4fc, comment => '# CJK(9B72)'},
{direction => BOTH, ucs => 0x9ED1, code => 0x8ff4fe, comment => '# CJK(9ED1)'},
{direction => BOTH, ucs => 0xF929, code => 0x8ff4c5, comment => '# CJK COMPATIBILITY IDEOGRAPH-F929'},
{direction => BOTH, ucs => 0xF9DC, code => 0x8ff4f2, comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'},
{direction => BOTH, ucs => 0xFA0E, code => 0x8ff4b4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'},
{direction => BOTH, ucs => 0xFA0F, code => 0x8ff4b7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'},
{direction => BOTH, ucs => 0xFA10, code => 0x8ff4b8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10'},
{direction => BOTH, ucs => 0xFA11, code => 0x8ff4bd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11'},
{direction => BOTH, ucs => 0xFA12, code => 0x8ff4c4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12'},
{direction => BOTH, ucs => 0xFA13, code => 0x8ff4c7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13'},
{direction => BOTH, ucs => 0xFA14, code => 0x8ff4c8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14'},
{direction => BOTH, ucs => 0xFA15, code => 0x8ff4ce, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15'},
{direction => BOTH, ucs => 0xFA16, code => 0x8ff4cf, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16'},
{direction => BOTH, ucs => 0xFA17, code => 0x8ff4d3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17'},
{direction => BOTH, ucs => 0xFA18, code => 0x8ff4d5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18'},
{direction => BOTH, ucs => 0xFA19, code => 0x8ff4d6, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19'},
{direction => BOTH, ucs => 0xFA1A, code => 0x8ff4d7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'},
{direction => BOTH, ucs => 0xFA1B, code => 0x8ff4d8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'},
{direction => BOTH, ucs => 0xFA1C, code => 0x8ff4da, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'},
{direction => BOTH, ucs => 0xFA1D, code => 0x8ff4db, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'},
{direction => BOTH, ucs => 0xFA1E, code => 0x8ff4de, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'},
{direction => BOTH, ucs => 0xFA1F, code => 0x8ff4e0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'},
{direction => BOTH, ucs => 0xFA20, code => 0x8ff4e2, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20'},
{direction => BOTH, ucs => 0xFA21, code => 0x8ff4e3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21'},
{direction => BOTH, ucs => 0xFA22, code => 0x8ff4e5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22'},
{direction => BOTH, ucs => 0xFA23, code => 0x8ff4e9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23'},
{direction => BOTH, ucs => 0xFA24, code => 0x8ff4ea, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24'},
{direction => BOTH, ucs => 0xFA25, code => 0x8ff4eb, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25'},
{direction => BOTH, ucs => 0xFA26, code => 0x8ff4ed, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26'},
{direction => BOTH, ucs => 0xFA27, code => 0x8ff4ef, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27'},
{direction => BOTH, ucs => 0xFA28, code => 0x8ff4f0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28'},
{direction => BOTH, ucs => 0xFA29, code => 0x8ff4f3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29'},
{direction => BOTH, ucs => 0xFA2A, code => 0x8ff4f7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'},
{direction => BOTH, ucs => 0xFA2B, code => 0x8ff4f8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'},
{direction => BOTH, ucs => 0xFA2C, code => 0x8ff4f9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'},
{direction => BOTH, ucs => 0xFA2D, code => 0x8ff4fd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'},
{direction => BOTH, ucs => 0xFF07, code => 0x8ff4a9, comment => '# FULLWIDTH APOSTROPHE'},
{direction => BOTH, ucs => 0xFFE4, code => 0x8fa2c3, comment => '# FULLWIDTH BROKEN BAR'},
{ direction => BOTH,
ucs => 0x4efc,
code => 0x8ff4af,
comment => '# CJK(4EFC)' },
{ direction => BOTH,
ucs => 0x50f4,
code => 0x8ff4b0,
comment => '# CJK(50F4)' },
{ direction => BOTH,
ucs => 0x51EC,
code => 0x8ff4b1,
comment => '# CJK(51EC)' },
{ direction => BOTH,
ucs => 0x5307,
code => 0x8ff4b2,
comment => '# CJK(5307)' },
{ direction => BOTH,
ucs => 0x5324,
code => 0x8ff4b3,
comment => '# CJK(5324)' },
{ direction => BOTH,
ucs => 0x548A,
code => 0x8ff4b5,
comment => '# CJK(548A)' },
{ direction => BOTH,
ucs => 0x5759,
code => 0x8ff4b6,
comment => '# CJK(5759)' },
{ direction => BOTH,
ucs => 0x589E,
code => 0x8ff4b9,
comment => '# CJK(589E)' },
{ direction => BOTH,
ucs => 0x5BEC,
code => 0x8ff4ba,
comment => '# CJK(5BEC)' },
{ direction => BOTH,
ucs => 0x5CF5,
code => 0x8ff4bb,
comment => '# CJK(5CF5)' },
{ direction => BOTH,
ucs => 0x5D53,
code => 0x8ff4bc,
comment => '# CJK(5D53)' },
{ direction => BOTH,
ucs => 0x5FB7,
code => 0x8ff4be,
comment => '# CJK(5FB7)' },
{ direction => BOTH,
ucs => 0x6085,
code => 0x8ff4bf,
comment => '# CJK(6085)' },
{ direction => BOTH,
ucs => 0x6120,
code => 0x8ff4c0,
comment => '# CJK(6120)' },
{ direction => BOTH,
ucs => 0x654E,
code => 0x8ff4c1,
comment => '# CJK(654E)' },
{ direction => BOTH,
ucs => 0x663B,
code => 0x8ff4c2,
comment => '# CJK(663B)' },
{ direction => BOTH,
ucs => 0x6665,
code => 0x8ff4c3,
comment => '# CJK(6665)' },
{ direction => BOTH,
ucs => 0x6801,
code => 0x8ff4c6,
comment => '# CJK(6801)' },
{ direction => BOTH,
ucs => 0x6A6B,
code => 0x8ff4c9,
comment => '# CJK(6A6B)' },
{ direction => BOTH,
ucs => 0x6AE2,
code => 0x8ff4ca,
comment => '# CJK(6AE2)' },
{ direction => BOTH,
ucs => 0x6DF2,
code => 0x8ff4cc,
comment => '# CJK(6DF2)' },
{ direction => BOTH,
ucs => 0x6DF8,
code => 0x8ff4cb,
comment => '# CJK(6DF8)' },
{ direction => BOTH,
ucs => 0x7028,
code => 0x8ff4cd,
comment => '# CJK(7028)' },
{ direction => BOTH,
ucs => 0x70BB,
code => 0x8ff4ae,
comment => '# CJK(70BB)' },
{ direction => BOTH,
ucs => 0x7501,
code => 0x8ff4d0,
comment => '# CJK(7501)' },
{ direction => BOTH,
ucs => 0x7682,
code => 0x8ff4d1,
comment => '# CJK(7682)' },
{ direction => BOTH,
ucs => 0x769E,
code => 0x8ff4d2,
comment => '# CJK(769E)' },
{ direction => BOTH,
ucs => 0x7930,
code => 0x8ff4d4,
comment => '# CJK(7930)' },
{ direction => BOTH,
ucs => 0x7AE7,
code => 0x8ff4d9,
comment => '# CJK(7AE7)' },
{ direction => BOTH,
ucs => 0x7DA0,
code => 0x8ff4dc,
comment => '# CJK(7DA0)' },
{ direction => BOTH,
ucs => 0x7DD6,
code => 0x8ff4dd,
comment => '# CJK(7DD6)' },
{ direction => BOTH,
ucs => 0x8362,
code => 0x8ff4df,
comment => '# CJK(8362)' },
{ direction => BOTH,
ucs => 0x85B0,
code => 0x8ff4e1,
comment => '# CJK(85B0)' },
{ direction => BOTH,
ucs => 0x8807,
code => 0x8ff4e4,
comment => '# CJK(8807)' },
{ direction => BOTH,
ucs => 0x8B7F,
code => 0x8ff4e6,
comment => '# CJK(8B7F)' },
{ direction => BOTH,
ucs => 0x8CF4,
code => 0x8ff4e7,
comment => '# CJK(8CF4)' },
{ direction => BOTH,
ucs => 0x8D76,
code => 0x8ff4e8,
comment => '# CJK(8D76)' },
{ direction => BOTH,
ucs => 0x90DE,
code => 0x8ff4ec,
comment => '# CJK(90DE)' },
{ direction => BOTH,
ucs => 0x9115,
code => 0x8ff4ee,
comment => '# CJK(9115)' },
{ direction => BOTH,
ucs => 0x9592,
code => 0x8ff4f1,
comment => '# CJK(9592)' },
{ direction => BOTH,
ucs => 0x973B,
code => 0x8ff4f4,
comment => '# CJK(973B)' },
{ direction => BOTH,
ucs => 0x974D,
code => 0x8ff4f5,
comment => '# CJK(974D)' },
{ direction => BOTH,
ucs => 0x9751,
code => 0x8ff4f6,
comment => '# CJK(9751)' },
{ direction => BOTH,
ucs => 0x999E,
code => 0x8ff4fa,
comment => '# CJK(999E)' },
{ direction => BOTH,
ucs => 0x9AD9,
code => 0x8ff4fb,
comment => '# CJK(9AD9)' },
{ direction => BOTH,
ucs => 0x9B72,
code => 0x8ff4fc,
comment => '# CJK(9B72)' },
{ direction => BOTH,
ucs => 0x9ED1,
code => 0x8ff4fe,
comment => '# CJK(9ED1)' },
{ direction => BOTH,
ucs => 0xF929,
code => 0x8ff4c5,
comment => '# CJK COMPATIBILITY IDEOGRAPH-F929' },
{ direction => BOTH,
ucs => 0xF9DC,
code => 0x8ff4f2,
comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC' },
{ direction => BOTH,
ucs => 0xFA0E,
code => 0x8ff4b4,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E' },
{ direction => BOTH,
ucs => 0xFA0F,
code => 0x8ff4b7,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F' },
{ direction => BOTH,
ucs => 0xFA10,
code => 0x8ff4b8,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10' },
{ direction => BOTH,
ucs => 0xFA11,
code => 0x8ff4bd,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11' },
{ direction => BOTH,
ucs => 0xFA12,
code => 0x8ff4c4,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12' },
{ direction => BOTH,
ucs => 0xFA13,
code => 0x8ff4c7,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13' },
{ direction => BOTH,
ucs => 0xFA14,
code => 0x8ff4c8,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14' },
{ direction => BOTH,
ucs => 0xFA15,
code => 0x8ff4ce,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15' },
{ direction => BOTH,
ucs => 0xFA16,
code => 0x8ff4cf,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16' },
{ direction => BOTH,
ucs => 0xFA17,
code => 0x8ff4d3,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17' },
{ direction => BOTH,
ucs => 0xFA18,
code => 0x8ff4d5,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18' },
{ direction => BOTH,
ucs => 0xFA19,
code => 0x8ff4d6,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19' },
{ direction => BOTH,
ucs => 0xFA1A,
code => 0x8ff4d7,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A' },
{ direction => BOTH,
ucs => 0xFA1B,
code => 0x8ff4d8,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B' },
{ direction => BOTH,
ucs => 0xFA1C,
code => 0x8ff4da,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C' },
{ direction => BOTH,
ucs => 0xFA1D,
code => 0x8ff4db,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D' },
{ direction => BOTH,
ucs => 0xFA1E,
code => 0x8ff4de,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E' },
{ direction => BOTH,
ucs => 0xFA1F,
code => 0x8ff4e0,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F' },
{ direction => BOTH,
ucs => 0xFA20,
code => 0x8ff4e2,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20' },
{ direction => BOTH,
ucs => 0xFA21,
code => 0x8ff4e3,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21' },
{ direction => BOTH,
ucs => 0xFA22,
code => 0x8ff4e5,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22' },
{ direction => BOTH,
ucs => 0xFA23,
code => 0x8ff4e9,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23' },
{ direction => BOTH,
ucs => 0xFA24,
code => 0x8ff4ea,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24' },
{ direction => BOTH,
ucs => 0xFA25,
code => 0x8ff4eb,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25' },
{ direction => BOTH,
ucs => 0xFA26,
code => 0x8ff4ed,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26' },
{ direction => BOTH,
ucs => 0xFA27,
code => 0x8ff4ef,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27' },
{ direction => BOTH,
ucs => 0xFA28,
code => 0x8ff4f0,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28' },
{ direction => BOTH,
ucs => 0xFA29,
code => 0x8ff4f3,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29' },
{ direction => BOTH,
ucs => 0xFA2A,
code => 0x8ff4f7,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A' },
{ direction => BOTH,
ucs => 0xFA2B,
code => 0x8ff4f8,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B' },
{ direction => BOTH,
ucs => 0xFA2C,
code => 0x8ff4f9,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C' },
{ direction => BOTH,
ucs => 0xFA2D,
code => 0x8ff4fd,
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D' },
{ direction => BOTH,
ucs => 0xFF07,
code => 0x8ff4a9,
comment => '# FULLWIDTH APOSTROPHE' },
{ direction => BOTH,
ucs => 0xFFE4,
code => 0x8fa2c3,
comment => '# FULLWIDTH BROKEN BAR' },
# additional conversions for EUC_JP -> UTF-8 conversion
{direction => TO_UNICODE, ucs => 0x2116, code => 0x8ff4ac, comment => '# NUMERO SIGN'},
{direction => TO_UNICODE, ucs => 0x2121, code => 0x8ff4ad, comment => '# TELEPHONE SIGN'},
{direction => TO_UNICODE, ucs => 0x3231, code => 0x8ff4ab, comment => '# PARENTHESIZED IDEOGRAPH STOCK'}
);
# additional conversions for EUC_JP -> UTF-8 conversion
{ direction => TO_UNICODE,
ucs => 0x2116,
code => 0x8ff4ac,
comment => '# NUMERO SIGN' },
{ direction => TO_UNICODE,
ucs => 0x2121,
code => 0x8ff4ad,
comment => '# TELEPHONE SIGN' },
{ direction => TO_UNICODE,
ucs => 0x3231,
code => 0x8ff4ab,
comment => '# PARENTHESIZED IDEOGRAPH STOCK' });
print_conversion_tables($this_script, "EUC_JP", \@mapping);
@@ -215,6 +484,7 @@ sub sjis2jis
if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
{
# This region (115-ku) is out of range of JIS code but for
# convenient to generate code in EUC CODESET 3, move this to
# seemingly duplicate region (83-84-ku).

View File

@@ -31,10 +31,24 @@ foreach my $i (@$mapping)
}
# Some extra characters that are not in KSX1001.TXT
push @$mapping,(
{direction => BOTH, ucs => 0x20AC, code => 0xa2e6, comment => '# EURO SIGN', f => $this_script, l => __LINE__},
{direction => BOTH, ucs => 0x00AE, code => 0xa2e7, comment => '# REGISTERED SIGN', f => $this_script, l => __LINE__ },
{direction => BOTH, ucs => 0x327E, code => 0xa2e8, comment => '# CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ }
);
push @$mapping,
( { direction => BOTH,
ucs => 0x20AC,
code => 0xa2e6,
comment => '# EURO SIGN',
f => $this_script,
l => __LINE__ },
{ direction => BOTH,
ucs => 0x00AE,
code => 0xa2e7,
comment => '# REGISTERED SIGN',
f => $this_script,
l => __LINE__ },
{ direction => BOTH,
ucs => 0x327E,
code => 0xa2e8,
comment => '# CIRCLED HANGUL IEUNG U',
f => $this_script,
l => __LINE__ });
print_conversion_tables($this_script, "EUC_KR", $mapping);

View File

@@ -28,8 +28,8 @@ my @extras;
foreach my $i (@$mapping)
{
my $ucs = $i->{ucs};
my $code = $i->{code};
my $ucs = $i->{ucs};
my $code = $i->{code};
my $origcode = $i->{code};
my $plane = ($code & 0x1f0000) >> 16;
@@ -52,14 +52,13 @@ foreach my $i (@$mapping)
# Some codes are mapped twice in the EUC_TW to UTF-8 table.
if ($origcode >= 0x12121 && $origcode <= 0x20000)
{
push @extras, {
ucs => $i->{ucs},
code => ($i->{code} + 0x8ea10000),
rest => $i->{rest},
push @extras,
{ ucs => $i->{ucs},
code => ($i->{code} + 0x8ea10000),
rest => $i->{rest},
direction => TO_UNICODE,
f => $i->{f},
l => $i->{l}
};
f => $i->{f},
l => $i->{l} };
}
}

View File

@@ -35,13 +35,12 @@ while (<$in>)
my $code = hex($c);
if ($code >= 0x80 && $ucs >= 0x0080)
{
push @mapping, {
ucs => $ucs,
code => $code,
push @mapping,
{ ucs => $ucs,
code => $code,
direction => BOTH,
f => $in_file,
l => $.
};
f => $in_file,
l => $. };
}
}
close($in);

View File

@@ -25,10 +25,24 @@ my $this_script = $0;
my $mapping = &read_source("JOHAB.TXT");
# Some extra characters that are not in JOHAB.TXT
push @$mapping, (
{direction => BOTH, ucs => 0x20AC, code => 0xd9e6, comment => '# EURO SIGN', f => $this_script, l => __LINE__ },
{direction => BOTH, ucs => 0x00AE, code => 0xd9e7, comment => '# REGISTERED SIGN', f => $this_script, l => __LINE__ },
{direction => BOTH, ucs => 0x327E, code => 0xd9e8, comment => '# CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ }
);
push @$mapping,
( { direction => BOTH,
ucs => 0x20AC,
code => 0xd9e6,
comment => '# EURO SIGN',
f => $this_script,
l => __LINE__ },
{ direction => BOTH,
ucs => 0x00AE,
code => 0xd9e7,
comment => '# REGISTERED SIGN',
f => $this_script,
l => __LINE__ },
{ direction => BOTH,
ucs => 0x327E,
code => 0xd9e8,
comment => '# CIRCLED HANGUL IEUNG U',
f => $this_script,
l => __LINE__ });
print_conversion_tables($this_script, "JOHAB", $mapping);

View File

@@ -24,6 +24,7 @@ while (my $line = <$in>)
{
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
{
# combined characters
my ($c, $u1, $u2) = ($1, $2, $3);
my $rest = "U+" . $u1 . "+" . $u2 . $4;
@@ -31,18 +32,18 @@ while (my $line = <$in>)
my $ucs1 = hex($u1);
my $ucs2 = hex($u2);
push @mapping, {
code => $code,
ucs => $ucs1,
push @mapping,
{ code => $code,
ucs => $ucs1,
ucs_second => $ucs2,
comment => $rest,
direction => BOTH,
f => $in_file,
l => $.
};
comment => $rest,
direction => BOTH,
f => $in_file,
l => $. };
}
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
{
# non-combined characters
my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
my $ucs = hex($u);
@@ -66,14 +67,13 @@ while (my $line = <$in>)
$direction = BOTH;
}
push @mapping, {
code => $code,
ucs => $ucs,
comment => $rest,
push @mapping,
{ code => $code,
ucs => $ucs,
comment => $rest,
direction => $direction,
f => $in_file,
l => $.
};
f => $in_file,
l => $. };
}
}
close($in);

View File

@@ -18,33 +18,71 @@ my $this_script = $0;
my $mapping = read_source("CP932.TXT");
# Drop these SJIS codes from the source for UTF8=>SJIS conversion
my @reject_sjis =(
0xed40..0xeefc, 0x8754..0x875d, 0x878a, 0x8782,
0x8784, 0xfa5b, 0xfa54, 0x8790..0x8792, 0x8795..0x8797,
0x879a..0x879c
);
my @reject_sjis = (
0xed40 .. 0xeefc, 0x8754 .. 0x875d, 0x878a, 0x8782,
0x8784, 0xfa5b, 0xfa54, 0x8790 .. 0x8792,
0x8795 .. 0x8797, 0x879a .. 0x879c);
foreach my $i (@$mapping)
{
my $code = $i->{code};
my $ucs = $i->{ucs};
my $ucs = $i->{ucs};
if (grep {$code == $_} @reject_sjis)
if (grep { $code == $_ } @reject_sjis)
{
$i->{direction} = TO_UNICODE;
}
}
# Add these UTF8->SJIS pairs to the table.
push @$mapping, (
{direction => FROM_UNICODE, ucs => 0x00a2, code => 0x8191, comment => '# CENT SIGN', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x00a3, code => 0x8192, comment => '# POUND SIGN', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x00a5, code => 0x5c, comment => '# YEN SIGN', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x00ac, code => 0x81ca, comment => '# NOT SIGN', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x2016, code => 0x8161, comment => '# DOUBLE VERTICAL LINE', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x203e, code => 0x7e, comment => '# OVERLINE', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x2212, code => 0x817c, comment => '# MINUS SIGN', f => $this_script, l => __LINE__ },
{direction => FROM_UNICODE, ucs => 0x301c, code => 0x8160, comment => '# WAVE DASH', f => $this_script, l => __LINE__ }
);
push @$mapping,
( { direction => FROM_UNICODE,
ucs => 0x00a2,
code => 0x8191,
comment => '# CENT SIGN',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x00a3,
code => 0x8192,
comment => '# POUND SIGN',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x00a5,
code => 0x5c,
comment => '# YEN SIGN',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x00ac,
code => 0x81ca,
comment => '# NOT SIGN',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x2016,
code => 0x8161,
comment => '# DOUBLE VERTICAL LINE',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x203e,
code => 0x7e,
comment => '# OVERLINE',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x2212,
code => 0x817c,
comment => '# MINUS SIGN',
f => $this_script,
l => __LINE__ },
{ direction => FROM_UNICODE,
ucs => 0x301c,
code => 0x8160,
comment => '# WAVE DASH',
f => $this_script,
l => __LINE__ });
print_conversion_tables($this_script, "SJIS", $mapping);

View File

@@ -38,18 +38,23 @@ while (<$in>)
if ($code >= 0x80 && $ucs >= 0x0080)
{
push @mapping, {
ucs => $ucs,
code => $code,
push @mapping,
{ ucs => $ucs,
code => $code,
direction => BOTH,
f => $in_file,
l => $.
};
f => $in_file,
l => $. };
}
}
close($in);
# One extra character that's not in the source file.
push @mapping, { direction => BOTH, code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ };
push @mapping,
{ direction => BOTH,
code => 0xa2e8,
ucs => 0x327e,
comment => 'CIRCLED HANGUL IEUNG U',
f => $this_script,
l => __LINE__ };
print_conversion_tables($this_script, "UHC", \@mapping);

View File

@@ -9,15 +9,15 @@ use strict;
use Exporter 'import';
our @EXPORT = qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables);
our @EXPORT =
qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables);
# Constants used in the 'direction' field of the character maps
use constant {
NONE => 0,
TO_UNICODE => 1,
FROM_UNICODE => 2,
BOTH => 3
};
BOTH => 3 };
#######################################################################
# read_source - common routine to read source file
@@ -36,7 +36,7 @@ sub read_source
next if (/^#/);
chop;
next if (/^$/); # Ignore empty lines
next if (/^$/); # Ignore empty lines
next if (/^0x([0-9A-F]+)\s+(#.*)$/);
@@ -49,13 +49,13 @@ sub read_source
print STDERR "READ ERROR at line $. in $fname: $_\n";
exit;
}
my $out = {code => hex($1),
ucs => hex($2),
comment => $4,
direction => BOTH,
f => $fname,
l => $.
};
my $out = {
code => hex($1),
ucs => hex($2),
comment => $4,
direction => BOTH,
f => $fname,
l => $. };
# Ignore pure ASCII mappings. PostgreSQL character conversion code
# never even passes these to the conversion code.
@@ -92,8 +92,10 @@ sub print_conversion_tables
{
my ($this_script, $csname, $charset) = @_;
print_conversion_tables_direction($this_script, $csname, FROM_UNICODE, $charset);
print_conversion_tables_direction($this_script, $csname, TO_UNICODE, $charset);
print_conversion_tables_direction($this_script, $csname, FROM_UNICODE,
$charset);
print_conversion_tables_direction($this_script, $csname, TO_UNICODE,
$charset);
}
#############################################################################
@@ -117,14 +119,14 @@ sub print_conversion_tables_direction
my $tblname;
if ($direction == TO_UNICODE)
{
$fname = lc("${csname}_to_utf8.map");
$fname = lc("${csname}_to_utf8.map");
$tblname = lc("${csname}_to_unicode_tree");
print "- Writing ${csname}=>UTF8 conversion table: $fname\n";
}
else
{
$fname = lc("utf8_to_${csname}.map");
$fname = lc("utf8_to_${csname}.map");
$tblname = lc("${csname}_from_unicode_tree");
print "- Writing UTF8=>${csname} conversion table: $fname\n";
@@ -135,24 +137,22 @@ sub print_conversion_tables_direction
print $out "/* src/backend/utils/mb/Unicode/$fname */\n";
print $out "/* This file is generated by $this_script */\n\n";
# Collect regular, non-combined, mappings, and create the radix tree from them.
# Collect regular, non-combined, mappings, and create the radix tree from them.
my $charmap = &make_charmap($out, $charset, $direction, 0);
print_radix_table($out, $tblname, $charmap);
# Collect combined characters, and create combined character table (if any)
# Collect combined characters, and create combined character table (if any)
my $charmap_combined = &make_charmap_combined($charset, $direction);
if (scalar @{$charmap_combined} > 0)
{
if ($direction == TO_UNICODE)
{
print_to_utf8_combined_map($out, $csname,
$charmap_combined, 1);
print_to_utf8_combined_map($out, $csname, $charmap_combined, 1);
}
else
{
print_from_utf8_combined_map($out, $csname,
$charmap_combined, 1);
print_from_utf8_combined_map($out, $csname, $charmap_combined, 1);
}
}
@@ -166,14 +166,16 @@ sub print_from_utf8_combined_map
my $last_comment = "";
printf $out "\n/* Combined character map */\n";
printf $out "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
printf $out
"static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
scalar(@$table);
my $first = 1;
foreach my $i (sort {$a->{utf8} <=> $b->{utf8}} @$table)
{
foreach my $i (sort { $a->{utf8} <=> $b->{utf8} } @$table)
{
print($out ",") if (!$first);
$first = 0;
print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
print $out "\t/* $last_comment */"
if ($verbose && $last_comment ne "");
printf $out "\n {0x%08x, 0x%08x, 0x%04x}",
$i->{utf8}, $i->{utf8_second}, $i->{code};
@@ -198,15 +200,17 @@ sub print_to_utf8_combined_map
my $last_comment = "";
printf $out "\n/* Combined character map */\n";
printf $out "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
printf $out
"static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
scalar(@$table);
my $first = 1;
foreach my $i (sort {$a->{code} <=> $b->{code}} @$table)
{
foreach my $i (sort { $a->{code} <=> $b->{code} } @$table)
{
print($out ",") if (!$first);
$first = 0;
print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
print $out "\t/* $last_comment */"
if ($verbose && $last_comment ne "");
printf $out "\n {0x%04x, 0x%08x, 0x%08x}",
$i->{code}, $i->{utf8}, $i->{utf8_second};
@@ -214,7 +218,7 @@ sub print_to_utf8_combined_map
if ($verbose >= 2)
{
$last_comment =
sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
}
elsif ($verbose >= 1)
{
@@ -255,25 +259,25 @@ sub print_radix_table
}
elsif ($in < 0x10000)
{
my $b1 = $in >> 8;
my $b2 = $in & 0xff;
my $b1 = $in >> 8;
my $b2 = $in & 0xff;
$b2map{$b1}{$b2} = $out;
}
elsif ($in < 0x1000000)
{
my $b1 = $in >> 16;
my $b2 = ($in >> 8) & 0xff;
my $b3 = $in & 0xff;
my $b1 = $in >> 16;
my $b2 = ($in >> 8) & 0xff;
my $b3 = $in & 0xff;
$b3map{$b1}{$b2}{$b3} = $out;
}
elsif ($in < 0x100000000)
{
my $b1 = $in >> 24;
my $b2 = ($in >> 16) & 0xff;
my $b3 = ($in >> 8) & 0xff;
my $b4 = $in & 0xff;
my $b1 = $in >> 24;
my $b2 = ($in >> 16) & 0xff;
my $b3 = ($in >> 8) & 0xff;
my $b4 = $in & 0xff;
$b4map{$b1}{$b2}{$b3}{$b4} = $out;
}
@@ -309,10 +313,14 @@ sub print_radix_table
###
# Add the segments for the radix trees themselves.
push @segments, build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
push @segments, build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
push @segments, build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
push @segments, build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
push @segments,
build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
push @segments,
build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
push @segments,
build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
push @segments,
build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
###
### Find min and max index used in each level of each tree.
@@ -325,23 +333,24 @@ sub print_radix_table
my %max_idx;
foreach my $seg (@segments)
{
my $this_min = $min_idx{$seg->{depth}}->{$seg->{level}};
my $this_max = $max_idx{$seg->{depth}}->{$seg->{level}};
my $this_min = $min_idx{ $seg->{depth} }->{ $seg->{level} };
my $this_max = $max_idx{ $seg->{depth} }->{ $seg->{level} };
foreach my $i (keys %{$seg->{values}})
foreach my $i (keys %{ $seg->{values} })
{
$this_min = $i if (!defined $this_min || $i < $this_min);
$this_max = $i if (!defined $this_max || $i > $this_max);
}
$min_idx{$seg->{depth}}{$seg->{level}} = $this_min;
$max_idx{$seg->{depth}}{$seg->{level}} = $this_max;
$min_idx{ $seg->{depth} }{ $seg->{level} } = $this_min;
$max_idx{ $seg->{depth} }{ $seg->{level} } = $this_max;
}
# Copy the mins and max's back to every segment, for convenience.
foreach my $seg (@segments)
{
$seg->{min_idx} = $min_idx{$seg->{depth}}{$seg->{level}};
$seg->{max_idx} = $max_idx{$seg->{depth}}{$seg->{level}};
$seg->{min_idx} = $min_idx{ $seg->{depth} }{ $seg->{level} };
$seg->{max_idx} = $max_idx{ $seg->{depth} }{ $seg->{level} };
}
###
@@ -359,11 +368,10 @@ sub print_radix_table
$widest_range = $this_range if ($this_range > $widest_range);
}
unshift @segments, {
header => "Dummy map, for invalid values",
unshift @segments,
{ header => "Dummy map, for invalid values",
min_idx => 0,
max_idx => $widest_range
};
max_idx => $widest_range };
###
### Eliminate overlapping zeros
@@ -378,26 +386,34 @@ sub print_radix_table
###
for (my $j = 0; $j < $#segments - 1; $j++)
{
my $seg = $segments[$j];
my $nextseg = $segments[$j + 1];
my $seg = $segments[$j];
my $nextseg = $segments[ $j + 1 ];
# Count the number of zero values at the end of this segment.
my $this_trail_zeros = 0;
for (my $i = $seg->{max_idx}; $i >= $seg->{min_idx} && !$seg->{values}->{$i}; $i--)
for (
my $i = $seg->{max_idx};
$i >= $seg->{min_idx} && !$seg->{values}->{$i};
$i--)
{
$this_trail_zeros++;
}
# Count the number of zeros at the beginning of next segment.
my $next_lead_zeros = 0;
for (my $i = $nextseg->{min_idx}; $i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i}; $i++)
for (
my $i = $nextseg->{min_idx};
$i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i};
$i++)
{
$next_lead_zeros++;
}
# How many zeros in common?
my $overlaid_trail_zeros =
($this_trail_zeros > $next_lead_zeros) ? $next_lead_zeros : $this_trail_zeros;
($this_trail_zeros > $next_lead_zeros)
? $next_lead_zeros
: $this_trail_zeros;
$seg->{overlaid_trail_zeros} = $overlaid_trail_zeros;
$seg->{max_idx} = $seg->{max_idx} - $overlaid_trail_zeros;
@@ -419,7 +435,7 @@ sub print_radix_table
foreach my $seg (@segments)
{
$seg->{offset} = $flatoff;
$segmap{$seg->{label}} = $flatoff;
$segmap{ $seg->{label} } = $flatoff;
$flatoff += $seg->{max_idx} - $seg->{min_idx} + 1;
}
my $tblsize = $flatoff;
@@ -427,9 +443,9 @@ sub print_radix_table
# Second pass: look up the offset of each label reference in the hash.
foreach my $seg (@segments)
{
while (my ($i, $val) = each %{$seg->{values}})
while (my ($i, $val) = each %{ $seg->{values} })
{
if (!($val =~ /^[0-9,.E]+$/ ))
if (!($val =~ /^[0-9,.E]+$/))
{
my $segoff = $segmap{$val};
if ($segoff)
@@ -482,7 +498,7 @@ sub print_radix_table
my $max_val = 0;
foreach my $seg (@segments)
{
foreach my $val (values %{$seg->{values}})
foreach my $val (values %{ $seg->{values} })
{
$max_val = $val if ($val > $max_val);
}
@@ -498,17 +514,17 @@ sub print_radix_table
if ($max_val <= 0xffff)
{
$vals_per_line = 8;
$colwidth = 4;
$colwidth = 4;
}
elsif ($max_val <= 0xffffff)
{
$vals_per_line = 4;
$colwidth = 6;
$colwidth = 6;
}
else
{
$vals_per_line = 4;
$colwidth = 8;
$colwidth = 8;
}
###
@@ -529,17 +545,20 @@ sub print_radix_table
print $out " ${tblname}_table,\n";
}
printf $out "\n";
printf $out " 0x%04x, /* offset of table for 1-byte inputs */\n", $b1root;
printf $out " 0x%04x, /* offset of table for 1-byte inputs */\n",
$b1root;
printf $out " 0x%02x, /* b1_lower */\n", $b1_lower;
printf $out " 0x%02x, /* b1_upper */\n", $b1_upper;
printf $out "\n";
printf $out " 0x%04x, /* offset of table for 2-byte inputs */\n", $b2root;
printf $out " 0x%04x, /* offset of table for 2-byte inputs */\n",
$b2root;
printf $out " 0x%02x, /* b2_1_lower */\n", $b2_1_lower;
printf $out " 0x%02x, /* b2_1_upper */\n", $b2_1_upper;
printf $out " 0x%02x, /* b2_2_lower */\n", $b2_2_lower;
printf $out " 0x%02x, /* b2_2_upper */\n", $b2_2_upper;
printf $out "\n";
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n", $b3root;
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n",
$b3root;
printf $out " 0x%02x, /* b3_1_lower */\n", $b3_1_lower;
printf $out " 0x%02x, /* b3_1_upper */\n", $b3_1_upper;
printf $out " 0x%02x, /* b3_2_lower */\n", $b3_2_lower;
@@ -547,7 +566,8 @@ sub print_radix_table
printf $out " 0x%02x, /* b3_3_lower */\n", $b3_3_lower;
printf $out " 0x%02x, /* b3_3_upper */\n", $b3_3_upper;
printf $out "\n";
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n", $b4root;
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n",
$b4root;
printf $out " 0x%02x, /* b4_1_lower */\n", $b4_1_lower;
printf $out " 0x%02x, /* b4_1_upper */\n", $b4_1_upper;
printf $out " 0x%02x, /* b4_2_lower */\n", $b4_2_lower;
@@ -561,18 +581,21 @@ sub print_radix_table
print $out "static const $datatype ${tblname}_table[$tblsize] =\n";
print $out "{";
my $off = 0;
foreach my $seg (@segments)
{
printf $out "\n";
printf $out " /*** %s - offset 0x%05x ***/\n", $seg->{header}, $off;
printf $out "\n";
for (my $i=$seg->{min_idx}; $i <= $seg->{max_idx};)
for (my $i = $seg->{min_idx}; $i <= $seg->{max_idx};)
{
# Print the next line's worth of values.
# XXX pad to begin at a nice boundary
printf $out " /* %02x */ ", $i;
for (my $j = 0; $j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
for (my $j = 0;
$j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
{
my $val = $seg->{values}->{$i};
@@ -588,7 +611,8 @@ sub print_radix_table
}
if ($seg->{overlaid_trail_zeros})
{
printf $out " /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
printf $out
" /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
}
}
@@ -607,13 +631,14 @@ sub build_segments_from_tree
if (%{$map})
{
@segments = build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
@segments =
build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
# Sort the segments into "breadth-first" order. Not strictly required,
# but makes the maps nicer to read.
@segments = sort { $a->{level} cmp $b->{level} or
$a->{path} cmp $b->{path}}
@segments;
@segments =
sort { $a->{level} cmp $b->{level} or $a->{path} cmp $b->{path} }
@segments;
}
return @segments;
@@ -628,14 +653,13 @@ sub build_segments_recurse
if ($level == $depth)
{
push @segments, {
header => $header . ", leaf: ${path}xx",
label => $label,
level => $level,
depth => $depth,
path => $path,
values => $map
};
push @segments,
{ header => $header . ", leaf: ${path}xx",
label => $label,
level => $level,
depth => $depth,
path => $path,
values => $map };
}
else
{
@@ -646,19 +670,19 @@ sub build_segments_recurse
my $childpath = $path . sprintf("%02x", $i);
my $childlabel = "$depth-level-$level-$childpath";
push @segments, build_segments_recurse($header, $childlabel, $childpath,
$level + 1, $depth, $val);
push @segments,
build_segments_recurse($header, $childlabel, $childpath,
$level + 1, $depth, $val);
$children{$i} = $childlabel;
}
push @segments, {
header => $header . ", byte #$level: ${path}xx",
label => $label,
level => $level,
depth => $depth,
path => $path,
values => \%children
};
push @segments,
{ header => $header . ", byte #$level: ${path}xx",
label => $label,
level => $level,
depth => $depth,
path => $path,
values => \%children };
}
return @segments;
}
@@ -688,29 +712,31 @@ sub make_charmap
my %charmap;
foreach my $c (@$charset)
{
# combined characters are handled elsewhere
next if (defined $c->{ucs_second});
next if ($c->{direction} != $direction && $c->{direction} != BOTH);
my ($src, $dst) =
$direction == TO_UNICODE
? ($c->{code}, ucs2utf($c->{ucs}))
: (ucs2utf($c->{ucs}), $c->{code});
$direction == TO_UNICODE
? ($c->{code}, ucs2utf($c->{ucs}))
: (ucs2utf($c->{ucs}), $c->{code});
# check for duplicate source codes
if (defined $charmap{$src})
{
printf STDERR
"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n",
$c->{f}, $c->{l}, $src, $charmap{$src}, $dst;
"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n",
$c->{f}, $c->{l}, $src, $charmap{$src}, $dst;
exit;
}
$charmap{$src} = $dst;
if ($verbose)
{
printf $out "0x%04x 0x%04x %s:%d %s\n", $src, $dst, $c->{f}, $c->{l}, $c->{comment};
printf $out "0x%04x 0x%04x %s:%d %s\n", $src, $dst, $c->{f},
$c->{l}, $c->{comment};
}
}
if ($verbose)
@@ -743,11 +769,13 @@ sub make_charmap_combined
if (defined $c->{ucs_second})
{
my $entry = {utf8 => ucs2utf($c->{ucs}),
utf8_second => ucs2utf($c->{ucs_second}),
code => $c->{code},
comment => $c->{comment},
f => $c->{f}, l => $c->{l}};
my $entry = {
utf8 => ucs2utf($c->{ucs}),
utf8_second => ucs2utf($c->{ucs_second}),
code => $c->{code},
comment => $c->{comment},
f => $c->{f},
l => $c->{l} };
push @combined, $entry;
}
}