mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
Post-PG 10 beta1 pgperltidy run
This commit is contained in:
@@ -35,9 +35,10 @@ my $all = &read_source("BIG5.TXT");
|
||||
# Load CP950.TXT
|
||||
my $cp950txt = &read_source("CP950.TXT");
|
||||
|
||||
foreach my $i (@$cp950txt) {
|
||||
foreach my $i (@$cp950txt)
|
||||
{
|
||||
my $code = $i->{code};
|
||||
my $ucs = $i->{ucs};
|
||||
my $ucs = $i->{ucs};
|
||||
|
||||
# Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
|
||||
# from CP950.TXT
|
||||
@@ -46,22 +47,24 @@ foreach my $i (@$cp950txt) {
|
||||
&& $code >= 0xf9d6
|
||||
&& $code <= 0xf9dc)
|
||||
{
|
||||
push @$all, {code => $code,
|
||||
ucs => $ucs,
|
||||
comment => $i->{comment},
|
||||
direction => BOTH,
|
||||
f => $i->{f},
|
||||
l => $i->{l} };
|
||||
push @$all,
|
||||
{ code => $code,
|
||||
ucs => $ucs,
|
||||
comment => $i->{comment},
|
||||
direction => BOTH,
|
||||
f => $i->{f},
|
||||
l => $i->{l} };
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $i (@$all) {
|
||||
foreach my $i (@$all)
|
||||
{
|
||||
my $code = $i->{code};
|
||||
my $ucs = $i->{ucs};
|
||||
my $ucs = $i->{ucs};
|
||||
|
||||
# BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can
|
||||
# contain only one of them. XXX: Doesn't really make sense to include any of them,
|
||||
# but for historical reasons, we map the first one of them.
|
||||
# BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can
|
||||
# contain only one of them. XXX: Doesn't really make sense to include any of them,
|
||||
# but for historical reasons, we map the first one of them.
|
||||
if ($i->{ucs} == 0xFFFD && $i->{code} != 0xA15A)
|
||||
{
|
||||
$i->{direction} = TO_UNICODE;
|
||||
|
||||
@@ -38,8 +38,10 @@ while (<$in>)
|
||||
# a lot of extra characters on top of the GB2312 character set that
|
||||
# EUC_CN encodes. Filter out those extra characters.
|
||||
next if (($code & 0xFF) < 0xA1);
|
||||
next if (!($code >= 0xA100 && $code <= 0xA9FF ||
|
||||
$code >= 0xB000 && $code <= 0xF7FF));
|
||||
next
|
||||
if (
|
||||
!( $code >= 0xA100 && $code <= 0xA9FF
|
||||
|| $code >= 0xB000 && $code <= 0xF7FF));
|
||||
|
||||
next if ($code >= 0xA2A1 && $code <= 0xA2B0);
|
||||
next if ($code >= 0xA2E3 && $code <= 0xA2E4);
|
||||
@@ -67,13 +69,12 @@ while (<$in>)
|
||||
$ucs = 0x2015;
|
||||
}
|
||||
|
||||
push @mapping, {
|
||||
ucs => $ucs,
|
||||
code => $code,
|
||||
push @mapping,
|
||||
{ ucs => $ucs,
|
||||
code => $code,
|
||||
direction => BOTH,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
close($in);
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ while (my $line = <$in>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
|
||||
# combined characters
|
||||
my ($c, $u1, $u2) = ($1, $2, $3);
|
||||
my $rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
@@ -31,17 +32,18 @@ while (my $line = <$in>)
|
||||
my $ucs1 = hex($u1);
|
||||
my $ucs2 = hex($u2);
|
||||
|
||||
push @all, { direction => BOTH,
|
||||
ucs => $ucs1,
|
||||
ucs_second => $ucs2,
|
||||
code => $code,
|
||||
comment => $rest,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
push @all,
|
||||
{ direction => BOTH,
|
||||
ucs => $ucs1,
|
||||
ucs_second => $ucs2,
|
||||
code => $code,
|
||||
comment => $rest,
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
|
||||
# non-combined characters
|
||||
my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
|
||||
my $ucs = hex($u);
|
||||
@@ -49,13 +51,13 @@ while (my $line = <$in>)
|
||||
|
||||
next if ($code < 0x80 && $ucs < 0x80);
|
||||
|
||||
push @all, { direction => BOTH,
|
||||
ucs => $ucs,
|
||||
code => $code,
|
||||
comment => $rest,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
push @all,
|
||||
{ direction => BOTH,
|
||||
ucs => $ucs,
|
||||
code => $code,
|
||||
comment => $rest,
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
}
|
||||
close($in);
|
||||
|
||||
@@ -21,7 +21,9 @@ my $jis0212 = &read_source("JIS0212.TXT");
|
||||
|
||||
my @mapping;
|
||||
|
||||
foreach my $i (@$jis0212) {
|
||||
foreach my $i (@$jis0212)
|
||||
{
|
||||
|
||||
# We have a different mapping for this in the EUC_JP to UTF-8 direction.
|
||||
if ($i->{code} == 0x2243)
|
||||
{
|
||||
@@ -48,13 +50,14 @@ foreach my $i (@$jis0212) {
|
||||
# Load CP932.TXT.
|
||||
my $ct932 = &read_source("CP932.TXT");
|
||||
|
||||
foreach my $i (@$ct932) {
|
||||
foreach my $i (@$ct932)
|
||||
{
|
||||
my $sjis = $i->{code};
|
||||
|
||||
# We have a different mapping for this in the EUC_JP to UTF-8 direction.
|
||||
if ($sjis == 0xeefa ||
|
||||
$sjis == 0xeefb ||
|
||||
$sjis == 0xeefc)
|
||||
if ( $sjis == 0xeefa
|
||||
|| $sjis == 0xeefb
|
||||
|| $sjis == 0xeefc)
|
||||
{
|
||||
next;
|
||||
}
|
||||
@@ -63,8 +66,10 @@ foreach my $i (@$ct932) {
|
||||
{
|
||||
my $jis = &sjis2jis($sjis);
|
||||
|
||||
$i->{code} = $jis | ($jis < 0x100 ? 0x8e00 :
|
||||
($sjis >= 0xeffd ? 0x8f8080 : 0x8080));
|
||||
$i->{code} = $jis | (
|
||||
$jis < 0x100
|
||||
? 0x8e00
|
||||
: ($sjis >= 0xeffd ? 0x8f8080 : 0x8080));
|
||||
|
||||
# Remember the SJIS code for later.
|
||||
$i->{sjis} = $sjis;
|
||||
@@ -73,13 +78,14 @@ foreach my $i (@$ct932) {
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $i (@mapping) {
|
||||
foreach my $i (@mapping)
|
||||
{
|
||||
my $sjis = $i->{sjis};
|
||||
|
||||
# These SJIS characters are excluded completely.
|
||||
if ($sjis >= 0xed00 && $sjis <= 0xeef9 ||
|
||||
$sjis >= 0xfa54 && $sjis <= 0xfa56 ||
|
||||
$sjis >= 0xfa58 && $sjis <= 0xfc4b)
|
||||
if ( $sjis >= 0xed00 && $sjis <= 0xeef9
|
||||
|| $sjis >= 0xfa54 && $sjis <= 0xfa56
|
||||
|| $sjis >= 0xfa58 && $sjis <= 0xfc4b)
|
||||
{
|
||||
$i->{direction} = NONE;
|
||||
next;
|
||||
@@ -92,10 +98,16 @@ foreach my $i (@mapping) {
|
||||
next;
|
||||
}
|
||||
|
||||
if ($sjis == 0x8790 || $sjis == 0x8791 || $sjis == 0x8792 ||
|
||||
$sjis == 0x8795 || $sjis == 0x8796 || $sjis == 0x8797 ||
|
||||
$sjis == 0x879a || $sjis == 0x879b || $sjis == 0x879c ||
|
||||
($sjis >= 0xfa4a && $sjis <= 0xfa53))
|
||||
if ( $sjis == 0x8790
|
||||
|| $sjis == 0x8791
|
||||
|| $sjis == 0x8792
|
||||
|| $sjis == 0x8795
|
||||
|| $sjis == 0x8796
|
||||
|| $sjis == 0x8797
|
||||
|| $sjis == 0x879a
|
||||
|| $sjis == 0x879b
|
||||
|| $sjis == 0x879c
|
||||
|| ($sjis >= 0xfa4a && $sjis <= 0xfa53))
|
||||
{
|
||||
$i->{direction} = TO_UNICODE;
|
||||
next;
|
||||
@@ -103,95 +115,352 @@ foreach my $i (@mapping) {
|
||||
}
|
||||
|
||||
push @mapping, (
|
||||
{direction => BOTH, ucs => 0x4efc, code => 0x8ff4af, comment => '# CJK(4EFC)'},
|
||||
{direction => BOTH, ucs => 0x50f4, code => 0x8ff4b0, comment => '# CJK(50F4)'},
|
||||
{direction => BOTH, ucs => 0x51EC, code => 0x8ff4b1, comment => '# CJK(51EC)'},
|
||||
{direction => BOTH, ucs => 0x5307, code => 0x8ff4b2, comment => '# CJK(5307)'},
|
||||
{direction => BOTH, ucs => 0x5324, code => 0x8ff4b3, comment => '# CJK(5324)'},
|
||||
{direction => BOTH, ucs => 0x548A, code => 0x8ff4b5, comment => '# CJK(548A)'},
|
||||
{direction => BOTH, ucs => 0x5759, code => 0x8ff4b6, comment => '# CJK(5759)'},
|
||||
{direction => BOTH, ucs => 0x589E, code => 0x8ff4b9, comment => '# CJK(589E)'},
|
||||
{direction => BOTH, ucs => 0x5BEC, code => 0x8ff4ba, comment => '# CJK(5BEC)'},
|
||||
{direction => BOTH, ucs => 0x5CF5, code => 0x8ff4bb, comment => '# CJK(5CF5)'},
|
||||
{direction => BOTH, ucs => 0x5D53, code => 0x8ff4bc, comment => '# CJK(5D53)'},
|
||||
{direction => BOTH, ucs => 0x5FB7, code => 0x8ff4be, comment => '# CJK(5FB7)'},
|
||||
{direction => BOTH, ucs => 0x6085, code => 0x8ff4bf, comment => '# CJK(6085)'},
|
||||
{direction => BOTH, ucs => 0x6120, code => 0x8ff4c0, comment => '# CJK(6120)'},
|
||||
{direction => BOTH, ucs => 0x654E, code => 0x8ff4c1, comment => '# CJK(654E)'},
|
||||
{direction => BOTH, ucs => 0x663B, code => 0x8ff4c2, comment => '# CJK(663B)'},
|
||||
{direction => BOTH, ucs => 0x6665, code => 0x8ff4c3, comment => '# CJK(6665)'},
|
||||
{direction => BOTH, ucs => 0x6801, code => 0x8ff4c6, comment => '# CJK(6801)'},
|
||||
{direction => BOTH, ucs => 0x6A6B, code => 0x8ff4c9, comment => '# CJK(6A6B)'},
|
||||
{direction => BOTH, ucs => 0x6AE2, code => 0x8ff4ca, comment => '# CJK(6AE2)'},
|
||||
{direction => BOTH, ucs => 0x6DF2, code => 0x8ff4cc, comment => '# CJK(6DF2)'},
|
||||
{direction => BOTH, ucs => 0x6DF8, code => 0x8ff4cb, comment => '# CJK(6DF8)'},
|
||||
{direction => BOTH, ucs => 0x7028, code => 0x8ff4cd, comment => '# CJK(7028)'},
|
||||
{direction => BOTH, ucs => 0x70BB, code => 0x8ff4ae, comment => '# CJK(70BB)'},
|
||||
{direction => BOTH, ucs => 0x7501, code => 0x8ff4d0, comment => '# CJK(7501)'},
|
||||
{direction => BOTH, ucs => 0x7682, code => 0x8ff4d1, comment => '# CJK(7682)'},
|
||||
{direction => BOTH, ucs => 0x769E, code => 0x8ff4d2, comment => '# CJK(769E)'},
|
||||
{direction => BOTH, ucs => 0x7930, code => 0x8ff4d4, comment => '# CJK(7930)'},
|
||||
{direction => BOTH, ucs => 0x7AE7, code => 0x8ff4d9, comment => '# CJK(7AE7)'},
|
||||
{direction => BOTH, ucs => 0x7DA0, code => 0x8ff4dc, comment => '# CJK(7DA0)'},
|
||||
{direction => BOTH, ucs => 0x7DD6, code => 0x8ff4dd, comment => '# CJK(7DD6)'},
|
||||
{direction => BOTH, ucs => 0x8362, code => 0x8ff4df, comment => '# CJK(8362)'},
|
||||
{direction => BOTH, ucs => 0x85B0, code => 0x8ff4e1, comment => '# CJK(85B0)'},
|
||||
{direction => BOTH, ucs => 0x8807, code => 0x8ff4e4, comment => '# CJK(8807)'},
|
||||
{direction => BOTH, ucs => 0x8B7F, code => 0x8ff4e6, comment => '# CJK(8B7F)'},
|
||||
{direction => BOTH, ucs => 0x8CF4, code => 0x8ff4e7, comment => '# CJK(8CF4)'},
|
||||
{direction => BOTH, ucs => 0x8D76, code => 0x8ff4e8, comment => '# CJK(8D76)'},
|
||||
{direction => BOTH, ucs => 0x90DE, code => 0x8ff4ec, comment => '# CJK(90DE)'},
|
||||
{direction => BOTH, ucs => 0x9115, code => 0x8ff4ee, comment => '# CJK(9115)'},
|
||||
{direction => BOTH, ucs => 0x9592, code => 0x8ff4f1, comment => '# CJK(9592)'},
|
||||
{direction => BOTH, ucs => 0x973B, code => 0x8ff4f4, comment => '# CJK(973B)'},
|
||||
{direction => BOTH, ucs => 0x974D, code => 0x8ff4f5, comment => '# CJK(974D)'},
|
||||
{direction => BOTH, ucs => 0x9751, code => 0x8ff4f6, comment => '# CJK(9751)'},
|
||||
{direction => BOTH, ucs => 0x999E, code => 0x8ff4fa, comment => '# CJK(999E)'},
|
||||
{direction => BOTH, ucs => 0x9AD9, code => 0x8ff4fb, comment => '# CJK(9AD9)'},
|
||||
{direction => BOTH, ucs => 0x9B72, code => 0x8ff4fc, comment => '# CJK(9B72)'},
|
||||
{direction => BOTH, ucs => 0x9ED1, code => 0x8ff4fe, comment => '# CJK(9ED1)'},
|
||||
{direction => BOTH, ucs => 0xF929, code => 0x8ff4c5, comment => '# CJK COMPATIBILITY IDEOGRAPH-F929'},
|
||||
{direction => BOTH, ucs => 0xF9DC, code => 0x8ff4f2, comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'},
|
||||
{direction => BOTH, ucs => 0xFA0E, code => 0x8ff4b4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'},
|
||||
{direction => BOTH, ucs => 0xFA0F, code => 0x8ff4b7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'},
|
||||
{direction => BOTH, ucs => 0xFA10, code => 0x8ff4b8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10'},
|
||||
{direction => BOTH, ucs => 0xFA11, code => 0x8ff4bd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11'},
|
||||
{direction => BOTH, ucs => 0xFA12, code => 0x8ff4c4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12'},
|
||||
{direction => BOTH, ucs => 0xFA13, code => 0x8ff4c7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13'},
|
||||
{direction => BOTH, ucs => 0xFA14, code => 0x8ff4c8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14'},
|
||||
{direction => BOTH, ucs => 0xFA15, code => 0x8ff4ce, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15'},
|
||||
{direction => BOTH, ucs => 0xFA16, code => 0x8ff4cf, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16'},
|
||||
{direction => BOTH, ucs => 0xFA17, code => 0x8ff4d3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17'},
|
||||
{direction => BOTH, ucs => 0xFA18, code => 0x8ff4d5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18'},
|
||||
{direction => BOTH, ucs => 0xFA19, code => 0x8ff4d6, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19'},
|
||||
{direction => BOTH, ucs => 0xFA1A, code => 0x8ff4d7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'},
|
||||
{direction => BOTH, ucs => 0xFA1B, code => 0x8ff4d8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'},
|
||||
{direction => BOTH, ucs => 0xFA1C, code => 0x8ff4da, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'},
|
||||
{direction => BOTH, ucs => 0xFA1D, code => 0x8ff4db, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'},
|
||||
{direction => BOTH, ucs => 0xFA1E, code => 0x8ff4de, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'},
|
||||
{direction => BOTH, ucs => 0xFA1F, code => 0x8ff4e0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'},
|
||||
{direction => BOTH, ucs => 0xFA20, code => 0x8ff4e2, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20'},
|
||||
{direction => BOTH, ucs => 0xFA21, code => 0x8ff4e3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21'},
|
||||
{direction => BOTH, ucs => 0xFA22, code => 0x8ff4e5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22'},
|
||||
{direction => BOTH, ucs => 0xFA23, code => 0x8ff4e9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23'},
|
||||
{direction => BOTH, ucs => 0xFA24, code => 0x8ff4ea, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24'},
|
||||
{direction => BOTH, ucs => 0xFA25, code => 0x8ff4eb, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25'},
|
||||
{direction => BOTH, ucs => 0xFA26, code => 0x8ff4ed, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26'},
|
||||
{direction => BOTH, ucs => 0xFA27, code => 0x8ff4ef, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27'},
|
||||
{direction => BOTH, ucs => 0xFA28, code => 0x8ff4f0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28'},
|
||||
{direction => BOTH, ucs => 0xFA29, code => 0x8ff4f3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29'},
|
||||
{direction => BOTH, ucs => 0xFA2A, code => 0x8ff4f7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'},
|
||||
{direction => BOTH, ucs => 0xFA2B, code => 0x8ff4f8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'},
|
||||
{direction => BOTH, ucs => 0xFA2C, code => 0x8ff4f9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'},
|
||||
{direction => BOTH, ucs => 0xFA2D, code => 0x8ff4fd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'},
|
||||
{direction => BOTH, ucs => 0xFF07, code => 0x8ff4a9, comment => '# FULLWIDTH APOSTROPHE'},
|
||||
{direction => BOTH, ucs => 0xFFE4, code => 0x8fa2c3, comment => '# FULLWIDTH BROKEN BAR'},
|
||||
{ direction => BOTH,
|
||||
ucs => 0x4efc,
|
||||
code => 0x8ff4af,
|
||||
comment => '# CJK(4EFC)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x50f4,
|
||||
code => 0x8ff4b0,
|
||||
comment => '# CJK(50F4)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x51EC,
|
||||
code => 0x8ff4b1,
|
||||
comment => '# CJK(51EC)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5307,
|
||||
code => 0x8ff4b2,
|
||||
comment => '# CJK(5307)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5324,
|
||||
code => 0x8ff4b3,
|
||||
comment => '# CJK(5324)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x548A,
|
||||
code => 0x8ff4b5,
|
||||
comment => '# CJK(548A)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5759,
|
||||
code => 0x8ff4b6,
|
||||
comment => '# CJK(5759)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x589E,
|
||||
code => 0x8ff4b9,
|
||||
comment => '# CJK(589E)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5BEC,
|
||||
code => 0x8ff4ba,
|
||||
comment => '# CJK(5BEC)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5CF5,
|
||||
code => 0x8ff4bb,
|
||||
comment => '# CJK(5CF5)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5D53,
|
||||
code => 0x8ff4bc,
|
||||
comment => '# CJK(5D53)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x5FB7,
|
||||
code => 0x8ff4be,
|
||||
comment => '# CJK(5FB7)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6085,
|
||||
code => 0x8ff4bf,
|
||||
comment => '# CJK(6085)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6120,
|
||||
code => 0x8ff4c0,
|
||||
comment => '# CJK(6120)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x654E,
|
||||
code => 0x8ff4c1,
|
||||
comment => '# CJK(654E)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x663B,
|
||||
code => 0x8ff4c2,
|
||||
comment => '# CJK(663B)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6665,
|
||||
code => 0x8ff4c3,
|
||||
comment => '# CJK(6665)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6801,
|
||||
code => 0x8ff4c6,
|
||||
comment => '# CJK(6801)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6A6B,
|
||||
code => 0x8ff4c9,
|
||||
comment => '# CJK(6A6B)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6AE2,
|
||||
code => 0x8ff4ca,
|
||||
comment => '# CJK(6AE2)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6DF2,
|
||||
code => 0x8ff4cc,
|
||||
comment => '# CJK(6DF2)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x6DF8,
|
||||
code => 0x8ff4cb,
|
||||
comment => '# CJK(6DF8)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7028,
|
||||
code => 0x8ff4cd,
|
||||
comment => '# CJK(7028)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x70BB,
|
||||
code => 0x8ff4ae,
|
||||
comment => '# CJK(70BB)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7501,
|
||||
code => 0x8ff4d0,
|
||||
comment => '# CJK(7501)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7682,
|
||||
code => 0x8ff4d1,
|
||||
comment => '# CJK(7682)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x769E,
|
||||
code => 0x8ff4d2,
|
||||
comment => '# CJK(769E)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7930,
|
||||
code => 0x8ff4d4,
|
||||
comment => '# CJK(7930)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7AE7,
|
||||
code => 0x8ff4d9,
|
||||
comment => '# CJK(7AE7)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7DA0,
|
||||
code => 0x8ff4dc,
|
||||
comment => '# CJK(7DA0)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x7DD6,
|
||||
code => 0x8ff4dd,
|
||||
comment => '# CJK(7DD6)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x8362,
|
||||
code => 0x8ff4df,
|
||||
comment => '# CJK(8362)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x85B0,
|
||||
code => 0x8ff4e1,
|
||||
comment => '# CJK(85B0)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x8807,
|
||||
code => 0x8ff4e4,
|
||||
comment => '# CJK(8807)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x8B7F,
|
||||
code => 0x8ff4e6,
|
||||
comment => '# CJK(8B7F)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x8CF4,
|
||||
code => 0x8ff4e7,
|
||||
comment => '# CJK(8CF4)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x8D76,
|
||||
code => 0x8ff4e8,
|
||||
comment => '# CJK(8D76)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x90DE,
|
||||
code => 0x8ff4ec,
|
||||
comment => '# CJK(90DE)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x9115,
|
||||
code => 0x8ff4ee,
|
||||
comment => '# CJK(9115)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x9592,
|
||||
code => 0x8ff4f1,
|
||||
comment => '# CJK(9592)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x973B,
|
||||
code => 0x8ff4f4,
|
||||
comment => '# CJK(973B)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x974D,
|
||||
code => 0x8ff4f5,
|
||||
comment => '# CJK(974D)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x9751,
|
||||
code => 0x8ff4f6,
|
||||
comment => '# CJK(9751)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x999E,
|
||||
code => 0x8ff4fa,
|
||||
comment => '# CJK(999E)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x9AD9,
|
||||
code => 0x8ff4fb,
|
||||
comment => '# CJK(9AD9)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x9B72,
|
||||
code => 0x8ff4fc,
|
||||
comment => '# CJK(9B72)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x9ED1,
|
||||
code => 0x8ff4fe,
|
||||
comment => '# CJK(9ED1)' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xF929,
|
||||
code => 0x8ff4c5,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-F929' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xF9DC,
|
||||
code => 0x8ff4f2,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA0E,
|
||||
code => 0x8ff4b4,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA0F,
|
||||
code => 0x8ff4b7,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA10,
|
||||
code => 0x8ff4b8,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA11,
|
||||
code => 0x8ff4bd,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA12,
|
||||
code => 0x8ff4c4,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA13,
|
||||
code => 0x8ff4c7,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA14,
|
||||
code => 0x8ff4c8,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA15,
|
||||
code => 0x8ff4ce,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA16,
|
||||
code => 0x8ff4cf,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA17,
|
||||
code => 0x8ff4d3,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA18,
|
||||
code => 0x8ff4d5,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA19,
|
||||
code => 0x8ff4d6,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA1A,
|
||||
code => 0x8ff4d7,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA1B,
|
||||
code => 0x8ff4d8,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA1C,
|
||||
code => 0x8ff4da,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA1D,
|
||||
code => 0x8ff4db,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA1E,
|
||||
code => 0x8ff4de,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA1F,
|
||||
code => 0x8ff4e0,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA20,
|
||||
code => 0x8ff4e2,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA21,
|
||||
code => 0x8ff4e3,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA22,
|
||||
code => 0x8ff4e5,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA23,
|
||||
code => 0x8ff4e9,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA24,
|
||||
code => 0x8ff4ea,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA25,
|
||||
code => 0x8ff4eb,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA26,
|
||||
code => 0x8ff4ed,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA27,
|
||||
code => 0x8ff4ef,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA28,
|
||||
code => 0x8ff4f0,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA29,
|
||||
code => 0x8ff4f3,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA2A,
|
||||
code => 0x8ff4f7,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA2B,
|
||||
code => 0x8ff4f8,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA2C,
|
||||
code => 0x8ff4f9,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFA2D,
|
||||
code => 0x8ff4fd,
|
||||
comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFF07,
|
||||
code => 0x8ff4a9,
|
||||
comment => '# FULLWIDTH APOSTROPHE' },
|
||||
{ direction => BOTH,
|
||||
ucs => 0xFFE4,
|
||||
code => 0x8fa2c3,
|
||||
comment => '# FULLWIDTH BROKEN BAR' },
|
||||
|
||||
# additional conversions for EUC_JP -> UTF-8 conversion
|
||||
{direction => TO_UNICODE, ucs => 0x2116, code => 0x8ff4ac, comment => '# NUMERO SIGN'},
|
||||
{direction => TO_UNICODE, ucs => 0x2121, code => 0x8ff4ad, comment => '# TELEPHONE SIGN'},
|
||||
{direction => TO_UNICODE, ucs => 0x3231, code => 0x8ff4ab, comment => '# PARENTHESIZED IDEOGRAPH STOCK'}
|
||||
);
|
||||
# additional conversions for EUC_JP -> UTF-8 conversion
|
||||
{ direction => TO_UNICODE,
|
||||
ucs => 0x2116,
|
||||
code => 0x8ff4ac,
|
||||
comment => '# NUMERO SIGN' },
|
||||
{ direction => TO_UNICODE,
|
||||
ucs => 0x2121,
|
||||
code => 0x8ff4ad,
|
||||
comment => '# TELEPHONE SIGN' },
|
||||
{ direction => TO_UNICODE,
|
||||
ucs => 0x3231,
|
||||
code => 0x8ff4ab,
|
||||
comment => '# PARENTHESIZED IDEOGRAPH STOCK' });
|
||||
|
||||
print_conversion_tables($this_script, "EUC_JP", \@mapping);
|
||||
|
||||
@@ -215,6 +484,7 @@ sub sjis2jis
|
||||
|
||||
if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
|
||||
{
|
||||
|
||||
# This region (115-ku) is out of range of JIS code but for
|
||||
# convenient to generate code in EUC CODESET 3, move this to
|
||||
# seemingly duplicate region (83-84-ku).
|
||||
|
||||
@@ -31,10 +31,24 @@ foreach my $i (@$mapping)
|
||||
}
|
||||
|
||||
# Some extra characters that are not in KSX1001.TXT
|
||||
push @$mapping,(
|
||||
{direction => BOTH, ucs => 0x20AC, code => 0xa2e6, comment => '# EURO SIGN', f => $this_script, l => __LINE__},
|
||||
{direction => BOTH, ucs => 0x00AE, code => 0xa2e7, comment => '# REGISTERED SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => BOTH, ucs => 0x327E, code => 0xa2e8, comment => '# CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ }
|
||||
);
|
||||
push @$mapping,
|
||||
( { direction => BOTH,
|
||||
ucs => 0x20AC,
|
||||
code => 0xa2e6,
|
||||
comment => '# EURO SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x00AE,
|
||||
code => 0xa2e7,
|
||||
comment => '# REGISTERED SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x327E,
|
||||
code => 0xa2e8,
|
||||
comment => '# CIRCLED HANGUL IEUNG U',
|
||||
f => $this_script,
|
||||
l => __LINE__ });
|
||||
|
||||
print_conversion_tables($this_script, "EUC_KR", $mapping);
|
||||
|
||||
@@ -28,8 +28,8 @@ my @extras;
|
||||
|
||||
foreach my $i (@$mapping)
|
||||
{
|
||||
my $ucs = $i->{ucs};
|
||||
my $code = $i->{code};
|
||||
my $ucs = $i->{ucs};
|
||||
my $code = $i->{code};
|
||||
my $origcode = $i->{code};
|
||||
|
||||
my $plane = ($code & 0x1f0000) >> 16;
|
||||
@@ -52,14 +52,13 @@ foreach my $i (@$mapping)
|
||||
# Some codes are mapped twice in the EUC_TW to UTF-8 table.
|
||||
if ($origcode >= 0x12121 && $origcode <= 0x20000)
|
||||
{
|
||||
push @extras, {
|
||||
ucs => $i->{ucs},
|
||||
code => ($i->{code} + 0x8ea10000),
|
||||
rest => $i->{rest},
|
||||
push @extras,
|
||||
{ ucs => $i->{ucs},
|
||||
code => ($i->{code} + 0x8ea10000),
|
||||
rest => $i->{rest},
|
||||
direction => TO_UNICODE,
|
||||
f => $i->{f},
|
||||
l => $i->{l}
|
||||
};
|
||||
f => $i->{f},
|
||||
l => $i->{l} };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,13 +35,12 @@ while (<$in>)
|
||||
my $code = hex($c);
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
push @mapping, {
|
||||
ucs => $ucs,
|
||||
code => $code,
|
||||
push @mapping,
|
||||
{ ucs => $ucs,
|
||||
code => $code,
|
||||
direction => BOTH,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
}
|
||||
close($in);
|
||||
|
||||
@@ -25,10 +25,24 @@ my $this_script = $0;
|
||||
my $mapping = &read_source("JOHAB.TXT");
|
||||
|
||||
# Some extra characters that are not in JOHAB.TXT
|
||||
push @$mapping, (
|
||||
{direction => BOTH, ucs => 0x20AC, code => 0xd9e6, comment => '# EURO SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => BOTH, ucs => 0x00AE, code => 0xd9e7, comment => '# REGISTERED SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => BOTH, ucs => 0x327E, code => 0xd9e8, comment => '# CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ }
|
||||
);
|
||||
push @$mapping,
|
||||
( { direction => BOTH,
|
||||
ucs => 0x20AC,
|
||||
code => 0xd9e6,
|
||||
comment => '# EURO SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x00AE,
|
||||
code => 0xd9e7,
|
||||
comment => '# REGISTERED SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => BOTH,
|
||||
ucs => 0x327E,
|
||||
code => 0xd9e8,
|
||||
comment => '# CIRCLED HANGUL IEUNG U',
|
||||
f => $this_script,
|
||||
l => __LINE__ });
|
||||
|
||||
print_conversion_tables($this_script, "JOHAB", $mapping);
|
||||
|
||||
@@ -24,6 +24,7 @@ while (my $line = <$in>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
|
||||
# combined characters
|
||||
my ($c, $u1, $u2) = ($1, $2, $3);
|
||||
my $rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
@@ -31,18 +32,18 @@ while (my $line = <$in>)
|
||||
my $ucs1 = hex($u1);
|
||||
my $ucs2 = hex($u2);
|
||||
|
||||
push @mapping, {
|
||||
code => $code,
|
||||
ucs => $ucs1,
|
||||
push @mapping,
|
||||
{ code => $code,
|
||||
ucs => $ucs1,
|
||||
ucs_second => $ucs2,
|
||||
comment => $rest,
|
||||
direction => BOTH,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
comment => $rest,
|
||||
direction => BOTH,
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
|
||||
# non-combined characters
|
||||
my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
|
||||
my $ucs = hex($u);
|
||||
@@ -66,14 +67,13 @@ while (my $line = <$in>)
|
||||
$direction = BOTH;
|
||||
}
|
||||
|
||||
push @mapping, {
|
||||
code => $code,
|
||||
ucs => $ucs,
|
||||
comment => $rest,
|
||||
push @mapping,
|
||||
{ code => $code,
|
||||
ucs => $ucs,
|
||||
comment => $rest,
|
||||
direction => $direction,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
}
|
||||
close($in);
|
||||
|
||||
@@ -18,33 +18,71 @@ my $this_script = $0;
|
||||
my $mapping = read_source("CP932.TXT");
|
||||
|
||||
# Drop these SJIS codes from the source for UTF8=>SJIS conversion
|
||||
my @reject_sjis =(
|
||||
0xed40..0xeefc, 0x8754..0x875d, 0x878a, 0x8782,
|
||||
0x8784, 0xfa5b, 0xfa54, 0x8790..0x8792, 0x8795..0x8797,
|
||||
0x879a..0x879c
|
||||
);
|
||||
my @reject_sjis = (
|
||||
0xed40 .. 0xeefc, 0x8754 .. 0x875d, 0x878a, 0x8782,
|
||||
0x8784, 0xfa5b, 0xfa54, 0x8790 .. 0x8792,
|
||||
0x8795 .. 0x8797, 0x879a .. 0x879c);
|
||||
|
||||
foreach my $i (@$mapping)
|
||||
{
|
||||
my $code = $i->{code};
|
||||
my $ucs = $i->{ucs};
|
||||
my $ucs = $i->{ucs};
|
||||
|
||||
if (grep {$code == $_} @reject_sjis)
|
||||
if (grep { $code == $_ } @reject_sjis)
|
||||
{
|
||||
$i->{direction} = TO_UNICODE;
|
||||
}
|
||||
}
|
||||
|
||||
# Add these UTF8->SJIS pairs to the table.
|
||||
push @$mapping, (
|
||||
{direction => FROM_UNICODE, ucs => 0x00a2, code => 0x8191, comment => '# CENT SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x00a3, code => 0x8192, comment => '# POUND SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x00a5, code => 0x5c, comment => '# YEN SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x00ac, code => 0x81ca, comment => '# NOT SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x2016, code => 0x8161, comment => '# DOUBLE VERTICAL LINE', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x203e, code => 0x7e, comment => '# OVERLINE', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x2212, code => 0x817c, comment => '# MINUS SIGN', f => $this_script, l => __LINE__ },
|
||||
{direction => FROM_UNICODE, ucs => 0x301c, code => 0x8160, comment => '# WAVE DASH', f => $this_script, l => __LINE__ }
|
||||
);
|
||||
push @$mapping,
|
||||
( { direction => FROM_UNICODE,
|
||||
ucs => 0x00a2,
|
||||
code => 0x8191,
|
||||
comment => '# CENT SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x00a3,
|
||||
code => 0x8192,
|
||||
comment => '# POUND SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x00a5,
|
||||
code => 0x5c,
|
||||
comment => '# YEN SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x00ac,
|
||||
code => 0x81ca,
|
||||
comment => '# NOT SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x2016,
|
||||
code => 0x8161,
|
||||
comment => '# DOUBLE VERTICAL LINE',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x203e,
|
||||
code => 0x7e,
|
||||
comment => '# OVERLINE',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x2212,
|
||||
code => 0x817c,
|
||||
comment => '# MINUS SIGN',
|
||||
f => $this_script,
|
||||
l => __LINE__ },
|
||||
{ direction => FROM_UNICODE,
|
||||
ucs => 0x301c,
|
||||
code => 0x8160,
|
||||
comment => '# WAVE DASH',
|
||||
f => $this_script,
|
||||
l => __LINE__ });
|
||||
|
||||
print_conversion_tables($this_script, "SJIS", $mapping);
|
||||
|
||||
@@ -38,18 +38,23 @@ while (<$in>)
|
||||
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
push @mapping, {
|
||||
ucs => $ucs,
|
||||
code => $code,
|
||||
push @mapping,
|
||||
{ ucs => $ucs,
|
||||
code => $code,
|
||||
direction => BOTH,
|
||||
f => $in_file,
|
||||
l => $.
|
||||
};
|
||||
f => $in_file,
|
||||
l => $. };
|
||||
}
|
||||
}
|
||||
close($in);
|
||||
|
||||
# One extra character that's not in the source file.
|
||||
push @mapping, { direction => BOTH, code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ };
|
||||
push @mapping,
|
||||
{ direction => BOTH,
|
||||
code => 0xa2e8,
|
||||
ucs => 0x327e,
|
||||
comment => 'CIRCLED HANGUL IEUNG U',
|
||||
f => $this_script,
|
||||
l => __LINE__ };
|
||||
|
||||
print_conversion_tables($this_script, "UHC", \@mapping);
|
||||
|
||||
@@ -9,15 +9,15 @@ use strict;
|
||||
|
||||
use Exporter 'import';
|
||||
|
||||
our @EXPORT = qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables);
|
||||
our @EXPORT =
|
||||
qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables);
|
||||
|
||||
# Constants used in the 'direction' field of the character maps
|
||||
use constant {
|
||||
NONE => 0,
|
||||
TO_UNICODE => 1,
|
||||
FROM_UNICODE => 2,
|
||||
BOTH => 3
|
||||
};
|
||||
BOTH => 3 };
|
||||
|
||||
#######################################################################
|
||||
# read_source - common routine to read source file
|
||||
@@ -36,7 +36,7 @@ sub read_source
|
||||
next if (/^#/);
|
||||
chop;
|
||||
|
||||
next if (/^$/); # Ignore empty lines
|
||||
next if (/^$/); # Ignore empty lines
|
||||
|
||||
next if (/^0x([0-9A-F]+)\s+(#.*)$/);
|
||||
|
||||
@@ -49,13 +49,13 @@ sub read_source
|
||||
print STDERR "READ ERROR at line $. in $fname: $_\n";
|
||||
exit;
|
||||
}
|
||||
my $out = {code => hex($1),
|
||||
ucs => hex($2),
|
||||
comment => $4,
|
||||
direction => BOTH,
|
||||
f => $fname,
|
||||
l => $.
|
||||
};
|
||||
my $out = {
|
||||
code => hex($1),
|
||||
ucs => hex($2),
|
||||
comment => $4,
|
||||
direction => BOTH,
|
||||
f => $fname,
|
||||
l => $. };
|
||||
|
||||
# Ignore pure ASCII mappings. PostgreSQL character conversion code
|
||||
# never even passes these to the conversion code.
|
||||
@@ -92,8 +92,10 @@ sub print_conversion_tables
|
||||
{
|
||||
my ($this_script, $csname, $charset) = @_;
|
||||
|
||||
print_conversion_tables_direction($this_script, $csname, FROM_UNICODE, $charset);
|
||||
print_conversion_tables_direction($this_script, $csname, TO_UNICODE, $charset);
|
||||
print_conversion_tables_direction($this_script, $csname, FROM_UNICODE,
|
||||
$charset);
|
||||
print_conversion_tables_direction($this_script, $csname, TO_UNICODE,
|
||||
$charset);
|
||||
}
|
||||
|
||||
#############################################################################
|
||||
@@ -117,14 +119,14 @@ sub print_conversion_tables_direction
|
||||
my $tblname;
|
||||
if ($direction == TO_UNICODE)
|
||||
{
|
||||
$fname = lc("${csname}_to_utf8.map");
|
||||
$fname = lc("${csname}_to_utf8.map");
|
||||
$tblname = lc("${csname}_to_unicode_tree");
|
||||
|
||||
print "- Writing ${csname}=>UTF8 conversion table: $fname\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
$fname = lc("utf8_to_${csname}.map");
|
||||
$fname = lc("utf8_to_${csname}.map");
|
||||
$tblname = lc("${csname}_from_unicode_tree");
|
||||
|
||||
print "- Writing UTF8=>${csname} conversion table: $fname\n";
|
||||
@@ -135,24 +137,22 @@ sub print_conversion_tables_direction
|
||||
print $out "/* src/backend/utils/mb/Unicode/$fname */\n";
|
||||
print $out "/* This file is generated by $this_script */\n\n";
|
||||
|
||||
# Collect regular, non-combined, mappings, and create the radix tree from them.
|
||||
# Collect regular, non-combined, mappings, and create the radix tree from them.
|
||||
my $charmap = &make_charmap($out, $charset, $direction, 0);
|
||||
print_radix_table($out, $tblname, $charmap);
|
||||
|
||||
# Collect combined characters, and create combined character table (if any)
|
||||
# Collect combined characters, and create combined character table (if any)
|
||||
my $charmap_combined = &make_charmap_combined($charset, $direction);
|
||||
|
||||
if (scalar @{$charmap_combined} > 0)
|
||||
{
|
||||
if ($direction == TO_UNICODE)
|
||||
{
|
||||
print_to_utf8_combined_map($out, $csname,
|
||||
$charmap_combined, 1);
|
||||
print_to_utf8_combined_map($out, $csname, $charmap_combined, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
print_from_utf8_combined_map($out, $csname,
|
||||
$charmap_combined, 1);
|
||||
print_from_utf8_combined_map($out, $csname, $charmap_combined, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,14 +166,16 @@ sub print_from_utf8_combined_map
|
||||
my $last_comment = "";
|
||||
|
||||
printf $out "\n/* Combined character map */\n";
|
||||
printf $out "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
|
||||
printf $out
|
||||
"static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
|
||||
scalar(@$table);
|
||||
my $first = 1;
|
||||
foreach my $i (sort {$a->{utf8} <=> $b->{utf8}} @$table)
|
||||
{
|
||||
foreach my $i (sort { $a->{utf8} <=> $b->{utf8} } @$table)
|
||||
{
|
||||
print($out ",") if (!$first);
|
||||
$first = 0;
|
||||
print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
|
||||
print $out "\t/* $last_comment */"
|
||||
if ($verbose && $last_comment ne "");
|
||||
|
||||
printf $out "\n {0x%08x, 0x%08x, 0x%04x}",
|
||||
$i->{utf8}, $i->{utf8_second}, $i->{code};
|
||||
@@ -198,15 +200,17 @@ sub print_to_utf8_combined_map
|
||||
my $last_comment = "";
|
||||
|
||||
printf $out "\n/* Combined character map */\n";
|
||||
printf $out "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
|
||||
printf $out
|
||||
"static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
|
||||
scalar(@$table);
|
||||
|
||||
my $first = 1;
|
||||
foreach my $i (sort {$a->{code} <=> $b->{code}} @$table)
|
||||
{
|
||||
foreach my $i (sort { $a->{code} <=> $b->{code} } @$table)
|
||||
{
|
||||
print($out ",") if (!$first);
|
||||
$first = 0;
|
||||
print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
|
||||
print $out "\t/* $last_comment */"
|
||||
if ($verbose && $last_comment ne "");
|
||||
|
||||
printf $out "\n {0x%04x, 0x%08x, 0x%08x}",
|
||||
$i->{code}, $i->{utf8}, $i->{utf8_second};
|
||||
@@ -214,7 +218,7 @@ sub print_to_utf8_combined_map
|
||||
if ($verbose >= 2)
|
||||
{
|
||||
$last_comment =
|
||||
sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
|
||||
sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
|
||||
}
|
||||
elsif ($verbose >= 1)
|
||||
{
|
||||
@@ -255,25 +259,25 @@ sub print_radix_table
|
||||
}
|
||||
elsif ($in < 0x10000)
|
||||
{
|
||||
my $b1 = $in >> 8;
|
||||
my $b2 = $in & 0xff;
|
||||
my $b1 = $in >> 8;
|
||||
my $b2 = $in & 0xff;
|
||||
|
||||
$b2map{$b1}{$b2} = $out;
|
||||
}
|
||||
elsif ($in < 0x1000000)
|
||||
{
|
||||
my $b1 = $in >> 16;
|
||||
my $b2 = ($in >> 8) & 0xff;
|
||||
my $b3 = $in & 0xff;
|
||||
my $b1 = $in >> 16;
|
||||
my $b2 = ($in >> 8) & 0xff;
|
||||
my $b3 = $in & 0xff;
|
||||
|
||||
$b3map{$b1}{$b2}{$b3} = $out;
|
||||
}
|
||||
elsif ($in < 0x100000000)
|
||||
{
|
||||
my $b1 = $in >> 24;
|
||||
my $b2 = ($in >> 16) & 0xff;
|
||||
my $b3 = ($in >> 8) & 0xff;
|
||||
my $b4 = $in & 0xff;
|
||||
my $b1 = $in >> 24;
|
||||
my $b2 = ($in >> 16) & 0xff;
|
||||
my $b3 = ($in >> 8) & 0xff;
|
||||
my $b4 = $in & 0xff;
|
||||
|
||||
$b4map{$b1}{$b2}{$b3}{$b4} = $out;
|
||||
}
|
||||
@@ -309,10 +313,14 @@ sub print_radix_table
|
||||
###
|
||||
|
||||
# Add the segments for the radix trees themselves.
|
||||
push @segments, build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
|
||||
push @segments, build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
|
||||
push @segments, build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
|
||||
push @segments, build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
|
||||
push @segments,
|
||||
build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
|
||||
push @segments,
|
||||
build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
|
||||
push @segments,
|
||||
build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
|
||||
push @segments,
|
||||
build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
|
||||
|
||||
###
|
||||
### Find min and max index used in each level of each tree.
|
||||
@@ -325,23 +333,24 @@ sub print_radix_table
|
||||
my %max_idx;
|
||||
foreach my $seg (@segments)
|
||||
{
|
||||
my $this_min = $min_idx{$seg->{depth}}->{$seg->{level}};
|
||||
my $this_max = $max_idx{$seg->{depth}}->{$seg->{level}};
|
||||
my $this_min = $min_idx{ $seg->{depth} }->{ $seg->{level} };
|
||||
my $this_max = $max_idx{ $seg->{depth} }->{ $seg->{level} };
|
||||
|
||||
foreach my $i (keys %{$seg->{values}})
|
||||
foreach my $i (keys %{ $seg->{values} })
|
||||
{
|
||||
$this_min = $i if (!defined $this_min || $i < $this_min);
|
||||
$this_max = $i if (!defined $this_max || $i > $this_max);
|
||||
}
|
||||
|
||||
$min_idx{$seg->{depth}}{$seg->{level}} = $this_min;
|
||||
$max_idx{$seg->{depth}}{$seg->{level}} = $this_max;
|
||||
$min_idx{ $seg->{depth} }{ $seg->{level} } = $this_min;
|
||||
$max_idx{ $seg->{depth} }{ $seg->{level} } = $this_max;
|
||||
}
|
||||
|
||||
# Copy the mins and max's back to every segment, for convenience.
|
||||
foreach my $seg (@segments)
|
||||
{
|
||||
$seg->{min_idx} = $min_idx{$seg->{depth}}{$seg->{level}};
|
||||
$seg->{max_idx} = $max_idx{$seg->{depth}}{$seg->{level}};
|
||||
$seg->{min_idx} = $min_idx{ $seg->{depth} }{ $seg->{level} };
|
||||
$seg->{max_idx} = $max_idx{ $seg->{depth} }{ $seg->{level} };
|
||||
}
|
||||
|
||||
###
|
||||
@@ -359,11 +368,10 @@ sub print_radix_table
|
||||
$widest_range = $this_range if ($this_range > $widest_range);
|
||||
}
|
||||
|
||||
unshift @segments, {
|
||||
header => "Dummy map, for invalid values",
|
||||
unshift @segments,
|
||||
{ header => "Dummy map, for invalid values",
|
||||
min_idx => 0,
|
||||
max_idx => $widest_range
|
||||
};
|
||||
max_idx => $widest_range };
|
||||
|
||||
###
|
||||
### Eliminate overlapping zeros
|
||||
@@ -378,26 +386,34 @@ sub print_radix_table
|
||||
###
|
||||
for (my $j = 0; $j < $#segments - 1; $j++)
|
||||
{
|
||||
my $seg = $segments[$j];
|
||||
my $nextseg = $segments[$j + 1];
|
||||
my $seg = $segments[$j];
|
||||
my $nextseg = $segments[ $j + 1 ];
|
||||
|
||||
# Count the number of zero values at the end of this segment.
|
||||
my $this_trail_zeros = 0;
|
||||
for (my $i = $seg->{max_idx}; $i >= $seg->{min_idx} && !$seg->{values}->{$i}; $i--)
|
||||
for (
|
||||
my $i = $seg->{max_idx};
|
||||
$i >= $seg->{min_idx} && !$seg->{values}->{$i};
|
||||
$i--)
|
||||
{
|
||||
$this_trail_zeros++;
|
||||
}
|
||||
|
||||
# Count the number of zeros at the beginning of next segment.
|
||||
my $next_lead_zeros = 0;
|
||||
for (my $i = $nextseg->{min_idx}; $i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i}; $i++)
|
||||
for (
|
||||
my $i = $nextseg->{min_idx};
|
||||
$i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i};
|
||||
$i++)
|
||||
{
|
||||
$next_lead_zeros++;
|
||||
}
|
||||
|
||||
# How many zeros in common?
|
||||
my $overlaid_trail_zeros =
|
||||
($this_trail_zeros > $next_lead_zeros) ? $next_lead_zeros : $this_trail_zeros;
|
||||
($this_trail_zeros > $next_lead_zeros)
|
||||
? $next_lead_zeros
|
||||
: $this_trail_zeros;
|
||||
|
||||
$seg->{overlaid_trail_zeros} = $overlaid_trail_zeros;
|
||||
$seg->{max_idx} = $seg->{max_idx} - $overlaid_trail_zeros;
|
||||
@@ -419,7 +435,7 @@ sub print_radix_table
|
||||
foreach my $seg (@segments)
|
||||
{
|
||||
$seg->{offset} = $flatoff;
|
||||
$segmap{$seg->{label}} = $flatoff;
|
||||
$segmap{ $seg->{label} } = $flatoff;
|
||||
$flatoff += $seg->{max_idx} - $seg->{min_idx} + 1;
|
||||
}
|
||||
my $tblsize = $flatoff;
|
||||
@@ -427,9 +443,9 @@ sub print_radix_table
|
||||
# Second pass: look up the offset of each label reference in the hash.
|
||||
foreach my $seg (@segments)
|
||||
{
|
||||
while (my ($i, $val) = each %{$seg->{values}})
|
||||
while (my ($i, $val) = each %{ $seg->{values} })
|
||||
{
|
||||
if (!($val =~ /^[0-9,.E]+$/ ))
|
||||
if (!($val =~ /^[0-9,.E]+$/))
|
||||
{
|
||||
my $segoff = $segmap{$val};
|
||||
if ($segoff)
|
||||
@@ -482,7 +498,7 @@ sub print_radix_table
|
||||
my $max_val = 0;
|
||||
foreach my $seg (@segments)
|
||||
{
|
||||
foreach my $val (values %{$seg->{values}})
|
||||
foreach my $val (values %{ $seg->{values} })
|
||||
{
|
||||
$max_val = $val if ($val > $max_val);
|
||||
}
|
||||
@@ -498,17 +514,17 @@ sub print_radix_table
|
||||
if ($max_val <= 0xffff)
|
||||
{
|
||||
$vals_per_line = 8;
|
||||
$colwidth = 4;
|
||||
$colwidth = 4;
|
||||
}
|
||||
elsif ($max_val <= 0xffffff)
|
||||
{
|
||||
$vals_per_line = 4;
|
||||
$colwidth = 6;
|
||||
$colwidth = 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
$vals_per_line = 4;
|
||||
$colwidth = 8;
|
||||
$colwidth = 8;
|
||||
}
|
||||
|
||||
###
|
||||
@@ -529,17 +545,20 @@ sub print_radix_table
|
||||
print $out " ${tblname}_table,\n";
|
||||
}
|
||||
printf $out "\n";
|
||||
printf $out " 0x%04x, /* offset of table for 1-byte inputs */\n", $b1root;
|
||||
printf $out " 0x%04x, /* offset of table for 1-byte inputs */\n",
|
||||
$b1root;
|
||||
printf $out " 0x%02x, /* b1_lower */\n", $b1_lower;
|
||||
printf $out " 0x%02x, /* b1_upper */\n", $b1_upper;
|
||||
printf $out "\n";
|
||||
printf $out " 0x%04x, /* offset of table for 2-byte inputs */\n", $b2root;
|
||||
printf $out " 0x%04x, /* offset of table for 2-byte inputs */\n",
|
||||
$b2root;
|
||||
printf $out " 0x%02x, /* b2_1_lower */\n", $b2_1_lower;
|
||||
printf $out " 0x%02x, /* b2_1_upper */\n", $b2_1_upper;
|
||||
printf $out " 0x%02x, /* b2_2_lower */\n", $b2_2_lower;
|
||||
printf $out " 0x%02x, /* b2_2_upper */\n", $b2_2_upper;
|
||||
printf $out "\n";
|
||||
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n", $b3root;
|
||||
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n",
|
||||
$b3root;
|
||||
printf $out " 0x%02x, /* b3_1_lower */\n", $b3_1_lower;
|
||||
printf $out " 0x%02x, /* b3_1_upper */\n", $b3_1_upper;
|
||||
printf $out " 0x%02x, /* b3_2_lower */\n", $b3_2_lower;
|
||||
@@ -547,7 +566,8 @@ sub print_radix_table
|
||||
printf $out " 0x%02x, /* b3_3_lower */\n", $b3_3_lower;
|
||||
printf $out " 0x%02x, /* b3_3_upper */\n", $b3_3_upper;
|
||||
printf $out "\n";
|
||||
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n", $b4root;
|
||||
printf $out " 0x%04x, /* offset of table for 3-byte inputs */\n",
|
||||
$b4root;
|
||||
printf $out " 0x%02x, /* b4_1_lower */\n", $b4_1_lower;
|
||||
printf $out " 0x%02x, /* b4_1_upper */\n", $b4_1_upper;
|
||||
printf $out " 0x%02x, /* b4_2_lower */\n", $b4_2_lower;
|
||||
@@ -561,18 +581,21 @@ sub print_radix_table
|
||||
print $out "static const $datatype ${tblname}_table[$tblsize] =\n";
|
||||
print $out "{";
|
||||
my $off = 0;
|
||||
|
||||
foreach my $seg (@segments)
|
||||
{
|
||||
printf $out "\n";
|
||||
printf $out " /*** %s - offset 0x%05x ***/\n", $seg->{header}, $off;
|
||||
printf $out "\n";
|
||||
|
||||
for (my $i=$seg->{min_idx}; $i <= $seg->{max_idx};)
|
||||
for (my $i = $seg->{min_idx}; $i <= $seg->{max_idx};)
|
||||
{
|
||||
|
||||
# Print the next line's worth of values.
|
||||
# XXX pad to begin at a nice boundary
|
||||
printf $out " /* %02x */ ", $i;
|
||||
for (my $j = 0; $j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
|
||||
for (my $j = 0;
|
||||
$j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
|
||||
{
|
||||
my $val = $seg->{values}->{$i};
|
||||
|
||||
@@ -588,7 +611,8 @@ sub print_radix_table
|
||||
}
|
||||
if ($seg->{overlaid_trail_zeros})
|
||||
{
|
||||
printf $out " /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
|
||||
printf $out
|
||||
" /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -607,13 +631,14 @@ sub build_segments_from_tree
|
||||
|
||||
if (%{$map})
|
||||
{
|
||||
@segments = build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
|
||||
@segments =
|
||||
build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
|
||||
|
||||
# Sort the segments into "breadth-first" order. Not strictly required,
|
||||
# but makes the maps nicer to read.
|
||||
@segments = sort { $a->{level} cmp $b->{level} or
|
||||
$a->{path} cmp $b->{path}}
|
||||
@segments;
|
||||
@segments =
|
||||
sort { $a->{level} cmp $b->{level} or $a->{path} cmp $b->{path} }
|
||||
@segments;
|
||||
}
|
||||
|
||||
return @segments;
|
||||
@@ -628,14 +653,13 @@ sub build_segments_recurse
|
||||
|
||||
if ($level == $depth)
|
||||
{
|
||||
push @segments, {
|
||||
header => $header . ", leaf: ${path}xx",
|
||||
label => $label,
|
||||
level => $level,
|
||||
depth => $depth,
|
||||
path => $path,
|
||||
values => $map
|
||||
};
|
||||
push @segments,
|
||||
{ header => $header . ", leaf: ${path}xx",
|
||||
label => $label,
|
||||
level => $level,
|
||||
depth => $depth,
|
||||
path => $path,
|
||||
values => $map };
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -646,19 +670,19 @@ sub build_segments_recurse
|
||||
my $childpath = $path . sprintf("%02x", $i);
|
||||
my $childlabel = "$depth-level-$level-$childpath";
|
||||
|
||||
push @segments, build_segments_recurse($header, $childlabel, $childpath,
|
||||
$level + 1, $depth, $val);
|
||||
push @segments,
|
||||
build_segments_recurse($header, $childlabel, $childpath,
|
||||
$level + 1, $depth, $val);
|
||||
$children{$i} = $childlabel;
|
||||
}
|
||||
|
||||
push @segments, {
|
||||
header => $header . ", byte #$level: ${path}xx",
|
||||
label => $label,
|
||||
level => $level,
|
||||
depth => $depth,
|
||||
path => $path,
|
||||
values => \%children
|
||||
};
|
||||
push @segments,
|
||||
{ header => $header . ", byte #$level: ${path}xx",
|
||||
label => $label,
|
||||
level => $level,
|
||||
depth => $depth,
|
||||
path => $path,
|
||||
values => \%children };
|
||||
}
|
||||
return @segments;
|
||||
}
|
||||
@@ -688,29 +712,31 @@ sub make_charmap
|
||||
my %charmap;
|
||||
foreach my $c (@$charset)
|
||||
{
|
||||
|
||||
# combined characters are handled elsewhere
|
||||
next if (defined $c->{ucs_second});
|
||||
|
||||
next if ($c->{direction} != $direction && $c->{direction} != BOTH);
|
||||
|
||||
my ($src, $dst) =
|
||||
$direction == TO_UNICODE
|
||||
? ($c->{code}, ucs2utf($c->{ucs}))
|
||||
: (ucs2utf($c->{ucs}), $c->{code});
|
||||
$direction == TO_UNICODE
|
||||
? ($c->{code}, ucs2utf($c->{ucs}))
|
||||
: (ucs2utf($c->{ucs}), $c->{code});
|
||||
|
||||
# check for duplicate source codes
|
||||
if (defined $charmap{$src})
|
||||
{
|
||||
printf STDERR
|
||||
"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n",
|
||||
$c->{f}, $c->{l}, $src, $charmap{$src}, $dst;
|
||||
"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n",
|
||||
$c->{f}, $c->{l}, $src, $charmap{$src}, $dst;
|
||||
exit;
|
||||
}
|
||||
$charmap{$src} = $dst;
|
||||
|
||||
if ($verbose)
|
||||
{
|
||||
printf $out "0x%04x 0x%04x %s:%d %s\n", $src, $dst, $c->{f}, $c->{l}, $c->{comment};
|
||||
printf $out "0x%04x 0x%04x %s:%d %s\n", $src, $dst, $c->{f},
|
||||
$c->{l}, $c->{comment};
|
||||
}
|
||||
}
|
||||
if ($verbose)
|
||||
@@ -743,11 +769,13 @@ sub make_charmap_combined
|
||||
|
||||
if (defined $c->{ucs_second})
|
||||
{
|
||||
my $entry = {utf8 => ucs2utf($c->{ucs}),
|
||||
utf8_second => ucs2utf($c->{ucs_second}),
|
||||
code => $c->{code},
|
||||
comment => $c->{comment},
|
||||
f => $c->{f}, l => $c->{l}};
|
||||
my $entry = {
|
||||
utf8 => ucs2utf($c->{ucs}),
|
||||
utf8_second => ucs2utf($c->{ucs_second}),
|
||||
code => $c->{code},
|
||||
comment => $c->{comment},
|
||||
f => $c->{f},
|
||||
l => $c->{l} };
|
||||
push @combined, $entry;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user