diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl index 6a1321bab84..bfd4511d724 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl @@ -24,8 +24,8 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) - -require "convutils.pm"; +use strict; +require convutils; # Load BIG5.TXT my $all = &read_source("BIG5.TXT"); diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl index 8df23f8be65..6b65c11a654 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl @@ -13,24 +13,24 @@ # where the "u" field is the Unicode code point in hex, # and the "b" field is the hex byte sequence for GB18030 -require "convutils.pm"; +use strict; +require convutils; # Read the input -$in_file = "gb-18030-2000.xml"; +my $in_file = "gb-18030-2000.xml"; -open(FILE, $in_file) || die("cannot open $in_file"); +open(my $in, '<', $in_file) || die("cannot open $in_file"); my @mapping; -while () +while (<$in>) { next if (!m/) direction => 'both' } } -close(FILE); +close($in); print_tables("EUC_CN", \@mapping); diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl index b4e140b657c..b1ad19a69d8 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl @@ -7,27 +7,27 @@ # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from # "euc-jis-2004-std.txt" (http://x0213.org) -require "convutils.pm"; +use strict; +require convutils; # first generate UTF-8 --> EUC_JIS_2004 table -$in_file = "euc-jis-2004-std.txt"; +my $in_file = "euc-jis-2004-std.txt"; -open(FILE, $in_file) || die("cannot open $in_file"); +open(my $in, '<', $in_file) || die("cannot open $in_file"); my @all; -while ($line = ) +while (my $line = <$in>) { if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs1 = hex($u1); - $ucs2 = hex($u2); + # combined characters + my ($c, $u1, $u2) = ($1, $2, $3); + my $rest = "U+" . $u1 . "+" . $u2 . $4; + my $code = hex($c); + my $ucs1 = hex($u1); + my $ucs2 = hex($u2); push @all, { direction => 'both', ucs => $ucs1, @@ -38,22 +38,16 @@ while ($line = ) } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; - $rest = "U+" . $u . $3; + # non-combined characters + my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3); + my $ucs = hex($u); + my $code = hex($c); + + next if ($code < 0x80 && $ucs < 0x80); + + push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest }; } - else - { - next; - } - - $ucs = hex($u); - $code = hex($c); - - next if ($code < 0x80 && $ucs < 0x80); - - push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest }; } -close(FILE); +close($in); print_tables("EUC_JIS_2004", \@all, 1); diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl index 0e9dd292bff..1bfd3b850e0 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl @@ -12,7 +12,7 @@ # organization's ftp site. use strict; -require "convutils.pm"; +require convutils; # Load JIS0212.TXT my $jis0212 = &read_source("JIS0212.TXT"); diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl index a917d067172..dffcdc40ded 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl @@ -16,7 +16,8 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) -require "convutils.pm"; +use strict; +require convutils; # Load the source file. diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl index aceef5433c2..cb81c4650d5 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl @@ -17,7 +17,8 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) -require "convutils.pm"; +use strict; +require convutils; my $mapping = &read_source("CNS11643.TXT"); diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl index f58361024e4..7be4b7b0abe 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl @@ -13,24 +13,24 @@ # where the "u" field is the Unicode code point in hex, # and the "b" field is the hex byte sequence for GB18030 -require "convutils.pm"; +use strict; +require convutils; # Read the input -$in_file = "gb-18030-2000.xml"; +my $in_file = "gb-18030-2000.xml"; -open(FILE, $in_file) || die("cannot open $in_file"); +open(my $in, '<', $in_file) || die("cannot open $in_file"); my @mapping; -while () +while (<$in>) { next if (!m/= 0x80 && $ucs >= 0x0080) { push @mapping, { @@ -40,6 +40,6 @@ while () } } } -close(FILE); +close($in); print_tables("GB18030", \@mapping); diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl index b98f9a7bf55..b249b81096c 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl @@ -15,7 +15,8 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) -require "convutils.pm"; +use strict; +require convutils; # Load the source file. diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl index 16a53ad1d9f..6be56b5b526 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl @@ -7,27 +7,27 @@ # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from # "sjis-0213-2004-std.txt" (http://x0213.org) -require "convutils.pm"; +use strict; +require convutils; # first generate UTF-8 --> SHIFT_JIS_2004 table -$in_file = "sjis-0213-2004-std.txt"; +my $in_file = "sjis-0213-2004-std.txt"; -open(FILE, $in_file) || die("cannot open $in_file"); +open(my $in, '<', $in_file) || die("cannot open $in_file"); my @mapping; -while ($line = ) +while (my $line = <$in>) { if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs1 = hex($u1); - $ucs2 = hex($u2); + # combined characters + my ($c, $u1, $u2) = ($1, $2, $3); + my $rest = "U+" . $u1 . "+" . $u2 . $4; + my $code = hex($c); + my $ucs1 = hex($u1); + my $ucs2 = hex($u2); push @mapping, { code => $code, @@ -40,42 +40,37 @@ while ($line = ) } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; - $rest = "U+" . $u . $3; - } - else - { - next; - } + # non-combined characters + my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3); + my $ucs = hex($u); + my $code = hex($c); + my $direction; - $ucs = hex($u); - $code = hex($c); + if ($code < 0x80 && $ucs < 0x80) + { + next; + } + elsif ($code < 0x80) + { + $direction = 'from_unicode'; + } + elsif ($ucs < 0x80) + { + $direction = 'to_unicode'; + } + else + { + $direction = 'both'; + } - if ($code < 0x80 && $ucs < 0x80) - { - next; + push @mapping, { + code => $code, + ucs => $ucs, + comment => $rest, + direction => $direction + }; } - elsif ($code < 0x80) - { - $direction = 'from_unicode'; - } - elsif ($ucs < 0x80) - { - $direction = 'to_unicode'; - } - else - { - $direction = 'both'; - } - - push @mapping, { - code => $code, - ucs => $ucs, - comment => $rest, - direction => $direction - }; } -close(FILE); +close($in); print_tables("SHIFT_JIS_2004", \@mapping, 1); diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl index c8ff712af8f..17289fc5e3e 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl @@ -11,7 +11,7 @@ # ftp site. use strict; -require "convutils.pm"; +require convutils; my $charset = read_source("CP932.TXT"); diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl index b6bf3bd8f27..667f6c177c7 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl @@ -13,24 +13,24 @@ # where the "u" field is the Unicode code point in hex, # and the "b" field is the hex byte sequence for UHC -require "convutils.pm"; +use strict; +require convutils; # Read the input -$in_file = "windows-949-2000.xml"; +my $in_file = "windows-949-2000.xml"; -open(FILE, $in_file) || die("cannot open $in_file"); +open(my $in, '<', $in_file) || die("cannot open $in_file"); my @mapping; -while () +while (<$in>) { next if (!m/) } } } -close(FILE); +close($in); # One extra character that's not in the source file. push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' }; diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl index a3cf436eefd..b3188f3709a 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_most.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl @@ -15,9 +15,10 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) -require "convutils.pm"; +use strict; +require convutils; -%filename = ( +my %filename = ( 'WIN866' => 'CP866.TXT', 'WIN874' => 'CP874.TXT', 'WIN1250' => 'CP1250.TXT', @@ -46,9 +47,10 @@ require "convutils.pm"; 'KOI8U' => 'KOI8-U.TXT', 'GBK' => 'CP936.TXT'); -@charsets = keys(%filename); -@charsets = @ARGV if scalar(@ARGV); -foreach $charset (@charsets) +# make maps for all encodings if not specified +my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename); + +foreach my $charset (@charsets) { my $mapping = &read_source($filename{$charset});