1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00

Teach UtfToLocal/LocalToUtf to support algorithmic encoding conversions.

Until now, these functions have only supported encoding conversions using
lookup tables, which is fine as long as there's not too many code points
to convert.  However, GB18030 expects all 1.1 million Unicode code points
to be convertible, which would require a ridiculously-sized lookup table.
Fortunately, a large fraction of those conversions can be expressed through
arithmetic, ie the conversions are one-to-one in certain defined ranges.
To support that, provide a callback function that is used after consulting
the lookup tables.  (This patch doesn't actually change anything about the
GB18030 conversion behavior, just provide infrastructure for fixing it.)

Since this requires changing the APIs of UtfToLocal/LocalToUtf anyway,
take the opportunity to rearrange their argument lists into what seems
to me a saner order.  And beautify the call sites by using lengthof()
instead of error-prone sizeof() arithmetic.

In passing, also mark all the lookup tables used by these calls "const".
This moves an impressive amount of stuff into the text segment, at least
on my machine, and is safer anyhow.
This commit is contained in:
Tom Lane
2015-05-14 22:27:07 -04:00
parent 83e176ec18
commit 7730f48ede
108 changed files with 541 additions and 411 deletions

View File

@ -97,7 +97,7 @@ close(FILE);
$file = lc("utf8_to_big5.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapBIG5[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -185,7 +185,7 @@ close(FILE);
$file = lc("big5_to_utf8.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapBIG5[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -55,7 +55,7 @@ close(FILE);
$file = "utf8_to_euc_cn.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -109,7 +109,7 @@ close(FILE);
$file = "euc_cn_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -72,7 +72,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -133,7 +133,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
"static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
for $index (sort { $a cmp $b } keys(%array1))
{
@ -256,7 +256,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -283,7 +283,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
"static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
for $index (sort { $a <=> $b } keys(%array1))
{

View File

@ -136,7 +136,7 @@ close(FILE);
$file = "utf8_to_euc_jp.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -263,7 +263,7 @@ close(FILE);
$file = "euc_jp_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -55,7 +55,7 @@ close(FILE);
$file = "utf8_to_euc_kr.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -109,7 +109,7 @@ close(FILE);
$file = "euc_kr_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -71,7 +71,7 @@ close(FILE);
$file = "utf8_to_euc_tw.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -138,7 +138,7 @@ close(FILE);
$file = "euc_tw_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -52,7 +52,7 @@ close(FILE);
$file = "utf8_to_gb18030.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapGB18030[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapGB18030[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -106,7 +106,7 @@ close(FILE);
$file = "gb18030_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapGB18030[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapGB18030[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -72,7 +72,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -99,7 +99,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
"static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
for $index (sort { $a cmp $b } keys(%array1))
{
@ -185,7 +185,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -212,7 +212,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
"static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
for $index (sort { $a <=> $b } keys(%array1))
{

View File

@ -72,7 +72,7 @@ close(FILE);
$file = "utf8_to_sjis.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapSJIS[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -122,7 +122,7 @@ close(FILE);
$file = "sjis_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapSJIS[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -88,7 +88,7 @@ foreach $charset (@charsets)
$file = lc("utf8_to_${charset}.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmap${charset}[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmap${charset}[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
@ -140,7 +140,7 @@ foreach $charset (@charsets)
$file = lc("${charset}_to_utf8.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmap${charset}[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapBIG5[ 13717 ] = {
static const pg_local_to_utf LUmapBIG5[ 13717 ] = {
{0xa140, 0xe38080},
{0xa141, 0xefbc8c},
{0xa142, 0xe38081},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/euc_cn_to_utf8.map */
static pg_local_to_utf LUmapEUC_CN[ 7445 ] = {
static const pg_local_to_utf LUmapEUC_CN[ 7445 ] = {
{0xa1a1, 0xe38080},
{0xa1a2, 0xe38081},
{0xa1a3, 0xe38082},

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_local_to_utf LUmapEUC_JIS_2004[] = {
static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
{0x000000, 0x00000000}, /* U+0000 <control> */
{0x000001, 0x00000001}, /* U+0001 <control> */
{0x000002, 0x00000002}, /* U+0002 <control> */

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
{0x00a4f7, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x00a4f8, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x00a4f9, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/euc_jp_to_utf8.map */
static pg_local_to_utf LUmapEUC_JP[] = {
static const pg_local_to_utf LUmapEUC_JP[] = {
{0x8ea1, 0xefbda1},
{0x8ea2, 0xefbda2},
{0x8ea3, 0xefbda3},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
static const pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
{0xa1a1, 0xe38080},
{0xa1a2, 0xe38081},
{0xa1a3, 0xe38082},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/euc_tw_to_utf8.map */
static pg_local_to_utf LUmapEUC_TW[ 23575 ] = {
static const pg_local_to_utf LUmapEUC_TW[ 23575 ] = {
{0xa1a1, 0xe38080},
{0xa1a2, 0xefbc8c},
{0xa1a3, 0xe38081},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/gb18030_to_utf8.map */
static pg_local_to_utf LUmapGB18030[ 63360 ] = {
static const pg_local_to_utf LUmapGB18030[ 63360 ] = {
{0x8140, 0xe4b882},
{0x8141, 0xe4b884},
{0x8142, 0xe4b885},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/gbk_to_utf8.map */
static pg_local_to_utf LUmapGBK[ 21792 ] = {
static const pg_local_to_utf LUmapGBK[ 21792 ] = {
{0x0080, 0xe282ac},
{0x8140, 0xe4b882},
{0x8141, 0xe4b884},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_10_to_utf8.map */
static pg_local_to_utf LUmapISO8859_10[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_10[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_13_to_utf8.map */
static pg_local_to_utf LUmapISO8859_13[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_13[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_14_to_utf8.map */
static pg_local_to_utf LUmapISO8859_14[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_14[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_15_to_utf8.map */
static pg_local_to_utf LUmapISO8859_15[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_15[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_16_to_utf8.map */
static pg_local_to_utf LUmapISO8859_16[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_16[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_2_to_utf8.map */
static pg_local_to_utf LUmapISO8859_2[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_2[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_3_to_utf8.map */
static pg_local_to_utf LUmapISO8859_3[ 121 ] = {
static const pg_local_to_utf LUmapISO8859_3[ 121 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_4_to_utf8.map */
static pg_local_to_utf LUmapISO8859_4[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_4[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_5_to_utf8.map */
static pg_local_to_utf LUmapISO8859_5[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_5[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_6_to_utf8.map */
static pg_local_to_utf LUmapISO8859_6[ 83 ] = {
static const pg_local_to_utf LUmapISO8859_6[ 83 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map */
static pg_local_to_utf LUmapISO8859_7[ 125 ] = {
static const pg_local_to_utf LUmapISO8859_7[ 125 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_8_to_utf8.map */
static pg_local_to_utf LUmapISO8859_8[ 92 ] = {
static const pg_local_to_utf LUmapISO8859_8[ 92 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/iso8859_9_to_utf8.map */
static pg_local_to_utf LUmapISO8859_9[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_9[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapJOHAB[ 17049 ] = {
static const pg_local_to_utf LUmapJOHAB[ 17049 ] = {
{0x8444, 0xe384b3},
{0x8446, 0xe384b5},
{0x8447, 0xe384b6},

View File

@ -1,6 +1,6 @@
/* src/backend/utils/mb/Unicode/koi8r_to_utf8.map */
static pg_local_to_utf LUmapKOI8R[ 128 ] = {
static const pg_local_to_utf LUmapKOI8R[ 128 ] = {
{0x0080, 0xe29480},
{0x0081, 0xe29482},
{0x0082, 0xe2948c},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapKOI8U[ 128 ] = {
static const pg_local_to_utf LUmapKOI8U[ 128 ] = {
{0x0080, 0xe29480},
{0x0081, 0xe29482},
{0x0082, 0xe2948c},

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_SHIFTJIS_2004.pl
*/
static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
{0x0000, 0x00000000}, /* U+0000 <control> */
{0x0001, 0x00000001}, /* U+0001 <control> */
{0x0002, 0x00000002}, /* U+0002 <control> */

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
{0x82f5, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x82f6, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x82f7, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapSJIS[ 7787 ] = {
static const pg_local_to_utf LUmapSJIS[ 7787 ] = {
{0x00a1, 0xefbda1},
{0x00a2, 0xefbda2},
{0x00a3, 0xefbda3},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapUHC[ 17237 ] = {
static const pg_local_to_utf LUmapUHC[ 17237 ] = {
{0x8141, 0xeab082},
{0x8142, 0xeab083},
{0x8143, 0xeab085},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapBIG5[ 13711 ] = {
static const pg_utf_to_local ULmapBIG5[ 13711 ] = {
{0xc2a2, 0xa246},
{0xc2a3, 0xa247},
{0xc2a5, 0xa244},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
static const pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
{0xc2a4, 0xa1e8},
{0xc2a7, 0xa1ec},
{0xc2a8, 0xa1a7},

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_utf_to_local ULmapEUC_JIS_2004[] = {
static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
{0x00000000, 0x000000}, /* U+0000 <control> */
{0x00000001, 0x000001}, /* U+0001 <control> */
{0x00000002, 0x000002}, /* U+0002 <control> */

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x00abc4}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x00abc8}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x00abc9}, /* U+0254+0301 [2000] */

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
static const pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
{0xc2a1, 0x8fa2c2},
{0xc2a4, 0x8fa2f0},
{0xc2a6, 0x8fa2c3},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
static const pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
{0xc2a1, 0xa2ae},
{0xc2a4, 0xa2b4},
{0xc2a7, 0xa1d7},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapEUC_TW[ 17711 ] = {
static const pg_utf_to_local ULmapEUC_TW[ 17711 ] = {
{0xc2a7, 0xa1f0},
{0xc2b0, 0xa2f8},
{0xc2b1, 0xa2b4},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapGB18030[ 63360 ] = {
static const pg_utf_to_local ULmapGB18030[ 63360 ] = {
{0xc280, 0x81308130},
{0xc281, 0x81308131},
{0xc282, 0x81308132},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapGBK[ 21792 ] = {
static const pg_utf_to_local ULmapGBK[ 21792 ] = {
{0xc2a4, 0xa1e8},
{0xc2a7, 0xa1ec},
{0xc2a8, 0xa1a7},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_10[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_10[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_13[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_13[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_14[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_14[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_15[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_15[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_16[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_16[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_2[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_2[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_3[ 121 ] = {
static const pg_utf_to_local ULmapISO8859_3[ 121 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_4[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_4[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_5[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_5[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_6[ 83 ] = {
static const pg_utf_to_local ULmapISO8859_6[ 83 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_7[ 125 ] = {
static const pg_utf_to_local ULmapISO8859_7[ 125 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_8[ 92 ] = {
static const pg_utf_to_local ULmapISO8859_8[ 92 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapISO8859_9[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_9[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapJOHAB[ 17049 ] = {
static const pg_utf_to_local ULmapJOHAB[ 17049 ] = {
{0xc2a1, 0xd9ae},
{0xc2a4, 0xd9b4},
{0xc2a7, 0xd967},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapKOI8R[ 128 ] = {
static const pg_utf_to_local ULmapKOI8R[ 128 ] = {
{0xc2a0, 0x009a},
{0xc2a9, 0x00bf},
{0xc2b0, 0x009c},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapKOI8U[ 128 ] = {
static const pg_utf_to_local ULmapKOI8U[ 128 ] = {
{0xc2a0, 0x009a},
{0xc2a9, 0x00bf},
{0xc2b0, 0x009c},

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
{0x00000000, 0x000000}, /* U+0000 <control> */
{0x00000001, 0x000001}, /* U+0001 <control> */
{0x00000002, 0x000002}, /* U+0002 <control> */

View File

@ -1,7 +1,7 @@
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x8663}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x8667}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x8668}, /* U+0254+0301 [2000] */

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapSJIS[ 7398 ] = {
static const pg_utf_to_local ULmapSJIS[ 7398 ] = {
{0xc19c, 0x815f},
{0xc2a2, 0x8191},
{0xc2a3, 0x8192},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapUHC[ 17237 ] = {
static const pg_utf_to_local ULmapUHC[ 17237 ] = {
{0xc2a1, 0xa2ae},
{0xc2a4, 0xa2b4},
{0xc2a7, 0xa1d7},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1250[ 123 ] = {
static const pg_utf_to_local ULmapWIN1250[ 123 ] = {
{0xc2a0, 0x00a0},
{0xc2a4, 0x00a4},
{0xc2a6, 0x00a6},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1251[ 127 ] = {
static const pg_utf_to_local ULmapWIN1251[ 127 ] = {
{0xc2a0, 0x00a0},
{0xc2a4, 0x00a4},
{0xc2a6, 0x00a6},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1252[ 123 ] = {
static const pg_utf_to_local ULmapWIN1252[ 123 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1253[ 111 ] = {
static const pg_utf_to_local ULmapWIN1253[ 111 ] = {
{0xc2a0, 0x00a0},
{0xc2a3, 0x00a3},
{0xc2a4, 0x00a4},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1254[ 121 ] = {
static const pg_utf_to_local ULmapWIN1254[ 121 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1255[ 105 ] = {
static const pg_utf_to_local ULmapWIN1255[ 105 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1256[ 128 ] = {
static const pg_utf_to_local ULmapWIN1256[ 128 ] = {
{0xc2a0, 0x00a0},
{0xc2a2, 0x00a2},
{0xc2a3, 0x00a3},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1257[ 116 ] = {
static const pg_utf_to_local ULmapWIN1257[ 116 ] = {
{0xc2a0, 0x00a0},
{0xc2a2, 0x00a2},
{0xc2a3, 0x00a3},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN1258[ 119 ] = {
static const pg_utf_to_local ULmapWIN1258[ 119 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN866[ 128 ] = {
static const pg_utf_to_local ULmapWIN866[ 128 ] = {
{0xc2a0, 0x00ff},
{0xc2a4, 0x00fd},
{0xc2b0, 0x00f8},

View File

@ -1,4 +1,4 @@
static pg_utf_to_local ULmapWIN874[ 97 ] = {
static const pg_utf_to_local ULmapWIN874[ 97 ] = {
{0xc2a0, 0x00a0},
{0xe0b881, 0x00a1},
{0xe0b882, 0x00a2},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1250[ 123 ] = {
static const pg_local_to_utf LUmapWIN1250[ 123 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0084, 0xe2809e},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1251[ 127 ] = {
static const pg_local_to_utf LUmapWIN1251[ 127 ] = {
{0x0080, 0xd082},
{0x0081, 0xd083},
{0x0082, 0xe2809a},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1252[ 123 ] = {
static const pg_local_to_utf LUmapWIN1252[ 123 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1253[ 111 ] = {
static const pg_local_to_utf LUmapWIN1253[ 111 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1254[ 121 ] = {
static const pg_local_to_utf LUmapWIN1254[ 121 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1255[ 105 ] = {
static const pg_local_to_utf LUmapWIN1255[ 105 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1256[ 128 ] = {
static const pg_local_to_utf LUmapWIN1256[ 128 ] = {
{0x0080, 0xe282ac},
{0x0081, 0xd9be},
{0x0082, 0xe2809a},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1257[ 116 ] = {
static const pg_local_to_utf LUmapWIN1257[ 116 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0084, 0xe2809e},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN1258[ 119 ] = {
static const pg_local_to_utf LUmapWIN1258[ 119 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN866[ 128 ] = {
static const pg_local_to_utf LUmapWIN866[ 128 ] = {
{0x0080, 0xd090},
{0x0081, 0xd091},
{0x0082, 0xd092},

View File

@ -1,4 +1,4 @@
static pg_local_to_utf LUmapWIN874[ 97 ] = {
static const pg_local_to_utf LUmapWIN874[ 97 ] = {
{0x0080, 0xe282ac},
{0x0085, 0xe280a6},
{0x0091, 0xe28098},

View File

@ -302,47 +302,62 @@ compare4(const void *p1, const void *p2)
}
/*
* convert 32bit wide character to mutibye stream pointed to by iso
* store 32bit character representation into multibyte stream
*/
static unsigned char *
set_iso_code(unsigned char *iso, uint32 code)
static inline unsigned char *
store_coded_char(unsigned char *dest, uint32 code)
{
if (code & 0xff000000)
*iso++ = code >> 24;
*dest++ = code >> 24;
if (code & 0x00ff0000)
*iso++ = (code & 0x00ff0000) >> 16;
*dest++ = code >> 16;
if (code & 0x0000ff00)
*iso++ = (code & 0x0000ff00) >> 8;
*dest++ = code >> 8;
if (code & 0x000000ff)
*iso++ = code & 0x000000ff;
return iso;
*dest++ = code;
return dest;
}
/*
* UTF8 ---> local code
*
* utf: input UTF8 string (need not be null-terminated).
* utf: input string in UTF8 encoding (need not be null-terminated)
* len: length of input string (in bytes)
* iso: pointer to the output area (must be large enough!)
* map: the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
(output string will be null-terminated)
* map: conversion map for single characters
* mapsize: number of entries in the conversion map
* cmap: conversion map for combined characters
* (optional, pass NULL if none)
* cmapsize: number of entries in the conversion map for combined characters
* (optional, pass 0 if none)
* conv_func: algorithmic encoding conversion function
* (optional, pass NULL if none)
* encoding: PG identifier for the local encoding
*
* For each character, the cmap (if provided) is consulted first; if no match,
* the map is consulted next; if still no match, the conv_func (if provided)
* is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
*/
void
UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len)
UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_utf_to_local *map, int mapsize,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding)
{
uint32 iutf;
uint32 cutf[2];
uint32 code;
pg_utf_to_local *p;
pg_utf_to_local_combined *cp;
int l;
const pg_utf_to_local *p;
const pg_utf_to_local_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid encoding number: %d", encoding)));
for (; len > 0; len -= l)
{
@ -351,7 +366,6 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
break;
l = pg_utf_mblen(utf);
if (len < l)
break;
@ -360,11 +374,13 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
if (l == 1)
{
/* ASCII case is easy */
/* ASCII case is easy, assume it's one-to-one conversion */
*iso++ = *utf++;
continue;
}
else if (l == 2)
/* collect coded char of length l */
if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
@ -388,15 +404,14 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
iutf = 0; /* keep compiler quiet */
}
/*
* first, try with combined map if possible
*/
/* First, try with combined map if possible */
if (cmap && len > l)
{
const unsigned char *utf_save = utf;
int len_save = len;
int l_save = l;
/* collect next character, same as above */
len -= l;
l = pg_utf_mblen(utf);
@ -406,83 +421,83 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
if (!pg_utf8_islegal(utf, l))
break;
cutf[0] = iutf;
if (l == 1)
/* We assume ASCII character cannot be in combined map */
if (l > 1)
{
if (len_save > 1)
uint32 iutf2;
uint32 cutf[2];
if (l == 2)
{
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
iutf2 = *utf++ << 8;
iutf2 |= *utf++;
}
else if (l == 3)
{
iutf2 = *utf++ << 16;
iutf2 |= *utf++ << 8;
iutf2 |= *utf++;
}
else if (l == 4)
{
iutf2 = *utf++ << 24;
iutf2 |= *utf++ << 16;
iutf2 |= *utf++ << 8;
iutf2 |= *utf++;
}
else
{
elog(ERROR, "unsupported character length %d", l);
iutf2 = 0; /* keep compiler quiet */
}
/* ASCII case is easy */
*iso++ = *utf++;
cutf[0] = iutf;
cutf[1] = iutf2;
cp = bsearch(cutf, cmap, cmapsize,
sizeof(pg_utf_to_local_combined), compare3);
if (cp)
{
iso = store_coded_char(iso, cp->code);
continue;
}
}
/* fail, so back up to reprocess second character next time */
utf = utf_save;
len = len_save;
l = l_save;
}
/* Now check ordinary map */
p = bsearch(&iutf, map, mapsize,
sizeof(pg_utf_to_local), compare1);
if (p)
{
iso = store_coded_char(iso, p->code);
continue;
}
/* if there's a conversion function, try that */
if (conv_func)
{
uint32 converted = (*conv_func) (iutf);
if (converted)
{
iso = store_coded_char(iso, converted);
continue;
}
else if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
}
else if (l == 3)
{
iutf = *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
else if (l == 4)
{
iutf = *utf++ << 24;
iutf |= *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
else
{
elog(ERROR, "unsupported character length %d", l);
iutf = 0; /* keep compiler quiet */
}
cutf[1] = iutf;
cp = bsearch(cutf, cmap, size2,
sizeof(pg_utf_to_local_combined), compare3);
if (cp)
code = cp->code;
else
{
/* not found in combined map. try with ordinary map */
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
p = bsearch(&cutf[1], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
}
else /* no cmap or no remaining data */
{
p = bsearch(&iutf, map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
iso = set_iso_code(iso, code);
/* failed to translate this character */
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
}
/* if we broke out of loop early, must be invalid input */
if (len > 0)
report_invalid_encoding(PG_UTF8, (const char *) utf, len);
@ -492,26 +507,38 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
/*
* local code ---> UTF8
*
* iso: input local string (need not be null-terminated).
* iso: input string in local encoding (need not be null-terminated)
* len: length of input string (in bytes)
* utf: pointer to the output area (must be large enough!)
* map: the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
(output string will be null-terminated)
* map: conversion map for single characters
* mapsize: number of entries in the conversion map
* cmap: conversion map for combined characters
* (optional, pass NULL if none)
* cmapsize: number of entries in the conversion map for combined characters
* (optional, pass 0 if none)
* conv_func: algorithmic encoding conversion function
* (optional, pass NULL if none)
* encoding: PG identifier for the local encoding
*
* For each character, the map is consulted first; if no match, the cmap
* (if provided) is consulted next; if still no match, the conv_func
* (if provided) is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
*/
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len)
LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_local_to_utf *map, int mapsize,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding)
{
unsigned int iiso;
uint32 iiso;
int l;
pg_local_to_utf *p;
pg_local_to_utf_combined *cp;
const pg_local_to_utf *p;
const pg_local_to_utf_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
@ -526,7 +553,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
if (!IS_HIGHBIT_SET(*iso))
{
/* ASCII case is easy */
/* ASCII case is easy, assume it's one-to-one conversion */
*utf++ = *iso++;
l = 1;
continue;
@ -536,6 +563,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
if (l < 0)
break;
/* collect coded char of length l */
if (l == 1)
iiso = *iso++;
else if (l == 2)
@ -562,61 +590,48 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
iiso = 0; /* keep compiler quiet */
}
p = bsearch(&iiso, map, size1,
/* First check ordinary map */
p = bsearch(&iiso, map, mapsize,
sizeof(pg_local_to_utf), compare2);
if (p == NULL)
if (p)
{
/*
* not found in the ordinary map. if there's a combined character
* map, try with it
*/
if (cmap)
utf = store_coded_char(utf, p->utf);
continue;
}
/* If there's a combined character map, try that */
if (cmap)
{
cp = bsearch(&iiso, cmap, cmapsize,
sizeof(pg_local_to_utf_combined), compare4);
if (cp)
{
cp = bsearch(&iiso, cmap, size2,
sizeof(pg_local_to_utf_combined), compare4);
if (cp)
{
if (cp->utf1 & 0xff000000)
*utf++ = cp->utf1 >> 24;
if (cp->utf1 & 0x00ff0000)
*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
if (cp->utf1 & 0x0000ff00)
*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
if (cp->utf1 & 0x000000ff)
*utf++ = cp->utf1 & 0x000000ff;
if (cp->utf2 & 0xff000000)
*utf++ = cp->utf2 >> 24;
if (cp->utf2 & 0x00ff0000)
*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
if (cp->utf2 & 0x0000ff00)
*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
if (cp->utf2 & 0x000000ff)
*utf++ = cp->utf2 & 0x000000ff;
continue;
}
utf = store_coded_char(utf, cp->utf1);
utf = store_coded_char(utf, cp->utf2);
continue;
}
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
}
else
/* if there's a conversion function, try that */
if (conv_func)
{
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
uint32 converted = (*conv_func) (iiso);
if (converted)
{
utf = store_coded_char(utf, converted);
continue;
}
}
/* failed to translate this character */
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
}
/* if we broke out of loop early, must be invalid input */
if (len > 0)
report_invalid_encoding(encoding, (const char *) iso, len);

View File

@ -22,7 +22,7 @@ typedef struct
} codes_t;
/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
static codes_t big5Level1ToCnsPlane1[25] = { /* range */
static const codes_t big5Level1ToCnsPlane1[25] = { /* range */
{0xA140, 0x2121},
{0xA1F6, 0x2258},
{0xA1F7, 0x2257},
@ -51,7 +51,7 @@ static codes_t big5Level1ToCnsPlane1[25] = { /* range */
};
/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
static codes_t cnsPlane1ToBig5Level1[26] = { /* range */
static const codes_t cnsPlane1ToBig5Level1[26] = { /* range */
{0x2121, 0xA140},
{0x2257, 0xA1F7},
{0x2258, 0xA1F6},
@ -81,7 +81,7 @@ static codes_t cnsPlane1ToBig5Level1[26] = { /* range */
};
/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
static codes_t big5Level2ToCnsPlane2[48] = { /* range */
static const codes_t big5Level2ToCnsPlane2[48] = { /* range */
{0xC940, 0x2121},
{0xc94a, 0x0000},
{0xC94B, 0x212B},
@ -133,7 +133,7 @@ static codes_t big5Level2ToCnsPlane2[48] = { /* range */
};
/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
static codes_t cnsPlane2ToBig5Level2[49] = { /* range */
static const codes_t cnsPlane2ToBig5Level2[49] = { /* range */
{0x2121, 0xC940},
{0x212B, 0xC94B},
{0x214C, 0xC9BE},
@ -186,7 +186,7 @@ static codes_t cnsPlane2ToBig5Level2[49] = { /* range */
};
/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
static unsigned short b1c4[][2] = {
static const unsigned short b1c4[][2] = {
{0xC879, 0x2123},
{0xC87B, 0x2124},
{0xC87D, 0x212A},
@ -194,7 +194,7 @@ static unsigned short b1c4[][2] = {
};
/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
static unsigned short b2c3[][2] = {
static const unsigned short b2c3[][2] = {
{0xF9D6, 0x4337},
{0xF9D7, 0x4F50},
{0xF9D8, 0x444E},
@ -205,7 +205,7 @@ static unsigned short b2c3[][2] = {
};
static unsigned short BinarySearchRange
(codes_t *array, int high, unsigned short code)
(const codes_t *array, int high, unsigned short code)
{
int low,
mid,

View File

@ -44,8 +44,11 @@ big5_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
LocalToUtf(src, dest, LUmapBIG5, NULL,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), 0, PG_BIG5, len);
LocalToUtf(src, len, dest,
LUmapBIG5, lengthof(LUmapBIG5),
NULL, 0,
NULL,
PG_BIG5);
PG_RETURN_VOID();
}
@ -59,8 +62,11 @@ utf8_to_big5(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
UtfToLocal(src, dest, ULmapBIG5, NULL,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), 0, PG_BIG5, len);
UtfToLocal(src, len, dest,
ULmapBIG5, lengthof(ULmapBIG5),
NULL, 0,
NULL,
PG_BIG5);
PG_RETURN_VOID();
}

View File

@ -53,8 +53,11 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
UtfToLocal(src, dest, ULmapKOI8R, NULL,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), 0, PG_KOI8R, len);
UtfToLocal(src, len, dest,
ULmapKOI8R, lengthof(ULmapKOI8R),
NULL, 0,
NULL,
PG_KOI8R);
PG_RETURN_VOID();
}
@ -68,8 +71,11 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
LocalToUtf(src, dest, LUmapKOI8R, NULL,
sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), 0, PG_KOI8R, len);
LocalToUtf(src, len, dest,
LUmapKOI8R, lengthof(LUmapKOI8R),
NULL, 0,
NULL,
PG_KOI8R);
PG_RETURN_VOID();
}
@ -83,8 +89,11 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
UtfToLocal(src, dest, ULmapKOI8U, NULL,
sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len);
UtfToLocal(src, len, dest,
ULmapKOI8U, lengthof(ULmapKOI8U),
NULL, 0,
NULL,
PG_KOI8U);
PG_RETURN_VOID();
}
@ -98,8 +107,11 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
LocalToUtf(src, dest, LUmapKOI8U, NULL,
sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len);
LocalToUtf(src, len, dest,
LUmapKOI8U, lengthof(LUmapKOI8U),
NULL, 0,
NULL,
PG_KOI8U);
PG_RETURN_VOID();
}

View File

@ -46,10 +46,11 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_JIS_2004, LUmapEUC_JIS_2004_combined,
sizeof(LUmapEUC_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapEUC_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_EUC_JIS_2004, len);
LocalToUtf(src, len, dest,
LUmapEUC_JIS_2004, lengthof(LUmapEUC_JIS_2004),
LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
NULL,
PG_EUC_JIS_2004);
PG_RETURN_VOID();
}
@ -63,10 +64,11 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
UtfToLocal(src, dest, ULmapEUC_JIS_2004, ULmapEUC_JIS_2004_combined,
sizeof(ULmapEUC_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapEUC_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_EUC_JIS_2004, len);
UtfToLocal(src, len, dest,
ULmapEUC_JIS_2004, lengthof(ULmapEUC_JIS_2004),
ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
NULL,
PG_EUC_JIS_2004);
PG_RETURN_VOID();
}

View File

@ -44,8 +44,11 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_CN, NULL,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), 0, PG_EUC_CN, len);
LocalToUtf(src, len, dest,
LUmapEUC_CN, lengthof(LUmapEUC_CN),
NULL, 0,
NULL,
PG_EUC_CN);
PG_RETURN_VOID();
}
@ -59,8 +62,11 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
UtfToLocal(src, dest, ULmapEUC_CN, NULL,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), 0, PG_EUC_CN, len);
UtfToLocal(src, len, dest,
ULmapEUC_CN, lengthof(ULmapEUC_CN),
NULL, 0,
NULL,
PG_EUC_CN);
PG_RETURN_VOID();
}

View File

@ -44,8 +44,11 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_JP, NULL,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), 0, PG_EUC_JP, len);
LocalToUtf(src, len, dest,
LUmapEUC_JP, lengthof(LUmapEUC_JP),
NULL, 0,
NULL,
PG_EUC_JP);
PG_RETURN_VOID();
}
@ -59,8 +62,11 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
UtfToLocal(src, dest, ULmapEUC_JP, NULL,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), 0, PG_EUC_JP, len);
UtfToLocal(src, len, dest,
ULmapEUC_JP, lengthof(ULmapEUC_JP),
NULL, 0,
NULL,
PG_EUC_JP);
PG_RETURN_VOID();
}

View File

@ -44,8 +44,11 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_KR, NULL,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), 0, PG_EUC_KR, len);
LocalToUtf(src, len, dest,
LUmapEUC_KR, lengthof(LUmapEUC_KR),
NULL, 0,
NULL,
PG_EUC_KR);
PG_RETURN_VOID();
}
@ -59,8 +62,11 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
UtfToLocal(src, dest, ULmapEUC_KR, NULL,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), 0, PG_EUC_KR, len);
UtfToLocal(src, len, dest,
ULmapEUC_KR, lengthof(ULmapEUC_KR),
NULL, 0,
NULL,
PG_EUC_KR);
PG_RETURN_VOID();
}

View File

@ -44,8 +44,11 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_TW, NULL,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), 0, PG_EUC_TW, len);
LocalToUtf(src, len, dest,
LUmapEUC_TW, lengthof(LUmapEUC_TW),
NULL, 0,
NULL,
PG_EUC_TW);
PG_RETURN_VOID();
}
@ -59,8 +62,11 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
UtfToLocal(src, dest, ULmapEUC_TW, NULL,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), 0, PG_EUC_TW, len);
UtfToLocal(src, len, dest,
ULmapEUC_TW, lengthof(ULmapEUC_TW),
NULL, 0,
NULL,
PG_EUC_TW);
PG_RETURN_VOID();
}

View File

@ -44,8 +44,11 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
LocalToUtf(src, dest, LUmapGB18030, NULL,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), 0, PG_GB18030, len);
LocalToUtf(src, len, dest,
LUmapGB18030, lengthof(LUmapGB18030),
NULL, 0,
NULL,
PG_GB18030);
PG_RETURN_VOID();
}
@ -59,8 +62,11 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
UtfToLocal(src, dest, ULmapGB18030, NULL,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), 0, PG_GB18030, len);
UtfToLocal(src, len, dest,
ULmapGB18030, lengthof(ULmapGB18030),
NULL, 0,
NULL,
PG_GB18030);
PG_RETURN_VOID();
}

Some files were not shown because too many files have changed in this diff Show More