mirror of
https://github.com/postgres/postgres.git
synced 2025-11-21 00:42:43 +03:00
Add new encoding EUC_JIS_2004 and SHIFT_JIS_2004,
along with new conversions among EUC_JIS_2004, SHIFT_JIS_2004 and UTF-8. catalog version has been bump up.
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.62 2007/01/05 22:19:44 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.63 2007/03/25 11:56:02 ishii Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -244,10 +244,10 @@ mic2latin_with_table(const unsigned char *mic,
|
||||
static int
|
||||
compare1(const void *p1, const void *p2)
|
||||
{
|
||||
unsigned int v1,
|
||||
uint32 v1,
|
||||
v2;
|
||||
|
||||
v1 = *(unsigned int *) p1;
|
||||
v1 = *(uint32 *) p1;
|
||||
v2 = ((pg_utf_to_local *) p2)->utf;
|
||||
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
|
||||
}
|
||||
@@ -259,31 +259,86 @@ compare1(const void *p1, const void *p2)
|
||||
static int
|
||||
compare2(const void *p1, const void *p2)
|
||||
{
|
||||
unsigned int v1,
|
||||
uint32 v1,
|
||||
v2;
|
||||
|
||||
v1 = *(unsigned int *) p1;
|
||||
v1 = *(uint32 *) p1;
|
||||
v2 = ((pg_local_to_utf *) p2)->code;
|
||||
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* comparison routine for bsearch()
|
||||
* this routine is intended for combined UTF8 -> local code
|
||||
*/
|
||||
static int
|
||||
compare3(const void *p1, const void *p2)
|
||||
{
|
||||
uint32 s1, s2, d1, d2;
|
||||
|
||||
s1 = *(uint32 *)p1;
|
||||
s2 = *((uint32 *)p1 + 1);
|
||||
d1 = ((pg_utf_to_local_combined *) p2)->utf1;
|
||||
d2 = ((pg_utf_to_local_combined *) p2)->utf2;
|
||||
return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* comparison routine for bsearch()
|
||||
* this routine is intended for local code -> combined UTF8
|
||||
*/
|
||||
static int
|
||||
compare4(const void *p1, const void *p2)
|
||||
{
|
||||
uint32 v1,
|
||||
v2;
|
||||
|
||||
v1 = *(uint32 *) p1;
|
||||
v2 = ((pg_local_to_utf_combined *) p2)->code;
|
||||
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* convert 32bit wide character to mutibye stream pointed to by iso
|
||||
*/
|
||||
static unsigned char *set_iso_code(unsigned char *iso, uint32 code)
|
||||
{
|
||||
if (code & 0xff000000)
|
||||
*iso++ = code >> 24;
|
||||
if (code & 0x00ff0000)
|
||||
*iso++ = (code & 0x00ff0000) >> 16;
|
||||
if (code & 0x0000ff00)
|
||||
*iso++ = (code & 0x0000ff00) >> 8;
|
||||
if (code & 0x000000ff)
|
||||
*iso++ = code & 0x000000ff;
|
||||
return iso;
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF8 ---> local code
|
||||
*
|
||||
* utf: input UTF8 string (need not be null-terminated).
|
||||
* iso: pointer to the output area (must be large enough!)
|
||||
* map: the conversion map.
|
||||
* size: the size of the conversion map.
|
||||
* cmap: the conversion map for combined characters.
|
||||
* (optional)
|
||||
* size1: the size of the conversion map.
|
||||
* size2: the size of the conversion map for combined characters
|
||||
* (optional)
|
||||
* encoding: the PG identifier for the local encoding.
|
||||
* len: length of input string.
|
||||
*/
|
||||
void
|
||||
UtfToLocal(const unsigned char *utf, unsigned char *iso,
|
||||
const pg_utf_to_local *map, int size, int encoding, int len)
|
||||
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
|
||||
int size1, int size2, int encoding, int len)
|
||||
{
|
||||
unsigned int iutf;
|
||||
int l;
|
||||
uint32 iutf;
|
||||
uint32 cutf[2];
|
||||
uint32 code;
|
||||
pg_utf_to_local *p;
|
||||
pg_utf_to_local_combined *cp;
|
||||
int l;
|
||||
|
||||
for (; len > 0; len -= l)
|
||||
{
|
||||
@@ -324,21 +379,94 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
|
||||
iutf |= *utf++;
|
||||
}
|
||||
|
||||
p = bsearch(&iutf, map, size,
|
||||
sizeof(pg_utf_to_local), compare1);
|
||||
/*
|
||||
* first, try with combined map if possible
|
||||
*/
|
||||
if (cmap && len > l)
|
||||
{
|
||||
const unsigned char *utf_save = utf;
|
||||
int len_save = len;
|
||||
int l_save = l;
|
||||
|
||||
len -= l;
|
||||
|
||||
if (p == NULL)
|
||||
report_untranslatable_char(PG_UTF8, encoding,
|
||||
(const char *) (utf - l), len);
|
||||
l = pg_utf_mblen(utf);
|
||||
if (len < l)
|
||||
break;
|
||||
|
||||
if (p->code & 0xff000000)
|
||||
*iso++ = p->code >> 24;
|
||||
if (p->code & 0x00ff0000)
|
||||
*iso++ = (p->code & 0x00ff0000) >> 16;
|
||||
if (p->code & 0x0000ff00)
|
||||
*iso++ = (p->code & 0x0000ff00) >> 8;
|
||||
if (p->code & 0x000000ff)
|
||||
*iso++ = p->code & 0x000000ff;
|
||||
if (!pg_utf8_islegal(utf, l))
|
||||
break;
|
||||
|
||||
cutf[0] = iutf;
|
||||
|
||||
if (l == 1)
|
||||
{
|
||||
if (len_save > 1)
|
||||
{
|
||||
p = bsearch(&cutf[0], map, size1,
|
||||
sizeof(pg_utf_to_local), compare1);
|
||||
if (p == NULL)
|
||||
report_untranslatable_char(PG_UTF8, encoding,
|
||||
(const char *) (utf_save - l_save), len_save);
|
||||
iso = set_iso_code(iso, p->code);
|
||||
}
|
||||
|
||||
/* ASCII case is easy */
|
||||
*iso++ = *utf++;
|
||||
continue;
|
||||
}
|
||||
else if (l == 2)
|
||||
{
|
||||
iutf = *utf++ << 8;
|
||||
iutf |= *utf++;
|
||||
}
|
||||
else if (l == 3)
|
||||
{
|
||||
iutf = *utf++ << 16;
|
||||
iutf |= *utf++ << 8;
|
||||
iutf |= *utf++;
|
||||
}
|
||||
else if (l == 4)
|
||||
{
|
||||
iutf = *utf++ << 24;
|
||||
iutf |= *utf++ << 16;
|
||||
iutf |= *utf++ << 8;
|
||||
iutf |= *utf++;
|
||||
}
|
||||
|
||||
cutf[1] = iutf;
|
||||
cp = bsearch(cutf, cmap, size2,
|
||||
sizeof(pg_utf_to_local_combined), compare3);
|
||||
if (cp)
|
||||
code = cp->code;
|
||||
else
|
||||
{
|
||||
/* not found in combined map. try with ordinary map */
|
||||
p = bsearch(&cutf[0], map, size1,
|
||||
sizeof(pg_utf_to_local), compare1);
|
||||
if (p == NULL)
|
||||
report_untranslatable_char(PG_UTF8, encoding,
|
||||
(const char *) (utf_save - l_save), len_save);
|
||||
iso = set_iso_code(iso, p->code);
|
||||
|
||||
p = bsearch(&cutf[1], map, size1,
|
||||
sizeof(pg_utf_to_local), compare1);
|
||||
if (p == NULL)
|
||||
report_untranslatable_char(PG_UTF8, encoding,
|
||||
(const char *) (utf - l), len);
|
||||
code = p->code;
|
||||
}
|
||||
}
|
||||
else /* no cmap or no remaining data */
|
||||
{
|
||||
p = bsearch(&iutf, map, size1,
|
||||
sizeof(pg_utf_to_local), compare1);
|
||||
if (p == NULL)
|
||||
report_untranslatable_char(PG_UTF8, encoding,
|
||||
(const char *) (utf - l), len);
|
||||
code = p->code;
|
||||
}
|
||||
iso = set_iso_code(iso, code);
|
||||
}
|
||||
|
||||
if (len > 0)
|
||||
@@ -353,17 +481,23 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
|
||||
* iso: input local string (need not be null-terminated).
|
||||
* utf: pointer to the output area (must be large enough!)
|
||||
* map: the conversion map.
|
||||
* size: the size of the conversion map.
|
||||
* cmap: the conversion map for combined characters.
|
||||
* (optional)
|
||||
* size1: the size of the conversion map.
|
||||
* size2: the size of the conversion map for combined characters
|
||||
* (optional)
|
||||
* encoding: the PG identifier for the local encoding.
|
||||
* len: length of input string.
|
||||
*/
|
||||
void
|
||||
LocalToUtf(const unsigned char *iso, unsigned char *utf,
|
||||
const pg_local_to_utf *map, int size, int encoding, int len)
|
||||
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
|
||||
int size1, int size2, int encoding, int len)
|
||||
{
|
||||
unsigned int iiso;
|
||||
int l;
|
||||
pg_local_to_utf *p;
|
||||
pg_local_to_utf_combined *cp;
|
||||
|
||||
if (!PG_VALID_ENCODING(encoding))
|
||||
ereport(ERROR,
|
||||
@@ -409,20 +543,59 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
|
||||
iiso |= *iso++;
|
||||
}
|
||||
|
||||
p = bsearch(&iiso, map, size,
|
||||
p = bsearch(&iiso, map, size1,
|
||||
sizeof(pg_local_to_utf), compare2);
|
||||
|
||||
if (p == NULL)
|
||||
{
|
||||
/*
|
||||
* not found in the ordinary map. if there's a combined
|
||||
* character map, try with it
|
||||
*/
|
||||
if (cmap)
|
||||
{
|
||||
cp = bsearch(&iiso, cmap, size2,
|
||||
sizeof(pg_local_to_utf_combined), compare4);
|
||||
|
||||
if (cp)
|
||||
{
|
||||
if (cp->utf1 & 0xff000000)
|
||||
*utf++ = cp->utf1 >> 24;
|
||||
if (cp->utf1 & 0x00ff0000)
|
||||
*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
|
||||
if (cp->utf1 & 0x0000ff00)
|
||||
*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
|
||||
if (cp->utf1 & 0x000000ff)
|
||||
*utf++ = cp->utf1 & 0x000000ff;
|
||||
|
||||
if (cp->utf2 & 0xff000000)
|
||||
*utf++ = cp->utf2 >> 24;
|
||||
if (cp->utf2 & 0x00ff0000)
|
||||
*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
|
||||
if (cp->utf2 & 0x0000ff00)
|
||||
*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
|
||||
if (cp->utf2 & 0x000000ff)
|
||||
*utf++ = cp->utf2 & 0x000000ff;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
report_untranslatable_char(encoding, PG_UTF8,
|
||||
(const char *) (iso - l), len);
|
||||
|
||||
if (p->utf & 0xff000000)
|
||||
*utf++ = p->utf >> 24;
|
||||
if (p->utf & 0x00ff0000)
|
||||
*utf++ = (p->utf & 0x00ff0000) >> 16;
|
||||
if (p->utf & 0x0000ff00)
|
||||
*utf++ = (p->utf & 0x0000ff00) >> 8;
|
||||
if (p->utf & 0x000000ff)
|
||||
*utf++ = p->utf & 0x000000ff;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (p->utf & 0xff000000)
|
||||
*utf++ = p->utf >> 24;
|
||||
if (p->utf & 0x00ff0000)
|
||||
*utf++ = (p->utf & 0x00ff0000) >> 16;
|
||||
if (p->utf & 0x0000ff00)
|
||||
*utf++ = (p->utf & 0x0000ff00) >> 8;
|
||||
if (p->utf & 0x000000ff)
|
||||
*utf++ = p->utf & 0x000000ff;
|
||||
}
|
||||
}
|
||||
|
||||
if (len > 0)
|
||||
|
||||
Reference in New Issue
Block a user