mirror of
https://github.com/postgres/postgres.git
synced 2025-06-29 10:41:53 +03:00
Back out fix for Unicode characters above 0x10000
This commit is contained in:
@ -1,5 +1,5 @@
|
|||||||
<!--
|
<!--
|
||||||
$PostgreSQL: pgsql/doc/src/sgml/postgres.sgml,v 1.65 2004/11/12 21:50:53 tgl Exp $
|
$PostgreSQL: pgsql/doc/src/sgml/postgres.sgml,v 1.66 2004/12/03 01:20:14 momjian Exp $
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.2//EN" [
|
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.2//EN" [
|
||||||
@ -179,6 +179,7 @@ $PostgreSQL: pgsql/doc/src/sgml/postgres.sgml,v 1.65 2004/11/12 21:50:53 tgl Exp
|
|||||||
&lobj;
|
&lobj;
|
||||||
&ecpg;
|
&ecpg;
|
||||||
&infoschema;
|
&infoschema;
|
||||||
|
&external_projects;
|
||||||
|
|
||||||
</part>
|
</part>
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* conversion functions between pg_wchar and multibyte streams.
|
* conversion functions between pg_wchar and multibyte streams.
|
||||||
* Tatsuo Ishii
|
* Tatsuo Ishii
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.39 2004/12/02 22:37:13 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.40 2004/12/03 01:20:20 momjian Exp $
|
||||||
*
|
*
|
||||||
* WIN1250 client encoding updated by Pavel Behal
|
* WIN1250 client encoding updated by Pavel Behal
|
||||||
*
|
*
|
||||||
@ -343,31 +343,6 @@ pg_johab_dsplen(const unsigned char *s)
|
|||||||
return (pg_euc_dsplen(s));
|
return (pg_euc_dsplen(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isLegalUTF8(const UTF8 *source, int len) {
|
|
||||||
UTF8 a;
|
|
||||||
const UTF8 *srcptr = source+len;
|
|
||||||
if(!source || (pg_utf_mblen(source) != len)) return false;
|
|
||||||
switch (len) {
|
|
||||||
default: return false;
|
|
||||||
/* Everything else falls through when "true"... */
|
|
||||||
case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
|
||||||
case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
|
||||||
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
|
||||||
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
|
||||||
case 2: if ((a = (*--srcptr)) > 0xBF) return false;
|
|
||||||
switch (*source) {
|
|
||||||
/* no fall-through in this inner switch */
|
|
||||||
case 0xE0: if (a < 0xA0) return false; break;
|
|
||||||
case 0xF0: if (a < 0x90) return false; break;
|
|
||||||
case 0xF4: if (a > 0x8F) return false; break;
|
|
||||||
default: if (a < 0x80) return false;
|
|
||||||
}
|
|
||||||
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
|
||||||
if (*source > 0xFD) return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* convert UTF-8 string to pg_wchar (UCS-2)
|
* convert UTF-8 string to pg_wchar (UCS-2)
|
||||||
* caller should allocate enough space for "to"
|
* caller should allocate enough space for "to"
|
||||||
@ -423,7 +398,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
|||||||
* returns the byte length of a UTF-8 word pointed to by s
|
* returns the byte length of a UTF-8 word pointed to by s
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pg_utf_mblen(const UTF8 *s)
|
pg_utf_mblen(const unsigned char *s)
|
||||||
{
|
{
|
||||||
int len = 1;
|
int len = 1;
|
||||||
|
|
||||||
@ -431,19 +406,13 @@ pg_utf_mblen(const UTF8 *s)
|
|||||||
len = 1;
|
len = 1;
|
||||||
else if ((*s & 0xe0) == 0xc0)
|
else if ((*s & 0xe0) == 0xc0)
|
||||||
len = 2;
|
len = 2;
|
||||||
else if ((*s & 0xf0) == 0xe0)
|
else if ((*s & 0xe0) == 0xe0)
|
||||||
len = 3;
|
len = 3;
|
||||||
else if ((*s & 0xf8) == 0xf0)
|
|
||||||
len = 4;
|
|
||||||
else if ((*s & 0xfc) == 0xf8)
|
|
||||||
len = 5;
|
|
||||||
else if ((*s & 0xfe) == 0xfc)
|
|
||||||
len = 6;
|
|
||||||
return (len);
|
return (len);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_utf_dsplen(const UTF8 *s)
|
pg_utf_dsplen(const unsigned char *s)
|
||||||
{
|
{
|
||||||
return 1; /* XXX fix me! */
|
return 1; /* XXX fix me! */
|
||||||
}
|
}
|
||||||
@ -752,7 +721,7 @@ pg_wchar_tbl pg_wchar_table[] = {
|
|||||||
{pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, 3}, /* 3; PG_EUC_KR */
|
{pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, 3}, /* 3; PG_EUC_KR */
|
||||||
{pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, 3}, /* 4; PG_EUC_TW */
|
{pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, 3}, /* 4; PG_EUC_TW */
|
||||||
{pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, 3}, /* 5; PG_JOHAB */
|
{pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, 3}, /* 5; PG_JOHAB */
|
||||||
{pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, 6}, /* 6; PG_UNICODE */
|
{pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, 3}, /* 6; PG_UNICODE */
|
||||||
{pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, 3}, /* 7; PG_MULE_INTERNAL */
|
{pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, 3}, /* 7; PG_MULE_INTERNAL */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 8; PG_LATIN1 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 8; PG_LATIN1 */
|
||||||
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 9; PG_LATIN2 */
|
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 9; PG_LATIN2 */
|
||||||
@ -853,15 +822,18 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
|
|||||||
|
|
||||||
while (len > 0 && *mbstr)
|
while (len > 0 && *mbstr)
|
||||||
{
|
{
|
||||||
|
/* special UTF-8 check */
|
||||||
|
if (encoding == PG_UTF8 && (*mbstr & 0xf8) == 0xf0)
|
||||||
|
{
|
||||||
|
if (noError)
|
||||||
|
return false;
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||||
|
errmsg("Unicode characters greater than or equal to 0x10000 are not supported")));
|
||||||
|
}
|
||||||
|
|
||||||
l = pg_mblen(mbstr);
|
l = pg_mblen(mbstr);
|
||||||
|
|
||||||
/* special UTF-8 check */
|
|
||||||
if (encoding == PG_UTF8) {
|
|
||||||
if(!isLegalUTF8(mbstr,l)) {
|
|
||||||
if (noError) return false;
|
|
||||||
ereport(ERROR,(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),errmsg("Invalid UNICODE byte sequence detected near character %c",*mbstr)));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (i = 1; i < l; i++)
|
for (i = 1; i < l; i++)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -891,10 +863,10 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
len -= l;
|
len -= l;
|
||||||
mbstr += l;
|
mbstr += l;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.54 2004/12/02 22:37:14 momjian Exp $ */
|
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.55 2004/12/03 01:20:33 momjian Exp $ */
|
||||||
|
|
||||||
#ifndef PG_WCHAR_H
|
#ifndef PG_WCHAR_H
|
||||||
#define PG_WCHAR_H
|
#define PG_WCHAR_H
|
||||||
@ -17,14 +17,6 @@
|
|||||||
*/
|
*/
|
||||||
typedef unsigned int pg_wchar;
|
typedef unsigned int pg_wchar;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The UTF types
|
|
||||||
*/
|
|
||||||
typedef unsigned int UTF32; /* at least 32 bits */
|
|
||||||
typedef unsigned short UTF16; /* at least 16 bits */
|
|
||||||
typedef unsigned char UTF8; /* typically 8 bits */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* various definitions for EUC
|
* various definitions for EUC
|
||||||
*/
|
*/
|
||||||
@ -348,6 +340,4 @@ extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
|
|||||||
extern void latin2mic_with_table(unsigned char *l, unsigned char *p, int len, int lc, unsigned char *tab);
|
extern void latin2mic_with_table(unsigned char *l, unsigned char *p, int len, int lc, unsigned char *tab);
|
||||||
extern void mic2latin_with_table(unsigned char *mic, unsigned char *p, int len, int lc, unsigned char *tab);
|
extern void mic2latin_with_table(unsigned char *mic, unsigned char *p, int len, int lc, unsigned char *tab);
|
||||||
|
|
||||||
extern bool isLegalUTF8(const UTF8 *source, int len);
|
|
||||||
|
|
||||||
#endif /* PG_WCHAR_H */
|
#endif /* PG_WCHAR_H */
|
||||||
|
Reference in New Issue
Block a user