mirror of
https://github.com/postgres/postgres.git
synced 2025-07-08 11:42:09 +03:00
Add mbverifystr() functions specific to each encoding.
This makes pg_verify_mbstr() function faster, by allowing more efficient encoding-specific implementations. All the implementations included in this commit are pretty naive, they just call the same encoding-specific verifychar functions that were used previously, but that already gives a performance boost because the tight character-at-a-time loop is simpler. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01@iki.fi
This commit is contained in:
@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control,
|
|||||||
src_encoding = control->encoding;
|
src_encoding = control->encoding;
|
||||||
|
|
||||||
/* make sure that source string is valid in the expected encoding */
|
/* make sure that source string is valid in the expected encoding */
|
||||||
pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
(void) pg_verify_mbstr(src_encoding, src_str, len, false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert the encoding to the database encoding. read_whole_file
|
* Convert the encoding to the database encoding. read_whole_file
|
||||||
|
@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
l = pg_encoding_verifymb(encoding, (const char *) iso, len);
|
l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
|
l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
|
||||||
|
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_EUC_JIS_2004,
|
report_invalid_encoding(PG_EUC_JIS_2004,
|
||||||
@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
|
l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
|
||||||
|
|
||||||
if (l < 0 || l > len)
|
if (l < 0 || l > len)
|
||||||
report_invalid_encoding(PG_SHIFT_JIS_2004,
|
report_invalid_encoding(PG_SHIFT_JIS_2004,
|
||||||
|
@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||||
(const char *) mic, len);
|
(const char *) mic, len);
|
||||||
@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
|
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_EUC_JP,
|
report_invalid_encoding(PG_EUC_JP,
|
||||||
(const char *) euc, len);
|
(const char *) euc, len);
|
||||||
@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||||
(const char *) mic, len);
|
(const char *) mic, len);
|
||||||
@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
|
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_EUC_JP,
|
report_invalid_encoding(PG_EUC_JP,
|
||||||
(const char *) euc, len);
|
(const char *) euc, len);
|
||||||
@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len);
|
l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_SJIS,
|
report_invalid_encoding(PG_SJIS,
|
||||||
(const char *) sjis, len);
|
(const char *) sjis, len);
|
||||||
|
@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
|
|||||||
c1 = *euc;
|
c1 = *euc;
|
||||||
if (IS_HIGHBIT_SET(c1))
|
if (IS_HIGHBIT_SET(c1))
|
||||||
{
|
{
|
||||||
l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len);
|
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
|
||||||
if (l != 2)
|
if (l != 2)
|
||||||
report_invalid_encoding(PG_EUC_KR,
|
report_invalid_encoding(PG_EUC_KR,
|
||||||
(const char *) euc, len);
|
(const char *) euc, len);
|
||||||
@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||||
(const char *) mic, len);
|
(const char *) mic, len);
|
||||||
|
@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
|
|||||||
c1 = *euc;
|
c1 = *euc;
|
||||||
if (IS_HIGHBIT_SET(c1))
|
if (IS_HIGHBIT_SET(c1))
|
||||||
{
|
{
|
||||||
l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
|
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_EUC_TW,
|
report_invalid_encoding(PG_EUC_TW,
|
||||||
(const char *) euc, len);
|
(const char *) euc, len);
|
||||||
@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||||
(const char *) mic, len);
|
(const char *) mic, len);
|
||||||
@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
|
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_BIG5,
|
report_invalid_encoding(PG_BIG5,
|
||||||
(const char *) big5, len);
|
(const char *) big5, len);
|
||||||
@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
|
|||||||
len--;
|
len--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||||
(const char *) mic, len);
|
(const char *) mic, len);
|
||||||
|
@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS)
|
|||||||
/* make sure that source string is valid */
|
/* make sure that source string is valid */
|
||||||
len = VARSIZE_ANY_EXHDR(string);
|
len = VARSIZE_ANY_EXHDR(string);
|
||||||
src_str = VARDATA_ANY(string);
|
src_str = VARDATA_ANY(string);
|
||||||
pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
(void) pg_verify_mbstr(src_encoding, src_str, len, false);
|
||||||
|
|
||||||
/* perform conversion */
|
/* perform conversion */
|
||||||
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
|
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
|
||||||
@ -1215,10 +1215,10 @@ static bool
|
|||||||
pg_generic_charinc(unsigned char *charptr, int len)
|
pg_generic_charinc(unsigned char *charptr, int len)
|
||||||
{
|
{
|
||||||
unsigned char *lastbyte = charptr + len - 1;
|
unsigned char *lastbyte = charptr + len - 1;
|
||||||
mbverifier mbverify;
|
mbchar_verifier mbverify;
|
||||||
|
|
||||||
/* We can just invoke the character verifier directly. */
|
/* We can just invoke the character verifier directly. */
|
||||||
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
|
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
|
||||||
|
|
||||||
while (*lastbyte < (unsigned char) 255)
|
while (*lastbyte < (unsigned char) 255)
|
||||||
{
|
{
|
||||||
@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void)
|
|||||||
bool
|
bool
|
||||||
pg_verifymbstr(const char *mbstr, int len, bool noError)
|
pg_verifymbstr(const char *mbstr, int len, bool noError)
|
||||||
{
|
{
|
||||||
return
|
return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
|
||||||
pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError)
|
|||||||
bool
|
bool
|
||||||
pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
|
pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
|
||||||
{
|
{
|
||||||
return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
|
int oklen;
|
||||||
|
|
||||||
|
Assert(PG_VALID_ENCODING(encoding));
|
||||||
|
|
||||||
|
oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
|
||||||
|
if (oklen != len)
|
||||||
|
{
|
||||||
|
if (noError)
|
||||||
|
return false;
|
||||||
|
report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
|
|||||||
* If OK, return length of string in the encoding.
|
* If OK, return length of string in the encoding.
|
||||||
* If a problem is found, return -1 when noError is
|
* If a problem is found, return -1 when noError is
|
||||||
* true; when noError is false, ereport() a descriptive message.
|
* true; when noError is false, ereport() a descriptive message.
|
||||||
|
*
|
||||||
|
* Note: We cannot use the faster encoding-specific mbverifystr() function
|
||||||
|
* here, because we need to count the number of characters in the string.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
||||||
{
|
{
|
||||||
mbverifier mbverify;
|
mbchar_verifier mbverifychar;
|
||||||
int mb_len;
|
int mb_len;
|
||||||
|
|
||||||
Assert(PG_VALID_ENCODING(encoding));
|
Assert(PG_VALID_ENCODING(encoding));
|
||||||
@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* fetch function pointer just once */
|
/* fetch function pointer just once */
|
||||||
mbverify = pg_wchar_table[encoding].mbverify;
|
mbverifychar = pg_wchar_table[encoding].mbverifychar;
|
||||||
|
|
||||||
mb_len = 0;
|
mb_len = 0;
|
||||||
|
|
||||||
@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
|||||||
report_invalid_encoding(encoding, mbstr, len);
|
report_invalid_encoding(encoding, mbstr, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
l = (*mbverify) ((const unsigned char *) mbstr, len);
|
l = (*mbverifychar) ((const unsigned char *) mbstr, len);
|
||||||
|
|
||||||
if (l < 0)
|
if (l < 0)
|
||||||
{
|
{
|
||||||
|
@ -19,9 +19,9 @@
|
|||||||
* Operations on multi-byte encodings are driven by a table of helper
|
* Operations on multi-byte encodings are driven by a table of helper
|
||||||
* functions.
|
* functions.
|
||||||
*
|
*
|
||||||
* To add an encoding support, define mblen(), dsplen() and verifier() for
|
* To add an encoding support, define mblen(), dsplen(), verifychar() and
|
||||||
* the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
|
* verifystr() for the encoding. For server-encodings, also define mb2wchar()
|
||||||
* conversion functions.
|
* and wchar2mb() conversion functions.
|
||||||
*
|
*
|
||||||
* These functions generally assume that their input is validly formed.
|
* These functions generally assume that their input is validly formed.
|
||||||
* The "verifier" functions, further down in the file, have to be more
|
* The "verifier" functions, further down in the file, have to be more
|
||||||
@ -1087,29 +1087,45 @@ pg_gb18030_dsplen(const unsigned char *s)
|
|||||||
*-------------------------------------------------------------------
|
*-------------------------------------------------------------------
|
||||||
* multibyte sequence validators
|
* multibyte sequence validators
|
||||||
*
|
*
|
||||||
* These functions accept "s", a pointer to the first byte of a string,
|
* The verifychar functions accept "s", a pointer to the first byte of a
|
||||||
* and "len", the remaining length of the string. If there is a validly
|
* string, and "len", the remaining length of the string. If there is a
|
||||||
* encoded character beginning at *s, return its length in bytes; else
|
* validly encoded character beginning at *s, return its length in bytes;
|
||||||
* return -1.
|
* else return -1.
|
||||||
*
|
*
|
||||||
* The functions can assume that len > 0 and that *s != '\0', but they must
|
* The verifystr functions also accept "s", a pointer to a string and "len",
|
||||||
* test for and reject zeroes in any additional bytes of a multibyte character.
|
* the length of the string. They verify the whole string, and return the
|
||||||
|
* number of input bytes (<= len) that are valid. In other words, if the
|
||||||
|
* whole string is valid, verifystr returns "len", otherwise it returns the
|
||||||
|
* byte offset of the first invalid character. The verifystr functions must
|
||||||
|
* test for and reject zeroes in the input.
|
||||||
*
|
*
|
||||||
* Note that this definition allows the function for a single-byte
|
* The verifychar functions can assume that len > 0 and that *s != '\0', but
|
||||||
* encoding to be just "return 1".
|
* they must test for and reject zeroes in any additional bytes of a
|
||||||
|
* multibyte character. Note that this definition allows the function for a
|
||||||
|
* single-byte encoding to be just "return 1".
|
||||||
*-------------------------------------------------------------------
|
*-------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_ascii_verifier(const unsigned char *s, int len)
|
pg_ascii_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_ascii_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *nullpos = memchr(s, 0, len);
|
||||||
|
|
||||||
|
if (nullpos == NULL)
|
||||||
|
return len;
|
||||||
|
else
|
||||||
|
return nullpos - s;
|
||||||
|
}
|
||||||
|
|
||||||
#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
|
#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_eucjp_verifier(const unsigned char *s, int len)
|
pg_eucjp_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
unsigned char c1,
|
unsigned char c1,
|
||||||
@ -1164,7 +1180,36 @@ pg_eucjp_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_euckr_verifier(const unsigned char *s, int len)
|
pg_eucjp_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_eucjp_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_euckr_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
unsigned char c1,
|
unsigned char c1,
|
||||||
@ -1192,11 +1237,41 @@ pg_euckr_verifier(const unsigned char *s, int len)
|
|||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_euckr_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_euckr_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
/* EUC-CN byte sequences are exactly same as EUC-KR */
|
/* EUC-CN byte sequences are exactly same as EUC-KR */
|
||||||
#define pg_euccn_verifier pg_euckr_verifier
|
#define pg_euccn_verifychar pg_euckr_verifychar
|
||||||
|
#define pg_euccn_verifystr pg_euckr_verifystr
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_euctw_verifier(const unsigned char *s, int len)
|
pg_euctw_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
unsigned char c1,
|
unsigned char c1,
|
||||||
@ -1246,7 +1321,36 @@ pg_euctw_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_johab_verifier(const unsigned char *s, int len)
|
pg_euctw_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_euctw_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_johab_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l,
|
int l,
|
||||||
mbl;
|
mbl;
|
||||||
@ -1270,7 +1374,36 @@ pg_johab_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_mule_verifier(const unsigned char *s, int len)
|
pg_johab_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_johab_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_mule_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l,
|
int l,
|
||||||
mbl;
|
mbl;
|
||||||
@ -1291,13 +1424,53 @@ pg_mule_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_latin1_verifier(const unsigned char *s, int len)
|
pg_mule_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_mule_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_latin1_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_sjis_verifier(const unsigned char *s, int len)
|
pg_latin1_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *nullpos = memchr(s, 0, len);
|
||||||
|
|
||||||
|
if (nullpos == NULL)
|
||||||
|
return len;
|
||||||
|
else
|
||||||
|
return nullpos - s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_sjis_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l,
|
int l,
|
||||||
mbl;
|
mbl;
|
||||||
@ -1320,7 +1493,36 @@ pg_sjis_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_big5_verifier(const unsigned char *s, int len)
|
pg_sjis_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_sjis_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_big5_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l,
|
int l,
|
||||||
mbl;
|
mbl;
|
||||||
@ -1340,7 +1542,36 @@ pg_big5_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_gbk_verifier(const unsigned char *s, int len)
|
pg_big5_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_big5_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_gbk_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l,
|
int l,
|
||||||
mbl;
|
mbl;
|
||||||
@ -1360,7 +1591,36 @@ pg_gbk_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_uhc_verifier(const unsigned char *s, int len)
|
pg_gbk_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_gbk_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_uhc_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l,
|
int l,
|
||||||
mbl;
|
mbl;
|
||||||
@ -1380,7 +1640,36 @@ pg_uhc_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_gb18030_verifier(const unsigned char *s, int len)
|
pg_uhc_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_uhc_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_gb18030_verifychar(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
@ -1411,11 +1700,55 @@ pg_gb18030_verifier(const unsigned char *s, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_utf8_verifier(const unsigned char *s, int len)
|
pg_gb18030_verifystr(const unsigned char *s, int len)
|
||||||
{
|
{
|
||||||
int l = pg_utf_mblen(s);
|
const unsigned char *start = s;
|
||||||
|
|
||||||
if (len < l)
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_gb18030_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_utf8_verifychar(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
if ((*s & 0x80) == 0)
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if ((*s & 0xe0) == 0xc0)
|
||||||
|
l = 2;
|
||||||
|
else if ((*s & 0xf0) == 0xe0)
|
||||||
|
l = 3;
|
||||||
|
else if ((*s & 0xf8) == 0xf0)
|
||||||
|
l = 4;
|
||||||
|
else
|
||||||
|
l = 1;
|
||||||
|
|
||||||
|
if (l > len)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (!pg_utf8_islegal(s, l))
|
if (!pg_utf8_islegal(s, l))
|
||||||
@ -1424,6 +1757,35 @@ pg_utf8_verifier(const unsigned char *s, int len)
|
|||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_utf8_verifystr(const unsigned char *s, int len)
|
||||||
|
{
|
||||||
|
const unsigned char *start = s;
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int l;
|
||||||
|
|
||||||
|
/* fast path for ASCII-subset characters */
|
||||||
|
if (!IS_HIGHBIT_SET(*s))
|
||||||
|
{
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
l = pg_utf8_verifychar(s, len);
|
||||||
|
if (l == -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
|
||||||
|
return s - start;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for validity of a single UTF-8 encoded character
|
* Check for validity of a single UTF-8 encoded character
|
||||||
*
|
*
|
||||||
@ -1503,48 +1865,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
|||||||
*-------------------------------------------------------------------
|
*-------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
const pg_wchar_tbl pg_wchar_table[] = {
|
const pg_wchar_tbl pg_wchar_table[] = {
|
||||||
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
|
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */
|
||||||
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
|
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */
|
||||||
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
|
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */
|
||||||
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
|
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */
|
||||||
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
|
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */
|
||||||
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
|
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */
|
||||||
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
|
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */
|
||||||
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
|
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */
|
||||||
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
|
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */
|
||||||
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
|
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */
|
||||||
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
|
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */
|
||||||
{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
|
{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */
|
||||||
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
|
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */
|
||||||
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
|
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */
|
||||||
{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
|
{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */
|
||||||
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
|
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1572,14 +1934,27 @@ pg_encoding_dsplen(int encoding, const char *mbstr)
|
|||||||
/*
|
/*
|
||||||
* Verify the first multibyte character of the given string.
|
* Verify the first multibyte character of the given string.
|
||||||
* Return its byte length if good, -1 if bad. (See comments above for
|
* Return its byte length if good, -1 if bad. (See comments above for
|
||||||
* full details of the mbverify API.)
|
* full details of the mbverifychar API.)
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pg_encoding_verifymb(int encoding, const char *mbstr, int len)
|
pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
|
||||||
{
|
{
|
||||||
return (PG_VALID_ENCODING(encoding) ?
|
return (PG_VALID_ENCODING(encoding) ?
|
||||||
pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
|
pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
|
||||||
pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
|
pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verify that a string is valid for the given encoding.
|
||||||
|
* Returns the number of input bytes (<= len) that form a valid string.
|
||||||
|
* (See comments above for full details of the mbverifystr API.)
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
|
||||||
|
{
|
||||||
|
return (PG_VALID_ENCODING(encoding) ?
|
||||||
|
pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
|
||||||
|
pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -371,7 +371,9 @@ typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
|
|||||||
|
|
||||||
typedef bool (*mbcharacter_incrementer) (unsigned char *mbstr, int len);
|
typedef bool (*mbcharacter_incrementer) (unsigned char *mbstr, int len);
|
||||||
|
|
||||||
typedef int (*mbverifier) (const unsigned char *mbstr, int len);
|
typedef int (*mbchar_verifier) (const unsigned char *mbstr, int len);
|
||||||
|
|
||||||
|
typedef int (*mbstr_verifier) (const unsigned char *mbstr, int len);
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@ -381,7 +383,8 @@ typedef struct
|
|||||||
* to a multibyte */
|
* to a multibyte */
|
||||||
mblen_converter mblen; /* get byte length of a char */
|
mblen_converter mblen; /* get byte length of a char */
|
||||||
mbdisplaylen_converter dsplen; /* get display width of a char */
|
mbdisplaylen_converter dsplen; /* get display width of a char */
|
||||||
mbverifier mbverify; /* verify multibyte sequence */
|
mbchar_verifier mbverifychar; /* verify multibyte character */
|
||||||
|
mbstr_verifier mbverifystr; /* verify multibyte string */
|
||||||
int maxmblen; /* max bytes for a char in this encoding */
|
int maxmblen; /* max bytes for a char in this encoding */
|
||||||
} pg_wchar_tbl;
|
} pg_wchar_tbl;
|
||||||
|
|
||||||
@ -554,7 +557,8 @@ extern int pg_valid_server_encoding_id(int encoding);
|
|||||||
*/
|
*/
|
||||||
extern int pg_encoding_mblen(int encoding, const char *mbstr);
|
extern int pg_encoding_mblen(int encoding, const char *mbstr);
|
||||||
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
|
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
|
||||||
extern int pg_encoding_verifymb(int encoding, const char *mbstr, int len);
|
extern int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len);
|
||||||
|
extern int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len);
|
||||||
extern int pg_encoding_max_length(int encoding);
|
extern int pg_encoding_max_length(int encoding);
|
||||||
extern int pg_valid_client_encoding(const char *name);
|
extern int pg_valid_client_encoding(const char *name);
|
||||||
extern int pg_valid_server_encoding(const char *name);
|
extern int pg_valid_server_encoding(const char *name);
|
||||||
|
Reference in New Issue
Block a user