mirror of
https://github.com/postgres/postgres.git
synced 2025-11-24 00:23:06 +03:00
Add mbverifystr() functions specific to each encoding.
This makes pg_verify_mbstr() function faster, by allowing more efficient encoding-specific implementations. All the implementations included in this commit are pretty naive, they just call the same encoding-specific verifychar functions that were used previously, but that already gives a performance boost because the tight character-at-a-time loop is simpler. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01@iki.fi
This commit is contained in:
@@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len,
|
||||
continue;
|
||||
}
|
||||
|
||||
l = pg_encoding_verifymb(encoding, (const char *) iso, len);
|
||||
l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
|
||||
if (l < 0)
|
||||
break;
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
|
||||
continue;
|
||||
}
|
||||
|
||||
l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
|
||||
l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
|
||||
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_EUC_JIS_2004,
|
||||
@@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
|
||||
continue;
|
||||
}
|
||||
|
||||
l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
|
||||
l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
|
||||
|
||||
if (l < 0 || l > len)
|
||||
report_invalid_encoding(PG_SHIFT_JIS_2004,
|
||||
|
||||
@@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||
(const char *) mic, len);
|
||||
@@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
|
||||
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_EUC_JP,
|
||||
(const char *) euc, len);
|
||||
@@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||
(const char *) mic, len);
|
||||
@@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
|
||||
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_EUC_JP,
|
||||
(const char *) euc, len);
|
||||
@@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len);
|
||||
l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_SJIS,
|
||||
(const char *) sjis, len);
|
||||
|
||||
@@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
|
||||
c1 = *euc;
|
||||
if (IS_HIGHBIT_SET(c1))
|
||||
{
|
||||
l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len);
|
||||
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
|
||||
if (l != 2)
|
||||
report_invalid_encoding(PG_EUC_KR,
|
||||
(const char *) euc, len);
|
||||
@@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||
(const char *) mic, len);
|
||||
|
||||
@@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
|
||||
c1 = *euc;
|
||||
if (IS_HIGHBIT_SET(c1))
|
||||
{
|
||||
l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
|
||||
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_EUC_TW,
|
||||
(const char *) euc, len);
|
||||
@@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||
(const char *) mic, len);
|
||||
@@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
|
||||
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_BIG5,
|
||||
(const char *) big5, len);
|
||||
@@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
|
||||
if (l < 0)
|
||||
report_invalid_encoding(PG_MULE_INTERNAL,
|
||||
(const char *) mic, len);
|
||||
|
||||
@@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS)
|
||||
/* make sure that source string is valid */
|
||||
len = VARSIZE_ANY_EXHDR(string);
|
||||
src_str = VARDATA_ANY(string);
|
||||
pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
||||
(void) pg_verify_mbstr(src_encoding, src_str, len, false);
|
||||
|
||||
/* perform conversion */
|
||||
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
|
||||
@@ -1215,10 +1215,10 @@ static bool
|
||||
pg_generic_charinc(unsigned char *charptr, int len)
|
||||
{
|
||||
unsigned char *lastbyte = charptr + len - 1;
|
||||
mbverifier mbverify;
|
||||
mbchar_verifier mbverify;
|
||||
|
||||
/* We can just invoke the character verifier directly. */
|
||||
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
|
||||
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
|
||||
|
||||
while (*lastbyte < (unsigned char) 255)
|
||||
{
|
||||
@@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void)
|
||||
bool
|
||||
pg_verifymbstr(const char *mbstr, int len, bool noError)
|
||||
{
|
||||
return
|
||||
pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
|
||||
return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError)
|
||||
bool
|
||||
pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
|
||||
{
|
||||
return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
|
||||
int oklen;
|
||||
|
||||
Assert(PG_VALID_ENCODING(encoding));
|
||||
|
||||
oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
|
||||
if (oklen != len)
|
||||
{
|
||||
if (noError)
|
||||
return false;
|
||||
report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
|
||||
* If OK, return length of string in the encoding.
|
||||
* If a problem is found, return -1 when noError is
|
||||
* true; when noError is false, ereport() a descriptive message.
|
||||
*
|
||||
* Note: We cannot use the faster encoding-specific mbverifystr() function
|
||||
* here, because we need to count the number of characters in the string.
|
||||
*/
|
||||
int
|
||||
pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
||||
{
|
||||
mbverifier mbverify;
|
||||
mbchar_verifier mbverifychar;
|
||||
int mb_len;
|
||||
|
||||
Assert(PG_VALID_ENCODING(encoding));
|
||||
@@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
||||
}
|
||||
|
||||
/* fetch function pointer just once */
|
||||
mbverify = pg_wchar_table[encoding].mbverify;
|
||||
mbverifychar = pg_wchar_table[encoding].mbverifychar;
|
||||
|
||||
mb_len = 0;
|
||||
|
||||
@@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
|
||||
report_invalid_encoding(encoding, mbstr, len);
|
||||
}
|
||||
|
||||
l = (*mbverify) ((const unsigned char *) mbstr, len);
|
||||
l = (*mbverifychar) ((const unsigned char *) mbstr, len);
|
||||
|
||||
if (l < 0)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user