1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-15 19:21:59 +03:00

Change the backend to reject strings containing invalidly-encoded multibyte

characters in all cases.  Formerly we mostly just threw warnings for invalid
input, and failed to detect it at all if no encoding conversion was required.
The tighter check is needed to defend against SQL-injection attacks as per
CVE-2006-2313 (further details will be published after release).  Embedded
zero (null) bytes will be rejected as well.  The checks are applied during
input to the backend (receipt from client or COPY IN), so it no longer seems
necessary to check in textin() and related routines; any string arriving at
those functions will already have been validated.  Conversion failure
reporting (for characters with no equivalent in the destination encoding)
has been cleaned up and made consistent while at it.

Also, fix a few longstanding errors in little-used encoding conversion
routines: win1251_to_iso, win866_to_iso, euc_tw_to_big5, euc_tw_to_mic,
mic_to_euc_tw were all broken to varying extents.

Patches by Tatsuo Ishii and Tom Lane.  Thanks to Akio Ishida and Yasuo Ohgaki
for identifying the security issues.
This commit is contained in:
Tom Lane
2006-05-21 20:05:21 +00:00
parent 1f219cf433
commit c61a2f5841
31 changed files with 1561 additions and 966 deletions

View File

@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.55 2006/01/12 22:04:02 neilc Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.56 2006/05/21 20:05:19 tgl Exp $
*/
#include "postgres.h"
@ -362,9 +362,50 @@ pg_client_to_server(const char *s, int len)
Assert(DatabaseEncoding);
Assert(ClientEncoding);
if (ClientEncoding->encoding == DatabaseEncoding->encoding)
if (len <= 0)
return (char *) s;
if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
ClientEncoding->encoding == PG_SQL_ASCII)
{
/*
* No conversion is needed, but we must still validate the data.
*/
(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
return (char *) s;
}
if (DatabaseEncoding->encoding == PG_SQL_ASCII)
{
/*
* No conversion is possible, but we must still validate the data,
* because the client-side code might have done string escaping
* using the selected client_encoding. If the client encoding is
* ASCII-safe then we just do a straight validation under that
* encoding. For an ASCII-unsafe encoding we have a problem:
* we dare not pass such data to the parser but we have no way
* to convert it. We compromise by rejecting the data if it
* contains any non-ASCII characters.
*/
if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
(void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
else
{
int i;
for (i = 0; i < len; i++)
{
if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid byte value for encoding \"%s\": 0x%02x",
pg_enc2name_tbl[PG_SQL_ASCII].name,
(unsigned char) s[i])));
}
}
return (char *) s;
}
return perform_default_encoding_conversion(s, len, true);
}
@ -377,9 +418,14 @@ pg_server_to_client(const char *s, int len)
Assert(DatabaseEncoding);
Assert(ClientEncoding);
if (ClientEncoding->encoding == DatabaseEncoding->encoding)
if (len <= 0)
return (char *) s;
if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
ClientEncoding->encoding == PG_SQL_ASCII ||
DatabaseEncoding->encoding == PG_SQL_ASCII)
return (char *) s; /* assume data is valid */
return perform_default_encoding_conversion(s, len, false);
}
@ -398,9 +444,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
dest_encoding;
FmgrInfo *flinfo;
if (len <= 0)
return (char *) src;
if (is_client_to_server)
{
src_encoding = ClientEncoding->encoding;
@ -417,12 +460,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
if (flinfo == NULL)
return (char *) src;
if (src_encoding == dest_encoding)
return (char *) src;
if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
return (char *) src;
result = palloc(len * 4 + 1);
FunctionCall5(flinfo,