mirror of
https://github.com/postgres/postgres.git
synced 2025-04-27 22:56:53 +03:00
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
This commit is contained in:
parent
00f1a1f665
commit
db3eb0e825
@ -15,6 +15,25 @@
|
|||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In today's multibyte encodings other than UTF8, this two-byte sequence
|
||||||
|
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
|
||||||
|
*
|
||||||
|
* For historical reasons, several verifychar implementations opt to reject
|
||||||
|
* this pair specifically. Byte pair range constraints, in encoding
|
||||||
|
* originator documentation, always excluded this pair. No core conversion
|
||||||
|
* could translate it. However, longstanding verifychar implementations
|
||||||
|
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
|
||||||
|
* pairs not valid per encoding originator documentation. To avoid tightening
|
||||||
|
* core or non-core conversions in a security patch, we sought this one pair.
|
||||||
|
*
|
||||||
|
* PQescapeString() historically used spaces for BYTE1; many other values
|
||||||
|
* could suffice for BYTE1.
|
||||||
|
*/
|
||||||
|
#define NONUTF8_INVALID_BYTE0 (0x8d)
|
||||||
|
#define NONUTF8_INVALID_BYTE1 (' ')
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Operations on multi-byte encodings are driven by a table of helper
|
* Operations on multi-byte encodings are driven by a table of helper
|
||||||
* functions.
|
* functions.
|
||||||
@ -1330,6 +1349,11 @@ pg_big5_verifier(const unsigned char *s, int len)
|
|||||||
if (len < l)
|
if (len < l)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (l == 2 &&
|
||||||
|
s[0] == NONUTF8_INVALID_BYTE0 &&
|
||||||
|
s[1] == NONUTF8_INVALID_BYTE1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
while (--l > 0)
|
while (--l > 0)
|
||||||
{
|
{
|
||||||
if (*++s == '\0')
|
if (*++s == '\0')
|
||||||
@ -1350,6 +1374,11 @@ pg_gbk_verifier(const unsigned char *s, int len)
|
|||||||
if (len < l)
|
if (len < l)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (l == 2 &&
|
||||||
|
s[0] == NONUTF8_INVALID_BYTE0 &&
|
||||||
|
s[1] == NONUTF8_INVALID_BYTE1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
while (--l > 0)
|
while (--l > 0)
|
||||||
{
|
{
|
||||||
if (*++s == '\0')
|
if (*++s == '\0')
|
||||||
@ -1370,6 +1399,11 @@ pg_uhc_verifier(const unsigned char *s, int len)
|
|||||||
if (len < l)
|
if (len < l)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (l == 2 &&
|
||||||
|
s[0] == NONUTF8_INVALID_BYTE0 &&
|
||||||
|
s[1] == NONUTF8_INVALID_BYTE1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
while (--l > 0)
|
while (--l > 0)
|
||||||
{
|
{
|
||||||
if (*++s == '\0')
|
if (*++s == '\0')
|
||||||
@ -1496,6 +1530,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fills the provided buffer with two bytes such that:
|
||||||
|
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pg_encoding_set_invalid(int encoding, char *dst)
|
||||||
|
{
|
||||||
|
Assert(pg_encoding_max_length(encoding) > 1);
|
||||||
|
|
||||||
|
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
|
||||||
|
dst[1] = NONUTF8_INVALID_BYTE1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*-------------------------------------------------------------------
|
*-------------------------------------------------------------------
|
||||||
* encoding info table
|
* encoding info table
|
||||||
@ -1671,5 +1718,11 @@ pg_encoding_max_length(int encoding)
|
|||||||
{
|
{
|
||||||
Assert(PG_VALID_ENCODING(encoding));
|
Assert(PG_VALID_ENCODING(encoding));
|
||||||
|
|
||||||
return pg_wchar_table[encoding].maxmblen;
|
/*
|
||||||
|
* Check for the encoding despite the assert, due to some mingw versions
|
||||||
|
* otherwise issuing bogus warnings.
|
||||||
|
*/
|
||||||
|
return PG_VALID_ENCODING(encoding) ?
|
||||||
|
pg_wchar_table[encoding].maxmblen :
|
||||||
|
pg_wchar_table[PG_SQL_ASCII].maxmblen;
|
||||||
}
|
}
|
||||||
|
@ -341,7 +341,7 @@ typedef struct pg_enc2name
|
|||||||
#endif
|
#endif
|
||||||
} pg_enc2name;
|
} pg_enc2name;
|
||||||
|
|
||||||
extern const pg_enc2name pg_enc2name_tbl[];
|
extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encoding names for gettext
|
* Encoding names for gettext
|
||||||
@ -552,6 +552,7 @@ extern int pg_valid_server_encoding_id(int encoding);
|
|||||||
* (in addition to the ones just above). The constant tables declared
|
* (in addition to the ones just above). The constant tables declared
|
||||||
* earlier in this file are also available from libpgcommon.
|
* earlier in this file are also available from libpgcommon.
|
||||||
*/
|
*/
|
||||||
|
extern void pg_encoding_set_invalid(int encoding, char *dst);
|
||||||
extern int pg_encoding_mblen(int encoding, const char *mbstr);
|
extern int pg_encoding_mblen(int encoding, const char *mbstr);
|
||||||
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
|
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
|
||||||
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
|
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
--
|
--
|
||||||
-- create user defined conversion
|
-- create user defined conversion
|
||||||
--
|
--
|
||||||
|
SELECT FROM test_enc_setup();
|
||||||
|
--
|
||||||
|
(1 row)
|
||||||
|
|
||||||
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
|
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
|
||||||
SET SESSION AUTHORIZATION regress_conversion_user;
|
SET SESSION AUTHORIZATION regress_conversion_user;
|
||||||
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
|
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
|
||||||
|
@ -62,6 +62,10 @@ CREATE FUNCTION test_atomic_ops()
|
|||||||
AS '@libdir@/regress@DLSUFFIX@'
|
AS '@libdir@/regress@DLSUFFIX@'
|
||||||
LANGUAGE C;
|
LANGUAGE C;
|
||||||
|
|
||||||
|
CREATE FUNCTION test_enc_setup() RETURNS void
|
||||||
|
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
-- Tests creating a FDW handler
|
-- Tests creating a FDW handler
|
||||||
CREATE FUNCTION test_fdw_handler()
|
CREATE FUNCTION test_fdw_handler()
|
||||||
RETURNS fdw_handler
|
RETURNS fdw_handler
|
||||||
|
@ -55,6 +55,9 @@ CREATE FUNCTION test_atomic_ops()
|
|||||||
RETURNS bool
|
RETURNS bool
|
||||||
AS '@libdir@/regress@DLSUFFIX@'
|
AS '@libdir@/regress@DLSUFFIX@'
|
||||||
LANGUAGE C;
|
LANGUAGE C;
|
||||||
|
CREATE FUNCTION test_enc_setup() RETURNS void
|
||||||
|
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
-- Tests creating a FDW handler
|
-- Tests creating a FDW handler
|
||||||
CREATE FUNCTION test_fdw_handler()
|
CREATE FUNCTION test_fdw_handler()
|
||||||
RETURNS fdw_handler
|
RETURNS fdw_handler
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "commands/trigger.h"
|
#include "commands/trigger.h"
|
||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
#include "executor/spi.h"
|
#include "executor/spi.h"
|
||||||
|
#include "mb/pg_wchar.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "nodes/supportnodes.h"
|
#include "nodes/supportnodes.h"
|
||||||
#include "optimizer/optimizer.h"
|
#include "optimizer/optimizer.h"
|
||||||
@ -1088,3 +1089,53 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_NULL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* one-time tests for encoding infrastructure */
|
||||||
|
PG_FUNCTION_INFO_V1(test_enc_setup);
|
||||||
|
Datum
|
||||||
|
test_enc_setup(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
/* Test pg_encoding_set_invalid() */
|
||||||
|
for (int i = 0; i < _PG_LAST_ENCODING_; i++)
|
||||||
|
{
|
||||||
|
char buf[2],
|
||||||
|
bigbuf[16];
|
||||||
|
int len,
|
||||||
|
mblen,
|
||||||
|
valid;
|
||||||
|
|
||||||
|
if (pg_encoding_max_length(i) == 1)
|
||||||
|
continue;
|
||||||
|
pg_encoding_set_invalid(i, buf);
|
||||||
|
len = strnlen(buf, 2);
|
||||||
|
if (len != 2)
|
||||||
|
elog(WARNING,
|
||||||
|
"official invalid string for encoding \"%s\" has length %d",
|
||||||
|
pg_enc2name_tbl[i].name, len);
|
||||||
|
mblen = pg_encoding_mblen(i, buf);
|
||||||
|
if (mblen != 2)
|
||||||
|
elog(WARNING,
|
||||||
|
"official invalid string for encoding \"%s\" has mblen %d",
|
||||||
|
pg_enc2name_tbl[i].name, mblen);
|
||||||
|
valid = pg_encoding_verifymbstr(i, buf, len);
|
||||||
|
if (valid != 0)
|
||||||
|
elog(WARNING,
|
||||||
|
"official invalid string for encoding \"%s\" has valid prefix of length %d",
|
||||||
|
pg_enc2name_tbl[i].name, valid);
|
||||||
|
valid = pg_encoding_verifymbstr(i, buf, 1);
|
||||||
|
if (valid != 0)
|
||||||
|
elog(WARNING,
|
||||||
|
"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
|
||||||
|
pg_enc2name_tbl[i].name, valid);
|
||||||
|
memset(bigbuf, ' ', sizeof(bigbuf));
|
||||||
|
bigbuf[0] = buf[0];
|
||||||
|
bigbuf[1] = buf[1];
|
||||||
|
valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
|
||||||
|
if (valid != 0)
|
||||||
|
elog(WARNING,
|
||||||
|
"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
|
||||||
|
pg_enc2name_tbl[i].name, valid);
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
--
|
--
|
||||||
-- create user defined conversion
|
-- create user defined conversion
|
||||||
--
|
--
|
||||||
|
|
||||||
|
SELECT FROM test_enc_setup();
|
||||||
|
|
||||||
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
|
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
|
||||||
SET SESSION AUTHORIZATION regress_conversion_user;
|
SET SESSION AUTHORIZATION regress_conversion_user;
|
||||||
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
|
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user