mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
This commit is contained in:
		@@ -16,6 +16,25 @@
 | 
			
		||||
#include "utils/ascii.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * In today's multibyte encodings other than UTF8, this two-byte sequence
 | 
			
		||||
 * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
 | 
			
		||||
 *
 | 
			
		||||
 * For historical reasons, several verifychar implementations opt to reject
 | 
			
		||||
 * this pair specifically.  Byte pair range constraints, in encoding
 | 
			
		||||
 * originator documentation, always excluded this pair.  No core conversion
 | 
			
		||||
 * could translate it.  However, longstanding verifychar implementations
 | 
			
		||||
 * accepted any non-NUL byte.  big5_to_euc_tw and big5_to_mic even translate
 | 
			
		||||
 * pairs not valid per encoding originator documentation.  To avoid tightening
 | 
			
		||||
 * core or non-core conversions in a security patch, we sought this one pair.
 | 
			
		||||
 *
 | 
			
		||||
 * PQescapeString() historically used spaces for BYTE1; many other values
 | 
			
		||||
 * could suffice for BYTE1.
 | 
			
		||||
 */
 | 
			
		||||
#define NONUTF8_INVALID_BYTE0 (0x8d)
 | 
			
		||||
#define NONUTF8_INVALID_BYTE1 (' ')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Operations on multi-byte encodings are driven by a table of helper
 | 
			
		||||
 * functions.
 | 
			
		||||
@@ -1526,6 +1545,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
 | 
			
		||||
	if (len < l)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (l == 2 &&
 | 
			
		||||
		s[0] == NONUTF8_INVALID_BYTE0 &&
 | 
			
		||||
		s[1] == NONUTF8_INVALID_BYTE1)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	while (--l > 0)
 | 
			
		||||
	{
 | 
			
		||||
		if (*++s == '\0')
 | 
			
		||||
@@ -1575,6 +1599,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
 | 
			
		||||
	if (len < l)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (l == 2 &&
 | 
			
		||||
		s[0] == NONUTF8_INVALID_BYTE0 &&
 | 
			
		||||
		s[1] == NONUTF8_INVALID_BYTE1)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	while (--l > 0)
 | 
			
		||||
	{
 | 
			
		||||
		if (*++s == '\0')
 | 
			
		||||
@@ -1624,6 +1653,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
 | 
			
		||||
	if (len < l)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (l == 2 &&
 | 
			
		||||
		s[0] == NONUTF8_INVALID_BYTE0 &&
 | 
			
		||||
		s[1] == NONUTF8_INVALID_BYTE1)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	while (--l > 0)
 | 
			
		||||
	{
 | 
			
		||||
		if (*++s == '\0')
 | 
			
		||||
@@ -2067,6 +2101,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Fills the provided buffer with two bytes such that:
 | 
			
		||||
 *   pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
 | 
			
		||||
 */
 | 
			
		||||
void
 | 
			
		||||
pg_encoding_set_invalid(int encoding, char *dst)
 | 
			
		||||
{
 | 
			
		||||
	Assert(pg_encoding_max_length(encoding) > 1);
 | 
			
		||||
 | 
			
		||||
	dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
 | 
			
		||||
	dst[1] = NONUTF8_INVALID_BYTE1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *-------------------------------------------------------------------
 | 
			
		||||
 * encoding info table
 | 
			
		||||
@@ -2189,5 +2236,11 @@ pg_encoding_max_length(int encoding)
 | 
			
		||||
{
 | 
			
		||||
	Assert(PG_VALID_ENCODING(encoding));
 | 
			
		||||
 | 
			
		||||
	return pg_wchar_table[encoding].maxmblen;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Check for the encoding despite the assert, due to some mingw versions
 | 
			
		||||
	 * otherwise issuing bogus warnings.
 | 
			
		||||
	 */
 | 
			
		||||
	return PG_VALID_ENCODING(encoding) ?
 | 
			
		||||
		pg_wchar_table[encoding].maxmblen :
 | 
			
		||||
		pg_wchar_table[PG_SQL_ASCII].maxmblen;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -573,6 +573,7 @@ extern int	pg_valid_server_encoding_id(int encoding);
 | 
			
		||||
 * (in addition to the ones just above).  The constant tables declared
 | 
			
		||||
 * earlier in this file are also available from libpgcommon.
 | 
			
		||||
 */
 | 
			
		||||
extern void pg_encoding_set_invalid(int encoding, char *dst);
 | 
			
		||||
extern int	pg_encoding_mblen(int encoding, const char *mbstr);
 | 
			
		||||
extern int	pg_encoding_mblen_bounded(int encoding, const char *mbstr);
 | 
			
		||||
extern int	pg_encoding_dsplen(int encoding, const char *mbstr);
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,13 @@
 | 
			
		||||
\getenv libdir PG_LIBDIR
 | 
			
		||||
\getenv dlsuffix PG_DLSUFFIX
 | 
			
		||||
\set regresslib :libdir '/regress' :dlsuffix
 | 
			
		||||
CREATE FUNCTION test_enc_setup() RETURNS void
 | 
			
		||||
    AS :'regresslib', 'test_enc_setup'
 | 
			
		||||
    LANGUAGE C STRICT;
 | 
			
		||||
SELECT FROM test_enc_setup();
 | 
			
		||||
--
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
 | 
			
		||||
    AS :'regresslib', 'test_enc_conversion'
 | 
			
		||||
    LANGUAGE C STRICT;
 | 
			
		||||
 
 | 
			
		||||
@@ -1099,6 +1099,56 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
 | 
			
		||||
	PG_RETURN_NULL();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* one-time tests for encoding infrastructure */
 | 
			
		||||
PG_FUNCTION_INFO_V1(test_enc_setup);
 | 
			
		||||
Datum
 | 
			
		||||
test_enc_setup(PG_FUNCTION_ARGS)
 | 
			
		||||
{
 | 
			
		||||
	/* Test pg_encoding_set_invalid() */
 | 
			
		||||
	for (int i = 0; i < _PG_LAST_ENCODING_; i++)
 | 
			
		||||
	{
 | 
			
		||||
		char		buf[2],
 | 
			
		||||
					bigbuf[16];
 | 
			
		||||
		int			len,
 | 
			
		||||
					mblen,
 | 
			
		||||
					valid;
 | 
			
		||||
 | 
			
		||||
		if (pg_encoding_max_length(i) == 1)
 | 
			
		||||
			continue;
 | 
			
		||||
		pg_encoding_set_invalid(i, buf);
 | 
			
		||||
		len = strnlen(buf, 2);
 | 
			
		||||
		if (len != 2)
 | 
			
		||||
			elog(WARNING,
 | 
			
		||||
				 "official invalid string for encoding \"%s\" has length %d",
 | 
			
		||||
				 pg_enc2name_tbl[i].name, len);
 | 
			
		||||
		mblen = pg_encoding_mblen(i, buf);
 | 
			
		||||
		if (mblen != 2)
 | 
			
		||||
			elog(WARNING,
 | 
			
		||||
				 "official invalid string for encoding \"%s\" has mblen %d",
 | 
			
		||||
				 pg_enc2name_tbl[i].name, mblen);
 | 
			
		||||
		valid = pg_encoding_verifymbstr(i, buf, len);
 | 
			
		||||
		if (valid != 0)
 | 
			
		||||
			elog(WARNING,
 | 
			
		||||
				 "official invalid string for encoding \"%s\" has valid prefix of length %d",
 | 
			
		||||
				 pg_enc2name_tbl[i].name, valid);
 | 
			
		||||
		valid = pg_encoding_verifymbstr(i, buf, 1);
 | 
			
		||||
		if (valid != 0)
 | 
			
		||||
			elog(WARNING,
 | 
			
		||||
				 "first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
 | 
			
		||||
				 pg_enc2name_tbl[i].name, valid);
 | 
			
		||||
		memset(bigbuf, ' ', sizeof(bigbuf));
 | 
			
		||||
		bigbuf[0] = buf[0];
 | 
			
		||||
		bigbuf[1] = buf[1];
 | 
			
		||||
		valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
 | 
			
		||||
		if (valid != 0)
 | 
			
		||||
			elog(WARNING,
 | 
			
		||||
				 "trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
 | 
			
		||||
				 pg_enc2name_tbl[i].name, valid);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	PG_RETURN_VOID();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Call an encoding conversion or verification function.
 | 
			
		||||
 *
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,11 @@
 | 
			
		||||
 | 
			
		||||
\set regresslib :libdir '/regress' :dlsuffix
 | 
			
		||||
 | 
			
		||||
CREATE FUNCTION test_enc_setup() RETURNS void
 | 
			
		||||
    AS :'regresslib', 'test_enc_setup'
 | 
			
		||||
    LANGUAGE C STRICT;
 | 
			
		||||
SELECT FROM test_enc_setup();
 | 
			
		||||
 | 
			
		||||
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
 | 
			
		||||
    AS :'regresslib', 'test_enc_conversion'
 | 
			
		||||
    LANGUAGE C STRICT;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user