1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-19 23:22:23 +03:00

Add pg_encoding_set_invalid()

There are cases where we cannot / do not want to error out for invalidly
encoded input. In such cases it can be useful to replace e.g. an incomplete
multi-byte characters with bytes that will trigger an error when getting
validated as part of a larger string.

Unfortunately, until now, for some encoding no such sequence existed. For
those encodings this commit removes one previously accepted input combination
- we consider that to be ok, as the chosen bytes are outside of the valid
ranges for the encodings, we just previously failed to detect that.

As we cannot add a new field to pg_wchar_table without breaking ABI, this is
implemented "in-line" in the newly added function.

Author: Noah Misch <noah@leadboat.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Backpatch-through: 13
Security: CVE-2025-1094
This commit is contained in:
Andres Freund
2025-02-10 10:03:40 -05:00
parent 00f1a1f665
commit db3eb0e825
7 changed files with 121 additions and 2 deletions

View File

@@ -15,6 +15,25 @@
#include "mb/pg_wchar.h"
/*
* In today's multibyte encodings other than UTF8, this two-byte sequence
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
*
* For historical reasons, several verifychar implementations opt to reject
* this pair specifically. Byte pair range constraints, in encoding
* originator documentation, always excluded this pair. No core conversion
* could translate it. However, longstanding verifychar implementations
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
* pairs not valid per encoding originator documentation. To avoid tightening
* core or non-core conversions in a security patch, we sought this one pair.
*
* PQescapeString() historically used spaces for BYTE1; many other values
* could suffice for BYTE1.
*/
#define NONUTF8_INVALID_BYTE0 (0x8d)
#define NONUTF8_INVALID_BYTE1 (' ')
/*
* Operations on multi-byte encodings are driven by a table of helper
* functions.
@@ -1330,6 +1349,11 @@ pg_big5_verifier(const unsigned char *s, int len)
if (len < l)
return -1;
if (l == 2 &&
s[0] == NONUTF8_INVALID_BYTE0 &&
s[1] == NONUTF8_INVALID_BYTE1)
return -1;
while (--l > 0)
{
if (*++s == '\0')
@@ -1350,6 +1374,11 @@ pg_gbk_verifier(const unsigned char *s, int len)
if (len < l)
return -1;
if (l == 2 &&
s[0] == NONUTF8_INVALID_BYTE0 &&
s[1] == NONUTF8_INVALID_BYTE1)
return -1;
while (--l > 0)
{
if (*++s == '\0')
@@ -1370,6 +1399,11 @@ pg_uhc_verifier(const unsigned char *s, int len)
if (len < l)
return -1;
if (l == 2 &&
s[0] == NONUTF8_INVALID_BYTE0 &&
s[1] == NONUTF8_INVALID_BYTE1)
return -1;
while (--l > 0)
{
if (*++s == '\0')
@@ -1496,6 +1530,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
}
/*
* Fills the provided buffer with two bytes such that:
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
*/
void
pg_encoding_set_invalid(int encoding, char *dst)
{
Assert(pg_encoding_max_length(encoding) > 1);
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
dst[1] = NONUTF8_INVALID_BYTE1;
}
/*
*-------------------------------------------------------------------
* encoding info table
@@ -1671,5 +1718,11 @@ pg_encoding_max_length(int encoding)
{
Assert(PG_VALID_ENCODING(encoding));
return pg_wchar_table[encoding].maxmblen;
/*
* Check for the encoding despite the assert, due to some mingw versions
* otherwise issuing bogus warnings.
*/
return PG_VALID_ENCODING(encoding) ?
pg_wchar_table[encoding].maxmblen :
pg_wchar_table[PG_SQL_ASCII].maxmblen;
}