mirror of
https://github.com/postgres/postgres.git
synced 2025-12-21 05:21:08 +03:00
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
This commit is contained in:
@@ -16,6 +16,25 @@
|
||||
#include "utils/ascii.h"
|
||||
|
||||
|
||||
/*
|
||||
* In today's multibyte encodings other than UTF8, this two-byte sequence
|
||||
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
|
||||
*
|
||||
* For historical reasons, several verifychar implementations opt to reject
|
||||
* this pair specifically. Byte pair range constraints, in encoding
|
||||
* originator documentation, always excluded this pair. No core conversion
|
||||
* could translate it. However, longstanding verifychar implementations
|
||||
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
|
||||
* pairs not valid per encoding originator documentation. To avoid tightening
|
||||
* core or non-core conversions in a security patch, we sought this one pair.
|
||||
*
|
||||
* PQescapeString() historically used spaces for BYTE1; many other values
|
||||
* could suffice for BYTE1.
|
||||
*/
|
||||
#define NONUTF8_INVALID_BYTE0 (0x8d)
|
||||
#define NONUTF8_INVALID_BYTE1 (' ')
|
||||
|
||||
|
||||
/*
|
||||
* Operations on multi-byte encodings are driven by a table of helper
|
||||
* functions.
|
||||
@@ -1465,6 +1484,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
|
||||
if (len < l)
|
||||
return -1;
|
||||
|
||||
if (l == 2 &&
|
||||
s[0] == NONUTF8_INVALID_BYTE0 &&
|
||||
s[1] == NONUTF8_INVALID_BYTE1)
|
||||
return -1;
|
||||
|
||||
while (--l > 0)
|
||||
{
|
||||
if (*++s == '\0')
|
||||
@@ -1514,6 +1538,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
|
||||
if (len < l)
|
||||
return -1;
|
||||
|
||||
if (l == 2 &&
|
||||
s[0] == NONUTF8_INVALID_BYTE0 &&
|
||||
s[1] == NONUTF8_INVALID_BYTE1)
|
||||
return -1;
|
||||
|
||||
while (--l > 0)
|
||||
{
|
||||
if (*++s == '\0')
|
||||
@@ -1563,6 +1592,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
|
||||
if (len < l)
|
||||
return -1;
|
||||
|
||||
if (l == 2 &&
|
||||
s[0] == NONUTF8_INVALID_BYTE0 &&
|
||||
s[1] == NONUTF8_INVALID_BYTE1)
|
||||
return -1;
|
||||
|
||||
while (--l > 0)
|
||||
{
|
||||
if (*++s == '\0')
|
||||
@@ -2007,6 +2041,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Fills the provided buffer with two bytes such that:
|
||||
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
|
||||
*/
|
||||
void
|
||||
pg_encoding_set_invalid(int encoding, char *dst)
|
||||
{
|
||||
Assert(pg_encoding_max_length(encoding) > 1);
|
||||
|
||||
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
|
||||
dst[1] = NONUTF8_INVALID_BYTE1;
|
||||
}
|
||||
|
||||
/*
|
||||
*-------------------------------------------------------------------
|
||||
* encoding info table
|
||||
@@ -2128,5 +2175,11 @@ pg_encoding_max_length(int encoding)
|
||||
{
|
||||
Assert(PG_VALID_ENCODING(encoding));
|
||||
|
||||
return pg_wchar_table[encoding].maxmblen;
|
||||
/*
|
||||
* Check for the encoding despite the assert, due to some mingw versions
|
||||
* otherwise issuing bogus warnings.
|
||||
*/
|
||||
return PG_VALID_ENCODING(encoding) ?
|
||||
pg_wchar_table[encoding].maxmblen :
|
||||
pg_wchar_table[PG_SQL_ASCII].maxmblen;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user