1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-27 22:56:53 +03:00

Fix PQescapeLiteral()/PQescapeIdentifier() length handling

In 5dc1e42b4fa I fixed bugs in various escape functions, unfortunately as part
of that I introduced a new bug in PQescapeLiteral()/PQescapeIdentifier(). The
bug is that I made PQescapeInternal() just use strlen(), rather than taking
the specified input length into account.

That's bad, because it can lead to including input that wasn't intended to be
included (in case len is shorter than null termination of the string) and
because it can lead to reading invalid memory if the input string is not null
terminated.

Expand test_escape to this kind of bug:

a) for escape functions with length support, append data that should not be
   escaped and check that it is not

b) add valgrind requests to detect access of bytes that should not be touched

Author: Tom Lane <tgl@sss.pgh.pa.us>
Author: Andres Freund <andres@anarazel.de
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/Z64jD3u46gObCo1p@pryzbyj2023
Backpatch: 13
This commit is contained in:
Andres Freund 2025-02-14 17:44:28 -05:00
parent 7720082ae5
commit efdadeb223
2 changed files with 77 additions and 5 deletions

View File

@ -4224,7 +4224,7 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
char *rp;
int num_quotes = 0; /* single or double, depending on as_ident */
int num_backslashes = 0;
size_t input_len = strlen(str);
size_t input_len = strnlen(str, len);
size_t result_size;
char quote_char = as_ident ? '"' : '\'';
bool validated_mb = false;
@ -4274,7 +4274,7 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
if (!validated_mb)
{
if (pg_encoding_verifymbstr(conn->client_encoding, s, remaining)
!= strlen(s))
!= remaining)
{
libpq_append_conn_error(conn, "invalid multibyte character");
return NULL;

View File

@ -17,6 +17,7 @@
#include "getopt_long.h"
#include "libpq-fe.h"
#include "mb/pg_wchar.h"
#include "utils/memdebug.h"
typedef struct pe_test_config
@ -56,6 +57,11 @@ typedef struct pe_test_escape_func
*/
bool supports_only_ascii_overlap;
/*
* Does the escape function have a length input?
*/
bool supports_input_length;
bool (*escape) (PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err);
@ -234,21 +240,25 @@ static pe_test_escape_func pe_test_escape_funcs[] =
{
.name = "PQescapeLiteral",
.reports_errors = true,
.supports_input_length = true,
.escape = escape_literal,
},
{
.name = "PQescapeIdentifier",
.reports_errors = true,
.supports_input_length = true,
.escape = escape_identifier
},
{
.name = "PQescapeStringConn",
.reports_errors = true,
.supports_input_length = true,
.escape = escape_string_conn
},
{
.name = "PQescapeString",
.reports_errors = false,
.supports_input_length = true,
.escape = escape_string
},
{
@ -256,6 +266,7 @@ static pe_test_escape_func pe_test_escape_funcs[] =
.reports_errors = false,
.supports_only_valid = true,
.supports_only_ascii_overlap = true,
.supports_input_length = true,
.escape = escape_replace
},
{
@ -272,6 +283,7 @@ static pe_test_escape_func pe_test_escape_funcs[] =
#define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, }
#define TV_LEN(enc, string, len) {.client_encoding = (enc), .escape=string, .escape_len=len, }
static pe_test_vector pe_test_vectors[] =
{
/* expected to work sanity checks */
@ -359,6 +371,15 @@ static pe_test_vector pe_test_vectors[] =
TV("mule_internal", "\\\x9c';\0;"),
TV("sql_ascii", "1\xC0'"),
/*
* Testcases that are not null terminated for the specified input length.
* That's interesting to verify that escape functions don't read beyond
* the intended input length.
*/
TV_LEN("gbk", "\x80", 1),
TV_LEN("UTF-8", "\xC3\xb6 ", 1),
TV_LEN("UTF-8", "\xC3\xb6 ", 2),
};
@ -521,6 +542,7 @@ test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_te
{
PQExpBuffer testname;
PQExpBuffer details;
PQExpBuffer raw_buf;
PQExpBuffer escape_buf;
PQExpBuffer escape_err;
size_t input_encoding_validlen;
@ -534,6 +556,7 @@ test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_te
escape_err = createPQExpBuffer();
testname = createPQExpBuffer();
details = createPQExpBuffer();
raw_buf = createPQExpBuffer();
escape_buf = createPQExpBuffer();
if (ef->supports_only_ascii_overlap &&
@ -567,8 +590,8 @@ test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_te
input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
tv->escape,
strlen(tv->escape));
input_encoding0_valid = input_encoding0_validlen == strlen(tv->escape);
strnlen(tv->escape, tv->escape_len));
input_encoding0_valid = input_encoding0_validlen == strnlen(tv->escape, tv->escape_len);
appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n",
input_encoding0_valid);
@ -580,9 +603,45 @@ test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_te
goto out;
/*
* Put the to-be-escaped data into a buffer, so that we
*
* a) can mark memory beyond end of the string as inaccessible when using
* valgrind
*
* b) can append extra data beyond the length passed to the escape
* function, to verify that that data is not processed.
*
* TODO: Should we instead/additionally escape twice, once with unmodified
* and once with appended input? That way we could compare the two.
*/
appendBinaryPQExpBuffer(raw_buf, tv->escape, tv->escape_len);
#define NEVER_ACCESS_STR "\xff never-to-be-touched"
if (ef->supports_input_length)
{
/*
* Append likely invalid string that does *not* contain a null byte
* (which'd prevent some invalid accesses to later memory).
*/
appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len],
raw_buf->len - tv->escape_len);
}
else
{
/* append invalid string, after \0 */
appendPQExpBufferChar(raw_buf, 0);
appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len + 1],
raw_buf->len - tv->escape_len - 1);
}
/* call the to-be-tested escape function */
escape_success = ef->escape(tc->conn, escape_buf,
tv->escape, tv->escape_len,
raw_buf->data, tv->escape_len,
escape_err);
if (!escape_success)
{
@ -592,6 +651,8 @@ test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_te
if (escape_buf->len > 0)
{
bool contains_never;
appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len);
escapify(details, escape_buf->data, escape_buf->len);
appendPQExpBufferChar(details, '\n');
@ -603,6 +664,16 @@ test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_te
appendPQExpBuffer(details, "#\t escape encoding valid: %d\n",
escape_encoding_valid);
/*
* Verify that no data beyond the end of the input is included in the
* escaped string. It'd be better to use something like memmem()
* here, but that's not available everywhere.
*/
contains_never = strstr(escape_buf->data, NEVER_ACCESS_STR) == NULL;
report_result(tc, contains_never, testname, details,
"escaped data beyond end of input",
contains_never ? "no" : "all secrets revealed");
}
else
{
@ -693,6 +764,7 @@ out:
destroyPQExpBuffer(details);
destroyPQExpBuffer(testname);
destroyPQExpBuffer(escape_buf);
destroyPQExpBuffer(raw_buf);
}
static void