From 89a5499ef9e477beb97d742d4df6fc8f601d87d5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 7 Jun 2021 14:15:25 -0400 Subject: [PATCH] Fix incautious handling of possibly-miscoded strings in client code. An incorrectly-encoded multibyte character near the end of a string could cause various processing loops to run past the string's terminating NUL, with results ranging from no detectable issue to a program crash, depending on what happens to be in the following memory. This isn't an issue in the server, because we take care to verify the encoding of strings before doing any interesting processing on them. However, that lack of care leaked into client-side code which shouldn't assume that anyone has validated the encoding of its input. Although this is certainly a bug worth fixing, the PG security team elected not to regard it as a security issue, primarily because any untrusted text should be sanitized by PQescapeLiteral or the like before being incorporated into a SQL or psql command. (If an app fails to do so, the same technique can be used to cause SQL injection, with probably much more dire consequences than a mere client-program crash.) Those functions were already made proof against this class of problem, cf CVE-2006-2313. To fix, invent PQmblenBounded() which is like PQmblen() except it won't return more than the number of bytes remaining in the string. In HEAD we can make this a new libpq function, as PQmblen() is. It seems imprudent to change libpq's API in stable branches though, so in the back branches define PQmblenBounded as a macro in the files that need it. (Note that just changing PQmblen's behavior would not be a good idea; notably, it would completely break the escaping functions' defense against this exact problem. So we just want a version for those callers that don't have any better way of handling this issue.) Per private report from houjingyi. Back-patch to all supported branches. --- src/bin/psql/common.c | 26 ++++++++++++++------------ src/bin/psql/psqlscanslash.l | 4 +++- src/bin/psql/stringutils.c | 8 +++++--- src/bin/psql/tab-complete.c | 4 +++- src/bin/scripts/common.c | 4 +++- src/fe_utils/print.c | 3 +++ src/interfaces/libpq/fe-print.c | 3 ++- src/interfaces/libpq/fe-protocol3.c | 6 ++++-- 8 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 11200cc6a6f..1b57f85fb23 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -29,6 +29,8 @@ #include "fe_utils/mbprint.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + static bool DescribeQuery(const char *query, double *elapsed_msec); static bool ExecQueryUsingCursor(const char *query, double *elapsed_msec); static bool command_no_begin(const char *query); @@ -2001,7 +2003,7 @@ skip_white_space(const char *query) while (*query) { - int mblen = PQmblen(query, pset.encoding); + int mblen = PQmblenBounded(query, pset.encoding); /* * Note: we assume the encoding is a superset of ASCII, so that for @@ -2038,7 +2040,7 @@ skip_white_space(const char *query) query++; break; } - query += PQmblen(query, pset.encoding); + query += PQmblenBounded(query, pset.encoding); } } else if (cnestlevel > 0) @@ -2073,7 +2075,7 @@ command_no_begin(const char *query) */ wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* * Transaction control commands. These should include every keyword that @@ -2104,7 +2106,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0) return true; @@ -2138,7 +2140,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) return true; @@ -2154,7 +2156,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); } if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0) @@ -2165,7 +2167,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) return true; @@ -2182,7 +2184,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* ALTER SYSTEM isn't allowed in xacts */ if (wordlen == 6 && pg_strncasecmp(query, "system", 6) == 0) @@ -2205,7 +2207,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) return true; @@ -2223,7 +2225,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) return true; @@ -2243,7 +2245,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 3 && pg_strncasecmp(query, "all", 3) == 0) return true; @@ -2279,7 +2281,7 @@ is_select_command(const char *query) */ wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 6 && pg_strncasecmp(query, "select", 6) == 0) return true; diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l index 2567ed80809..a7734506cf0 100644 --- a/src/bin/psql/psqlscanslash.l +++ b/src/bin/psql/psqlscanslash.l @@ -27,6 +27,8 @@ %{ #include "fe_utils/psqlscan_int.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* * We must have a typedef YYSTYPE for yylex's first argument, but this lexer * doesn't presently make use of that argument, so just declare it as int. @@ -752,7 +754,7 @@ dequote_downcase_identifier(char *str, bool downcase, int encoding) { if (downcase && !inquotes) *cp = pg_tolower((unsigned char) *cp); - cp += PQmblen(cp, encoding); + cp += PQmblenBounded(cp, encoding); } } } diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c index 29b9c9c7f03..762277bf75a 100644 --- a/src/bin/psql/stringutils.c +++ b/src/bin/psql/stringutils.c @@ -12,6 +12,8 @@ #include "common.h" #include "stringutils.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* * Replacement for strtok() (a.k.a. poor man's flex) @@ -143,7 +145,7 @@ strtokx(const char *s, /* okay, we have a quoted token, now scan for the closer */ char thisquote = *p++; - for (; *p; p += PQmblen(p, encoding)) + for (; *p; p += PQmblenBounded(p, encoding)) { if (*p == escape && p[1] != '\0') p++; /* process escaped anything */ @@ -262,7 +264,7 @@ strip_quotes(char *source, char quote, char escape, int encoding) else if (c == escape && src[1] != '\0') src++; /* process escaped character */ - i = PQmblen(src, encoding); + i = PQmblenBounded(src, encoding); while (i--) *dst++ = *src++; } @@ -322,7 +324,7 @@ quote_if_needed(const char *source, const char *entails_quote, else if (strchr(entails_quote, c)) need_quotes = true; - i = PQmblen(src, encoding); + i = PQmblenBounded(src, encoding); while (i--) *dst++ = *src++; } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 7041248bbb2..d6b4a5bc31f 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -60,6 +60,8 @@ extern char *filename_completion_function(); #define completion_matches rl_completion_matches #endif +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* word break characters */ #define WORD_BREAKS "\t\n@$><=;|&{() " @@ -4128,7 +4130,7 @@ _complete_from_query(const char *simple_query, while (*pstr) { char_length++; - pstr += PQmblen(pstr, pset.encoding); + pstr += PQmblenBounded(pstr, pset.encoding); } /* Free any prior result */ diff --git a/src/bin/scripts/common.c b/src/bin/scripts/common.c index f018aa87db7..223862f9442 100644 --- a/src/bin/scripts/common.c +++ b/src/bin/scripts/common.c @@ -22,6 +22,8 @@ #include "fe_utils/string_utils.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + static PGcancel *volatile cancelConn = NULL; bool CancelRequested = false; @@ -303,7 +305,7 @@ split_table_columns_spec(const char *spec, int encoding, cp++; } else - cp += PQmblen(cp, encoding); + cp += PQmblenBounded(cp, encoding); } *table = pg_strdup(spec); (*table)[cp - spec] = '\0'; /* no strndup */ diff --git a/src/fe_utils/print.c b/src/fe_utils/print.c index 22f67710d2a..dba19c28f59 100644 --- a/src/fe_utils/print.c +++ b/src/fe_utils/print.c @@ -3528,6 +3528,9 @@ strlen_max_width(unsigned char *str, int *target_width, int encoding) curr_width += char_width; str += PQmblen((char *) str, encoding); + + if (str > end) /* Don't overrun invalid string */ + str = end; } *target_width = curr_width; diff --git a/src/interfaces/libpq/fe-print.c b/src/interfaces/libpq/fe-print.c index 95de270b933..f07adf0b760 100644 --- a/src/interfaces/libpq/fe-print.c +++ b/src/interfaces/libpq/fe-print.c @@ -36,6 +36,7 @@ #include "libpq-fe.h" #include "libpq-int.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) static void do_field(const PQprintOpt *po, const PGresult *res, const int i, const int j, const int fs_len, @@ -365,7 +366,7 @@ do_field(const PQprintOpt *po, const PGresult *res, /* Detect whether field contains non-numeric data */ char ch = '0'; - for (p = pval; *p; p += PQmblen(p, res->client_encoding)) + for (p = pval; *p; p += PQmblenBounded(p, res->client_encoding)) { ch = *p; if (!((ch >= '0' && ch <= '9') || diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index f1d0820510b..5ede5ce3989 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -41,6 +41,8 @@ ((id) == 'T' || (id) == 'D' || (id) == 'd' || (id) == 'V' || \ (id) == 'E' || (id) == 'N' || (id) == 'A') +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + static void handleSyncLoss(PGconn *conn, char id, int msgLength); static int getRowDescriptions(PGconn *conn, int msgLength); @@ -1227,7 +1229,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) if (w <= 0) w = 1; scroffset += w; - qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]); + qoffset += PQmblenBounded(&wquery[qoffset], encoding); } else { @@ -1295,7 +1297,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) * width. */ scroffset = 0; - for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i])) + for (; i < msg->len; i += PQmblenBounded(&msg->data[i], encoding)) { int w = pg_encoding_dsplen(encoding, &msg->data[i]);