From 4f4a422fbb15e49ff5e9dc410cd8713d63fdda24 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 6 Nov 2023 10:38:00 -0500 Subject: [PATCH] Compute aggregate argument types correctly in transformAggregateCall(). transformAggregateCall() captures the datatypes of the aggregate's arguments immediately to construct the Aggref.aggargtypes list. This seems reasonable because the arguments have already been transformed --- but there is an edge case where they haven't been. Specifically, if we have an unknown-type literal in an ANY argument position, nothing will have been done with it earlier. But if we also have DISTINCT, then addTargetToGroupList() converts the literal to "text" type, resulting in the aggargtypes list not matching the actual runtime type of the argument. The end result is that the aggregate tries to interpret a "text" value as being of type "unknown", that is a zero-terminated C string. If the text value contains no zero bytes, this could result in disclosure of server memory following the text literal value. To fix, move the collection of the aggargtypes list to the end of transformAggregateCall(), after DISTINCT has been handled. This requires slightly more code, but not a great deal. Our thanks to Jingzhou Fu for reporting this problem. Security: CVE-2023-5868 --- src/backend/parser/parse_agg.c | 35 +++++++++++++++++++---------- src/test/regress/expected/jsonb.out | 7 ++++++ src/test/regress/sql/jsonb.sql | 3 +++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index ece4dd03296..fab9d0d293d 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -110,18 +110,6 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, int save_next_resno; ListCell *lc; - /* - * Before separating the args into direct and aggregated args, make a list - * of their data type OIDs for use later. - */ - foreach(lc, args) - { - Expr *arg = (Expr *) lfirst(lc); - - argtypes = lappend_oid(argtypes, exprType((Node *) arg)); - } - agg->aggargtypes = argtypes; - if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) { /* @@ -233,6 +221,29 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->aggorder = torder; agg->aggdistinct = tdistinct; + /* + * Now build the aggargtypes list with the type OIDs of the direct and + * aggregated args, ignoring any resjunk entries that might have been + * added by ORDER BY/DISTINCT processing. We can't do this earlier + * because said processing can modify some args' data types, in particular + * by resolving previously-unresolved "unknown" literals. + */ + foreach(lc, agg->aggdirectargs) + { + Expr *arg = (Expr *) lfirst(lc); + + argtypes = lappend_oid(argtypes, exprType((Node *) arg)); + } + foreach(lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore junk */ + argtypes = lappend_oid(argtypes, exprType((Node *) tle->expr)); + } + agg->aggargtypes = argtypes; + check_agglevels_and_constraints(pstate, (Node *) agg); } diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index f4fe030a39a..f6530e5b0ce 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -1565,6 +1565,13 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; ERROR: field name must not be null +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + jsonb_object_agg +------------------ + {"a": "abc"} +(1 row) + -- jsonb_object -- empty object, one dimension SELECT jsonb_object('{}'); diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 141d4c72765..66d731a3fd3 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -407,6 +407,9 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + -- jsonb_object -- empty object, one dimension