From d3de70fdb76520610583221c38d0c7eaa6573503 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 6 Nov 2023 10:38:00 -0500 Subject: [PATCH] Compute aggregate argument types correctly in transformAggregateCall(). transformAggregateCall() captures the datatypes of the aggregate's arguments immediately to construct the Aggref.aggargtypes list. This seems reasonable because the arguments have already been transformed --- but there is an edge case where they haven't been. Specifically, if we have an unknown-type literal in an ANY argument position, nothing will have been done with it earlier. But if we also have DISTINCT, then addTargetToGroupList() converts the literal to "text" type, resulting in the aggargtypes list not matching the actual runtime type of the argument. The end result is that the aggregate tries to interpret a "text" value as being of type "unknown", that is a zero-terminated C string. If the text value contains no zero bytes, this could result in disclosure of server memory following the text literal value. To fix, move the collection of the aggargtypes list to the end of transformAggregateCall(), after DISTINCT has been handled. This requires slightly more code, but not a great deal. Our thanks to Jingzhou Fu for reporting this problem. Security: CVE-2023-5868 --- src/backend/parser/parse_agg.c | 35 +++++++++++++++++++---------- src/test/regress/expected/jsonb.out | 7 ++++++ src/test/regress/sql/jsonb.sql | 3 +++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index bd98dbd7ace..558eff7f8fd 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -110,18 +110,6 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, int save_next_resno; ListCell *lc; - /* - * Before separating the args into direct and aggregated args, make a list - * of their data type OIDs for use later. - */ - foreach(lc, args) - { - Expr *arg = (Expr *) lfirst(lc); - - argtypes = lappend_oid(argtypes, exprType((Node *) arg)); - } - agg->aggargtypes = argtypes; - if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) { /* @@ -233,6 +221,29 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->aggorder = torder; agg->aggdistinct = tdistinct; + /* + * Now build the aggargtypes list with the type OIDs of the direct and + * aggregated args, ignoring any resjunk entries that might have been + * added by ORDER BY/DISTINCT processing. We can't do this earlier + * because said processing can modify some args' data types, in particular + * by resolving previously-unresolved "unknown" literals. + */ + foreach(lc, agg->aggdirectargs) + { + Expr *arg = (Expr *) lfirst(lc); + + argtypes = lappend_oid(argtypes, exprType((Node *) arg)); + } + foreach(lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore junk */ + argtypes = lappend_oid(argtypes, exprType((Node *) tle->expr)); + } + agg->aggargtypes = argtypes; + check_agglevels_and_constraints(pstate, (Node *) agg); } diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 1e6c6ef200a..edb520815c5 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -1527,6 +1527,13 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; ERROR: field name must not be null +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + jsonb_object_agg +------------------ + {"a": "abc"} +(1 row) + -- jsonb_object -- empty object, one dimension SELECT jsonb_object('{}'); diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index b6409767f6d..418589611f2 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -380,6 +380,9 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + -- jsonb_object -- empty object, one dimension