From 9146d0d65073eeb6738db7c21626d8672fda29f9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 6 Nov 2023 10:38:00 -0500 Subject: [PATCH] Compute aggregate argument types correctly in transformAggregateCall(). transformAggregateCall() captures the datatypes of the aggregate's arguments immediately to construct the Aggref.aggargtypes list. This seems reasonable because the arguments have already been transformed --- but there is an edge case where they haven't been. Specifically, if we have an unknown-type literal in an ANY argument position, nothing will have been done with it earlier. But if we also have DISTINCT, then addTargetToGroupList() converts the literal to "text" type, resulting in the aggargtypes list not matching the actual runtime type of the argument. The end result is that the aggregate tries to interpret a "text" value as being of type "unknown", that is a zero-terminated C string. If the text value contains no zero bytes, this could result in disclosure of server memory following the text literal value. To fix, move the collection of the aggargtypes list to the end of transformAggregateCall(), after DISTINCT has been handled. This requires slightly more code, but not a great deal. Our thanks to Jingzhou Fu for reporting this problem. Security: CVE-2023-5868 --- src/backend/parser/parse_agg.c | 35 +++++++++++++++++++---------- src/test/regress/expected/jsonb.out | 7 ++++++ src/test/regress/sql/jsonb.sql | 3 +++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 828cd99bc1e..90cf150d5f7 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -110,18 +110,6 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, int save_next_resno; ListCell *lc; - /* - * Before separating the args into direct and aggregated args, make a list - * of their data type OIDs for use later. - */ - foreach(lc, args) - { - Expr *arg = (Expr *) lfirst(lc); - - argtypes = lappend_oid(argtypes, exprType((Node *) arg)); - } - agg->aggargtypes = argtypes; - if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) { /* @@ -233,6 +221,29 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->aggorder = torder; agg->aggdistinct = tdistinct; + /* + * Now build the aggargtypes list with the type OIDs of the direct and + * aggregated args, ignoring any resjunk entries that might have been + * added by ORDER BY/DISTINCT processing. We can't do this earlier + * because said processing can modify some args' data types, in particular + * by resolving previously-unresolved "unknown" literals. + */ + foreach(lc, agg->aggdirectargs) + { + Expr *arg = (Expr *) lfirst(lc); + + argtypes = lappend_oid(argtypes, exprType((Node *) arg)); + } + foreach(lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore junk */ + argtypes = lappend_oid(argtypes, exprType((Node *) tle->expr)); + } + agg->aggargtypes = argtypes; + check_agglevels_and_constraints(pstate, (Node *) agg); } diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index bec355d4391..3c5be773a1a 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -1558,6 +1558,13 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; ERROR: field name must not be null +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + jsonb_object_agg +------------------ + {"a": "abc"} +(1 row) + -- jsonb_object -- empty object, one dimension SELECT jsonb_object('{}'); diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index f8d596042ce..040e1bac212 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -397,6 +397,9 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + -- jsonb_object -- empty object, one dimension