1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-22 23:02:54 +03:00

Expand virtual generated columns in the planner

Commit 83ea6c540 added support for virtual generated columns that are
computed on read.  All Var nodes in the query that reference virtual
generated columns must be replaced with the corresponding generation
expressions.  Currently, this replacement occurs in the rewriter.
However, this approach has several issues.  If a Var referencing a
virtual generated column has any varnullingrels, those varnullingrels
need to be propagated into the generation expression.  Failing to do
so can lead to "wrong varnullingrels" errors and improper outer-join
removal.

Additionally, if such a Var comes from the nullable side of an outer
join, we may need to wrap the generation expression in a
PlaceHolderVar to ensure that it is evaluated at the right place and
hence is forced to null when the outer join should do so.  In certain
cases, such as when the query uses grouping sets, we also need a
PlaceHolderVar for anything that is not a simple Var to isolate
subexpressions.  Failure to do so can result in incorrect results.

To fix these issues, this patch expands the virtual generated columns
in the planner rather than in the rewriter, and leverages the
pullup_replace_vars architecture to avoid code duplication.  The
generation expressions will be correctly marked with nullingrel bits
and wrapped in PlaceHolderVars when needed by the pullup_replace_vars
callback function.  This requires handling the OLD/NEW RETURNING list
Vars in pullup_replace_vars_callback, as it may now deal with Vars
referencing the result relation instead of a subquery.

The "wrong varnullingrels" error was reported by Alexander Lakhin.
The incorrect result issue and the improper outer-join removal issue
were reported by Richard Guo.

Author: Richard Guo <guofenglinux@gmail.com>
Author: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Jian He <jian.universality@gmail.com>
Discussion: https://postgr.es/m/75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com
This commit is contained in:
Richard Guo 2025-02-25 16:10:25 +09:00
parent 560a842d63
commit 1e4351af32
10 changed files with 445 additions and 46 deletions

View File

@ -734,6 +734,14 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
*/
preprocess_function_rtes(root);
/*
* Scan the rangetable for relations with virtual generated columns, and
* replace all Var nodes in the query that reference these columns with
* the generation expressions. Recursion issues here are handled in the
* same way as for SubLinks.
*/
parse = root->parse = expand_virtual_generated_columns(root);
/*
* Check to see if any subqueries in the jointree can be merged into this
* query.

View File

@ -7,6 +7,7 @@
* replace_empty_jointree
* pull_up_sublinks
* preprocess_function_rtes
* expand_virtual_generated_columns
* pull_up_subqueries
* flatten_simple_union_all
* do expression preprocessing (including flattening JOIN alias vars)
@ -25,6 +26,7 @@
*/
#include "postgres.h"
#include "access/table.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "miscadmin.h"
@ -39,7 +41,9 @@
#include "optimizer/tlist.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteHandler.h"
#include "rewrite/rewriteManip.h"
#include "utils/rel.h"
typedef struct nullingrel_info
@ -58,6 +62,8 @@ typedef struct pullup_replace_vars_context
PlannerInfo *root;
List *targetlist; /* tlist of subquery being pulled up */
RangeTblEntry *target_rte; /* RTE of subquery */
int result_relation; /* the index of the result relation in the
* rewritten query */
Relids relids; /* relids within subquery, as numbered after
* pullup (set only if target_rte->lateral) */
nullingrel_info *nullinfo; /* per-RTE nullingrel info (set only if
@ -916,6 +922,133 @@ preprocess_function_rtes(PlannerInfo *root)
}
}
/*
* expand_virtual_generated_columns
* Expand all virtual generated column references in a query.
*
* This scans the rangetable for relations with virtual generated columns, and
* replaces all Var nodes in the query that reference these columns with the
* generation expressions. Note that we do not descend into subqueries; that
* is taken care of when the subqueries are planned.
*
* This has to be done after we have pulled up any SubLinks within the query's
* quals; otherwise any virtual generated column references within the SubLinks
* that should be transformed into joins wouldn't get expanded.
*
* Returns a modified copy of the query tree, if any relations with virtual
* generated columns are present.
*/
Query *
expand_virtual_generated_columns(PlannerInfo *root)
{
Query *parse = root->parse;
int rt_index;
ListCell *lc;
rt_index = 0;
foreach(lc, parse->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
Relation rel;
TupleDesc tupdesc;
++rt_index;
/*
* Only normal relations can have virtual generated columns.
*/
if (rte->rtekind != RTE_RELATION)
continue;
rel = table_open(rte->relid, NoLock);
tupdesc = RelationGetDescr(rel);
if (tupdesc->constr && tupdesc->constr->has_generated_virtual)
{
List *tlist = NIL;
pullup_replace_vars_context rvcontext;
for (int i = 0; i < tupdesc->natts; i++)
{
Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
TargetEntry *tle;
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
{
Node *defexpr;
defexpr = build_generation_expression(rel, i + 1);
ChangeVarNodes(defexpr, 1, rt_index, 0);
tle = makeTargetEntry((Expr *) defexpr, i + 1, 0, false);
tlist = lappend(tlist, tle);
}
else
{
Var *var;
var = makeVar(rt_index,
i + 1,
attr->atttypid,
attr->atttypmod,
attr->attcollation,
0);
tle = makeTargetEntry((Expr *) var, i + 1, 0, false);
tlist = lappend(tlist, tle);
}
}
Assert(list_length(tlist) > 0);
Assert(!rte->lateral);
/*
* The relation's targetlist items are now in the appropriate form
* to insert into the query, except that we may need to wrap them
* in PlaceHolderVars. Set up required context data for
* pullup_replace_vars.
*/
rvcontext.root = root;
rvcontext.targetlist = tlist;
rvcontext.target_rte = rte;
rvcontext.result_relation = parse->resultRelation;
/* won't need these values */
rvcontext.relids = NULL;
rvcontext.nullinfo = NULL;
/* pass NULL for outer_hasSubLinks */
rvcontext.outer_hasSubLinks = NULL;
rvcontext.varno = rt_index;
/* this flag will be set below, if needed */
rvcontext.wrap_non_vars = false;
/* initialize cache array with indexes 0 .. length(tlist) */
rvcontext.rv_cache = palloc0((list_length(tlist) + 1) *
sizeof(Node *));
/*
* If the query uses grouping sets, we need a PlaceHolderVar for
* anything that's not a simple Var. Again, this ensures that
* expressions retain their separate identity so that they will
* match grouping set columns when appropriate. (It'd be
* sufficient to wrap values used in grouping set columns, and do
* so only in non-aggregated portions of the tlist and havingQual,
* but that would require a lot of infrastructure that
* pullup_replace_vars hasn't currently got.)
*/
if (parse->groupingSets)
rvcontext.wrap_non_vars = true;
/*
* Apply pullup variable replacement throughout the query tree.
*/
parse = (Query *) pullup_replace_vars((Node *) parse, &rvcontext);
}
table_close(rel, NoLock);
}
return parse;
}
/*
* pull_up_subqueries
* Look for subqueries in the rangetable that can be pulled up into
@ -1197,6 +1330,13 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
*/
preprocess_function_rtes(subroot);
/*
* Scan the rangetable for relations with virtual generated columns, and
* replace all Var nodes in the query that reference these columns with
* the generation expressions.
*/
subquery = subroot->parse = expand_virtual_generated_columns(subroot);
/*
* Recursively pull up the subquery's subqueries, so that
* pull_up_subqueries' processing is complete for its jointree and
@ -1274,6 +1414,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
rvcontext.root = root;
rvcontext.targetlist = subquery->targetList;
rvcontext.target_rte = rte;
rvcontext.result_relation = 0;
if (rte->lateral)
{
rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree,
@ -1834,6 +1975,7 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
rvcontext.root = root;
rvcontext.targetlist = tlist;
rvcontext.target_rte = rte;
rvcontext.result_relation = 0;
rvcontext.relids = NULL; /* can't be any lateral references here */
rvcontext.nullinfo = NULL;
rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
@ -1993,6 +2135,7 @@ pull_up_constant_function(PlannerInfo *root, Node *jtnode,
NULL, /* resname */
false)); /* resjunk */
rvcontext.target_rte = rte;
rvcontext.result_relation = 0;
/*
* Since this function was reduced to a Const, it doesn't contain any
@ -2490,6 +2633,10 @@ pullup_replace_vars_callback(Var *var,
bool need_phv;
Node *newnode;
/* System columns are not replaced. */
if (varattno < InvalidAttrNumber)
return (Node *) copyObject(var);
/*
* We need a PlaceHolderVar if the Var-to-be-replaced has nonempty
* varnullingrels (unless we find below that the replacement expression is
@ -2559,6 +2706,22 @@ pullup_replace_vars_callback(Var *var,
rowexpr->location = var->location;
newnode = (Node *) rowexpr;
/* Handle any OLD/NEW RETURNING list Vars */
if (var->varreturningtype != VAR_RETURNING_DEFAULT)
{
/*
* Wrap the RowExpr in a ReturningExpr node, so that the executor
* returns NULL if the OLD/NEW row does not exist.
*/
ReturningExpr *rexpr = makeNode(ReturningExpr);
rexpr->retlevelsup = 0;
rexpr->retold = (var->varreturningtype == VAR_RETURNING_OLD);
rexpr->retexpr = (Expr *) newnode;
newnode = (Node *) rexpr;
}
/*
* Insert PlaceHolderVar if needed. Notice that we are wrapping one
* PlaceHolderVar around the whole RowExpr, rather than putting one
@ -2588,6 +2751,39 @@ pullup_replace_vars_callback(Var *var,
/* Make a copy of the tlist item to return */
newnode = (Node *) copyObject(tle->expr);
/* Handle any OLD/NEW RETURNING list Vars */
if (var->varreturningtype != VAR_RETURNING_DEFAULT)
{
/*
* Copy varreturningtype onto any Vars in the tlist item that
* refer to result_relation (which had better be non-zero).
*/
if (rcon->result_relation == 0)
elog(ERROR, "variable returning old/new found outside RETURNING list");
SetVarReturningType((Node *) newnode, rcon->result_relation,
0, var->varreturningtype);
/*
* If the replacement expression in the targetlist is not simply a
* Var referencing result_relation, wrap it in a ReturningExpr
* node, so that the executor returns NULL if the OLD/NEW row does
* not exist.
*/
if (!IsA(newnode, Var) ||
((Var *) newnode)->varno != rcon->result_relation ||
((Var *) newnode)->varlevelsup != 0)
{
ReturningExpr *rexpr = makeNode(ReturningExpr);
rexpr->retlevelsup = 0;
rexpr->retold = (var->varreturningtype == VAR_RETURNING_OLD);
rexpr->retexpr = (Expr *) newnode;
newnode = (Node *) rexpr;
}
}
/* Insert PlaceHolderVar if needed */
if (need_phv)
{

View File

@ -2190,10 +2190,6 @@ fireRIRrules(Query *parsetree, List *activeRIRs)
* requires special recursion detection if the new quals have sublink
* subqueries, and if we did it in the loop above query_tree_walker would
* then recurse into those quals a second time.
*
* Finally, we expand any virtual generated columns. We do this after
* each table's RLS policies are applied because the RLS policies might
* also refer to the table's virtual generated columns.
*/
rt_index = 0;
foreach(lc, parsetree->rtable)
@ -2207,11 +2203,10 @@ fireRIRrules(Query *parsetree, List *activeRIRs)
++rt_index;
/*
* Only normal relations can have RLS policies or virtual generated
* columns.
*/
if (rte->rtekind != RTE_RELATION)
/* Only normal relations can have RLS policies */
if (rte->rtekind != RTE_RELATION ||
(rte->relkind != RELKIND_RELATION &&
rte->relkind != RELKIND_PARTITIONED_TABLE))
continue;
rel = table_open(rte->relid, NoLock);
@ -2300,16 +2295,6 @@ fireRIRrules(Query *parsetree, List *activeRIRs)
if (hasSubLinks)
parsetree->hasSubLinks = true;
/*
* Expand any references to virtual generated columns of this table.
* Note that subqueries in virtual generated column expressions are
* not currently supported, so this cannot add any more sublinks.
*/
parsetree = (Query *)
expand_generated_columns_internal((Node *) parsetree,
rel, rt_index, rte,
parsetree->resultRelation);
table_close(rel, NoLock);
}
@ -4457,35 +4442,12 @@ expand_generated_columns_internal(Node *node, Relation rel, int rt_index,
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
{
Node *defexpr;
int attnum = i + 1;
Oid attcollid;
TargetEntry *te;
defexpr = build_column_default(rel, attnum);
if (defexpr == NULL)
elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
attnum, RelationGetRelationName(rel));
/*
* If the column definition has a collation and it is
* different from the collation of the generation expression,
* put a COLLATE clause around the expression.
*/
attcollid = attr->attcollation;
if (attcollid && attcollid != exprCollation(defexpr))
{
CollateExpr *ce = makeNode(CollateExpr);
ce->arg = (Expr *) defexpr;
ce->collOid = attcollid;
ce->location = -1;
defexpr = (Node *) ce;
}
defexpr = build_generation_expression(rel, i + 1);
ChangeVarNodes(defexpr, 1, rt_index, 0);
te = makeTargetEntry((Expr *) defexpr, attnum, 0, false);
te = makeTargetEntry((Expr *) defexpr, i + 1, 0, false);
tlist = lappend(tlist, te);
}
}
@ -4528,6 +4490,47 @@ expand_generated_columns_in_expr(Node *node, Relation rel, int rt_index)
return node;
}
/*
* Build the generation expression for the virtual generated column.
*
* Error out if there is no generation expression found for the given column.
*/
Node *
build_generation_expression(Relation rel, int attrno)
{
TupleDesc rd_att = RelationGetDescr(rel);
Form_pg_attribute att_tup = TupleDescAttr(rd_att, attrno - 1);
Node *defexpr;
Oid attcollid;
Assert(rd_att->constr && rd_att->constr->has_generated_virtual);
Assert(att_tup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL);
defexpr = build_column_default(rel, attrno);
if (defexpr == NULL)
elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
attrno, RelationGetRelationName(rel));
/*
* If the column definition has a collation and it is different from the
* collation of the generation expression, put a COLLATE clause around the
* expression.
*/
attcollid = att_tup->attcollation;
if (attcollid && attcollid != exprCollation(defexpr))
{
CollateExpr *ce = makeNode(CollateExpr);
ce->arg = (Expr *) defexpr;
ce->collOid = attcollid;
ce->location = -1;
defexpr = (Node *) ce;
}
return defexpr;
}
/*
* QueryRewrite -

View File

@ -1010,7 +1010,7 @@ SetVarReturningType_walker(Node *node, SetVarReturningType_context *context)
return expression_tree_walker(node, SetVarReturningType_walker, context);
}
static void
void
SetVarReturningType(Node *node, int result_relation, int sublevels_up,
VarReturningType returning_type)
{

View File

@ -2147,7 +2147,7 @@ typedef struct InferenceElem
* rule, which may also contain arbitrary expressions.
*
* ReturningExpr nodes never appear in a parsed Query --- they are only ever
* inserted by the rewriter.
* inserted by the rewriter and the planner.
*/
typedef struct ReturningExpr
{

View File

@ -25,6 +25,7 @@ extern void transform_MERGE_to_join(Query *parse);
extern void replace_empty_jointree(Query *parse);
extern void pull_up_sublinks(PlannerInfo *root);
extern void preprocess_function_rtes(PlannerInfo *root);
extern Query *expand_virtual_generated_columns(PlannerInfo *root);
extern void pull_up_subqueries(PlannerInfo *root);
extern void flatten_simple_union_all(PlannerInfo *root);
extern void reduce_outer_joins(PlannerInfo *root);

View File

@ -39,5 +39,6 @@ extern void error_view_not_updatable(Relation view,
const char *detail);
extern Node *expand_generated_columns_in_expr(Node *node, Relation rel, int rt_index);
extern Node *build_generation_expression(Relation rel, int attrno);
#endif /* REWRITEHANDLER_H */

View File

@ -55,6 +55,9 @@ extern void IncrementVarSublevelsUp(Node *node, int delta_sublevels_up,
extern void IncrementVarSublevelsUp_rtable(List *rtable,
int delta_sublevels_up, int min_sublevels_up);
extern void SetVarReturningType(Node *node, int result_relation, int sublevels_up,
VarReturningType returning_type);
extern bool rangeTableEntry_used(Node *node, int rt_index,
int sublevels_up);

View File

@ -1398,3 +1398,133 @@ SELECT attrelid, attname, attgenerated FROM pg_attribute WHERE attgenerated NOT
----------+---------+--------------
(0 rows)
--
-- test the expansion of virtual generated columns
--
-- these tests are specific to generated_virtual.sql
--
create table gtest32 (
a int primary key,
b int generated always as (a * 2),
c int generated always as (10 + 10),
d int generated always as (coalesce(a, 100))
);
insert into gtest32 values (1), (2);
analyze gtest32;
-- Ensure that nullingrel bits are propagated into the generation expressions
explain (costs off)
select sum(t2.b) over (partition by t2.a),
sum(t2.c) over (partition by t2.a),
sum(t2.d) over (partition by t2.a)
from gtest32 as t1 left join gtest32 as t2 on (t1.a = t2.a)
order by t1.a;
QUERY PLAN
------------------------------------------------------
Sort
Sort Key: t1.a
-> WindowAgg
-> Sort
Sort Key: t2.a
-> Nested Loop Left Join
Join Filter: (t1.a = t2.a)
-> Seq Scan on gtest32 t1
-> Materialize
-> Seq Scan on gtest32 t2
(10 rows)
select sum(t2.b) over (partition by t2.a),
sum(t2.c) over (partition by t2.a),
sum(t2.d) over (partition by t2.a)
from gtest32 as t1 left join gtest32 as t2 on (t1.a = t2.a)
order by t1.a;
sum | sum | sum
-----+-----+-----
2 | 20 | 1
4 | 20 | 2
(2 rows)
-- Ensure that outer-join removal functions correctly after the propagation of nullingrel bits
explain (costs off)
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2;
QUERY PLAN
-----------------------------------------
Hash Left Join
Hash Cond: (t1.a = t2.a)
Filter: (COALESCE((t2.a * 2), 1) = 2)
-> Seq Scan on gtest32 t1
-> Hash
-> Seq Scan on gtest32 t2
(6 rows)
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2;
a
---
1
(1 row)
explain (costs off)
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2 or t1.a is null;
QUERY PLAN
-------------------------------------------------------------
Hash Left Join
Hash Cond: (t1.a = t2.a)
Filter: ((COALESCE((t2.a * 2), 1) = 2) OR (t1.a IS NULL))
-> Seq Scan on gtest32 t1
-> Hash
-> Seq Scan on gtest32 t2
(6 rows)
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2 or t1.a is null;
a
---
1
(1 row)
-- Ensure that the generation expressions are wrapped into PHVs if needed
explain (verbose, costs off)
select t2.* from gtest32 t1 left join gtest32 t2 on false;
QUERY PLAN
------------------------------------------------------
Nested Loop Left Join
Output: a, (a * 2), (20), (COALESCE(a, 100))
Join Filter: false
-> Seq Scan on generated_virtual_tests.gtest32 t1
Output: t1.a, t1.b, t1.c, t1.d
-> Result
Output: a, 20, COALESCE(a, 100)
One-Time Filter: false
(8 rows)
select t2.* from gtest32 t1 left join gtest32 t2 on false;
a | b | c | d
---+---+---+---
| | |
| | |
(2 rows)
explain (verbose, costs off)
select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20;
QUERY PLAN
-----------------------------------------------------
HashAggregate
Output: a, ((a * 2)), (20), (COALESCE(a, 100))
Hash Key: t.a
Hash Key: (t.a * 2)
Hash Key: 20
Hash Key: COALESCE(t.a, 100)
Filter: ((20) = 20)
-> Seq Scan on generated_virtual_tests.gtest32 t
Output: a, (a * 2), 20, COALESCE(a, 100)
(9 rows)
select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20;
a | b | c | d
---+---+----+---
| | 20 |
(1 row)
drop table gtest32;

View File

@ -732,3 +732,60 @@ CREATE TABLE gtest28b (LIKE gtest28a INCLUDING GENERATED);
-- sanity check of system catalog
SELECT attrelid, attname, attgenerated FROM pg_attribute WHERE attgenerated NOT IN ('', 's', 'v');
--
-- test the expansion of virtual generated columns
--
-- these tests are specific to generated_virtual.sql
--
create table gtest32 (
a int primary key,
b int generated always as (a * 2),
c int generated always as (10 + 10),
d int generated always as (coalesce(a, 100))
);
insert into gtest32 values (1), (2);
analyze gtest32;
-- Ensure that nullingrel bits are propagated into the generation expressions
explain (costs off)
select sum(t2.b) over (partition by t2.a),
sum(t2.c) over (partition by t2.a),
sum(t2.d) over (partition by t2.a)
from gtest32 as t1 left join gtest32 as t2 on (t1.a = t2.a)
order by t1.a;
select sum(t2.b) over (partition by t2.a),
sum(t2.c) over (partition by t2.a),
sum(t2.d) over (partition by t2.a)
from gtest32 as t1 left join gtest32 as t2 on (t1.a = t2.a)
order by t1.a;
-- Ensure that outer-join removal functions correctly after the propagation of nullingrel bits
explain (costs off)
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2;
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2;
explain (costs off)
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2 or t1.a is null;
select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a
where coalesce(t2.b, 1) = 2 or t1.a is null;
-- Ensure that the generation expressions are wrapped into PHVs if needed
explain (verbose, costs off)
select t2.* from gtest32 t1 left join gtest32 t2 on false;
select t2.* from gtest32 t1 left join gtest32 t2 on false;
explain (verbose, costs off)
select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20;
select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20;
drop table gtest32;