1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-18 02:02:55 +03:00

Reduce "Var IS [NOT] NULL" quals during constant folding

In commit b262ad440, we introduced an optimization that reduces an IS
[NOT] NULL qual on a NOT NULL column to constant true or constant
false, provided we can prove that the input expression of the NullTest
is not nullable by any outer joins or grouping sets.  This deduction
happens quite late in the planner, during the distribution of quals to
rels in query_planner.  However, this approach has some drawbacks: we
can't perform any further folding with the constant, and it turns out
to be prone to bugs.

Ideally, this deduction should happen during constant folding.
However, the per-relation information about which columns are defined
as NOT NULL is not available at that point.  This information is
currently collected from catalogs when building RelOptInfos for base
or "other" relations.

This patch moves the collection of NOT NULL attribute information for
relations before pull_up_sublinks, storing it in a hash table keyed by
relation OID.  It then uses this information to perform the NullTest
deduction for Vars during constant folding.  This also makes it
possible to leverage this information to pull up NOT IN subqueries.

Note that this patch does not get rid of restriction_is_always_true
and restriction_is_always_false.  Removing them would prevent us from
reducing some IS [NOT] NULL quals that we were previously able to
reduce, because (a) the self-join elimination may introduce new IS NOT
NULL quals after constant folding, and (b) if some outer joins are
converted to inner joins, previously irreducible NullTest quals may
become reducible.

Author: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAMbWs4-bFJ1At4btk5wqbezdu8PLtQ3zv-aiaY3ry9Ymm=jgFQ@mail.gmail.com
This commit is contained in:
Richard Guo
2025-07-22 11:21:36 +09:00
parent 904f6a593a
commit e2debb6438
17 changed files with 336 additions and 81 deletions

View File

@@ -3048,36 +3048,16 @@ add_base_clause_to_rel(PlannerInfo *root, Index relid,
* expr_is_nonnullable
* Check to see if the Expr cannot be NULL
*
* If the Expr is a simple Var that is defined NOT NULL and meanwhile is not
* nulled by any outer joins, then we can know that it cannot be NULL.
* Currently we only support simple Vars.
*/
static bool
expr_is_nonnullable(PlannerInfo *root, Expr *expr)
{
RelOptInfo *rel;
Var *var;
/* For now only check simple Vars */
if (!IsA(expr, Var))
return false;
var = (Var *) expr;
/* could the Var be nulled by any outer joins? */
if (!bms_is_empty(var->varnullingrels))
return false;
/* system columns cannot be NULL */
if (var->varattno < 0)
return true;
/* is the column defined NOT NULL? */
rel = find_base_rel(root, var->varno);
if (var->varattno > 0 &&
bms_is_member(var->varattno, rel->notnullattnums))
return true;
return false;
return var_is_nonnullable(root, (Var *) expr, true);
}
/*

View File

@@ -342,6 +342,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
glob->transientPlan = false;
glob->dependsOnRole = false;
glob->partition_directory = NULL;
glob->rel_notnullatts_hash = NULL;
/*
* Assess whether it's feasible to use parallel mode for this query. We
@@ -723,11 +724,12 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
/*
* Scan the rangetable for relation RTEs and retrieve the necessary
* catalog information for each relation. Using this information, clear
* the inh flag for any relation that has no children, and expand virtual
* generated columns for any relation that contains them. Note that this
* step does not descend into sublinks and subqueries; if we pull up any
* sublinks or subqueries below, their relation RTEs are processed just
* before pulling them up.
* the inh flag for any relation that has no children, collect not-null
* attribute numbers for any relation that has column not-null
* constraints, and expand virtual generated columns for any relation that
* contains them. Note that this step does not descend into sublinks and
* subqueries; if we pull up any sublinks or subqueries below, their
* relation RTEs are processed just before pulling them up.
*/
parse = root->parse = preprocess_relation_rtes(root);

View File

@@ -1519,8 +1519,10 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
/*
* Scan the rangetable for relation RTEs and retrieve the necessary
* catalog information for each relation. Using this information, clear
* the inh flag for any relation that has no children, and expand virtual
* generated columns for any relation that contains them.
* the inh flag for any relation that has no children, collect not-null
* attribute numbers for any relation that has column not-null
* constraints, and expand virtual generated columns for any relation that
* contains them.
*
* Note: we construct up an entirely dummy PlannerInfo for use here. This
* is fine because only the "glob" and "parse" links will be used in this
@@ -1760,6 +1762,7 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
Node **testexpr, List **paramIds)
{
Node *whereClause;
PlannerInfo subroot;
List *leftargs,
*rightargs,
*opids,
@@ -1819,12 +1822,15 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
* parent aliases were flattened already, and we're not going to pull any
* child Vars (of any description) into the parent.
*
* Note: passing the parent's root to eval_const_expressions is
* technically wrong, but we can get away with it since only the
* boundParams (if any) are used, and those would be the same in a
* subroot.
* Note: we construct up an entirely dummy PlannerInfo to pass to
* eval_const_expressions. This is fine because only the "glob" and
* "parse" links are used by eval_const_expressions.
*/
whereClause = eval_const_expressions(root, whereClause);
MemSet(&subroot, 0, sizeof(subroot));
subroot.type = T_PlannerInfo;
subroot.glob = root->glob;
subroot.parse = subselect;
whereClause = eval_const_expressions(&subroot, whereClause);
whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
whereClause = (Node *) make_ands_implicit((Expr *) whereClause);

View File

@@ -36,6 +36,7 @@
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
#include "optimizer/placeholder.h"
#include "optimizer/plancat.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
#include "optimizer/tlist.h"
@@ -401,8 +402,9 @@ transform_MERGE_to_join(Query *parse)
*
* This scans the rangetable for relation RTEs and retrieves the necessary
* catalog information for each relation. Using this information, it clears
* the inh flag for any relation that has no children, and expands virtual
* generated columns for any relation that contains them.
* the inh flag for any relation that has no children, collects not-null
* attribute numbers for any relation that has column not-null constraints, and
* expands virtual generated columns for any relation that contains them.
*
* Note that expanding virtual generated columns may cause the query tree to
* have new copies of rangetable entries. Therefore, we have to use list_nth
@@ -447,6 +449,13 @@ preprocess_relation_rtes(PlannerInfo *root)
if (rte->inh)
rte->inh = relation->rd_rel->relhassubclass;
/*
* Check to see if the relation has any column not-null constraints;
* if so, retrieve the constraint information and store it in a
* relation OID based hash table.
*/
get_relation_notnullatts(root, relation);
/*
* Check to see if the relation has any virtual generated columns; if
* so, replace all Var nodes in the query that reference these columns
@@ -1384,8 +1393,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
/*
* Scan the rangetable for relation RTEs and retrieve the necessary
* catalog information for each relation. Using this information, clear
* the inh flag for any relation that has no children, and expand virtual
* generated columns for any relation that contains them.
* the inh flag for any relation that has no children, collect not-null
* attribute numbers for any relation that has column not-null
* constraints, and expand virtual generated columns for any relation that
* contains them.
*/
subquery = subroot->parse = preprocess_relation_rtes(subroot);

View File

@@ -20,6 +20,7 @@
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_class.h"
#include "catalog/pg_language.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
@@ -36,6 +37,7 @@
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
#include "parser/analyze.h"
@@ -43,6 +45,7 @@
#include "parser/parse_collate.h"
#include "parser/parse_func.h"
#include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteHandler.h"
#include "rewrite/rewriteManip.h"
#include "tcop/tcopprot.h"
@@ -2242,7 +2245,8 @@ rowtype_field_matches(Oid rowtypeid, int fieldnum,
* only operators and functions that are reasonable to try to execute.
*
* NOTE: "root" can be passed as NULL if the caller never wants to do any
* Param substitutions nor receive info about inlined functions.
* Param substitutions nor receive info about inlined functions nor reduce
* NullTest for Vars to constant true or constant false.
*
* NOTE: the planner assumes that this will always flatten nested AND and
* OR clauses into N-argument form. See comments in prepqual.c.
@@ -3544,6 +3548,31 @@ eval_const_expressions_mutator(Node *node,
return makeBoolConst(result, false);
}
if (!ntest->argisrow && arg && IsA(arg, Var) && context->root)
{
Var *varg = (Var *) arg;
bool result;
if (var_is_nonnullable(context->root, varg, false))
{
switch (ntest->nulltesttype)
{
case IS_NULL:
result = false;
break;
case IS_NOT_NULL:
result = true;
break;
default:
elog(ERROR, "unrecognized nulltesttype: %d",
(int) ntest->nulltesttype);
result = false; /* keep compiler quiet */
break;
}
return makeBoolConst(result, false);
}
}
newntest = makeNode(NullTest);
newntest->arg = (Expr *) arg;
@@ -4162,6 +4191,67 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
return newexpr;
}
/*
* var_is_nonnullable: check to see if the Var cannot be NULL
*
* If the Var is defined NOT NULL and meanwhile is not nulled by any outer
* joins or grouping sets, then we can know that it cannot be NULL.
*
* use_rel_info indicates whether the corresponding RelOptInfo is available for
* use.
*/
bool
var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
{
Relids notnullattnums = NULL;
Assert(IsA(var, Var));
/* skip upper-level Vars */
if (var->varlevelsup != 0)
return false;
/* could the Var be nulled by any outer joins or grouping sets? */
if (!bms_is_empty(var->varnullingrels))
return false;
/* system columns cannot be NULL */
if (var->varattno < 0)
return true;
/*
* Check if the Var is defined as NOT NULL. We retrieve the column NOT
* NULL constraint information from the corresponding RelOptInfo if it is
* available; otherwise, we search the hash table for this information.
*/
if (use_rel_info)
{
RelOptInfo *rel = find_base_rel(root, var->varno);
notnullattnums = rel->notnullattnums;
}
else
{
RangeTblEntry *rte = planner_rt_fetch(var->varno, root);
/*
* We must skip inheritance parent tables, as some child tables may
* have a NOT NULL constraint for a column while others may not. This
* cannot happen with partitioned tables, though.
*/
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
return false;
notnullattnums = find_relation_notnullatts(root, rte->relid);
}
if (var->varattno > 0 &&
bms_is_member(var->varattno, notnullattnums))
return true;
return false;
}
/*
* expand_function_arguments: convert named-notation args to positional args
* and/or insert default args, as needed

View File

@@ -466,8 +466,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
Index *childRTindex_p)
{
Query *parse = root->parse;
Oid parentOID PG_USED_FOR_ASSERTS_ONLY =
RelationGetRelid(parentrel);
Oid parentOID = RelationGetRelid(parentrel);
Oid childOID = RelationGetRelid(childrel);
RangeTblEntry *childrte;
Index childRTindex;
@@ -513,6 +512,13 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
*childrte_p = childrte;
*childRTindex_p = childRTindex;
/*
* Retrieve column not-null constraint information for the child relation
* if its relation OID is different from the parent's.
*/
if (childOID != parentOID)
get_relation_notnullatts(root, childrel);
/*
* Build an AppendRelInfo struct for each parent/child pair.
*/

View File

@@ -59,6 +59,12 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
/* Hook for plugins to get control in get_relation_info() */
get_relation_info_hook_type get_relation_info_hook = NULL;
typedef struct NotnullHashEntry
{
Oid relid; /* OID of the relation */
Relids notnullattnums; /* attnums of NOT NULL columns */
} NotnullHashEntry;
static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
Relation relation, bool inhparent);
@@ -172,27 +178,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
* RangeTblEntry does get populated.
*/
if (!inhparent || relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
for (int i = 0; i < relation->rd_att->natts; i++)
{
CompactAttribute *attr = TupleDescCompactAttr(relation->rd_att, i);
Assert(attr->attnullability != ATTNULLABLE_UNKNOWN);
if (attr->attnullability == ATTNULLABLE_VALID)
{
rel->notnullattnums = bms_add_member(rel->notnullattnums,
i + 1);
/*
* Per RemoveAttributeById(), dropped columns will have their
* attnotnull unset, so we needn't check for dropped columns
* in the above condition.
*/
Assert(!attr->attisdropped);
}
}
}
rel->notnullattnums = find_relation_notnullatts(root, relationObjectId);
/*
* Estimate relation size --- unless it's an inheritance parent, in which
@@ -683,6 +669,105 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
}
}
/*
* get_relation_notnullatts -
* Retrieves column not-null constraint information for a given relation.
*
* We do this while we have the relcache entry open, and store the column
* not-null constraint information in a hash table based on the relation OID.
*/
void
get_relation_notnullatts(PlannerInfo *root, Relation relation)
{
Oid relid = RelationGetRelid(relation);
NotnullHashEntry *hentry;
bool found;
Relids notnullattnums = NULL;
/* bail out if the relation has no not-null constraints */
if (relation->rd_att->constr == NULL ||
!relation->rd_att->constr->has_not_null)
return;
/* create the hash table if it hasn't been created yet */
if (root->glob->rel_notnullatts_hash == NULL)
{
HTAB *hashtab;
HASHCTL hash_ctl;
hash_ctl.keysize = sizeof(Oid);
hash_ctl.entrysize = sizeof(NotnullHashEntry);
hash_ctl.hcxt = CurrentMemoryContext;
hashtab = hash_create("Relation NOT NULL attnums",
64L, /* arbitrary initial size */
&hash_ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
root->glob->rel_notnullatts_hash = hashtab;
}
/*
* Create a hash entry for this relation OID, if we don't have one
* already.
*/
hentry = (NotnullHashEntry *) hash_search(root->glob->rel_notnullatts_hash,
&relid,
HASH_ENTER,
&found);
/* bail out if a hash entry already exists for this relation OID */
if (found)
return;
/* collect the column not-null constraint information for this relation */
for (int i = 0; i < relation->rd_att->natts; i++)
{
CompactAttribute *attr = TupleDescCompactAttr(relation->rd_att, i);
Assert(attr->attnullability != ATTNULLABLE_UNKNOWN);
if (attr->attnullability == ATTNULLABLE_VALID)
{
notnullattnums = bms_add_member(notnullattnums, i + 1);
/*
* Per RemoveAttributeById(), dropped columns will have their
* attnotnull unset, so we needn't check for dropped columns in
* the above condition.
*/
Assert(!attr->attisdropped);
}
}
/* ... and initialize the new hash entry */
hentry->notnullattnums = notnullattnums;
}
/*
* find_relation_notnullatts -
* Searches the hash table and returns the column not-null constraint
* information for a given relation.
*/
Relids
find_relation_notnullatts(PlannerInfo *root, Oid relid)
{
NotnullHashEntry *hentry;
bool found;
if (root->glob->rel_notnullatts_hash == NULL)
return NULL;
hentry = (NotnullHashEntry *) hash_search(root->glob->rel_notnullatts_hash,
&relid,
HASH_FIND,
&found);
if (!found)
return NULL;
return hentry->notnullattnums;
}
/*
* infer_arbiter_indexes -
* Determine the unique indexes used to arbitrate speculative insertion.