1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-25 20:23:07 +03:00

Invent "multibitmapsets", and use them to speed up antijoin detection.

Implement a data structure that is a List of Bitmapsets, which is
essentially a 2-D boolean array except that the rows need not all
be the same width.  Operations such as union and intersection are
meaningful for these, just as they are for Bitmapsets.  Eventually
we might build many of the same operations that we have written for
Bitmapsets, but for the first use-case we just need a few.

That first use-case is for antijoin detection: reduce_outer_joins
needs to find the set of Vars that are certain to be non-null in a
successfully joined (not null-extended) left join row, and also
find the set of Vars subject to higher-level IS NULL constraints,
and intersect them.  We had been doing this by making Lists of
the Var nodes and then using list_intersect, which works but is
pretty inefficient compared to a bitmapset-like intersection.
Potentially it's O(N^2) if there are a lot of Vars involved,
which fortunately there generally aren't; still it's not great.
Moreover, that method requires the Vars of interest to be exactly
equal() in the join condition and the upper IS NULL condition,
which is problematic for my WIP patch that labels Vars according
to which outer joins have possibly nulled them.

Discussion: https://postgr.es/m/892228.1668437838@sss.pgh.pa.us
Discussion: https://postgr.es/m/CAMbWs4-mvPPCJ1W6iK6dD5HiNwoJdi6mZp=-7mE8N9Sh+cd0tQ@mail.gmail.com
This commit is contained in:
Tom Lane
2022-11-16 13:58:42 -05:00
parent 90e4f308b4
commit e9e26b5e71
7 changed files with 238 additions and 25 deletions

View File

@@ -28,6 +28,7 @@
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "nodes/makefuncs.h"
#include "nodes/multibitmapset.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
@@ -2769,7 +2770,7 @@ reduce_outer_joins_pass1(Node *jtnode)
* state: state data collected by phase 1 for this node
* root: toplevel planner state
* nonnullable_rels: set of base relids forced non-null by upper quals
* forced_null_vars: list of Vars forced null by upper quals
* forced_null_vars: multibitmapset of Vars forced null by upper quals
*/
static void
reduce_outer_joins_pass2(Node *jtnode,
@@ -2799,8 +2800,8 @@ reduce_outer_joins_pass2(Node *jtnode,
pass_nonnullable_rels = bms_add_members(pass_nonnullable_rels,
nonnullable_rels);
pass_forced_null_vars = find_forced_null_vars(f->quals);
pass_forced_null_vars = list_concat(pass_forced_null_vars,
forced_null_vars);
pass_forced_null_vars = mbms_add_members(pass_forced_null_vars,
forced_null_vars);
/* And recurse --- but only into interesting subtrees */
Assert(list_length(f->fromlist) == list_length(state->sub_states));
forboth(l, f->fromlist, s, state->sub_states)
@@ -2897,7 +2898,7 @@ reduce_outer_joins_pass2(Node *jtnode,
if (jointype == JOIN_LEFT)
{
List *nonnullable_vars;
List *overlap;
Bitmapset *overlap;
/* Find Vars in j->quals that must be non-null in joined rows */
nonnullable_vars = find_nonnullable_vars(j->quals);
@@ -2907,11 +2908,8 @@ reduce_outer_joins_pass2(Node *jtnode,
* forced_null_vars overlap: we need to know if the overlap
* includes any RHS variables.
*/
overlap = list_intersection(nonnullable_vars,
forced_null_vars);
if (overlap != NIL &&
bms_overlap(pull_varnos(root, (Node *) overlap),
right_state->relids))
overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars);
if (bms_overlap(overlap, right_state->relids))
jointype = JOIN_ANTI;
}
@@ -2964,8 +2962,8 @@ reduce_outer_joins_pass2(Node *jtnode,
/* OK to merge upper and local constraints */
local_nonnullable_rels = bms_add_members(local_nonnullable_rels,
nonnullable_rels);
local_forced_null_vars = list_concat(local_forced_null_vars,
forced_null_vars);
local_forced_null_vars = mbms_add_members(local_forced_null_vars,
forced_null_vars);
}
}
else

View File

@@ -31,6 +31,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/multibitmapset.h"
#include "nodes/nodeFuncs.h"
#include "nodes/subscripting.h"
#include "nodes/supportnodes.h"
@@ -1566,7 +1567,7 @@ find_nonnullable_rels_walker(Node *node, bool top_level)
* find_nonnullable_vars
* Determine which Vars are forced nonnullable by given clause.
*
* Returns a list of all level-zero Vars that are referenced in the clause in
* Returns the set of all level-zero Vars that are referenced in the clause in
* such a way that the clause cannot possibly return TRUE if any of these Vars
* is NULL. (It is OK to err on the side of conservatism; hence the analysis
* here is simplistic.)
@@ -1578,8 +1579,9 @@ find_nonnullable_rels_walker(Node *node, bool top_level)
* the expression to have been AND/OR flattened and converted to implicit-AND
* format.
*
* The result is a palloc'd List, but we have not copied the member Var nodes.
* Also, we don't bother trying to eliminate duplicate entries.
* Attnos of the identified Vars are returned in a multibitmapset (a List of
* Bitmapsets). List indexes correspond to relids (varnos), while the per-rel
* Bitmapsets hold varattnos offset by FirstLowInvalidHeapAttributeNumber.
*
* top_level is true while scanning top-level AND/OR structure; here, showing
* the result is either FALSE or NULL is good enough. top_level is false when
@@ -1608,7 +1610,9 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
Var *var = (Var *) node;
if (var->varlevelsup == 0)
result = list_make1(var);
result = mbms_add_member(result,
var->varno,
var->varattno - FirstLowInvalidHeapAttributeNumber);
}
else if (IsA(node, List))
{
@@ -1623,9 +1627,9 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
*/
foreach(l, (List *) node)
{
result = list_concat(result,
find_nonnullable_vars_walker(lfirst(l),
top_level));
result = mbms_add_members(result,
find_nonnullable_vars_walker(lfirst(l),
top_level));
}
}
else if (IsA(node, FuncExpr))
@@ -1657,7 +1661,12 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
switch (expr->boolop)
{
case AND_EXPR:
/* At top level we can just recurse (to the List case) */
/*
* At top level we can just recurse (to the List case), since
* the result should be the union of what we can prove in each
* arm.
*/
if (top_level)
{
result = find_nonnullable_vars_walker((Node *) expr->args,
@@ -1689,7 +1698,7 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
if (result == NIL) /* first subresult? */
result = subresult;
else
result = list_intersection(result, subresult);
result = mbms_int_members(result, subresult);
/*
* If the intersection is empty, we can stop looking. This
@@ -1788,8 +1797,8 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
* side of conservatism; hence the analysis here is simplistic. In fact,
* we only detect simple "var IS NULL" tests at the top level.)
*
* The result is a palloc'd List, but we have not copied the member Var nodes.
* Also, we don't bother trying to eliminate duplicate entries.
* As with find_nonnullable_vars, we return the varattnos of the identified
* Vars in a multibitmapset.
*/
List *
find_forced_null_vars(Node *node)
@@ -1804,7 +1813,9 @@ find_forced_null_vars(Node *node)
var = find_forced_null_var(node);
if (var)
{
result = list_make1(var);
result = mbms_add_member(result,
var->varno,
var->varattno - FirstLowInvalidHeapAttributeNumber);
}
/* Otherwise, handle AND-conditions */
else if (IsA(node, List))
@@ -1815,8 +1826,8 @@ find_forced_null_vars(Node *node)
*/
foreach(l, (List *) node)
{
result = list_concat(result,
find_forced_null_vars(lfirst(l)));
result = mbms_add_members(result,
find_forced_null_vars((Node *) lfirst(l)));
}
}
else if (IsA(node, BoolExpr))