1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Make group_similar_or_args() reorder clause list as little as possible

Currently, group_similar_or_args() permutes original positions of clauses
independently on whether it manages to find any groups of similar clauses.
While we are not providing any strict warranties on saving the original order
of OR-clauses, it is preferred that the original order be modified as little
as possible.

This commit changes the reordering algorithm of group_similar_or_args() in
the following way.  We reorder each group of similar clauses so that the
first item of the group stays in place, but all the other items are moved
after it.  So, if there are no similar clauses, the order of clauses stays
the same.  When there are some groups, only required reordering happens while
the rest of the clauses remain in their places.

Reported-by: Andrei Lepikhov <lepihov@gmail.com>
Discussion: https://postgr.es/m/3ac7c436-81e1-4191-9caf-b0dd70b51511%40gmail.com
Reviewed-by: Pavel Borisov <pashkin.elfe@gmail.com>
Reviewed-by: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru>
This commit is contained in:
Alexander Korotkov
2025-03-28 23:36:49 +02:00
parent 519338ace4
commit 775a06d44c
5 changed files with 141 additions and 42 deletions

View File

@ -1189,6 +1189,8 @@ typedef struct
Oid inputcollid; /* OID of the OpClause input collation */
int argindex; /* index of the clause in the list of
* arguments */
int groupindex; /* value of argindex for the fist clause in
* the group of similar clauses */
} OrArgIndexMatch;
/*
@ -1229,6 +1231,29 @@ or_arg_index_match_cmp(const void *a, const void *b)
return 0;
}
/*
* Another comparison function for OrArgIndexMatch. It sorts groups together
* using groupindex. The group items are then sorted by argindex.
*/
static int
or_arg_index_match_cmp_group(const void *a, const void *b)
{
const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
if (match_a->groupindex < match_b->groupindex)
return -1;
else if (match_a->groupindex > match_b->groupindex)
return 1;
if (match_a->argindex < match_b->argindex)
return -1;
else if (match_a->argindex > match_b->argindex)
return 1;
return 0;
}
/*
* group_similar_or_args
* Transform incoming OR-restrictinfo into a list of sub-restrictinfos,
@ -1282,6 +1307,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
i++;
matches[i].argindex = i;
matches[i].groupindex = i;
matches[i].indexnum = -1;
matches[i].colnum = -1;
matches[i].opno = InvalidOid;
@ -1400,9 +1426,40 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
return orargs;
}
/* Sort clauses to make similar clauses go together */
/*
* Sort clauses to make similar clauses go together. But at the same
* time, we would like to change the order of clauses as little as
* possible. To do so, we reorder each group of similar clauses so that
* the first item of the group stays in place, and all the other items are
* moved after it. So, if there are no similar clauses, the order of
* clauses stays the same. When there are some groups, required
* reordering happens while the rest of the clauses remain in their
* places. That is achieved by assigning a 'groupindex' to each clause:
* the number of the first item in the group in the original clause list.
*/
qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
/* Assign groupindex to the sorted clauses */
for (i = 1; i < n; i++)
{
/*
* When two clauses are similar and should belong to the same group,
* copy the 'groupindex' from the previous clause. Given we are
* considering clauses in direct order, all the clauses would have a
* 'groupindex' equal to the 'groupindex' of the first clause in the
* group.
*/
if (matches[i].indexnum == matches[i - 1].indexnum &&
matches[i].colnum == matches[i - 1].colnum &&
matches[i].opno == matches[i - 1].opno &&
matches[i].inputcollid == matches[i - 1].inputcollid &&
matches[i].indexnum != -1)
matches[i].groupindex = matches[i - 1].groupindex;
}
/* Re-sort clauses first by groupindex then by argindex */
qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp_group);
/*
* Group similar clauses into single sub-restrictinfo. Side effect: the
* resulting list of restrictions will be sorted by indexnum and colnum.

View File

@ -1899,13 +1899,13 @@ SELECT * FROM tenk1
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on tenk1
Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))))
Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))) OR ((thousand = 42) AND (tenthous IS NULL)))
Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous IS NULL))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous IS NULL))
(8 rows)
EXPLAIN (COSTS OFF)
@ -1938,13 +1938,13 @@ SELECT * FROM tenk1
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on tenk1
Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint)))
Recheck Cond: (((thousand = 42) AND (tenthous = '1'::smallint)) OR ((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))))
Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
(8 rows)
EXPLAIN (COSTS OFF)
@ -2129,16 +2129,16 @@ SELECT count(*) FROM tenk1
---------------------------------------------------------------------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on tenk1
Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41))))
Recheck Cond: ((hundred = 42) AND (((thousand = 42) OR (thousand = 41)) OR ((thousand = 99) AND (tenthous = 2))))
Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))
-> BitmapAnd
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 42)
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 99) AND (tenthous = 2))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = ANY ('{42,41}'::integer[]))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 99) AND (tenthous = 2))
(12 rows)
SELECT count(*) FROM tenk1
@ -3248,6 +3248,37 @@ SELECT b.relname,
(2 rows)
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
-- No OR-clause groupings should happen, and there should be no clause
-- permutations in the filtering conditions we could see in the EXPLAIN.
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2;
QUERY PLAN
--------------------------------------------------
Bitmap Heap Scan on tenk1
Recheck Cond: ((unique1 < 1) OR (hundred < 2))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 < 1)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred < 2)
(7 rows)
-- OR clauses in the 'unique1' column are grouped, so clause permutation
-- occurs. W e can see it in the 'Recheck Cond': the second clause involving
-- the 'unique1' column goes just after the first one.
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2;
QUERY PLAN
---------------------------------------------------------------------
Bitmap Heap Scan on tenk1
Recheck Cond: (((unique1 < 1) OR (unique1 < 3)) OR (hundred < 2))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 < ANY ('{1,3}'::integer[]))
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred < 2)
(7 rows)
-- Check bitmap scan can consider similar OR arguments separately without
-- grouping them into SAOP.
CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);

View File

@ -4333,20 +4333,20 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: ((hundred = 4) OR (unique1 = 2))
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
(17 rows)
explain (costs off)
@ -4360,12 +4360,12 @@ select * from tenk1 a join tenk1 b on
Filter: ((unique1 = 2) OR (ten = 4))
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
(12 rows)
explain (costs off)
@ -4377,21 +4377,21 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: ((hundred = 4) OR (unique1 = 2))
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
(18 rows)
explain (costs off)
@ -4403,21 +4403,21 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: ((hundred = 4) OR (unique1 = 2))
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
(18 rows)
explain (costs off)
@ -4431,15 +4431,15 @@ select * from tenk1 a join tenk1 b on
-> Seq Scan on tenk1 b
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20))
Recheck Cond: ((unique1 < 20) OR ((unique1 = 3) OR (unique1 = 1)) OR ((unique2 = 3) OR (unique2 = 7)))
Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 < 20)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
(14 rows)
--

View File

@ -2568,24 +2568,24 @@ where not exists (select 1 from prtx2
-> Seq Scan on prtx1_1
Filter: ((a < 20) AND (c = 91))
-> Bitmap Heap Scan on prtx2_1
Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
Filter: (a = prtx1_1.a)
-> BitmapOr
-> Bitmap Index Scan on prtx2_1_c_idx
Index Cond: (c = 99)
-> Bitmap Index Scan on prtx2_1_b_idx
Index Cond: (b = (prtx1_1.b + 1))
-> Bitmap Index Scan on prtx2_1_c_idx
Index Cond: (c = 99)
-> Nested Loop Anti Join
-> Seq Scan on prtx1_2
Filter: ((a < 20) AND (c = 91))
-> Bitmap Heap Scan on prtx2_2
Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
Filter: (a = prtx1_2.a)
-> BitmapOr
-> Bitmap Index Scan on prtx2_2_c_idx
Index Cond: (c = 99)
-> Bitmap Index Scan on prtx2_2_b_idx
Index Cond: (b = (prtx1_2.b + 1))
-> Bitmap Index Scan on prtx2_2_c_idx
Index Cond: (c = 99)
(23 rows)
select * from prtx1

View File

@ -1355,6 +1355,17 @@ SELECT b.relname,
ORDER BY 1;
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
-- No OR-clause groupings should happen, and there should be no clause
-- permutations in the filtering conditions we could see in the EXPLAIN.
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2;
-- OR clauses in the 'unique1' column are grouped, so clause permutation
-- occurs. W e can see it in the 'Recheck Cond': the second clause involving
-- the 'unique1' column goes just after the first one.
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2;
-- Check bitmap scan can consider similar OR arguments separately without
-- grouping them into SAOP.
CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);