1
0
mirror of https://github.com/MariaDB/server.git synced 2025-04-18 21:44:20 +03:00

MDEV-34620: Lots of index_merge created and discarded for many-way OR

If a query has many OR-ed constructs which can use multiple indexes

  (key1=1 AND key2=10) OR
  (key1=2 AND key2=20) OR
  (key1=3 AND key2=30) OR
  ...

The range optimizer would construct and then discard a lot of potential
index_merge plans. This process
1. is CPU-intensive
2. can hit the @@optimizer_max_sel_args limitation after which all
   potential range or index_merge plans are discarded.

The fix is to apply a heuristic: if there is an OR clause with more than
MAX_OR_ELEMENTS_FOR_INDEX_MERGE=100 branches (hard-coded constant),
disallow construction of index_merge plans for the OR branches.
This commit is contained in:
Sergei Petrunia 2024-07-15 16:00:32 +03:00
parent 733852d4c3
commit 1965b2be16
4 changed files with 155 additions and 0 deletions

View File

@ -247,3 +247,70 @@ SELECT id FROM t1 WHERE id IS NULL OR id NOT BETWEEN 1 AND 4;
id
5
DROP TABLE t1;
#
# MDEV-34620: Many index_merge variants made and discarded for a big OR
#
CREATE TABLE t1 (
a1 int NOT NULL,
a2 int NOT NULL,
filler char(100),
KEY key1 (a1,a2),
KEY key2 (a2,a1)
);
insert into t1 (a1,a2) values (1,1),(2,2),(3,3);
set @query= concat(
"explain select * from t1 where\n",
(select
group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " )
from seq_1_to_30)
);
set optimizer_trace=1;
prepare s from @query;
execute s;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL key1,key2 NULL NULL NULL 3 Using where
set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis');
# Observe that "key1" is a a part of several index_merge_union:
select json_pretty(json_search(@trace, 'all', 'key1'));
json_pretty(json_search(@trace, 'all', 'key1'))
[
"$[0].potential_range_indexes[0].index",
"$[0].analyzing_range_alternatives.range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[0].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[0].index_to_merge",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[1].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[1].index_to_merge",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[0].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[0].index_to_merge",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[1].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[1].index_to_merge",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[2].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[2].index_to_merge",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[0].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[0].index_to_merge",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[1].range_scan_alternatives[0].index",
"$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[1].index_to_merge"
]
#
# Now, same as above but for a long IN-list
#
set @query= concat(
"explain select * from t1 where\n",
(select
group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " )
from seq_1_to_120)
);
set optimizer_trace=1;
prepare s from @query;
execute s;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL key1,key2 NULL NULL NULL 3 Using where
set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis');
# Observe that there are NO index_merge_union candidates. Only one potential range scan:
select json_pretty(json_search(@trace, 'all', 'key1'));
json_pretty(json_search(@trace, 'all', 'key1'))
[
"$[0].potential_range_indexes[0].index",
"$[0].analyzing_range_alternatives.range_scan_alternatives[0].index"
]
drop table t1;

View File

@ -162,3 +162,51 @@ INSERT INTO t1 VALUES (1),(5);
SELECT id FROM t1 WHERE id IS NULL OR id NOT BETWEEN 1 AND 4;
DROP TABLE t1;
--echo #
--echo # MDEV-34620: Many index_merge variants made and discarded for a big OR
--echo #
CREATE TABLE t1 (
a1 int NOT NULL,
a2 int NOT NULL,
filler char(100),
KEY key1 (a1,a2),
KEY key2 (a2,a1)
);
insert into t1 (a1,a2) values (1,1),(2,2),(3,3);
set @query= concat(
"explain select * from t1 where\n",
(select
group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " )
from seq_1_to_30)
);
set optimizer_trace=1;
prepare s from @query;
execute s;
set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis');
--echo # Observe that "key1" is a a part of several index_merge_union:
select json_pretty(json_search(@trace, 'all', 'key1'));
--echo #
--echo # Now, same as above but for a long IN-list
--echo #
set @query= concat(
"explain select * from t1 where\n",
(select
group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " )
from seq_1_to_120)
);
set optimizer_trace=1;
prepare s from @query;
execute s;
set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis');
--echo # Observe that there are NO index_merge_union candidates. Only one potential range scan:
select json_pretty(json_search(@trace, 'all', 'key1'));
drop table t1;

View File

@ -8513,6 +8513,11 @@ SEL_TREE *Item_cond::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
SEL_TREE *tree= li.ref()[0]->get_mm_tree(param, li.ref());
if (param->statement_should_be_aborted())
DBUG_RETURN(NULL);
bool orig_disable_index_merge= param->disable_index_merge_plans;
if (list.elements > MAX_OR_ELEMENTS_FOR_INDEX_MERGE)
param->disable_index_merge_plans= true;
if (tree)
{
if (tree->type == SEL_TREE::IMPOSSIBLE &&
@ -8529,7 +8534,10 @@ SEL_TREE *Item_cond::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
{
SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param, li.ref());
if (new_tree == NULL || param->statement_should_be_aborted())
{
param->disable_index_merge_plans= orig_disable_index_merge;
DBUG_RETURN(NULL);
}
tree= tree_or(param, tree, new_tree);
if (tree == NULL || tree->type == SEL_TREE::ALWAYS)
{
@ -8561,6 +8569,7 @@ SEL_TREE *Item_cond::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
if (replace_cond)
*cond_ptr= replacement_item;
}
param->disable_index_merge_plans= orig_disable_index_merge;
DBUG_RETURN(tree);
}
@ -9952,6 +9961,8 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
{
bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys);
no_imerge_from_ranges= must_be_ored;
if (param->disable_index_merge_plans)
no_imerge_from_ranges= true;
if (no_imerge_from_ranges && no_merges1 && no_merges2)
{

View File

@ -39,6 +39,32 @@
class JOIN;
class Item_sum;
/*
When processing an OR clause with more than MAX_OR_ELEMENTS_FOR_INDEX_MERGE
disjuncts (i.e. OR-parts), do not construct index_merge plans from it.
Some users have OR clauses with extremely large number of disjuncts, like:
(key1=1 AND key2=10) OR
(key1=2 AND key2=20) OR
(key1=3 AND key2=30) OR
...
When processing this, the optimizer would try to build a lot of potential
index_merge plans. Hypothetically this could be useful as the cheapest plan
could be to pick a specific index for each disjunct and build:
index_merge(key1 IN (1,3,8,15...), key2 IN (20, 40, 50 ...))
In practice this causes combinatorial amount of time to be spent in the range
analyzer, and most variants will be discarded when the range optimizer tries
to avoid this combinatorial explosion (which may or may not work depending on
the form of the WHERE clause).
In practice, very long ORs are served well enough by just considering range
accesses on individual indexes.
*/
const int MAX_OR_ELEMENTS_FOR_INDEX_MERGE=100;
struct KEY_PART {
uint16 key,part;
/* See KEY_PART_INFO for meaning of the next two: */
@ -889,6 +915,9 @@ public:
*/
bool remove_false_where_parts;
/* If TRUE, do not construct index_merge plans */
bool disable_index_merge_plans;
/*
Which functions should give SQL notes for unusable keys.
*/