From 3b4de4c281cb3e33e6d3ee9537e542bf0a84b83e Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 24 Mar 2025 13:07:36 +0200 Subject: [PATCH] MDEV-32084: Assertion in best_extension_by_limited_search() ... When subquery with LEFT JOIN is converted into semi-join, it is possible to construct cases where the LEFT JOIN's ON expression refers to a table in the current select but not in the current join nest. For example: t1 SEMI JOIN ( t2 LEFT JOIN (t3 LEFT JOIN t4 ON t4.col=t1.col) ON expr ) here, ON t4.col=t1.col" has this property. Let's denote it as ON-EXPR-HAS-REF-OUTSIDE-NEST. The optimizer handles LEFT JOINs like so: - Outer join runtime requires that "inner tables follow outer" in any join order. - Join optimizer enforces this by constructing join orders that follow table dependencies as they are specified in TABLE_LIST::dep_tables. - The dep_tables are set in simplify_joins() according to the contents of ON expressions and LEFT JOIN structure. However, the logic in simplify_joins() failed to account for possible ON-EXPR-HAS-REF-OUTSIDE-NEST. It assumed that references outside of the current join nest could only be OUTER_REF_TABLE_BIT or RAND_TABLE_BIT. The fix was to add the missing logic. --- mysql-test/main/join_nested.result | 12 +++++ mysql-test/main/join_nested.test | 13 ++++++ mysql-test/main/join_nested_jcl6.result | 12 +++++ sql/sql_select.cc | 58 +++++++++++++++++++++++-- 4 files changed, 91 insertions(+), 4 deletions(-) diff --git a/mysql-test/main/join_nested.result b/mysql-test/main/join_nested.result index cb9dffc837d..564685188bf 100644 --- a/mysql-test/main/join_nested.result +++ b/mysql-test/main/join_nested.result @@ -2051,3 +2051,15 @@ a b c a a b DROP TABLE t1, t2, t3; set join_cache_level= @save_join_cache_level; # end of 10.3 tests +# +# MDEV-32084: Assertion in best_extension_by_limited_search(), or crash elsewhere in release +# +CREATE TABLE t1 (i int); +INSERT INTO t1 values (1),(2); +SELECT 1 FROM t1 WHERE i IN +(SELECT 1 FROM t1 c +LEFT JOIN (t1 a LEFT JOIN t1 b ON t1.i = b.i) ON c.i = t1.i); +1 +1 +DROP TABLE t1; +# end of 10.11 tests diff --git a/mysql-test/main/join_nested.test b/mysql-test/main/join_nested.test index ed1fe4c9f7e..62370b95262 100644 --- a/mysql-test/main/join_nested.test +++ b/mysql-test/main/join_nested.test @@ -1458,3 +1458,16 @@ DROP TABLE t1, t2, t3; set join_cache_level= @save_join_cache_level; --echo # end of 10.3 tests + +--echo # +--echo # MDEV-32084: Assertion in best_extension_by_limited_search(), or crash elsewhere in release +--echo # +CREATE TABLE t1 (i int); +INSERT INTO t1 values (1),(2); + +SELECT 1 FROM t1 WHERE i IN + (SELECT 1 FROM t1 c + LEFT JOIN (t1 a LEFT JOIN t1 b ON t1.i = b.i) ON c.i = t1.i); + +DROP TABLE t1; +--echo # end of 10.11 tests diff --git a/mysql-test/main/join_nested_jcl6.result b/mysql-test/main/join_nested_jcl6.result index 0bda8d43a69..50a1e83a5ab 100644 --- a/mysql-test/main/join_nested_jcl6.result +++ b/mysql-test/main/join_nested_jcl6.result @@ -2060,6 +2060,18 @@ a b c a a b DROP TABLE t1, t2, t3; set join_cache_level= @save_join_cache_level; # end of 10.3 tests +# +# MDEV-32084: Assertion in best_extension_by_limited_search(), or crash elsewhere in release +# +CREATE TABLE t1 (i int); +INSERT INTO t1 values (1),(2); +SELECT 1 FROM t1 WHERE i IN +(SELECT 1 FROM t1 c +LEFT JOIN (t1 a LEFT JOIN t1 b ON t1.i = b.i) ON c.i = t1.i); +1 +1 +DROP TABLE t1; +# end of 10.11 tests CREATE TABLE t5 (a int, b int, c int, PRIMARY KEY(a), KEY b_i (b)); CREATE TABLE t6 (a int, b int, c int, PRIMARY KEY(a), KEY b_i (b)); CREATE TABLE t7 (a int, b int, c int, PRIMARY KEY(a), KEY b_i (b)); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 3c4b7b3ec03..85d65400224 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -18628,6 +18628,8 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, prev_table->dep_tables|= used_tables; if (prev_table->on_expr) { + /* If the ON expression is still there, it's an outer join */ + DBUG_ASSERT(prev_table->outer_join); prev_table->dep_tables|= table->on_expr_dep_tables; table_map prev_used_tables= prev_table->nested_join ? prev_table->nested_join->used_tables : @@ -18642,11 +18644,59 @@ simplify_joins(JOIN *join, List *join_list, COND *conds, bool top, prevents update of inner table dependences. For example it might happen if RAND() function is used in JOIN ON clause. - */ - if (!((prev_table->on_expr->used_tables() & - ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) & - ~prev_used_tables)) + */ + table_map prev_on_expr_deps= prev_table->on_expr->used_tables() & + ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + prev_on_expr_deps&= ~prev_used_tables; + + if (!prev_on_expr_deps) prev_table->dep_tables|= used_tables; + else + { + /* + Another possible case is when prev_on_expr_deps!=0 but it depends + on a table outside this join nest. SQL name resolution don't allow + this but it is possible when LEFT JOIN is inside a subquery which + is converted into a semi-join nest, Example: + + t1 SEMI JOIN ( + t2 + LEFT JOIN (t3 LEFT JOIN t4 ON t4.col=t1.col) ON expr + ) ON ... + + here, we would have prev_table=t4, table=t3. The condition + "ON t4.col=t1.col" depends on tables {t1, t4}. To make sure the + optimizer puts t3 before t4 we need to make sure t4.dep_tables + includes t3. + */ + + DBUG_ASSERT(table->embedding == prev_table->embedding); + if (table->embedding) + { + /* + Find what are the "peers" of "table" in the join nest. Normally, + it is table->embedding->nested_join->used_tables, but here we are + in the process of recomputing that value. + So, we walk the join list and collect the bitmap of peers: + */ + table_map peers= 0; + List_iterator_fast li(*join_list); + TABLE_LIST *peer; + while ((peer= li++)) + { + table_map curmap= peer->nested_join + ? peer->nested_join->used_tables + : peer->get_map(); + peers|= curmap; + } + /* + If prev_table doesn't depend on any of its peers, add a + dependency on nearest peer, that is, on 'table'. + */ + if (!(prev_on_expr_deps & peers)) + prev_table->dep_tables|= used_tables; + } + } } } prev_table= table;