diff --git a/client/mysql.cc b/client/mysql.cc index e5e6d2f85a2..ceba2256da9 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -1833,7 +1833,8 @@ static struct my_option my_long_options[] = "if the output is suspended. Doesn't use history file.", &quick, &quick, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"quick-max-column-width", 0, - "Maximal field length limit in case of --qick", &quick_max_column_width, + "Maximum number of characters displayed in a column header" + " when using --quick", &quick_max_column_width, &quick_max_column_width, 0, GET_ULONG, REQUIRED_ARG, LONG_MAX, 0, ULONG_MAX, 0, 1, 0}, {"raw", 'r', "Write fields without conversion. Used with --batch.", diff --git a/mysql-test/main/client.result b/mysql-test/main/client.result index bc3b9f64e81..86620d58bc6 100644 --- a/mysql-test/main/client.result +++ b/mysql-test/main/client.result @@ -58,5 +58,5 @@ insert into t1 values ("01234", "0123456789", "01234567890123456789", "1"); +-----------+------------+----------------------+------+ drop table t1; # -# End of 10.7 tests +# End of 10.5 tests # diff --git a/mysql-test/main/client.test b/mysql-test/main/client.test index 20df85f0807..d7249c15e75 100644 --- a/mysql-test/main/client.test +++ b/mysql-test/main/client.test @@ -42,5 +42,5 @@ insert into t1 values ("01234", "0123456789", "01234567890123456789", "1"); drop table t1; --echo # ---echo # End of 10.7 tests +--echo # End of 10.5 tests --echo # diff --git a/mysql-test/main/having_cond_pushdown.result b/mysql-test/main/having_cond_pushdown.result index ba11be710f1..f94fb237bb7 100644 --- a/mysql-test/main/having_cond_pushdown.result +++ b/mysql-test/main/having_cond_pushdown.result @@ -6293,4 +6293,14 @@ HAVING (SELECT MAX(b) FROM t1) = a AND a > b; a b 2 1 DROP TABLE t1; +# +# MDEV-25084: Moving equality with constant right side +# from HAVING to WHERE +# (fixed by the patch for MDEV-26402) +# +CREATE TABLE t1 (a CHAR(3)) CHARSET=sjis; +INSERT INTO t1 VALUES ('foo'),('bar'); +SELECT LOAD_FILE('') AS f, a FROM t1 GROUP BY f, a HAVING f = a; +f a +DROP TABLE t1; End of 10.5 tests diff --git a/mysql-test/main/having_cond_pushdown.test b/mysql-test/main/having_cond_pushdown.test index 78371a32e70..58a05a159cb 100644 --- a/mysql-test/main/having_cond_pushdown.test +++ b/mysql-test/main/having_cond_pushdown.test @@ -1738,4 +1738,16 @@ eval $q; DROP TABLE t1; + +--echo # +--echo # MDEV-25084: Moving equality with constant right side +--echo # from HAVING to WHERE +--echo # (fixed by the patch for MDEV-26402) +--echo # + +CREATE TABLE t1 (a CHAR(3)) CHARSET=sjis; +INSERT INTO t1 VALUES ('foo'),('bar'); +SELECT LOAD_FILE('') AS f, a FROM t1 GROUP BY f, a HAVING f = a; +DROP TABLE t1; + --echo End of 10.5 tests diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 5c1904a7572..2507cffb14c 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -757,6 +757,15 @@ The following specify which files/extra groups are read (specified before remain --optimizer-index-block-copy-cost=# Cost of copying a key block from the cache to intern storage as part of an index scan. + --optimizer-join-limit-pref-ratio=# + For queries with JOIN and ORDER BY LIMIT : make the + optimizer consider a join order that allows to short-cut + execution after producing #LIMIT matches if that promises + N times speedup. (A conservative setting here would be is + a high value, like 100 so the short-cutting plan is used + if it promises a speedup of 100x or more). Short-cutting + plans are inherently risky so the default is 0 which + means do not consider this optimization --optimizer-key-compare-cost=# Cost of checking a key against the end key condition. --optimizer-key-copy-cost=# @@ -1777,6 +1786,7 @@ optimizer-disk-read-cost 10.24 optimizer-disk-read-ratio 0.02 optimizer-extra-pruning-depth 8 optimizer-index-block-copy-cost 0.0356 +optimizer-join-limit-pref-ratio 0 optimizer-key-compare-cost 0.011361 optimizer-key-copy-cost 0.015685 optimizer-key-lookup-cost 0.435777 diff --git a/mysql-test/main/order_by_limit_join.result b/mysql-test/main/order_by_limit_join.result new file mode 100644 index 00000000000..bdfce0915b2 --- /dev/null +++ b/mysql-test/main/order_by_limit_join.result @@ -0,0 +1,473 @@ +# +# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT +# +create table t1 ( +a int, +b int, +c int, +col1 int, +col2 int, +index(a), +index(b), +index(col1) +); +insert into t1 select +mod(seq, 100), +mod(seq, 95), +seq, +seq, +seq +from +seq_1_to_10000; +create table t10 ( +a int, +a_value char(10), +key(a) +); +insert into t10 select seq, seq from seq_1_to_100; +create table t11 ( +b int, +b_value char(10), +key(b) +); +insert into t11 select seq, seq from seq_1_to_150; +set @tmp_os=@@optimizer_trace; +set optimizer_trace=1; +# +# Query 1 - basic example. +# +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a,b col1 5 NULL 10 Using where +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "table": "t1", + "rows_estimation": 10000, + "filesort_cost": 0.205030632, + "read_cost": 1.791376632, + "filesort_type": "priority_queue with row lookup", + "fanout": 1, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "rows_to_examine": 10, + "range_scan": false, + "scan_cost": 0.013129232, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 37.9789756, + "risk_ratio": 10, + "shortcut_join_cost": 0.049201862, + "shortcut_cost_with_risk": 0.492018616, + "use_shortcut_cost": true + } +] +# +# Query 2 - same as above but without a suitable index. +# +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +# t1 is first but there's no suitable index, +# so we use filesort but using temporary: +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a,b NULL NULL NULL 10000 Using where; Using filesort +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [], + "can_skip_filesort": false, + "full_join_cost": 37.9789756, + "risk_ratio": 10, + "shortcut_join_cost": 1.94241863, + "shortcut_cost_with_risk": 19.4241863, + "use_shortcut_cost": true + } +] +# +# Query 3: Counter example with large limit +# +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 5000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +# Same plan as above: +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 5000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.5, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "table": "t1", + "rows_estimation": 10000, + "filesort_cost": 1.070467741, + "read_cost": 2.656813741, + "filesort_type": "merge_sort with addon fields", + "fanout": 1, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "rows_to_examine": 5000, + "range_scan": false, + "scan_cost": 6.174703142, + "usable": false, + "cause": "cost" + } + ] + } + } + ], + "can_skip_filesort": false, + "full_join_cost": 37.9789756, + "risk_ratio": 10, + "shortcut_join_cost": 19.9426608, + "shortcut_cost_with_risk": 199.426608, + "use_shortcut_cost": false + } +] +# +# Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first, +# however the optimizer still puts it as sort_by_table. +# +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL NULL NULL NULL NULL 100 Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +# This will show nothing as limit shortcut code figures that +# it's not possible to use t1 to construct shortcuts: +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +NULL +# +# Query 5: Same as Q1 but also with a semi-join +# +set optimizer_join_limit_pref_ratio=default; +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +where +t1.a in (select a from t10) and +t1.b in (select b from t11) +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t10 index a a 5 NULL 100 Using where; Using index; LooseScan; Using temporary; Using filesort +1 PRIMARY t10 ref a a 5 test.t10.a 1 +1 PRIMARY t1 ref a,b a 5 test.t10.a 100 Using where +1 PRIMARY t11 ref b b 5 test.t1.b 1 +1 PRIMARY t11 ref b b 5 test.t1.b 1 Using index; FirstMatch(t11) +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +where +t1.a in (select a from t10) and +t1.b in (select b from t11) +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index a,b col1 5 NULL 10 Using where +1 PRIMARY t10 ref a a 5 test.t1.a 1 +1 PRIMARY t11 ref b b 5 test.t1.b 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +2 MATERIALIZED t10 index a a 5 NULL 100 Using index +3 MATERIALIZED t11 index b b 5 NULL 150 Using index +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "table": "t1", + "rows_estimation": 10000, + "filesort_cost": 0.205030632, + "read_cost": 1.791376632, + "filesort_type": "priority_queue with row lookup", + "fanout": 1, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "rows_to_examine": 10, + "range_scan": false, + "scan_cost": 0.013129232, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 42.02203018, + "risk_ratio": 10, + "shortcut_join_cost": 0.053244916, + "shortcut_cost_with_risk": 0.532449162, + "use_shortcut_cost": true + } +] +# +# Query 6: same as Query 1 but let's limit the search depth +# +set @tmp_osd=@@optimizer_search_depth; +set optimizer_search_depth=1; +set optimizer_join_limit_pref_ratio=default; +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t11 ALL b NULL NULL NULL 150 Using join buffer (flat, BNL join) +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a,b col1 5 NULL 10 Using where +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "table": "t1", + "rows_estimation": 10000, + "filesort_cost": 0.205030632, + "read_cost": 1.791376632, + "filesort_type": "priority_queue with row lookup", + "fanout": 1, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "rows_to_examine": 10, + "range_scan": false, + "scan_cost": 0.013129232, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 37.9789756, + "risk_ratio": 10, + "shortcut_join_cost": 0.049201862, + "shortcut_cost_with_risk": 0.492018616, + "use_shortcut_cost": true + } +] +set optimizer_search_depth=@tmp_osd; +set optimizer_trace=@tmp_os; +set optimizer_join_limit_pref_ratio=default; +drop table t1, t10, t11; diff --git a/mysql-test/main/order_by_limit_join.test b/mysql-test/main/order_by_limit_join.test new file mode 100644 index 00000000000..6e695ecb58a --- /dev/null +++ b/mysql-test/main/order_by_limit_join.test @@ -0,0 +1,207 @@ +--echo # +--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT +--echo # + +--source include/have_sequence.inc + +# We need optimizer trace +--source include/not_embedded.inc + +create table t1 ( + a int, + b int, + c int, + col1 int, + col2 int, + index(a), + index(b), + index(col1) +); + +insert into t1 select + mod(seq, 100), + mod(seq, 95), + seq, + seq, + seq +from + seq_1_to_10000; + + +create table t10 ( + a int, + a_value char(10), + key(a) +); +insert into t10 select seq, seq from seq_1_to_100; + +create table t11 ( + b int, + b_value char(10), + key(b) +); +insert into t11 select seq, seq from seq_1_to_150; + +set @tmp_os=@@optimizer_trace; +set optimizer_trace=1; + +--echo # +--echo # Query 1 - basic example. +--echo # +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 2 - same as above but without a suitable index. +--echo # +let $query= +explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col2 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +eval $query; + +--echo # t1 is first but there's no suitable index, +--echo # so we use filesort but using temporary: +set optimizer_join_limit_pref_ratio=10; +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 3: Counter example with large limit +--echo # +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 5000; + +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +eval $query; + +--echo # Same plan as above: +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=10; +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first, +--echo # however the optimizer still puts it as sort_by_table. +--echo # +set optimizer_join_limit_pref_ratio=10; +explain +select + * +from + t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a +order by + t1.col2 +limit 10; + +set @trace=(select trace from information_schema.optimizer_trace); +--echo # This will show nothing as limit shortcut code figures that +--echo # it's not possible to use t1 to construct shortcuts: +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 5: Same as Q1 but also with a semi-join +--echo # +set optimizer_join_limit_pref_ratio=default; +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +where + t1.a in (select a from t10) and + t1.b in (select b from t11) +order by + t1.col1 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 6: same as Query 1 but let's limit the search depth +--echo # +set @tmp_osd=@@optimizer_search_depth; +set optimizer_search_depth=1; +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 10; + +set optimizer_join_limit_pref_ratio=default; +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + + +set optimizer_search_depth=@tmp_osd; +set optimizer_trace=@tmp_os; +set optimizer_join_limit_pref_ratio=default; +drop table t1, t10, t11; + diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index 4f0619ca540..e00bdbf85c0 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -12,10 +12,6 @@ galera_as_slave_ctas : MDEV-28378 timeout galera_pc_recovery : MDEV-25199 cluster fails to start up -galera_bf_kill_debug : timeout after 900 seconds -galera_ssl_upgrade : [Warning] Failed to load slave replication state from table mysql.gtid_slave_pos: 130: Incorrect file format 'gtid_slave_pos' -galera_parallel_simple : timeout related to wsrep_sync_wait -galera_insert_bulk : MDEV-30536 no expected deadlock in galera_insert_bulk test galera_sequences : MDEV-32561 WSREP FSM failure: no such a transition REPLICATING -> COMMITTED galera_concurrent_ctas : MDEV-32779 galera_concurrent_ctas: assertion in the galera::ReplicatorSMM::finish_cert() galera_as_slave_replay : MDEV-32780 galera_as_slave_replay: assertion in the wsrep::transaction::before_rollback() diff --git a/mysql-test/suite/galera/include/galera_wsrep_recover.inc b/mysql-test/suite/galera/include/galera_wsrep_recover.inc index aa2f0e2e777..efe803dcc9f 100644 --- a/mysql-test/suite/galera/include/galera_wsrep_recover.inc +++ b/mysql-test/suite/galera/include/galera_wsrep_recover.inc @@ -10,8 +10,8 @@ if (!$wsrep_recover_additional) --perl use strict; - my $wsrep_start_position_str = "grep 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'"; - my $wsrep_start_position = `grep 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; + my $wsrep_start_position_str = "grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'"; + my $wsrep_start_position = `grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; chomp($wsrep_start_position); die if $wsrep_start_position eq ''; diff --git a/mysql-test/suite/galera/r/MDEV-34647.result b/mysql-test/suite/galera/r/MDEV-34647.result index 16a4e839f13..0333f14ece1 100644 --- a/mysql-test/suite/galera/r/MDEV-34647.result +++ b/mysql-test/suite/galera/r/MDEV-34647.result @@ -39,7 +39,7 @@ id val 7 d 9 d 11 d -set global wsrep_mode=REPLICATE_MYISAM; +set global wsrep_mode='REPLICATE_MYISAM,REPLICATE_ARIA'; create table t4(id serial, val varchar(100)) engine=myisam; insert into t4 values(null, 'a'); insert into t4 values(null, 'b'); @@ -95,6 +95,7 @@ id val 4 d 5 d 6 d +set global wsrep_mode=default; connection node_1; drop table t1,t2,t3,t4,t5; set global wsrep_mode=default; diff --git a/mysql-test/suite/galera/r/galera_bf_kill_debug.result b/mysql-test/suite/galera/r/galera_bf_kill_debug.result index c3eae243f47..52bd1b0e370 100644 --- a/mysql-test/suite/galera/r/galera_bf_kill_debug.result +++ b/mysql-test/suite/galera/r/galera_bf_kill_debug.result @@ -22,16 +22,19 @@ update t1 set b= 1 where a=1; connection node_2b; SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; connection node_2; -SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; +SET DEBUG_SYNC= 'wsrep_kill_before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; connection node_2b; SET DEBUG_SYNC='now WAIT_FOR awake_reached'; SET GLOBAL debug_dbug = ""; SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; +connection node_1; +COMMIT; +connection node_2b; SET DEBUG_SYNC = "now SIGNAL continue_kill"; connection node_2; connection node_2a; select * from t1; -connection node_2; +connection node_2b; SET DEBUG_SYNC = "RESET"; drop table t1; disconnect node_2a; diff --git a/mysql-test/suite/galera/r/galera_insert_bulk.result b/mysql-test/suite/galera/r/galera_insert_bulk.result index f4d4adf64e1..7191464ba64 100644 --- a/mysql-test/suite/galera/r/galera_insert_bulk.result +++ b/mysql-test/suite/galera/r/galera_insert_bulk.result @@ -2,6 +2,8 @@ connection node_2; connection node_1; connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; +SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_apply_cb'; +connection node_1; SET foreign_key_checks = 0; SET unique_checks = 0; START TRANSACTION; @@ -10,11 +12,20 @@ SET foreign_key_checks = 1; SET unique_checks = 1; INSERT INTO t1 VALUES (1001); connection node_1; +SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR sync.wsrep_apply_cb_reached'; +SET DEBUG_SYNC = 'wsrep_after_certification SIGNAL signal.wsrep_apply_cb'; COMMIT; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction DROP TABLE t1; +SET GLOBAL DEBUG_DBUG = ''; +SET DEBUG_SYNC = 'RESET'; connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; +connection node_2; +SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_apply_cb'; +connection node_1; +SET foreign_key_checks = 0; +SET unique_checks = 0; START TRANSACTION; connection node_2; SET foreign_key_checks = 1; @@ -23,8 +34,14 @@ START TRANSACTION; INSERT INTO t1 VALUES (1001); connection node_1; COMMIT; -2 +3 connection node_2; +SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR sync.wsrep_apply_cb_reached'; +SET DEBUG_SYNC = 'wsrep_after_certification SIGNAL signal.wsrep_apply_cb'; COMMIT; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction DROP TABLE t1; +SET GLOBAL DEBUG_DBUG = ''; +SET DEBUG_SYNC = 'RESET'; +disconnect node_2; +disconnect node_1; diff --git a/mysql-test/suite/galera/r/galera_vote_ddl.result b/mysql-test/suite/galera/r/galera_vote_ddl.result new file mode 100644 index 00000000000..bc5d99256e5 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_vote_ddl.result @@ -0,0 +1,70 @@ +connection node_2; +connection node_1; +connection node_1; +SET @@global.wsrep_ignore_apply_errors = 7; +connection node_2; +SET @@global.wsrep_ignore_apply_errors = 7; +connection node_1; +CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY); +connection node_1; +DROP TABLE nonexistent; +ERROR 42S02: Unknown table 'test.nonexistent' +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +TRUNCATE TABLE nonexistent; +ERROR 42S02: Table 'test.nonexistent' doesn't exist +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +CREATE TABLE nonexistent.t1 (s INT); +ERROR 42000: Unknown database 'nonexistent' +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +CREATE TABLE t1 (s INT); +ERROR 42S01: Table 't1' already exists +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +ALTER TABLE nonexistent ADD COLUMN (c INT); +ERROR 42S02: Table 'test.nonexistent' doesn't exist +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +DROP TABLE t1; +connection node_1; +SET @@global.wsrep_ignore_apply_errors = 0; +connection node_2; +SET @@global.wsrep_ignore_apply_errors = 0; +connection node_1; +CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY); +connection node_1; +DROP TABLE nonexistent; +ERROR 42S02: Unknown table 'test.nonexistent' +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +TRUNCATE TABLE nonexistent; +ERROR 42S02: Table 'test.nonexistent' doesn't exist +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +CREATE TABLE nonexistent.t1 (s INT); +ERROR 42000: Unknown database 'nonexistent' +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +CREATE TABLE t1 (s INT); +ERROR 42S01: Table 't1' already exists +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +connection node_1; +ALTER TABLE nonexistent ADD COLUMN (c INT); +ERROR 42S02: Table 'test.nonexistent' doesn't exist +INSERT INTO t1 VALUES (DEFAULT); +connection node_2; +DROP TABLE t1; +connection node_1; +SET @@global.wsrep_ignore_apply_errors = 7; +connection node_2; +SET @@global.wsrep_ignore_apply_errors = 7; diff --git a/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result b/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result new file mode 100644 index 00000000000..41275ede6d2 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result @@ -0,0 +1,36 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +connection node_1; +call mtr.add_suppression("WSREP: async IST sender failed to serve.*"); +SET @wsrep_provider_options_orig = @@GLOBAL.wsrep_provider_options; +connection node_2; +SET @wsrep_cluster_address_orig = @@GLOBAL.wsrep_cluster_address; +SET GLOBAL WSREP_ON=0; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +EXPECT_0 +0 +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_allowlist; +EXPECT_0 +0 +SELECT COUNT(*) AS EXPECT_1 FROM mysql.wsrep_cluster; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_2 FROM mysql.wsrep_cluster_members; +EXPECT_2 +2 +INSERT INTO mysql.wsrep_allowlist (ip) VALUES (0); +SET GLOBAL wsrep_cluster_address = @wsrep_cluster_address_orig; +SELECT 1; +1 +1 +DELETE FROM mysql.wsrep_allowlist; +connection node_1; +SET GLOBAL wsrep_provider_options ='pc.ignore_sb=true'; +connection node_2; +Killing server ... +connection node_1; +connection node_2; +connection node_1; +SET GLOBAL wsrep_provider_options ='pc.ignore_sb=false'; diff --git a/mysql-test/suite/galera/r/mdev-22063.result b/mysql-test/suite/galera/r/mdev-22063.result index 585d70acb61..228f63d6688 100644 --- a/mysql-test/suite/galera/r/mdev-22063.result +++ b/mysql-test/suite/galera/r/mdev-22063.result @@ -17,12 +17,14 @@ SELECT * FROM s; next_not_cached_value minimum_value maximum_value start_value increment cache_size cycle_option cycle_count 1 1 9223372036854775806 1 1 1000 0 0 connection node_2; +SET GLOBAL WSREP_MODE='REPLICATE_ARIA,REPLICATE_MYISAM'; SELECT * FROM t1; a SELECT * FROM s; next_not_cached_value minimum_value maximum_value start_value increment cache_size cycle_option cycle_count 1 1 9223372036854775806 1 1 1000 0 0 connection node_1; +SET GLOBAL WSREP_MODE='REPLICATE_ARIA,REPLICATE_MYISAM'; DROP TABLE t1; DROP SEQUENCE s; # Case 2 REPLACE INTO ... SELECT with error @@ -240,3 +242,5 @@ pk DROP TABLE t1; DROP VIEW view_t1; SET GLOBAL wsrep_mode=DEFAULT; +connection node_2; +SET GLOBAL wsrep_mode=DEFAULT; diff --git a/mysql-test/suite/galera/t/MDEV-34647.test b/mysql-test/suite/galera/t/MDEV-34647.test index 8840c233c6d..db776681aa1 100644 --- a/mysql-test/suite/galera/t/MDEV-34647.test +++ b/mysql-test/suite/galera/t/MDEV-34647.test @@ -22,7 +22,7 @@ insert into t3 select null, 'c'; insert into t3 select null, 'd' from t3; select * from t3; -set global wsrep_mode=REPLICATE_MYISAM; +set global wsrep_mode='REPLICATE_MYISAM,REPLICATE_ARIA'; create table t4(id serial, val varchar(100)) engine=myisam; insert into t4 values(null, 'a'); @@ -45,9 +45,8 @@ select * from t2; select * from t3; select * from t4; select * from t5; - +set global wsrep_mode=default; --connection node_1 drop table t1,t2,t3,t4,t5; set global wsrep_mode=default; - diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test index 6706734cc36..1b7b3f40ac8 100644 --- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test +++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.test @@ -66,7 +66,7 @@ SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; # # pause KILL execution before awake # -SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; +SET DEBUG_SYNC= 'wsrep_kill_before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; --disable_query_log --send_eval KILL $k_thread --enable_query_log @@ -78,6 +78,11 @@ SET DEBUG_SYNC='now WAIT_FOR awake_reached'; # release applier and KILL operator SET GLOBAL debug_dbug = ""; SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; + +--connection node_1 +COMMIT; + +--connection node_2b SET DEBUG_SYNC = "now SIGNAL continue_kill"; --connection node_2 @@ -87,7 +92,7 @@ SET DEBUG_SYNC = "now SIGNAL continue_kill"; --error 0,1213,2013,2026 select * from t1; ---connection node_2 +--connection node_2b SET DEBUG_SYNC = "RESET"; drop table t1; diff --git a/mysql-test/suite/galera/t/galera_insert_bulk.test b/mysql-test/suite/galera/t/galera_insert_bulk.test index f58870d5f74..7faf8356420 100644 --- a/mysql-test/suite/galera/t/galera_insert_bulk.test +++ b/mysql-test/suite/galera/t/galera_insert_bulk.test @@ -5,6 +5,8 @@ --source include/galera_cluster.inc --source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_debug.inc # # Make bulk insert BF-abort, but regular insert succeed. @@ -13,6 +15,10 @@ --connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; +# Delay applying of the single insert from the other node. +SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_apply_cb'; + +--connection node_1 # Disable foreign and unique key checks to allow bulk insert. SET foreign_key_checks = 0; SET unique_checks = 0; @@ -30,7 +36,7 @@ while ($count < 1000) --connection node_2 -# Disable bulk insert. +# Disable bulk insert on this node. SET foreign_key_checks = 1; SET unique_checks = 1; @@ -38,10 +44,20 @@ SET unique_checks = 1; INSERT INTO t1 VALUES (1001); --connection node_1 + +# We need to trigger Galera-level certification conflict. For this: +# - start applying single insert from the other node before bulk insert certifies +# - certifying bulk insert will lead to the conflict +# - keep applying single insert +SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR sync.wsrep_apply_cb_reached'; +SET DEBUG_SYNC = 'wsrep_after_certification SIGNAL signal.wsrep_apply_cb'; + --error ER_LOCK_DEADLOCK COMMIT; DROP TABLE t1; +SET GLOBAL DEBUG_DBUG = ''; +SET DEBUG_SYNC = 'RESET'; # # Make bulk insert succeed, but regular insert BF-abort. @@ -50,8 +66,17 @@ DROP TABLE t1; --connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; +--connection node_2 +# Delay applying of the bulk insert from the other node. +SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_apply_cb'; + +--connection node_1 --let $before_bulk_keys = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_repl_keys'` +# Disable foreign and unique key checks to allow bulk insert. +SET foreign_key_checks = 0; +SET unique_checks = 0; + START TRANSACTION; --let $count=0 @@ -64,8 +89,7 @@ while ($count < 1000) --enable_query_log --connection node_2 - -# Disable bulk insert. +# Disable bulk insert on this node. SET foreign_key_checks = 1; SET unique_checks = 1; @@ -77,12 +101,23 @@ INSERT INTO t1 VALUES (1001); --connection node_1 COMMIT; -# Expect two keys to be added for bulk insert: DB-level shared key and table-level exclusive key. +# Expect three keys to be added for bulk insert: "zero-level" key, DB-level shared key and table-level exclusive key. --let $bulk_keys_count = `SELECT VARIABLE_VALUE - $before_bulk_keys FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_repl_keys'` --echo $bulk_keys_count --connection node_2 + +# We need to trigger Galera-level certification conflict. For this: +# - start applying bulk insert from the other node before local insert certifies +# - certifying local insert will lead to the conflict +# - keep applying bulk insert +SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR sync.wsrep_apply_cb_reached'; +SET DEBUG_SYNC = 'wsrep_after_certification SIGNAL signal.wsrep_apply_cb'; + --error ER_LOCK_DEADLOCK COMMIT; DROP TABLE t1; +SET GLOBAL DEBUG_DBUG = ''; +SET DEBUG_SYNC = 'RESET'; +--source include/galera_end.inc diff --git a/mysql-test/suite/galera/t/galera_pc_recovery.test b/mysql-test/suite/galera/t/galera_pc_recovery.test index 16abe6fc9ba..0fd9c8ab3d9 100644 --- a/mysql-test/suite/galera/t/galera_pc_recovery.test +++ b/mysql-test/suite/galera/t/galera_pc_recovery.test @@ -38,10 +38,10 @@ SELECT COUNT(*) = 1 FROM t1; --perl use strict; - my $wsrep_start_position1 = `grep 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.1.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; + my $wsrep_start_position1 = `grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.1.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; chomp($wsrep_start_position1); - my $wsrep_start_position2 = `grep 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.2.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; + my $wsrep_start_position2 = `grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.2.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; chomp($wsrep_start_position2); die if $wsrep_start_position1 eq '' || $wsrep_start_position2 eq ''; diff --git a/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf b/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf index 2954ae0f4cb..7c495102564 100644 --- a/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf +++ b/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf @@ -1,5 +1,9 @@ !include ../galera_2nodes.cnf +[mysqld] +loose-galera-ssl-upgrade=1 +wsrep-debug=1 + [mysqld.1] wsrep_provider_options='base_port=@mysqld.1.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem' diff --git a/mysql-test/suite/galera/t/galera_ssl_upgrade.test b/mysql-test/suite/galera/t/galera_ssl_upgrade.test index c09615527fd..78897ffd738 100644 --- a/mysql-test/suite/galera/t/galera_ssl_upgrade.test +++ b/mysql-test/suite/galera/t/galera_ssl_upgrade.test @@ -7,6 +7,8 @@ --source include/galera_cluster.inc --source include/have_innodb.inc --source include/have_ssl_communication.inc +--source include/have_openssl.inc +--source include/force_restart.inc # Save original auto_increment_offset values. --let $node_1=node_1 diff --git a/mysql-test/suite/galera/t/galera_vote_ddl.inc b/mysql-test/suite/galera/t/galera_vote_ddl.inc new file mode 100644 index 00000000000..80a543fb886 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_vote_ddl.inc @@ -0,0 +1,54 @@ +--connection node_1 +CREATE TABLE t1 (pk INT AUTO_INCREMENT PRIMARY KEY); + +--connection node_1 +--error 1051 +DROP TABLE nonexistent; + +# Verify cluster is intact +INSERT INTO t1 VALUES (DEFAULT); +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc + +--connection node_1 +--error 1146 +TRUNCATE TABLE nonexistent; + +# Verify cluster is intact +INSERT INTO t1 VALUES (DEFAULT); +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 2 FROM t1; +--source include/wait_condition.inc + +--connection node_1 +--error 1049 +CREATE TABLE nonexistent.t1 (s INT); + +# Verify cluster is intact +INSERT INTO t1 VALUES (DEFAULT); +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 3 FROM t1; +--source include/wait_condition.inc + +--connection node_1 +--error 1050 +CREATE TABLE t1 (s INT); + +# Verify cluster is intact +INSERT INTO t1 VALUES (DEFAULT); +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 4 FROM t1; +--source include/wait_condition.inc + +--connection node_1 +--error 1146 +ALTER TABLE nonexistent ADD COLUMN (c INT); + +# Verify cluster is intact +INSERT INTO t1 VALUES (DEFAULT); +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 5 FROM t1; +--source include/wait_condition.inc + +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_vote_ddl.test b/mysql-test/suite/galera/t/galera_vote_ddl.test new file mode 100644 index 00000000000..9db6e612e7e --- /dev/null +++ b/mysql-test/suite/galera/t/galera_vote_ddl.test @@ -0,0 +1,34 @@ +# +# Test voting on identical DDL errors (error messages should match) +# + +--source include/galera_cluster.inc +--source include/have_binlog_format_row.inc + +# +# 1. Ignore all DDL errors (avoids voting) +# +--connection node_1 +--let $wsrep_ignore_apply_errors_saved1 = `SELECT @@global.wsrep_ignore_apply_errors` +SET @@global.wsrep_ignore_apply_errors = 7; +--connection node_2 +--let $wsrep_ignore_apply_errors_saved2 = `SELECT @@global.wsrep_ignore_apply_errors` +SET @@global.wsrep_ignore_apply_errors = 7; + +--source galera_vote_ddl.inc + +# +# 2. Don't ignore any errors (forces voting) +# +--connection node_1 +SET @@global.wsrep_ignore_apply_errors = 0; +--connection node_2 +SET @@global.wsrep_ignore_apply_errors = 0; + +--source galera_vote_ddl.inc + +--connection node_1 +--eval SET @@global.wsrep_ignore_apply_errors = $wsrep_ignore_apply_errors_saved1 + +--connection node_2 +--eval SET @@global.wsrep_ignore_apply_errors = $wsrep_ignore_apply_errors_saved2 diff --git a/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test b/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test new file mode 100644 index 00000000000..9942d63f142 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test @@ -0,0 +1,45 @@ +--source include/galera_cluster.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +--connection node_1 +call mtr.add_suppression("WSREP: async IST sender failed to serve.*"); +SET @wsrep_provider_options_orig = @@GLOBAL.wsrep_provider_options; + +--connection node_2 +SET @wsrep_cluster_address_orig = @@GLOBAL.wsrep_cluster_address; +SET GLOBAL WSREP_ON=0; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_allowlist; +SELECT COUNT(*) AS EXPECT_1 FROM mysql.wsrep_cluster; +SELECT COUNT(*) AS EXPECT_2 FROM mysql.wsrep_cluster_members; + +INSERT INTO mysql.wsrep_allowlist (ip) VALUES (0); +SET GLOBAL wsrep_cluster_address = @wsrep_cluster_address_orig; +SELECT 1; +DELETE FROM mysql.wsrep_allowlist; + +--connection node_1 +SET GLOBAL wsrep_provider_options ='pc.ignore_sb=true'; + +--connection node_2 +--source include/kill_galera.inc + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +--connection node_2 +--source include/start_mysqld.inc + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +SET GLOBAL wsrep_provider_options ='pc.ignore_sb=false'; + +# Cleanup +--source include/auto_increment_offset_restore.inc diff --git a/mysql-test/suite/galera/t/mdev-22063.test b/mysql-test/suite/galera/t/mdev-22063.test index ccc199e308f..260067d8a7b 100644 --- a/mysql-test/suite/galera/t/mdev-22063.test +++ b/mysql-test/suite/galera/t/mdev-22063.test @@ -16,6 +16,7 @@ SELECT * FROM t1; SELECT * FROM s; --connection node_2 +SET GLOBAL WSREP_MODE='REPLICATE_ARIA,REPLICATE_MYISAM'; --let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 's' @@ -27,6 +28,7 @@ SELECT * FROM t1; SELECT * FROM s; --connection node_1 +SET GLOBAL WSREP_MODE='REPLICATE_ARIA,REPLICATE_MYISAM'; DROP TABLE t1; DROP SEQUENCE s; @@ -183,3 +185,6 @@ SELECT * FROM t1; DROP TABLE t1; DROP VIEW view_t1; SET GLOBAL wsrep_mode=DEFAULT; + +--connection node_2 +SET GLOBAL wsrep_mode=DEFAULT; diff --git a/mysql-test/suite/galera_3nodes/disabled.def b/mysql-test/suite/galera_3nodes/disabled.def index 3a6c38d81bb..c52df019c65 100644 --- a/mysql-test/suite/galera_3nodes/disabled.def +++ b/mysql-test/suite/galera_3nodes/disabled.def @@ -11,11 +11,6 @@ ############################################################################## galera_2_cluster : MDEV-32631 galera_2_cluster: before_rollback(): Assertion `0' failed -galera_gtid_2_cluster : MDEV-32633 galera_gtid_2_cluster: Assertion `thd->wsrep_next_trx_id() != (0x7fffffffffffffffLL * 2ULL + 1)' -galera_ssl_reload : MDEV-32778 galera_ssl_reload failed with warning message -galera_pc_bootstrap : temporarily disabled at the request of Codership -galera_ipv6_mariabackup_section : temporarily disabled at the request of Codership -# Opensuse/suse/rocky9/rocky84/rhel9/rhel8-ppc64le .. - all same IPv6 isn't configured right or skipping or galera -galera_ipv6_rsync : Can't connect to server on '::1' (115) -galera_ipv6_rsync_section : Can't connect to server on '::1' (115) -GCF-354 : MDEV-25614 Galera test failure on GCF-354 +galera_ipv6_rsync : MDEV-34842 Can't connect to server on '::1' (115) +galera_ipv6_rsync_section : MDEV-34842 Can't connect to server on '::1' (115) +galera_ipv6_mariabackup_section : MDEV-34842 Can't connect to server on '::1' (115) diff --git a/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result b/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result index 1cb14cd3eff..b849cc9f368 100644 --- a/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result +++ b/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result @@ -223,10 +223,14 @@ select @@gtid_binlog_state; drop table t1; stop slave; reset slave; +Warnings: +Note 4190 RESET SLAVE is implicitly changing the value of 'Using_Gtid' from 'Current_Pos' to 'Slave_Pos' cluster 2 node 1 connection node_4; stop slave; reset slave; +Warnings: +Note 4190 RESET SLAVE is implicitly changing the value of 'Using_Gtid' from 'Current_Pos' to 'Slave_Pos' cluster 1 node 1 connection node_1; change master to master_use_gtid=no, ignore_server_ids=(); diff --git a/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result b/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result index ffc5ec0627a..91ff0342b8d 100644 --- a/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result +++ b/mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result @@ -1,6 +1,9 @@ connection node_2; connection node_1; connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connection node_1; +connection node_2; +connection node_3; connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; set wsrep_sync_wait=0; connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; @@ -44,9 +47,9 @@ connection node_1b; connection node_1; connection node_3; connection node_1; -CALL insert_row('node1', 500); +CALL insert_row('node1', 100); connection node_3; -CALL insert_row('node3', 500); +CALL insert_row('node3', 100); CREATE TABLE t2(i int primary key) engine=innodb; connection node_2; # Restart node_2 @@ -60,7 +63,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2503 +gtid_binlog_pos 1111-1-1703 connection node_2; # GTID in node2 show variables like 'wsrep_gtid_domain_id'; @@ -68,7 +71,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2503 +gtid_binlog_pos 1111-1-1703 connection node_3; # GTID in node3 show variables like 'wsrep_gtid_domain_id'; @@ -76,7 +79,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2503 +gtid_binlog_pos 1111-1-1703 # Shutdown node_3 connection node_3; SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1'; @@ -98,7 +101,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2554 +gtid_binlog_pos 1111-1-1754 connection node_2; # GTID in node2 show variables like 'wsrep_gtid_domain_id'; @@ -106,7 +109,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2554 +gtid_binlog_pos 1111-1-1754 connection node_3; # GTID in node3 show variables like 'wsrep_gtid_domain_id'; @@ -114,7 +117,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2554 +gtid_binlog_pos 1111-1-1754 # One by one shutdown all nodes connection node_3; # shutdown node_3 @@ -132,7 +135,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2554 +gtid_binlog_pos 1111-1-1754 ANALYZE TABLE t2; Table Op Msg_type Msg_text test.t2 analyze status Engine-independent statistics collected @@ -163,7 +166,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2756 +gtid_binlog_pos 1111-1-1956 connection node_2; node2 GTID show variables like 'wsrep_gtid_domain_id'; @@ -171,7 +174,7 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2756 +gtid_binlog_pos 1111-1-1956 connection node_3; node3 GTID show variables like 'wsrep_gtid_domain_id'; @@ -179,22 +182,22 @@ Variable_name Value wsrep_gtid_domain_id 1111 show variables like '%gtid_binlog_pos%'; Variable_name Value -gtid_binlog_pos 1111-1-2756 +gtid_binlog_pos 1111-1-1956 connection node_1; table size in node1 SELECT COUNT(*) FROM t1; COUNT(*) -2750 +1950 connection node_2; table size in node2 SELECT COUNT(*) FROM t1; COUNT(*) -2750 +1950 connection node_3; table size in node3 SELECT COUNT(*) FROM t1; COUNT(*) -2750 +1950 connection node_2; call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node"); call mtr.add_suppression("Sending JOIN failed: "); diff --git a/mysql-test/suite/galera_3nodes/r/galera_toi_vote.result b/mysql-test/suite/galera_3nodes/r/galera_toi_vote.result index d8d3abe40e9..345fa92c13d 100644 --- a/mysql-test/suite/galera_3nodes/r/galera_toi_vote.result +++ b/mysql-test/suite/galera_3nodes/r/galera_toi_vote.result @@ -7,8 +7,9 @@ connection node_3; SET SESSION wsrep_on=OFF; DROP SCHEMA test; connection node_1; +SET SESSION lc_messages='fr_FR'; CREATE SCHEMA test; -ERROR HY000: Can't create database 'test'; database exists +ERROR HY000: Ne peut créer la base 'test'; elle existe déjà connection node_1; SET SESSION wsrep_sync_wait=0; connection node_2; diff --git a/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf index c27490faf36..c0acbe58ad3 100644 --- a/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf +++ b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.cnf @@ -1,38 +1,31 @@ !include ../galera_3nodes.cnf +[mysqld] +loose-galera-gtid-consistency=1 +wsrep_sst_auth="root:" +wsrep_sst_method=mariabackup +log_slave_updates=ON +log_bin=mariadb-bin-log +binlog-format=row +wsrep-gtid-mode=ON +wsrep-debug=1 +gtid-strict-mode=1 + [mysqld.1] wsrep-node-name="node1" -wsrep_gtid_domain_id=1111 gtid_domain_id=2 server_id=10999 -wsrep_sst_auth="root:" -wsrep_sst_method=mariabackup -log_slave_updates=ON -log_bin=mariadb-bin-log -binlog-format=row -wsrep-gtid-mode=ON +wsrep_gtid_domain_id=1111 [mysqld.2] wsrep-node-name="node2" -wsrep_gtid_domain_id=1112 gtid_domain_id=3 -wsrep_sst_auth="root:" -wsrep_sst_method=mariabackup -log_slave_updates=ON -log_bin=mariadb-bin-log -binlog-format=row -wsrep-gtid-mode=ON +wsrep_gtid_domain_id=1112 [mysqld.3] wsrep-node-name="node3" -wsrep_gtid_domain_id=1113 gtid_domain_id=4 -wsrep_sst_auth="root:" -wsrep_sst_method=mariabackup -log_slave_updates=ON -log_bin=mariadb-bin-log -binlog-format=row -wsrep-gtid-mode=ON +wsrep_gtid_domain_id=1113 [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff --git a/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test index f00972b0461..871014b39d0 100644 --- a/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test +++ b/mysql-test/suite/galera_3nodes/t/galera_gtid_consistency.test @@ -2,7 +2,6 @@ --source include/big_test.inc --source include/force_restart.inc - # # Testing gtid consistency in 3 node cluster when nodes drop # and join back to cluster. @@ -13,6 +12,13 @@ # from the bootstrap node (node_1), and use it # --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 +--source ../galera/include/auto_increment_offset_save.inc + --connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 set wsrep_sync_wait=0; --connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 @@ -98,10 +104,10 @@ show variables like '%gtid_binlog_pos%'; # while node 2 is absent # --connection node_1 -CALL insert_row('node1', 500); +CALL insert_row('node1', 100); --connection node_3 -CALL insert_row('node3', 500); +CALL insert_row('node3', 100); CREATE TABLE t2(i int primary key) engine=innodb; @@ -225,12 +231,19 @@ show variables like '%gtid_binlog_pos%'; # bootstap cluster in order node1 - node2 - node3 # send some inserts and DDL after each node started # ---sleep 5 + --echo # Bootstrap from node_1 --connection node_1 --let $restart_parameters = --wsrep_new_cluster --source include/start_mysqld.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment' +--source include/wait_condition.inc + show variables like 'wsrep_gtid_domain_id'; show variables like '%gtid_binlog_pos%'; @@ -243,6 +256,13 @@ ANALYZE TABLE t2; --let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect --source include/start_mysqld.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment' +--source include/wait_condition.inc + # # connection node_1b may not be functional anymore, after node was # shutdown, open node_1c for controlling node 1 state @@ -265,6 +285,14 @@ ALTER TABLE t2 ADD COLUMN (k int); --let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.3.expect --source include/start_mysqld.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment' +--source include/wait_condition.inc + + --connection node_1c --echo # wait until all nodes are back in cluster --let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; @@ -335,12 +363,16 @@ DROP TABLE t2; DROP TABLE t3; --connection node_3 ---let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2' +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't3' --source include/wait_condition.inc --connection node_2 ---let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2' +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't3' --source include/wait_condition.inc +# Restore original auto_increment_offset values. +--let $galera_cluster_size=3 +--source ../galera/include/auto_increment_offset_restore.inc + --disconnect node_3 --disconnect node_2b --disconnect node_1b diff --git a/mysql-test/suite/galera_3nodes/t/galera_toi_vote.test b/mysql-test/suite/galera_3nodes/t/galera_toi_vote.test index 6bc87cf8874..bd53c510cd4 100644 --- a/mysql-test/suite/galera_3nodes/t/galera_toi_vote.test +++ b/mysql-test/suite/galera_3nodes/t/galera_toi_vote.test @@ -24,6 +24,9 @@ DROP SCHEMA test; # This should fail on nodes 1 and 2 and succeed on node 3 --connection node_1 +# Make error message on source node different by changing locale +# It should still agree with node 2 +SET SESSION lc_messages='fr_FR'; --error ER_DB_CREATE_EXISTS CREATE SCHEMA test; diff --git a/mysql-test/suite/innodb/r/alter_copy_bulk,OFF.rdiff b/mysql-test/suite/innodb/r/alter_copy_bulk,OFF.rdiff index a644139ea78..6570c52def1 100644 --- a/mysql-test/suite/innodb/r/alter_copy_bulk,OFF.rdiff +++ b/mysql-test/suite/innodb/r/alter_copy_bulk,OFF.rdiff @@ -1,9 +1,9 @@ --- bulk_copy_alter.result +++ bulk_copy_alter,non_bulk_alter_copy.result -@@ -5,7 +5,7 @@ +@@ -6,7 +6,7 @@ INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536; - ALTER TABLE t1 ADD INDEX(f2); - ALTER TABLE t1 ADD PRIMARY KEY(f1(2)); + ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); + ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2)); -ERROR 23000: Duplicate entry 'bb' for key 'PRIMARY' +ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY' INSERT INTO t1 VALUES(repeat('a', 200), 1); diff --git a/mysql-test/suite/innodb/r/alter_copy_bulk.result b/mysql-test/suite/innodb/r/alter_copy_bulk.result index 53c7e8a812d..cd83e65a5f4 100644 --- a/mysql-test/suite/innodb/r/alter_copy_bulk.result +++ b/mysql-test/suite/innodb/r/alter_copy_bulk.result @@ -1,10 +1,11 @@ -SET @@alter_algorithm=COPY; +SET @default_stats_persistent= @@global.innodb_stats_persistent; +SET GLOBAL innodb_stats_persistent= 0; CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB; INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2; -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536; -ALTER TABLE t1 ADD INDEX(f2); -ALTER TABLE t1 ADD PRIMARY KEY(f1(2)); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2)); ERROR 23000: Duplicate entry 'bb' for key 'PRIMARY' INSERT INTO t1 VALUES(repeat('a', 200), 1); ALTER TABLE t1 ADD UNIQUE KEY(f2); @@ -13,14 +14,14 @@ ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL; CREATE TABLE t2(f1 INT NOT NULL, FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB; INSERT INTO t2 VALUES(1); -ALTER TABLE t2 FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; DROP TABLE t2, t1; CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2; INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2; -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536; -ALTER TABLE t1 ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); DROP TABLE t1; # # MDEV-34756 Validation of new foreign key skipped @@ -39,13 +40,14 @@ ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`#sql-alter`, CONSTRAINT `#sql-alter_ibfk_2` FOREIGN KEY (`f2`) REFERENCES `t1` (`f1`)) INSERT INTO t1 VALUES(3, 1); SET STATEMENT foreign_key_checks=0 FOR -ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); +ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); affected rows: 1 info: Records: 1 Duplicates: 0 Warnings: 0 -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; affected rows: 2 info: Records: 2 Duplicates: 0 Warnings: 0 -ALTER TABLE t2 FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; affected rows: 1 info: Records: 1 Duplicates: 0 Warnings: 0 DROP TABLE t2, t1; +SET GLOBAL innodb_stats_persistent=@default_stats_persistent; diff --git a/mysql-test/suite/innodb/t/alter_copy_bulk.test b/mysql-test/suite/innodb/t/alter_copy_bulk.test index ae815cd4e30..2e8764578d5 100644 --- a/mysql-test/suite/innodb/t/alter_copy_bulk.test +++ b/mysql-test/suite/innodb/t/alter_copy_bulk.test @@ -1,21 +1,22 @@ --source include/have_innodb.inc --source include/have_partition.inc --source include/have_sequence.inc -SET @@alter_algorithm=COPY; +SET @default_stats_persistent= @@global.innodb_stats_persistent; +SET GLOBAL innodb_stats_persistent= 0; CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB; INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2; # Buffer fits in the memory -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; # Insert more entries INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536; # Alter should use temporary file for sorting -ALTER TABLE t1 ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); # Error while buffering the insert operation --error ER_DUP_ENTRY -ALTER TABLE t1 ADD PRIMARY KEY(f1(2)); +ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2)); INSERT INTO t1 VALUES(repeat('a', 200), 1); # Error while applying the bulk insert operation @@ -29,18 +30,18 @@ CREATE TABLE t2(f1 INT NOT NULL, FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB; INSERT INTO t2 VALUES(1); # Bulk operation shouldn't happen because of foreign key constraints -ALTER TABLE t2 FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; DROP TABLE t2, t1; CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2; INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2; # Buffer fits in the memory -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; # Insert more entries INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536; # Alter should use temporary file for sorting -ALTER TABLE t1 ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); DROP TABLE t1; --echo # @@ -62,8 +63,9 @@ ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); INSERT INTO t1 VALUES(3, 1); --enable_info SET STATEMENT foreign_key_checks=0 FOR -ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); -ALTER TABLE t1 FORCE; -ALTER TABLE t2 FORCE; +ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); +ALTER TABLE t1 ALGORITHM=COPY, FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; --disable_info DROP TABLE t2, t1; +SET GLOBAL innodb_stats_persistent=@default_stats_persistent; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 81f0af88836..d52dbb9a6f3 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2382,6 +2382,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_KEY_COMPARE_COST VARIABLE_SCOPE GLOBAL VARIABLE_TYPE DOUBLE diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 1b9a8ed0a36..5ccdc4c7203 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2582,6 +2582,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_KEY_COMPARE_COST VARIABLE_SCOPE GLOBAL VARIABLE_TYPE DOUBLE diff --git a/mysql-test/suite/wsrep/r/MDEV-33997.result b/mysql-test/suite/wsrep/r/MDEV-33997.result new file mode 100644 index 00000000000..f0e43462054 --- /dev/null +++ b/mysql-test/suite/wsrep/r/MDEV-33997.result @@ -0,0 +1,38 @@ +SET SESSION wsrep_osu_method=RSU; +SET autocommit=0; +CREATE TABLE t (c INT) ENGINE=INNODB PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t VALUES (1); +INSERT INTO t SELECT 1 ; +COMMIT; +SELECT * FROM t; +c +1 +1 +DROP TABLE t; +SET autocommit=1; +SET SESSION wsrep_osu_method=RSU; +CREATE TABLE t (c INT) ENGINE=INNODB PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t SELECT 1 ; +SELECT * FROM t; +c +1 +DROP TABLE t; +SET autocommit=1; +SET SESSION wsrep_osu_method=RSU; +CREATE TABLE t (c INT) ENGINE=MYISAM PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t SELECT 1 ; +ERROR 42000: This version of MariaDB doesn't yet support 'RSU on this table engine' +SELECT * FROM t; +c +DROP TABLE t; +SET SESSION wsrep_osu_method=RSU; +SET autocommit=0; +CREATE TABLE t (c INT) ENGINE=MYISAM PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t VALUES (1); +INSERT INTO t SELECT 1 ; +ERROR 42000: This version of MariaDB doesn't yet support 'RSU on this table engine' +COMMIT; +SELECT * FROM t; +c +1 +DROP TABLE t; diff --git a/mysql-test/suite/wsrep/t/MDEV-33997.cnf b/mysql-test/suite/wsrep/t/MDEV-33997.cnf new file mode 100644 index 00000000000..489c4385dbd --- /dev/null +++ b/mysql-test/suite/wsrep/t/MDEV-33997.cnf @@ -0,0 +1,9 @@ +!include ../my.cnf + +[mysqld.1] +wsrep-on=ON +binlog-format=ROW +innodb-flush-log-at-trx-commit=1 +wsrep-cluster-address=gcomm:// +wsrep-provider=@ENV.WSREP_PROVIDER +innodb-autoinc-lock-mode=2 diff --git a/mysql-test/suite/wsrep/t/MDEV-33997.combinations b/mysql-test/suite/wsrep/t/MDEV-33997.combinations new file mode 100644 index 00000000000..1ce3b45aa1a --- /dev/null +++ b/mysql-test/suite/wsrep/t/MDEV-33997.combinations @@ -0,0 +1,4 @@ +[binlogon] +log-bin + +[binlogoff] diff --git a/mysql-test/suite/wsrep/t/MDEV-33997.test b/mysql-test/suite/wsrep/t/MDEV-33997.test new file mode 100644 index 00000000000..3d015244d7d --- /dev/null +++ b/mysql-test/suite/wsrep/t/MDEV-33997.test @@ -0,0 +1,49 @@ +--source include/have_wsrep.inc +--source include/have_innodb.inc +--source include/have_wsrep_provider.inc +--source include/have_partition.inc +# +# MDEV-33997: Assertion `((WSREP_PROVIDER_EXISTS_ && this->variables.wsrep_on) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()' failed in int THD::binlog_write_row(TABLE*, bool, const uchar*) +# +SET SESSION wsrep_osu_method=RSU; +SET autocommit=0; + +CREATE TABLE t (c INT) ENGINE=INNODB PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t VALUES (1); +INSERT INTO t SELECT 1 ; +COMMIT; +SELECT * FROM t; +DROP TABLE t; + +# +# MDEV-27296 : Assertion `((thd && (WSREP_PROVIDER_EXISTS_ && thd->variables.wsrep_on)) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()' failed +# Second test case +# +SET autocommit=1; +SET SESSION wsrep_osu_method=RSU; +CREATE TABLE t (c INT) ENGINE=INNODB PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t SELECT 1 ; +SELECT * FROM t; +DROP TABLE t; + +# +# We should not allow RSU for MyISAM +# +SET autocommit=1; +SET SESSION wsrep_osu_method=RSU; +CREATE TABLE t (c INT) ENGINE=MYISAM PARTITION BY KEY(c) PARTITIONS 2; +--error ER_NOT_SUPPORTED_YET +INSERT INTO t SELECT 1 ; +SELECT * FROM t; +DROP TABLE t; + +SET SESSION wsrep_osu_method=RSU; +SET autocommit=0; + +CREATE TABLE t (c INT) ENGINE=MYISAM PARTITION BY KEY(c) PARTITIONS 2; +INSERT INTO t VALUES (1); +--error ER_NOT_SUPPORTED_YET +INSERT INTO t SELECT 1 ; +COMMIT; +SELECT * FROM t; +DROP TABLE t; diff --git a/mysql-test/suite/wsrep/t/wsrep-recover-step.inc b/mysql-test/suite/wsrep/t/wsrep-recover-step.inc index 22669438fe0..b131ac07641 100644 --- a/mysql-test/suite/wsrep/t/wsrep-recover-step.inc +++ b/mysql-test/suite/wsrep/t/wsrep-recover-step.inc @@ -18,7 +18,7 @@ --perl use strict; - my $wsrep_start_position = `grep 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; + my $wsrep_start_position = `grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; chomp($wsrep_start_position); die if $wsrep_start_position eq ''; open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/galera_wsrep_start_position.inc") or die; diff --git a/scripts/galera_recovery.sh b/scripts/galera_recovery.sh index 9ff5e4e5f50..50aeb3a0b91 100644 --- a/scripts/galera_recovery.sh +++ b/scripts/galera_recovery.sh @@ -81,10 +81,10 @@ wsrep_recover_position() { # Parse server's error log for recovered position. The server prints # "..skipping position recovery.." if started without wsrep. - recovered_pos="$(grep 'WSREP: Recovered position:' $log_file)" + recovered_pos="$(grep -a 'WSREP: Recovered position:' $log_file)" if [ -z "$recovered_pos" ]; then - skipped="$(grep WSREP $log_file | grep 'skipping position recovery')" + skipped="$(grep -a WSREP $log_file | grep 'skipping position recovery')" if [ -z "$skipped" ]; then log "WSREP: Failed to recover position: '`cat $log_file`'" exit 1 diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index 44dfe5e4935..e03843ff2c9 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -266,9 +266,9 @@ wsrep_recover_position() { exit 1 fi - local rp="$(grep 'WSREP: Recovered position:' $wr_logfile)" + local rp="$(grep -a 'WSREP: Recovered position:' $wr_logfile)" if [ -z "$rp" ]; then - local skipped="$(grep WSREP $wr_logfile | grep 'skipping position recovery')" + local skipped="$(grep -a WSREP $wr_logfile | grep 'skipping position recovery')" if [ -z "$skipped" ]; then log_error "WSREP: Failed to recover position: '`cat $wr_logfile`'" ret=1 diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 8219024173c..718c346d098 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -4974,13 +4974,15 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) #ifdef WITH_WSREP if (WSREP(thd)) { - WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d " + WSREP_WARN("BF applier thread=%lu failed to open_and_lock_tables for " + "%s, fatal: %d " "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)", - thd->get_stmt_da()->sql_errno(), - thd->is_fatal_error, - thd->wsrep_cs().mode(), - thd->wsrep_trx().state(), - (long long) wsrep_thd_trx_seqno(thd)); + thd_get_thread_id(thd), + thd->get_stmt_da()->message(), + thd->is_fatal_error, + thd->wsrep_cs().mode(), + thd->wsrep_trx().state(), + wsrep_thd_trx_seqno(thd)); } #endif /* WITH_WSREP */ if (thd->is_error() && diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc index 656b72b933b..96020c68b2b 100644 --- a/sql/signal_handler.cc +++ b/sql/signal_handler.cc @@ -25,6 +25,10 @@ #include "my_stacktrace.h" #include +#ifdef WITH_WSREP +#include "wsrep_server_state.h" +#endif /* WITH_WSREP */ + #ifdef _WIN32 #include #include @@ -222,6 +226,10 @@ extern "C" sig_handler handle_fatal_signal(int sig) "the equation.\n\n"); } +#ifdef WITH_WSREP + Wsrep_server_state::handle_fatal_signal(); +#endif /* WITH_WSREP */ + #ifdef HAVE_STACKTRACE thd= current_thd; diff --git a/sql/sql_class.h b/sql/sql_class.h index 59ea3a95f99..562c5f1d6c2 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -769,6 +769,7 @@ typedef struct system_variables ulong net_wait_timeout; ulong net_write_timeout; ulong optimizer_extra_pruning_depth; + ulonglong optimizer_join_limit_pref_ratio; ulong optimizer_prune_level; ulong optimizer_search_depth; ulong optimizer_selectivity_sampling_limit; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 85f6acb60af..de23e3220a8 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -4585,30 +4585,39 @@ mysql_execute_command(THD *thd, bool is_called_from_prepared_stmt) #ifdef WITH_WSREP if (wsrep && !first_table->view) { - bool is_innodb= first_table->table->file->partition_ht()->db_type == DB_TYPE_INNODB; - - // For consistency check inserted table needs to be InnoDB - if (!is_innodb && thd->wsrep_consistency_check != NO_CONSISTENCY_CHECK) + const legacy_db_type db_type= first_table->table->file->partition_ht()->db_type; + // For InnoDB we don't need to worry about anything here: + if (db_type != DB_TYPE_INNODB) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "Galera cluster does support consistency check only" - " for InnoDB tables."); - thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; - } - - // For !InnoDB we start TOI if it is not yet started and hope for the best - if (!is_innodb && !wsrep_toi) - { - const legacy_db_type db_type= first_table->table->file->partition_ht()->db_type; - - /* Currently we support TOI for MyISAM only. */ - if (db_type == DB_TYPE_MYISAM && - wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) - WSREP_TO_ISOLATION_BEGIN(first_table->db.str, first_table->table_name.str, NULL); + // For consistency check inserted table needs to be InnoDB + if (thd->wsrep_consistency_check != NO_CONSISTENCY_CHECK) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "Galera cluster does support consistency check only" + " for InnoDB tables."); + thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; + } + /* Only TOI allowed to !InnoDB tables */ + if (wsrep_OSU_method_get(thd) != WSREP_OSU_TOI) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "RSU on this table engine"); + break; + } + // For !InnoDB we start TOI if it is not yet started and hope for the best + if (!wsrep_toi) + { + /* Currently we support TOI for MyISAM only. */ + if ((db_type == DB_TYPE_MYISAM && wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) || + (db_type == DB_TYPE_ARIA && wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA))) + { + WSREP_TO_ISOLATION_BEGIN(first_table->db.str, first_table->table_name.str, NULL); + } + } } } #endif /* WITH_WSREP */ + /* Only the INSERT table should be merged. Other will be handled by select. diff --git a/sql/sql_select.cc b/sql/sql_select.cc index a7f4d97a9ee..ff9962afbeb 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -269,12 +269,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, bool is_top_and_level); static Item* part_of_refkey(TABLE *form,Field *field); -static bool test_if_cheaper_ordering(const JOIN_TAB *tab, +static bool test_if_cheaper_ordering(bool in_join_optimizer, + const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int key, ha_rows select_limit, int *new_key, int *new_key_direction, ha_rows *new_select_limit, + double *new_read_time, uint *new_used_key_parts= NULL, uint *saved_best_key_parts= NULL); static int test_if_order_by_key(JOIN *, ORDER *, TABLE *, uint, uint *); @@ -368,6 +370,18 @@ static double prev_record_reads(const POSITION *positions, uint idx, table_map found_ref, double record_count, double *same_keys); +static +bool join_limit_shortcut_is_applicable(const JOIN *join); +POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost); + +static +bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order, + key_map *usable_keys); +static +void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, + Item_field *item_field, + key_map *col_keys); + #ifndef DBUG_OFF /* @@ -6027,6 +6041,7 @@ make_join_statistics(JOIN *join, List &tables_list, join->sort_by_table= get_sort_by_table(join->order, join->group_list, join->select_lex->leaf_tables, join->const_table_map); + join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join); /* Update info on indexes that can be used for search lookups as reading const tables may has added new sargable predicates. @@ -9998,6 +10013,7 @@ choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest) qsort2_cmp jtab_sort_func; DBUG_ENTER("choose_plan"); + join->limit_optimization_mode= false; join->cur_embedding_map= 0; join->extra_heuristic_pruning= false; join->prune_level= join->thd->variables.optimizer_prune_level; @@ -10072,8 +10088,46 @@ choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest) join->extra_heuristic_pruning= true; } + double limit_cost= DBL_MAX; + double limit_record_count; + POSITION *limit_plan= NULL; + + /* First, build a join plan that can short-cut ORDER BY...LIMIT */ + if (join->limit_shortcut_applicable && !join->emb_sjm_nest) + { + bool res; + Json_writer_object wrapper(join->thd); + Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search"); + join->limit_optimization_mode= true; + res= greedy_search(join, join_tables, search_depth, + use_cond_selectivity); + join->limit_optimization_mode= false; + + if (res) + DBUG_RETURN(TRUE); + DBUG_ASSERT(join->best_read != DBL_MAX); + + /* + We've built a join order. Adjust its cost based on ORDER BY...LIMIT + short-cutting. + */ + limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost); + limit_record_count= join->join_record_count; + } + + /* The main call to search for the query plan: */ if (greedy_search(join, join_tables, search_depth, use_cond_selectivity)) DBUG_RETURN(TRUE); + + DBUG_ASSERT(join->best_read != DBL_MAX); + if (limit_plan && limit_cost < join->best_read) + { + /* Plan that uses ORDER BY ... LIMIT shortcutting is better. */ + memcpy((uchar*)join->best_positions, (uchar*)limit_plan, + sizeof(POSITION)*join->table_count); + join->best_read= limit_cost; + join->join_record_count= limit_record_count; + } } join->emb_sjm_nest= 0; @@ -11366,6 +11420,315 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, DBUG_RETURN(found_eq_ref); } + +/* + @brief + Check if it is potentally possible to short-cut the JOIN execution due to + ORDER BY ... LIMIT clause + + @detail + It is possible when the join has "ORDER BY ... LIMIT n" clause, and the + sort+limit operation is done right after the join operation (there's no + grouping or DISTINCT in between). + Then we can potentially build a join plan that enumerates rows in the + ORDER BY order and so will be able to terminate as soon as it has produced + #limit rows. + + Note that it is not a requirement that sort_by_table has an index that + matches ORDER BY. If it doesn't have one, the optimizer will pass + sort_by_table to filesort. Reading from sort_by_table won't use + short-cutting but the rest of the join will. +*/ + +static +bool join_limit_shortcut_is_applicable(const JOIN *join) +{ + /* + Any post-join operation like GROUP BY or DISTINCT or window functions + means we cannot short-cut join execution + */ + if (!join->thd->variables.optimizer_join_limit_pref_ratio || + !join->order || + join->select_limit == HA_POS_ERROR || + join->group_list || + join->select_distinct || + join->select_options & SELECT_BIG_RESULT || + join->rollup.state != ROLLUP::STATE_NONE || + join->select_lex->have_window_funcs() || + join->select_lex->with_sum_func) + { + return false; + } + + /* + Cannot do short-cutting if + (1) ORDER BY refers to more than one table or + (2) the table it refers to cannot be first table in the join order + */ + if (!join->sort_by_table || // (1) + join->sort_by_table->reginfo.join_tab->dependent) // (2) + return false; + + Json_writer_object wrapper(join->thd); + Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable"); + trace.add("applicable", 1); + /* It looks like we can short-cut limit due to join */ + return true; +} + + +/* + @brief + Check if we could use an index-based access method to produce rows + in the order for ORDER BY ... LIMIT. + + @detail + This should do what test_if_skip_sort_order() does. We can't use that + function directly, because: + + 1. We're at the join optimization stage and have not done query plan + fix-ups done in get_best_combination() and co. + + 2. The code in test_if_skip_sort_order() does modify query plan structures, + for example it may change the table's quick select. This is done even if + it's called with no_changes=true parameter. + + @param access_method_changed OUT Whether the function changed the access + method to get rows in desired order. + @param new_access_cost OUT if access method changed: its cost. + + @return + true - Can skip sorting + false - Cannot skip sorting +*/ + +bool test_if_skip_sort_order_early(JOIN *join, + bool *access_method_changed, + double *new_access_cost) +{ + const POSITION *pos= &join->best_positions[join->const_tables]; + TABLE *table= pos->table->table; + key_map usable_keys= table->keys_in_use_for_order_by; + + *access_method_changed= false; + + // Step #1: Find indexes that produce the required ordering. + if (find_indexes_matching_order(join, table, join->order, &usable_keys)) + { + return false; // Cannot skip sorting + } + + // Step #2: Check if the index we're using produces the needed ordering + uint ref_key; + if (pos->key) + { + // Mirror the (wrong) logic in test_if_skip_sort_order: + if (pos->spl_plan || pos->type == JT_REF_OR_NULL) + return false; // Use filesort + + ref_key= pos->key->key; + } + else + { + if (pos->table->quick) + { + if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + ref_key= pos->table->quick->index; + else + ref_key= MAX_KEY; + } + else + ref_key= MAX_KEY; + } + + if (ref_key != MAX_KEY && usable_keys.is_set(ref_key)) + { + return true; // we're using an index that produces the reqired ordering. + } + + /* + Step #3: check if we can switch to using an index that would produce the + ordering. + (But don't actually switch, this will be done by test_if_skip_sort_order) + */ + int best_key= -1; + uint UNINIT_VAR(best_key_parts); + uint saved_best_key_parts= 0; + int best_key_direction= 0; + JOIN_TAB *tab= pos->table; + ha_rows new_limit; + double new_read_time; + if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE, + tab, join->order, table, usable_keys, + ref_key, join->select_limit, + &best_key, &best_key_direction, + &new_limit, &new_read_time, + &best_key_parts, + &saved_best_key_parts)) + { + // Ok found a way to skip sorting + *access_method_changed= true; + *new_access_cost= new_read_time; + return true; + } + + return false; +} + + +/* + Compute the cost of join assuming we only need fraction of the output. +*/ + +double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting, + double *first_table_cost, + double fraction) +{ + POSITION *pos= join->best_positions + join->const_tables; + /* + Generally, we assume that producing X% of output takes X% of the cost. + */ + double partial_join_cost= join->best_read * fraction; + + if (skip_sorting) + { + /* + First table produces rows in required order. Two options: + + A. first_table_cost=NULL means we use whatever access method the join + optimizer has picked. Its cost was included in join->best_read and + we've already took a fraction of it. + + B. first_table_cost!=NULL means we will need to switch to another access + method, we have the cost to read rows to produce #LIMIT rows in join + output. + */ + if (first_table_cost) + { + /* + Subtract the remainder of the first table's cost we had in + join->best_read: + */ + partial_join_cost -= pos->read_time*fraction; + partial_join_cost -= pos->records_read*fraction * WHERE_COST_THD(join->thd); + + /* Add the cost of the new access method we've got: */ + partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost); + } + } + else + { + DBUG_ASSERT(!first_table_cost); + /* + Cannot skip sorting. We read the first table entirely, then sort it. + + partial_join_cost includes pos->read_time*fraction. Add to it + pos->read_time*(1-fraction) so we have the cost to read the entire first + table. Do the same for costs of checking the WHERE. + */ + double extra_first_table_cost= pos->read_time * (1.0 - fraction); + double extra_first_table_where= pos->records_read * (1.0 - fraction) * + WHERE_COST_THD(join->thd); + + partial_join_cost= COST_ADD(partial_join_cost, + COST_ADD(extra_first_table_cost, + extra_first_table_where)); + } + return partial_join_cost; +} + + +/* + @brief + Finalize building the join order which allows to short-cut the join + execution. + + @detail + This is called after we have produced a join order that allows short- + cutting. + Here, we decide if it is cheaper to use this one or the original join + order. +*/ + +POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost) +{ + Json_writer_object wrapper(join->thd); + Json_writer_object trace(join->thd, "join_limit_shortcut_choice"); + + double fraction= join->select_limit / join->join_record_count; + trace.add("limit_fraction", fraction); + + /* Check which fraction of join output we need */ + if (fraction >= 1.0) + { + trace.add("skip_adjustment", "no short-cutting"); + return NULL; + } + + /* + Check if the first table's access method produces the required ordering. + Possible options: + 1. Yes: we can just take a fraction of the execution cost. + 2A No: change the access method to one that does produce the required + ordering, update the costs. + 2B No: Need to pass the first table to filesort(). + */ + bool skip_sorting; + bool access_method_changed; + double new_access_cost; + { + Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early"); + skip_sorting= test_if_skip_sort_order_early(join, + &access_method_changed, + &new_access_cost); + } + trace.add("can_skip_filesort", skip_sorting); + + double cost_with_shortcut= + recompute_join_cost_with_limit(join, skip_sorting, + access_method_changed ? + &new_access_cost : (double*)0, + fraction); + double risk_ratio= + (double)join->thd->variables.optimizer_join_limit_pref_ratio; + trace.add("full_join_cost", join->best_read); + trace.add("risk_ratio", risk_ratio); + trace.add("shortcut_join_cost", cost_with_shortcut); + cost_with_shortcut *= risk_ratio; + trace.add("shortcut_cost_with_risk", cost_with_shortcut); + if (cost_with_shortcut < join->best_read) + { + trace.add("use_shortcut_cost", true); + POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root, + join->best_positions, + sizeof(POSITION)* + (join->table_count + 1)); + *cost= cost_with_shortcut; + return pos; + } + trace.add("use_shortcut_cost", false); + return NULL; +} + + +/* + @brief + If we're in Limit Optimization Mode, allow only join->sort_by_table as + the first table in the join order +*/ + +static +bool join_limit_shortcut_limits_tables(const JOIN *join, uint idx, table_map *map) +{ + if (join->limit_optimization_mode && idx == join->const_tables) + { + *map= join->sort_by_table->map; + return true; + } + return false; +} + + /** Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search. @@ -11546,6 +11909,9 @@ best_extension_by_limited_search(JOIN *join, */ allowed_tables= remaining_tables; allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables; + table_map sort_table; + if (join_limit_shortcut_limits_tables(join, idx, &sort_table)) + allowed_current_tables= sort_table; } DBUG_ASSERT(allowed_tables & remaining_tables); @@ -26409,6 +26775,7 @@ find_field_in_item_list (Field *field, void *data) that belong to 'table' and are equal to 'item_field'. */ +static void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, Item_field *item_field, key_map *col_keys) @@ -26553,6 +26920,59 @@ static void prepare_for_reverse_ordered_access(JOIN_TAB *tab) } +/* + @brief + Given a table and order, find indexes that produce rows in the order + + @param usable_keys IN Bitmap of keys we can use + OUT Bitmap of indexes that produce rows in order. + + @return + false Some indexes were found + true No indexes found +*/ + +static +bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order, + key_map *usable_keys) +{ + /* Find indexes that cover all ORDER/GROUP BY fields */ + for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= (*tmp_order->item)->real_item(); + if (item->type() != Item::FIELD_ITEM) + { + usable_keys->clear_all(); + return true; /* No suitable keys */ + } + + /* + Take multiple-equalities into account. Suppose we have + ORDER BY col1, col10 + and there are + multiple-equal(col1, col2, col3), + multiple-equal(col10, col11). + + Then, + - when item=col1, we find the set of indexes that cover one of {col1, + col2, col3} + - when item=col10, we find the set of indexes that cover one of {col10, + col11} + + And we compute an intersection of these sets to find set of indexes that + cover all ORDER BY components. + */ + key_map col_keys; + compute_part_of_sort_key_for_equals(join, table, (Item_field*)item, + &col_keys); + usable_keys->intersect(col_keys); + if (usable_keys->is_clear_all()) + return true; // No usable keys + } + return false; + +} + /** Test if we can skip the ORDER BY by using an index. @@ -26614,41 +27034,17 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, been taken into account. */ usable_keys= *map; - - /* Find indexes that cover all ORDER/GROUP BY fields */ - for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) + + // Step #1: Find indexes that produce the required ordering. + if (find_indexes_matching_order(tab->join, table, order, &usable_keys)) { - Item *item= (*tmp_order->item)->real_item(); - if (item->type() != Item::FIELD_ITEM) - { - usable_keys.clear_all(); - DBUG_RETURN(0); - } - - /* - Take multiple-equalities into account. Suppose we have - ORDER BY col1, col10 - and there are - multiple-equal(col1, col2, col3), - multiple-equal(col10, col11). - - Then, - - when item=col1, we find the set of indexes that cover one of {col1, - col2, col3} - - when item=col10, we find the set of indexes that cover one of {col10, - col11} - - And we compute an intersection of these sets to find set of indexes that - cover all ORDER BY components. - */ - key_map col_keys; - compute_part_of_sort_key_for_equals(tab->join, table, (Item_field*)item, - &col_keys); - usable_keys.intersect(col_keys); - if (usable_keys.is_clear_all()) - goto use_filesort; // No usable keys + DBUG_RETURN(false); // Cannot skip sorting } + /* + Step #2: Analyze the current access method. Note the used index as ref_key + and #used keyparts in ref_key_parts. + */ ref_key= -1; /* Test if constant range in WHERE */ if (tab->ref.key >= 0 && tab->ref.key_parts) @@ -26692,6 +27088,12 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, } } + /* + Step #3: Check if index ref_key that we're using produces the required + ordering or if there is another index new_ref_key such that + - ref_key is a prefix of new_ref_key (so, access method can be reused) + - new_ref_key produces the required ordering + */ if (ref_key >= 0 && ref_key != MAX_KEY) { /* Current access method uses index ref_key with ref_key_parts parts */ @@ -26811,17 +27213,24 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, &used_key_parts))) goto check_reverse_order; } + + /* + Step #4: Go through all indexes that produce required ordering (in + usable_keys) and check if any of them is cheaper than ref_key + */ { uint UNINIT_VAR(best_key_parts); uint saved_best_key_parts= 0; int best_key_direction= 0; JOIN *join= tab->join; ha_rows table_records= table->stat_records(); + double new_read_time_dummy; - test_if_cheaper_ordering(tab, order, table, usable_keys, + test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys, ref_key, select_limit, &best_key, &best_key_direction, - &select_limit, &best_key_parts, + &select_limit, &new_read_time_dummy, + &best_key_parts, &saved_best_key_parts); /* @@ -32084,11 +32493,13 @@ static bool get_range_limit_read_cost(const POSITION *pos, */ static bool -test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, +test_if_cheaper_ordering(bool in_join_optimizer, + const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int ref_key, ha_rows select_limit_arg, int *new_key, int *new_key_direction, - ha_rows *new_select_limit, uint *new_used_key_parts, + ha_rows *new_select_limit, double *new_read_time, + uint *new_used_key_parts, uint *saved_best_key_parts) { DBUG_ENTER("test_if_cheaper_ordering"); @@ -32162,7 +32573,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, if (join) // True if SELECT { - uint nr= (uint) (tab - join->join_tab); + uint nr= join->const_tables; fanout= 1.0; if (nr != join->table_count - 1) // If not last table fanout= (join->join_record_count / position->records_out); @@ -32189,12 +32600,27 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, Calculate the selectivity of the ref_key for REF_ACCESS. For RANGE_ACCESS we use table->opt_range_condition_rows. */ - if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) + if (in_join_optimizer) + { + if (ref_key >= 0 && ref_key != MAX_KEY && + join->best_positions[join->const_tables].type == JT_REF) + { + refkey_rows_estimate= + (ha_rows)join->best_positions[join->const_tables].records_read; + set_if_bigger(refkey_rows_estimate, 1); + } + } + else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) { /* If ref access uses keypart=const for all its key parts, and quick select uses the same # of key parts, then they are equivalent. Reuse #rows estimate from quick select as it is more precise. + + Note: we could just have used + join->best_positions[join->const_tables].records_read + here. That number was computed in best_access_path() and it already + includes adjustments based on table->opt_range[ref_key].rows. */ if (tab->ref.const_ref_part_map == make_prev_keypart_map(tab->ref.key_parts) && @@ -32431,6 +32857,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, *new_key= best_key; *new_key_direction= best_key_direction; *new_select_limit= has_limit ? best_select_limit : table_records; + *new_read_time= read_time; DBUG_RETURN(TRUE); } @@ -32529,10 +32956,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, table->opt_range_condition_rows= table->stat_records(); int key, direction; - if (test_if_cheaper_ordering(NULL, order, table, + double new_cost; + if (test_if_cheaper_ordering(FALSE, NULL, order, table, table->keys_in_use_for_order_by, -1, limit, - &key, &direction, &limit) && + &key, &direction, &limit, &new_cost) && !is_key_used(table, key, table->write_set)) { *need_sort= FALSE; diff --git a/sql/sql_select.h b/sql/sql_select.h index 3e29dbe019d..bd1b766bcdc 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1295,6 +1295,20 @@ public: passing 1st non-const table to filesort(). NULL means no such table exists. */ TABLE *sort_by_table; + + /* + If true, there is ORDER BY x LIMIT n clause and for certain join orders, it + is possible to short-cut the join execution, i.e. stop it as soon as n + output rows were produced. See join_limit_shortcut_is_applicable(). + */ + bool limit_shortcut_applicable; + + /* + Used during join optimization: if true, we're building a join order that + will short-cut join execution as soon as #LIMIT rows are produced. + */ + bool limit_optimization_mode; + /* Number of tables in the join. (In MySQL, it is named 'tables' and is also the number of elements in diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 970fb338519..8d47bb68f71 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2832,6 +2832,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); +static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio( + "optimizer_join_limit_pref_ratio", + "For queries with JOIN and ORDER BY LIMIT : make the optimizer " + "consider a join order that allows to short-cut execution after " + "producing #LIMIT matches if that promises N times speedup. " + "(A conservative setting here would be is a high value, like 100 so " + "the short-cutting plan is used if it promises a speedup of 100x or " + "more). Short-cutting plans are inherently risky so the default is 0 " + "which means do not consider this optimization", + SESSION_VAR(optimizer_join_limit_pref_ratio), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), + DEFAULT(0), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "optimizer_use_condition_selectivity", "Controls selectivity of which conditions the optimizer takes into " diff --git a/sql/table.cc b/sql/table.cc index 977590c83e1..2e597583f07 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -285,17 +285,6 @@ const char *fn_frm_ext(const char *name) TABLE_CATEGORY get_table_category(const Lex_ident_db &db, const Lex_ident_table &name) { -#ifdef WITH_WSREP - if (db.str && db.streq(MYSQL_SCHEMA_NAME)) - { - if (name.streq(Lex_ident_table{STRING_WITH_LEN(WSREP_STREAMING_TABLE)}) || - name.streq(Lex_ident_table{STRING_WITH_LEN(WSREP_CLUSTER_TABLE)}) || - name.streq(Lex_ident_table{STRING_WITH_LEN(WSREP_MEMBERS_TABLE)})) - { - return TABLE_CATEGORY_INFORMATION; - } - } -#endif /* WITH_WSREP */ if (is_infoschema_db(&db)) return TABLE_CATEGORY_INFORMATION; @@ -317,6 +306,20 @@ TABLE_CATEGORY get_table_category(const Lex_ident_db &db, return TABLE_CATEGORY_LOG; } +#ifdef WITH_WSREP + if (db.streq(WSREP_LEX_SCHEMA)) + { + if(name.streq(WSREP_LEX_STREAMING)) + return TABLE_CATEGORY_INFORMATION; + if (name.streq(WSREP_LEX_CLUSTER)) + return TABLE_CATEGORY_INFORMATION; + if (name.streq(WSREP_LEX_MEMBERS)) + return TABLE_CATEGORY_INFORMATION; + if (name.streq(WSREP_LEX_ALLOWLIST)) + return TABLE_CATEGORY_INFORMATION; + } +#endif /* WITH_WSREP */ + return TABLE_CATEGORY_USER; } diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc index eee9dc02319..2dc6f47d06b 100644 --- a/sql/wsrep_applier.cc +++ b/sql/wsrep_applier.cc @@ -82,7 +82,9 @@ wsrep_get_apply_format(THD* thd) return thd->wsrep_rgi->rli->relay_log.description_event_for_exec; } -void wsrep_store_error(const THD* const thd, wsrep::mutable_buffer& dst) +void wsrep_store_error(const THD* const thd, + wsrep::mutable_buffer& dst, + bool const include_msg) { Diagnostics_area::Sql_condition_iterator it= thd->get_stmt_da()->sql_conditions(); @@ -100,8 +102,16 @@ void wsrep_store_error(const THD* const thd, wsrep::mutable_buffer& dst) uint const err_code= cond->get_sql_errno(); const char* const err_str= cond->get_message_text(); - slider+= my_snprintf(slider, buf_end - slider, " %s, Error_code: %d;", - err_str, err_code); + if (include_msg) + { + slider+= snprintf(slider, buf_end - slider, " %s, Error_code: %d;", + err_str, err_code); + } + else + { + slider+= snprintf(slider, buf_end - slider, " Error_code: %d;", + err_code); + } } if (slider != dst.data()) diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h index fefca306a70..e633b1b9bf2 100644 --- a/sql/wsrep_applier.h +++ b/sql/wsrep_applier.h @@ -35,7 +35,21 @@ int wsrep_apply_events(THD* thd, #define WSREP_ERR_FAILED 6 // Operation failed for some internal reason #define WSREP_ERR_ABORTED 7 // Operation was aborted externally -void wsrep_store_error(const THD* thd, wsrep::mutable_buffer& buf); +/* Loops over THD diagnostic area and concatenates all error messages + * and error codes to a single continuous buffer to create a unique + * but consistent failure signature which provider can use for voting + * between the nodes in the cluster. + * + * @param thd THD context + * @param dst buffer to store the signature + * @param include_msg whether to use MySQL error message in addition to + * MySQL error code. Note that in the case of a TOI + * operation the message may be not consistent between + * the nodes e.g. due to a different client locale setting + * and should be omitted */ +void wsrep_store_error(const THD* thd, + wsrep::mutable_buffer& buf, + bool include_msg); class Format_description_log_event; void wsrep_set_apply_format(THD*, Format_description_log_event*); diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc index 7eb3f07849a..2c0e2e643fa 100644 --- a/sql/wsrep_high_priority_service.cc +++ b/sql/wsrep_high_priority_service.cc @@ -123,14 +123,15 @@ static void wsrep_setup_uk_and_fk_checks(THD* thd) static int apply_events(THD* thd, Relay_log_info* rli, const wsrep::const_buffer& data, - wsrep::mutable_buffer& err) + wsrep::mutable_buffer& err, + bool const include_msg) { int const ret= wsrep_apply_events(thd, rli, data.data(), data.size()); if (ret || wsrep_thd_has_ignored_error(thd)) { if (ret) { - wsrep_store_error(thd, err); + wsrep_store_error(thd, err, include_msg); } wsrep_dump_rbr_buf_with_header(thd, data.data(), data.size()); } @@ -427,7 +428,7 @@ int Wsrep_high_priority_service::apply_toi(const wsrep::ws_meta& ws_meta, #endif thd->set_time(); - int ret= apply_events(thd, m_rli, data, err); + int ret= apply_events(thd, m_rli, data, err, false); wsrep_thd_set_ignored_error(thd, false); trans_commit(thd); @@ -595,7 +596,7 @@ int Wsrep_applier_service::apply_write_set(const wsrep::ws_meta& ws_meta, #endif /* ENABLED_DEBUG_SYNC */ wsrep_setup_uk_and_fk_checks(thd); - int ret= apply_events(thd, m_rli, data, err); + int ret= apply_events(thd, m_rli, data, err, true); thd->close_temporary_tables(); if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) @@ -764,7 +765,7 @@ int Wsrep_replayer_service::apply_write_set(const wsrep::ws_meta& ws_meta, ws_meta, thd->wsrep_sr().fragments()); } - ret= ret || apply_events(thd, m_rli, data, err); + ret= ret || apply_events(thd, m_rli, data, err, true); thd->close_temporary_tables(); if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) { diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 9cdf7bb4bb1..a0767186866 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2900,7 +2900,10 @@ static void wsrep_TOI_end(THD *thd) { if (thd->is_error() && !wsrep_must_ignore_error(thd)) { - wsrep_store_error(thd, err); + /* use only error code, for the message can be inconsistent + * between the nodes due to differing lc_message settings + * in client session and server applier thread */ + wsrep_store_error(thd, err, false); } int const ret= client_state.leave_toi_local(err); diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc index 05eb1eeb7fa..e7b3a8580c3 100644 --- a/sql/wsrep_schema.cc +++ b/sql/wsrep_schema.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Codership Oy +/* Copyright (C) 2015-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,18 @@ #include #include +#define WSREP_SCHEMA "mysql" +#define WSREP_STREAMING_TABLE "wsrep_streaming_log" +#define WSREP_CLUSTER_TABLE "wsrep_cluster" +#define WSREP_MEMBERS_TABLE "wsrep_cluster_members" +#define WSREP_ALLOWLIST_TABLE "wsrep_allowlist" + +LEX_CSTRING WSREP_LEX_SCHEMA= {STRING_WITH_LEN(WSREP_SCHEMA)}; +LEX_CSTRING WSREP_LEX_STREAMING= {STRING_WITH_LEN(WSREP_STREAMING_TABLE)}; +LEX_CSTRING WSREP_LEX_CLUSTER= {STRING_WITH_LEN(WSREP_CLUSTER_TABLE)}; +LEX_CSTRING WSREP_LEX_MEMBERS= {STRING_WITH_LEN(WSREP_MEMBERS_TABLE)}; +LEX_CSTRING WSREP_LEX_ALLOWLIST= {STRING_WITH_LEN(WSREP_ALLOWLIST_TABLE)}; + const char* wsrep_sr_table_name_full= WSREP_SCHEMA "/" WSREP_STREAMING_TABLE; static const std::string wsrep_schema_str= WSREP_SCHEMA; diff --git a/sql/wsrep_schema.h b/sql/wsrep_schema.h index 81816bbc243..c9004d076bd 100644 --- a/sql/wsrep_schema.h +++ b/sql/wsrep_schema.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2019 Codership Oy +/* Copyright (C) 2015-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,12 +33,6 @@ struct TABLE_LIST; struct st_mysql_lex_string; typedef struct st_mysql_lex_string LEX_STRING; -#define WSREP_SCHEMA "mysql" -#define WSREP_STREAMING_TABLE "wsrep_streaming_log" -#define WSREP_CLUSTER_TABLE "wsrep_cluster" -#define WSREP_MEMBERS_TABLE "wsrep_cluster_members" -#define WSREP_ALLOWLIST_TABLE "wsrep_allowlist" - /** Name of the table in `wsrep_schema_str` used for storing streaming replication data. In an InnoDB full format, e.g. "database/tablename". */ extern const char* wsrep_sr_table_name_full; @@ -169,4 +163,10 @@ class Wsrep_schema extern Wsrep_schema* wsrep_schema; +extern LEX_CSTRING WSREP_LEX_SCHEMA; +extern LEX_CSTRING WSREP_LEX_STREAMING; +extern LEX_CSTRING WSREP_LEX_CLUSTER; +extern LEX_CSTRING WSREP_LEX_MEMBERS; +extern LEX_CSTRING WSREP_LEX_ALLOWLIST; + #endif /* !WSREP_SCHEMA_H */ diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index 71252c94399..5717c5af997 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -166,9 +166,16 @@ void Wsrep_server_service::bootstrap() wsrep_set_SE_checkpoint(wsrep::gtid::undefined(), wsrep_gtid_server.undefined()); } +static std::atomic suppress_logging{false}; +void wsrep_suppress_error_logging() { suppress_logging= true; } + void Wsrep_server_service::log_message(enum wsrep::log::level level, - const char* message) + const char *message) { + if (suppress_logging.load(std::memory_order_relaxed)) + { + return; + } switch (level) { case wsrep::log::debug: diff --git a/sql/wsrep_server_service.h b/sql/wsrep_server_service.h index 3a7da229cd4..9a1e148b55f 100644 --- a/sql/wsrep_server_service.h +++ b/sql/wsrep_server_service.h @@ -99,4 +99,8 @@ class Wsrep_storage_service; Wsrep_storage_service* wsrep_create_storage_service(THD *orig_thd, const char *ctx); +/** + Suppress all error logging from wsrep/Galera library. + */ +void wsrep_suppress_error_logging(); #endif /* WSREP_SERVER_SERVICE */ diff --git a/sql/wsrep_server_state.cc b/sql/wsrep_server_state.cc index f80320fe216..ddb289c6802 100644 --- a/sql/wsrep_server_state.cc +++ b/sql/wsrep_server_state.cc @@ -21,6 +21,8 @@ #include "wsrep_binlog.h" /* init/deinit group commit */ #include "wsrep_plugin.h" /* make/destroy sysvar helpers */ +#include "my_stacktrace.h" /* my_safe_printf_stderr() */ + mysql_mutex_t LOCK_wsrep_server_state; mysql_cond_t COND_wsrep_server_state; @@ -154,3 +156,23 @@ void Wsrep_server_state::deinit_provider_services() m_provider_services= wsrep::provider::services(); } +void Wsrep_server_state::handle_fatal_signal() +{ + if (m_instance) + { + /* Galera background threads are still running and the logging may be + relatively verbose in case of networking error. Silence all wsrep + logging before shutting down networking to avoid garbling signal + handler output. */ + my_safe_printf_stderr("WSREP: Suppressing further logging\n"); + wsrep_suppress_error_logging(); + + /* Shut down all communication with other nodes to fail silently. */ + my_safe_printf_stderr("WSREP: Shutting down network communications\n"); + if (m_instance->provider().set_node_isolation( + wsrep::provider::node_isolation::isolated)) { + my_safe_printf_stderr("WSREP: Galera library does not support node isolation\n"); + } + my_safe_printf_stderr("\n"); + } +} diff --git a/sql/wsrep_server_state.h b/sql/wsrep_server_state.h index d169e5b219d..91e9c34b764 100644 --- a/sql/wsrep_server_state.h +++ b/sql/wsrep_server_state.h @@ -74,6 +74,8 @@ public: return m_provider_services; } + static void handle_fatal_signal(); + private: Wsrep_server_state(const std::string& name, const std::string& incoming_address, diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 75bff992932..76da1d70174 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1944,7 +1944,8 @@ wait_signal: else { WSREP_WARN("Received unknown signal: '%s'", out); - err = -EINVAL; + /* since it is the end of the loop, we must set error code */ + err=-EINVAL; proc.wait(); } } diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index db75f973eab..c666fb0ca50 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1219,19 +1219,43 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, if (!resize_flush_buf) { ut_ad(is_pmem()); + lsn_lock.wr_lock(); const size_t resize_capacity{resize_target - START_OFFSET}; - const lsn_t resizing{resize_in_progress()}; - if (UNIV_UNLIKELY(lsn < resizing)) { - size_t l= resizing - lsn; - if (l >= len) - return; - end+= l - len; - len-= l; - lsn+= l; + const lsn_t resizing{resize_in_progress()}; + /* For memory-mapped log, log_t::resize_start() would never + set log_sys.resize_lsn to less than log_sys.lsn. It cannot + execute concurrently with this thread, because we are holding + log_sys.latch and it would hold an exclusive log_sys.latch. */ + if (UNIV_UNLIKELY(lsn < resizing)) + { + /* This function may execute in multiple concurrent threads + that hold a shared log_sys.latch. Before we got lsn_lock, + another thread could have executed resize_lsn.store(lsn) below + with a larger lsn than ours. + + append_prepare() guarantees that the concurrent writes + cannot overlap, that is, our entire log must be discarded. + Besides, incomplete mini-transactions cannot be parsed anyway. */ + ut_ad(resizing >= lsn + len); + goto pmem_done; + } + + s= START_OFFSET; + + if (UNIV_UNLIKELY(lsn - resizing + len >= resize_capacity)) + { + resize_lsn.store(lsn, std::memory_order_relaxed); + lsn= 0; + } + else + { + lsn-= resizing; + s+= lsn; + } } - lsn-= resizing; - s= START_OFFSET + lsn % resize_capacity; + + ut_ad(s + len <= resize_target); if (UNIV_UNLIKELY(end < &buf[START_OFFSET])) { @@ -1241,59 +1265,22 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, ut_ad(end + capacity() + len >= &buf[file_size]); size_t l= size_t(buf - (end - START_OFFSET)); - if (UNIV_LIKELY(s + len <= resize_target)) - { - /* The destination buffer (log_sys.resize_buf) did not wrap around */ - memcpy(resize_buf + s, end + capacity(), l); - memcpy(resize_buf + s + l, &buf[START_OFFSET], len - l); - goto pmem_nowrap; - } - else - { - /* Both log_sys.buf and log_sys.resize_buf wrapped around */ - const size_t rl= resize_target - s; - if (l <= rl) - { - /* log_sys.buf wraps around first */ - memcpy(resize_buf + s, end + capacity(), l); - memcpy(resize_buf + s + l, &buf[START_OFFSET], rl - l); - memcpy(resize_buf + START_OFFSET, &buf[START_OFFSET + rl - l], - len - l); - } - else - { - /* log_sys.resize_buf wraps around first */ - memcpy(resize_buf + s, end + capacity(), rl); - memcpy(resize_buf + START_OFFSET, end + capacity() + rl, l - rl); - memcpy(resize_buf + START_OFFSET + (l - rl), - &buf[START_OFFSET], len - l); - } - goto pmem_wrap; - } + memcpy(resize_buf + s, end + capacity(), l); + memcpy(resize_buf + s + l, &buf[START_OFFSET], len - l); } else { ut_ad(end + len <= &buf[file_size]); - - if (UNIV_LIKELY(s + len <= resize_target)) - { - memcpy(resize_buf + s, end, len); - pmem_nowrap: - s+= len - seq; - } - else - { - /* The log_sys.resize_buf wrapped around */ - memcpy(resize_buf + s, end, resize_target - s); - memcpy(resize_buf + START_OFFSET, end + (resize_target - s), - len - (resize_target - s)); - pmem_wrap: - s+= len - seq; - if (s >= resize_target) - s-= resize_capacity; - resize_lsn.fetch_add(resize_capacity); /* Move the target ahead. */ - } + memcpy(resize_buf + s, end, len); } + s+= len - seq; + + /* Always set the sequence bit. If the resized log were to wrap around, + we will advance resize_lsn. */ + ut_ad(resize_buf[s] <= 1); + resize_buf[s]= 1; + pmem_done: + lsn_lock.wr_unlock(); } else #endif @@ -1303,12 +1290,11 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, ut_ad(s + len <= buf_size); memcpy(resize_buf + s, end, len); s+= len - seq; + /* Always set the sequence bit. If the resized log were to wrap around, + we will advance resize_lsn. */ + ut_ad(resize_buf[s] <= 1); + resize_buf[s]= 1; } - - /* Always set the sequence bit. If the resized log were to wrap around, - we will advance resize_lsn. */ - ut_ad(resize_buf[s] <= 1); - resize_buf[s]= 1; } }