From 6c40590405da71a592a65b68e6b41b11437fb517 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Mon, 28 Jan 2019 00:26:23 +0100 Subject: [PATCH 01/35] use environment file in systemd units for _WSREP_START_POSITION We used to run `systemctl set-environment` to pass _WSREP_START_POSITION. This is bad because: * it clutter systemd's environment (yes, pid 1) * it requires root privileges * options (like LimitNOFILE=) are not applied Let's just create an environment file in ExecStartPre=, that is read before ExecStart= kicks in. We have _WSREP_START_POSITION around for the main process without any downsides. --- support-files/CMakeLists.txt | 2 ++ support-files/mariadb.service.in | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index 4c1db775c19..7f49fca212b 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -32,6 +32,8 @@ ELSE() SET(MYSQLD_GROUP "mysql") SET(ini_file_extension "cnf") SET(HOSTNAME "uname -n") + get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) + SET(mysqlunixdir ${MYSQL_UNIX_DIR}) ENDIF() # XXX: shouldn't we just have variables for all this stuff and centralise diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index b7a55596029..2f990df4a15 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,20 +70,20 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true -# Execute pre and post scripts as root, otherwise it does it as User= -PermissionsStartOnly=true +# Use an environment file to pass variable _WSREP_START_POSITION +EnvironmentFile=-@mysqlunixdir@/wsrep-start-position @SYSTEMD_EXECSTARTPRE@ # Perform automatic wsrep recovery. When server is started without wsrep, # galera_recovery simply returns an empty string. In any case, however, # the script is not expected to return with a non-zero status. -# It is always safe to unset _WSREP_START_POSITION environment variable. +# It is always safe to remove @mysqlunixdir@/wsrep-start-position +# environment file. # Do not panic if galera_recovery script is not available. (MDEV-10538) -ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ VAR=`cd @bindir@/..; @bindir@/galera_recovery`; [ $? -eq 0 ] \ - && systemctl set-environment _WSREP_START_POSITION=$VAR || exit 1" + && echo _WSREP_START_POSITION=$VAR > @mysqlunixdir@/wsrep-start-position || exit 1" # Needed to create system tables etc. # ExecStartPre=@scriptdir@/mysql_install_db -u mysql @@ -96,7 +96,7 @@ ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ ExecStart=@sbindir@/mysqld $MYSQLD_OPTS $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION # Unset _WSREP_START_POSITION environment variable. -ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" +ExecStartPost=/bin/rm -f @mysqlunixdir@/wsrep-start-position @SYSTEMD_EXECSTARTPOST@ From b54e4bf00b191633c74baad143c552bfb478fff3 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Tue, 29 Jan 2019 10:03:42 +0100 Subject: [PATCH 02/35] update galera_new_cluster to use environment file Now that the systemd unit files use an environment file to pass _WSREP_START_POSITION we have to update galera_new_cluster as well. --- scripts/CMakeLists.txt | 15 +++++++++++++++ scripts/galera_new_cluster.sh | 4 ++-- support-files/mariadb.service.in | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) mode change 100755 => 100644 scripts/galera_new_cluster.sh diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 358045c17ab..e41c4fce68f 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -190,6 +190,9 @@ ELSE() SET(localstatedir ${MYSQL_DATADIR}) ENDIF() +get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) +SET(mysqlunixdir ${MYSQL_UNIX_DIR}) + SET(resolveip_locations "$basedir/${INSTALL_BINDIR} $basedir/bin") SET(mysqld_locations "$basedir/${INSTALL_SBINDIR} $basedir/libexec $basedir/sbin $basedir/bin") SET(errmsg_locations "$basedir/${INSTALL_MYSQLSHAREDIR}/english $basedir/share/english $basedir/share/mysql/english") @@ -207,6 +210,18 @@ INSTALL_SCRIPT( DESTINATION ${INSTALL_SCRIPTDIR} COMPONENT Server ) + +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/galera_new_cluster.sh + ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster ESCAPE_QUOTES @ONLY) + EXECUTE_PROCESS( + COMMAND chmod +x ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster + ) + +INSTALL_SCRIPT( + "${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster" + DESTINATION ${INSTALL_SCRIPTDIR} + COMPONENT Server + ) ENDIF() SET(prefix "${CMAKE_INSTALL_PREFIX}") diff --git a/scripts/galera_new_cluster.sh b/scripts/galera_new_cluster.sh old mode 100755 new mode 100644 index ac9dcf42102..b6617378b90 --- a/scripts/galera_new_cluster.sh +++ b/scripts/galera_new_cluster.sh @@ -21,11 +21,11 @@ EOF exit 0 fi -systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster' && \ +echo _WSREP_NEW_CLUSTER='--wsrep-new-cluster' > @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" && \ systemctl restart ${1:-mariadb} extcode=$? -systemctl set-environment _WSREP_NEW_CLUSTER='' +rm -f @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" exit $extcode diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 2f990df4a15..19e2e16abeb 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,6 +70,9 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true +# Use an environment file to pass variable _WSREP_NEW_CLUSTER +EnvironmentFile=-@mysqlunixdir@/wsrep-new-cluster-%N + # Use an environment file to pass variable _WSREP_START_POSITION EnvironmentFile=-@mysqlunixdir@/wsrep-start-position From 277968aa4cfa46a290af1c44e9d756381005939a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 20 Jun 2023 14:57:04 +0300 Subject: [PATCH 03/35] MDEV-31413 : Node has been dropped from the cluster on Startup / Shutdown with async replica There was two related problems: (1) Galera node that is defined as a slave to async MariaDB master at restart might do SST (state stransfer) and part of that it will copy mysql.gtid_slave_pos table. Problem is that updates on that table are not replicated on a cluster. Therefore, table from donor that is not slave is copied and joiner looses gtid position it was and start executing events from wrong position of the binlog. This incorrect position could break replication and causes node to be dropped and requiring user action. (2) Slave sql thread might start executing events before galera is ready (wsrep_ready=ON) and that could also cause node to be dropped from the cluster. In this fix we enable replication of mysql.gtid_slave_pos table on a cluster. In this way all nodes in a cluster will know gtid slave position and even after SST joiner knows correct gtid position to start. Furthermore, we wait galera to be ready before slave sql thread executes any events to prevent too early execution. Signed-off-by: Julius Goryavsky --- .../r/galera_as_slave_gtid_myisam.result | 9 +- .../galera/r/galera_restart_replica.result | 122 ++++++++++ .../galera/t/galera_as_slave_gtid_myisam.cnf | 1 + .../galera/t/galera_as_slave_gtid_myisam.test | 12 +- .../suite/galera/t/galera_restart_replica.cnf | 20 ++ .../galera/t/galera_restart_replica.test | 212 ++++++++++++++++++ sql/rpl_gtid.cc | 33 ++- sql/slave.cc | 2 + sql/wsrep_mysqld.cc | 26 ++- sql/wsrep_mysqld.h | 4 +- sql/wsrep_priv.h | 4 +- sql/wsrep_server_service.cc | 7 +- 12 files changed, 433 insertions(+), 19 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_restart_replica.result create mode 100644 mysql-test/suite/galera/t/galera_restart_replica.cnf create mode 100644 mysql-test/suite/galera/t/galera_restart_replica.test diff --git a/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result b/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result index b498f334bf8..6559c7828be 100644 --- a/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result +++ b/mysql-test/suite/galera/r/galera_as_slave_gtid_myisam.result @@ -17,12 +17,15 @@ SELECT LENGTH(@@global.gtid_binlog_state) > 1; LENGTH(@@global.gtid_binlog_state) > 1 1 connection node_1; +SELECT COUNT(*) AS EXPECT_1 FROM t1; +EXPECT_1 +1 gtid_binlog_state_equal 0 connection node_2; -SELECT COUNT(*) AS EXPECT_0 FROM t1; -EXPECT_0 -0 +SELECT COUNT(*) AS EXPECT_1 FROM t1; +EXPECT_1 +1 gtid_binlog_state_equal 0 #cleanup diff --git a/mysql-test/suite/galera/r/galera_restart_replica.result b/mysql-test/suite/galera/r/galera_restart_replica.result new file mode 100644 index 00000000000..9b7e9fd259f --- /dev/null +++ b/mysql-test/suite/galera/r/galera_restart_replica.result @@ -0,0 +1,122 @@ +connection node_2; +connection node_1; +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +create user repl@'%' identified by 'repl'; +grant all on *.* to repl@'%'; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +connection node_1; +connection node_2; +connection node_2; +START SLAVE; +connection node_3; +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +# Verify that graceful shutdown succeeds. +# Force SST +connection node_1; +# Waiting until node_2 is not part of cluster anymore +connection node_2; +# Start node_2 again +¤ Wait until node_2 is back on cluster +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_3; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_3; +drop table t1; +connection node_2; +connection node_1; +connection node_3; +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; +EXPECT_10000 +10000 +connection node_2; +# Verify that graceful shutdown succeeds. +# Force SST +connection node_1; +# Waiting until node_2 is not part of cluster anymore +connection node_3; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_2; +# Start node_2 again +¤ Wait until node_2 is back on cluster +connection node_2; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_1; +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_3; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; +EXPECT_20000 +20000 +connection node_3; +drop table t1; +connection node_2; +connection node_1; +connection node_2; +STOP SLAVE; +RESET SLAVE ALL; +connection node_3; +RESET MASTER; +connection node_1; +disconnect node_3; +disconnect node_2; +disconnect node_1; +# End of test diff --git a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf index 01d2eb12630..d3f33bd1427 100644 --- a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf +++ b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.cnf @@ -4,3 +4,4 @@ log-bin=mysqld-bin log-slave-updates binlog-format=ROW +wsrep-replicate-myisam=ON diff --git a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test index 8787f864a99..60476bc45a7 100644 --- a/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test +++ b/mysql-test/suite/galera/t/galera_as_slave_gtid_myisam.test @@ -44,12 +44,20 @@ SELECT LENGTH(@@global.gtid_binlog_state) > 1; --let $wait_condition = SELECT COUNT(*) = 1 FROM t1; --source include/wait_condition.inc +SELECT COUNT(*) AS EXPECT_1 FROM t1; + --disable_query_log --eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal; --enable_query_log --connection node_2 -SELECT COUNT(*) AS EXPECT_0 FROM t1; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) AS EXPECT_1 FROM t1; --disable_query_log --eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal; @@ -59,8 +67,6 @@ SELECT COUNT(*) AS EXPECT_0 FROM t1; --connection node_3 DROP TABLE t1; ---sleep 1 - --connection node_1 --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc diff --git a/mysql-test/suite/galera/t/galera_restart_replica.cnf b/mysql-test/suite/galera/t/galera_restart_replica.cnf new file mode 100644 index 00000000000..6713e301527 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_restart_replica.cnf @@ -0,0 +1,20 @@ +!include ../galera_2nodes_as_slave.cnf + +[mysqld] +wsrep-debug=1 + +[mysqld.1] +server_id=15 +wsrep_gtid_mode=1 +wsrep_gtid_domain_id=16 +gtid_domain_id=11 +gtid_strict_mode=1 + +[mysqld.2] +skip-slave-start=OFF +server_id=15 +wsrep_gtid_mode=1 +wsrep_gtid_domain_id=16 +gtid_domain_id=11 +gtid_strict_mode=1 + diff --git a/mysql-test/suite/galera/t/galera_restart_replica.test b/mysql-test/suite/galera/t/galera_restart_replica.test new file mode 100644 index 00000000000..2cc3a1dcff2 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_restart_replica.test @@ -0,0 +1,212 @@ +# +# Test Galera as a replica to a MySQL async replication +# +# The galera/galera_2node_slave.cnf describes the setup of the nodes +# +--source include/big_test.inc +--source include/force_restart.inc +--source include/galera_cluster.inc +--source include/have_sequence.inc + +# As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it +# we open the node_3 connection here +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + +create user repl@'%' identified by 'repl'; +grant all on *.* to repl@'%'; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; + +--let $node_1 = node_1 +--let $node_2 = node_2 +--source include/auto_increment_offset_save.inc + +--connection node_2 +--disable_query_log +--eval CHANGE MASTER TO master_host='127.0.0.1', master_user='repl', master_password='repl', master_port=$NODE_MYPORT_3, master_use_gtid=slave_pos; +--enable_query_log +START SLAVE; + +--connection node_3 + +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +--disable_query_log +INSERT INTO t1 SELECT seq, 'test' from seq_1_to_10000; +--enable_query_log +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--echo # Verify that graceful shutdown succeeds. +--source include/shutdown_mysqld.inc +--echo # Force SST +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--echo # Waiting until node_2 is not part of cluster anymore +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +--connection node_2 +--echo # Start node_2 again +--source include/start_mysqld.inc + +--echo ¤ Wait until node_2 is back on cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc + +--connection node_2 +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_1 +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_3 +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +# +# Cleanup +# +--connection node_3 +drop table t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +# +# Case 2 : While slave is down add writes to master +# + +--connection node_3 + +CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb; +--disable_query_log +INSERT INTO t1 SELECT seq, 'test' from seq_1_to_10000; +--enable_query_log +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1; +--source include/wait_condition.inc + +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_10000 FROM t1; + +--connection node_2 +--echo # Verify that graceful shutdown succeeds. +--source include/shutdown_mysqld.inc +--echo # Force SST +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--echo # Waiting until node_2 is not part of cluster anymore +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +# Add writes to master +--connection node_3 +--disable_query_log +INSERT INTO t1 SELECT seq, 'test' from seq_20001_to_30000; +--enable_query_log +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +--connection node_2 +--echo # Start node_2 again +--source include/start_mysqld.inc + +--echo ¤ Wait until node_2 is back on cluster +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; +--source include/wait_condition.inc +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; +--source include/wait_condition.inc +SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos; +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +--connection node_3 +SELECT COUNT(*) AS EXPECT_20000 FROM t1; + +# +# Cleanup +# +--connection node_3 +drop table t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_1 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_2 +STOP SLAVE; +RESET SLAVE ALL; + +--connection node_3 +RESET MASTER; + +--connection node_1 +--disconnect node_3 + +--source include/auto_increment_offset_restore.inc +--source include/galera_end.inc +--echo # End of test diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index e7ff8924874..b0c9a018ae8 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -28,6 +28,10 @@ #include "rpl_rli.h" #include "slave.h" #include "log_event.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" // wsrep_thd_is_local +#include "wsrep_trans_observer.h" // wsrep_start_trx_if_not_started +#endif const LEX_CSTRING rpl_gtid_slave_state_table_name= { STRING_WITH_LEN("gtid_slave_pos") }; @@ -690,10 +694,18 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, #ifdef WITH_WSREP /* - Updates in slave state table should not be appended to galera transaction - writeset. + We should replicate local gtid_slave_pos updates to other nodes. + In applier we should not append them to galera writeset. */ - thd->wsrep_ignore_table= true; + if (WSREP_ON_ && wsrep_thd_is_local(thd)) + { + thd->wsrep_ignore_table= false; + wsrep_start_trx_if_not_started(thd); + } + else + { + thd->wsrep_ignore_table= true; + } #endif if (!in_transaction) @@ -859,9 +871,20 @@ rpl_slave_state::gtid_delete_pending(THD *thd, #ifdef WITH_WSREP /* - Updates in slave state table should not be appended to galera transaction - writeset. + We should replicate local gtid_slave_pos updates to other nodes. + In applier we should not append them to galera writeset. */ + if (WSREP_ON_ && wsrep_thd_is_local(thd) && + thd->wsrep_cs().state() != wsrep::client_state::s_none) + { + if (thd->wsrep_trx().active() == false) + { + if (thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID) + thd->set_query_id(next_query_id()); + wsrep_start_transaction(thd, thd->wsrep_next_trx_id()); + } + thd->wsrep_ignore_table= false; + } thd->wsrep_ignore_table= true; #endif diff --git a/sql/slave.cc b/sql/slave.cc index 0b575b5f714..3fe89a95e5e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -5484,6 +5484,8 @@ pthread_handler_t handle_slave_sql(void *arg) mysql_mutex_unlock(&rli->data_lock); #ifdef WITH_WSREP wsrep_open(thd); + if (WSREP_ON_) + wsrep_wait_ready(thd); if (wsrep_before_command(thd)) { WSREP_WARN("Slave SQL wsrep_before_command() failed"); diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index a8247f977cf..a345f8c6b35 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1,4 +1,4 @@ -/* Copyright 2008-2022 Codership Oy +/* Copyright 2008-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -3151,3 +3151,27 @@ bool wsrep_consistency_check(THD *thd) { return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; } + +// Wait until wsrep has reached ready state +void wsrep_wait_ready(THD *thd) +{ + mysql_mutex_lock(&LOCK_wsrep_ready); + while(!wsrep_ready) + { + WSREP_INFO("Waiting to reach ready state"); + mysql_cond_wait(&COND_wsrep_ready, &LOCK_wsrep_ready); + } + WSREP_INFO("ready state reached"); + mysql_mutex_unlock(&LOCK_wsrep_ready); +} + +void wsrep_ready_set(bool ready_value) +{ + WSREP_DEBUG("Setting wsrep_ready to %d", ready_value); + mysql_mutex_lock(&LOCK_wsrep_ready); + wsrep_ready= ready_value; + // Signal if we have reached ready state + if (wsrep_ready) + mysql_cond_signal(&COND_wsrep_ready); + mysql_mutex_unlock(&LOCK_wsrep_ready); +} diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 201e3e1a44a..3d06f165c5c 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -1,4 +1,4 @@ -/* Copyright 2008-2022 Codership Oy +/* Copyright 2008-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -442,6 +442,8 @@ enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit); wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, enum wsrep::key::type type); +void wsrep_wait_ready(THD *thd); +void wsrep_ready_set(bool ready_value); #else /* !WITH_WSREP */ /* These macros are needed to compile MariaDB without WSREP support diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h index e88d3c862a0..e707ec79c95 100644 --- a/sql/wsrep_priv.h +++ b/sql/wsrep_priv.h @@ -1,4 +1,4 @@ -/* Copyright 2010 Codership Oy +/* Copyright 2010-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,8 +22,6 @@ #include "wsrep_api.h" #include "wsrep/server_state.hpp" -my_bool wsrep_ready_set (my_bool x); - ssize_t wsrep_sst_prepare (void** msg); wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc index 71f5d20feba..7bf9851c25b 100644 --- a/sql/wsrep_server_service.cc +++ b/sql/wsrep_server_service.cc @@ -342,6 +342,7 @@ void Wsrep_server_service::log_state_change( case Wsrep_server_state::s_synced: wsrep_ready= TRUE; WSREP_INFO("Synchronized with group, ready for connections"); + wsrep_ready_set(true); /* fall through */ case Wsrep_server_state::s_joined: case Wsrep_server_state::s_donor: @@ -349,16 +350,16 @@ void Wsrep_server_service::log_state_change( break; case Wsrep_server_state::s_connected: wsrep_cluster_status= "non-Primary"; - wsrep_ready= FALSE; + wsrep_ready_set(false); wsrep_connected= TRUE; break; case Wsrep_server_state::s_disconnected: - wsrep_ready= FALSE; + wsrep_ready_set(false); wsrep_connected= FALSE; wsrep_cluster_status= "Disconnected"; break; default: - wsrep_ready= FALSE; + wsrep_ready_set(false); wsrep_cluster_status= "non-Primary"; break; } From 48e6918c94d3a2caa99feb4b1195b9d6e2739581 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 8 Aug 2023 14:01:36 +0100 Subject: [PATCH 04/35] Revert "update galera_new_cluster to use environment file" This reverts commit b54e4bf00b191633c74baad143c552bfb478fff3. --- scripts/CMakeLists.txt | 15 --------------- scripts/galera_new_cluster.sh | 4 ++-- support-files/mariadb.service.in | 3 --- 3 files changed, 2 insertions(+), 20 deletions(-) mode change 100644 => 100755 scripts/galera_new_cluster.sh diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index e41c4fce68f..358045c17ab 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -190,9 +190,6 @@ ELSE() SET(localstatedir ${MYSQL_DATADIR}) ENDIF() -get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) -SET(mysqlunixdir ${MYSQL_UNIX_DIR}) - SET(resolveip_locations "$basedir/${INSTALL_BINDIR} $basedir/bin") SET(mysqld_locations "$basedir/${INSTALL_SBINDIR} $basedir/libexec $basedir/sbin $basedir/bin") SET(errmsg_locations "$basedir/${INSTALL_MYSQLSHAREDIR}/english $basedir/share/english $basedir/share/mysql/english") @@ -210,18 +207,6 @@ INSTALL_SCRIPT( DESTINATION ${INSTALL_SCRIPTDIR} COMPONENT Server ) - -CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/galera_new_cluster.sh - ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster ESCAPE_QUOTES @ONLY) - EXECUTE_PROCESS( - COMMAND chmod +x ${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster - ) - -INSTALL_SCRIPT( - "${CMAKE_CURRENT_BINARY_DIR}/galera_new_cluster" - DESTINATION ${INSTALL_SCRIPTDIR} - COMPONENT Server - ) ENDIF() SET(prefix "${CMAKE_INSTALL_PREFIX}") diff --git a/scripts/galera_new_cluster.sh b/scripts/galera_new_cluster.sh old mode 100644 new mode 100755 index b6617378b90..ac9dcf42102 --- a/scripts/galera_new_cluster.sh +++ b/scripts/galera_new_cluster.sh @@ -21,11 +21,11 @@ EOF exit 0 fi -echo _WSREP_NEW_CLUSTER='--wsrep-new-cluster' > @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" && \ +systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster' && \ systemctl restart ${1:-mariadb} extcode=$? -rm -f @mysqlunixdir@/"wsrep-new-cluster-${1:-mariadb}" +systemctl set-environment _WSREP_NEW_CLUSTER='' exit $extcode diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 19e2e16abeb..2f990df4a15 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,9 +70,6 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true -# Use an environment file to pass variable _WSREP_NEW_CLUSTER -EnvironmentFile=-@mysqlunixdir@/wsrep-new-cluster-%N - # Use an environment file to pass variable _WSREP_START_POSITION EnvironmentFile=-@mysqlunixdir@/wsrep-start-position From 161ce045a71e306768d4609bdc35788fa5ea2a71 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 8 Aug 2023 14:01:47 +0100 Subject: [PATCH 05/35] Revert "use environment file in systemd units for _WSREP_START_POSITION" This reverts commit 6c40590405da71a592a65b68e6b41b11437fb517. --- support-files/CMakeLists.txt | 2 -- support-files/mariadb.service.in | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index 7f49fca212b..4c1db775c19 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -32,8 +32,6 @@ ELSE() SET(MYSQLD_GROUP "mysql") SET(ini_file_extension "cnf") SET(HOSTNAME "uname -n") - get_filename_component(MYSQL_UNIX_DIR ${MYSQL_UNIX_ADDR} DIRECTORY) - SET(mysqlunixdir ${MYSQL_UNIX_DIR}) ENDIF() # XXX: shouldn't we just have variables for all this stuff and centralise diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 2f990df4a15..b7a55596029 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -70,20 +70,20 @@ ProtectSystem=full # Prevent accessing /home, /root and /run/user ProtectHome=true -# Use an environment file to pass variable _WSREP_START_POSITION -EnvironmentFile=-@mysqlunixdir@/wsrep-start-position +# Execute pre and post scripts as root, otherwise it does it as User= +PermissionsStartOnly=true @SYSTEMD_EXECSTARTPRE@ # Perform automatic wsrep recovery. When server is started without wsrep, # galera_recovery simply returns an empty string. In any case, however, # the script is not expected to return with a non-zero status. -# It is always safe to remove @mysqlunixdir@/wsrep-start-position -# environment file. +# It is always safe to unset _WSREP_START_POSITION environment variable. # Do not panic if galera_recovery script is not available. (MDEV-10538) +ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ VAR=`cd @bindir@/..; @bindir@/galera_recovery`; [ $? -eq 0 ] \ - && echo _WSREP_START_POSITION=$VAR > @mysqlunixdir@/wsrep-start-position || exit 1" + && systemctl set-environment _WSREP_START_POSITION=$VAR || exit 1" # Needed to create system tables etc. # ExecStartPre=@scriptdir@/mysql_install_db -u mysql @@ -96,7 +96,7 @@ ExecStartPre=/bin/sh -c "[ ! -e @bindir@/galera_recovery ] && VAR= || \ ExecStart=@sbindir@/mysqld $MYSQLD_OPTS $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION # Unset _WSREP_START_POSITION environment variable. -ExecStartPost=/bin/rm -f @mysqlunixdir@/wsrep-start-position +ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION" @SYSTEMD_EXECSTARTPOST@ From fccfad153f4c3fd59496021cbfc62afa7e5f0511 Mon Sep 17 00:00:00 2001 From: Monty Date: Tue, 8 Aug 2023 17:17:01 +0300 Subject: [PATCH 06/35] MDEV-31864 Assertion `d >= 0' failed in COST_ADD with join_cache_level > 2 and partitions The problem was a partitioned table had 0 rows, which caused a division by zero at hash_join_fanout() Fix: Do not call hash_join_fanout with 0 rows. --- mysql-test/main/selectivity.result | 29 +++++++++++++++++++++++ mysql-test/main/selectivity.test | 21 ++++++++++++++++ mysql-test/main/selectivity_innodb.result | 29 +++++++++++++++++++++++ sql/sql_select.cc | 4 +++- 4 files changed, 82 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index 4d1ad98e905..f07720bb689 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -2084,3 +2084,32 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY a range t1_c2 t1_c2 5 NULL 5 Using where; Using index; LooseScan 1 PRIMARY t1 ref t1_c2 t1_c2 5 test.a.c2 2 drop table t1,t2; +# +# MDEV-31864 Assertion `d >= 0' failed in COST_ADD with join_cache_level +# > 2 and partitions +# +SET join_cache_level= 3; +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b INT, c INT); +INSERT INTO t2 VALUES (3,4),(5,6); +CREATE TABLE t3 (d INT, e INT) PARTITION BY RANGE COLUMNS (d) (p1 VALUES LESS THAN (1000), pn VALUES LESS THAN (MAXVALUE)); +ANALYZE TABLE t1, t2, t3 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +test.t3 analyze status Engine-independent statistics collected +test.t3 analyze status OK +explain SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.b = t3.e) ON t2.c = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 0 Using where +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.b = t3.e) ON t2.c = 1; +a b c d e +1 NULL NULL NULL NULL +2 NULL NULL NULL NULL +set join_cache_level= default; +DROP TABLE t1, t2, t3; diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test index a41b695608d..ee4c117584b 100644 --- a/mysql-test/main/selectivity.test +++ b/mysql-test/main/selectivity.test @@ -2,6 +2,7 @@ --source include/have_stat_tables.inc --source include/have_sequence.inc --source include/default_charset.inc +--source include/have_partition.inc --disable_warnings drop table if exists t0,t1,t2,t3; @@ -1429,3 +1430,23 @@ truncate table t1; insert into t1 select * from t2; explain select * from t1 where t1.c2 in (select a.c2 from t1 a) and c2 >= 3 order by c2; drop table t1,t2; + +--echo # +--echo # MDEV-31864 Assertion `d >= 0' failed in COST_ADD with join_cache_level +--echo # > 2 and partitions +--echo # + +SET join_cache_level= 3; + +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b INT, c INT); +INSERT INTO t2 VALUES (3,4),(5,6); +CREATE TABLE t3 (d INT, e INT) PARTITION BY RANGE COLUMNS (d) (p1 VALUES LESS THAN (1000), pn VALUES LESS THAN (MAXVALUE)); +ANALYZE TABLE t1, t2, t3 PERSISTENT FOR ALL; + +explain SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.b = t3.e) ON t2.c = 1; +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.b = t3.e) ON t2.c = 1; + +set join_cache_level= default; +DROP TABLE t1, t2, t3; \ No newline at end of file diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result index 79d08e455a8..102242dca76 100644 --- a/mysql-test/main/selectivity_innodb.result +++ b/mysql-test/main/selectivity_innodb.result @@ -2091,6 +2091,35 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY a range t1_c2 t1_c2 5 NULL 5 Using where; Using index; LooseScan 1 PRIMARY t1 ref t1_c2 t1_c2 5 test.a.c2 2 drop table t1,t2; +# +# MDEV-31864 Assertion `d >= 0' failed in COST_ADD with join_cache_level +# > 2 and partitions +# +SET join_cache_level= 3; +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b INT, c INT); +INSERT INTO t2 VALUES (3,4),(5,6); +CREATE TABLE t3 (d INT, e INT) PARTITION BY RANGE COLUMNS (d) (p1 VALUES LESS THAN (1000), pn VALUES LESS THAN (MAXVALUE)); +ANALYZE TABLE t1, t2, t3 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +test.t3 analyze status Engine-independent statistics collected +test.t3 analyze status OK +explain SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.b = t3.e) ON t2.c = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 Using where +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.b = t3.e) ON t2.c = 1; +a b c d e +1 NULL NULL NULL NULL +2 NULL NULL NULL NULL +set join_cache_level= default; +DROP TABLE t1, t2, t3; set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set @tmp_ust= @@use_stat_tables; set @tmp_oucs= @@optimizer_use_condition_selectivity; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index e68020a4bd0..b5714af2075 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8145,6 +8145,7 @@ double hash_join_fanout(JOIN *join, JOIN_TAB *tab, table_map remaining_tables, bool found_not_usable_field= 0; bool found_usable_field __attribute__((unused))= 0; DBUG_ENTER("hash_join_cardinality"); + DBUG_ASSERT(rnd_records > 0 && min_freq > 0); Json_writer_object trace_obj(thd, "hash_join_cardinality"); @@ -9128,7 +9129,8 @@ best_access_path(JOIN *join, DBUG_ASSERT(hj_start_key); fanout= rnd_records; - if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY)) + if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY) && + rnd_records > 0) { /* Starting from this point, rnd_records should not be used anymore. From 0be4781428a4044b13b085965820a995652bb0e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 25 Jul 2023 15:25:09 +0300 Subject: [PATCH 07/35] MDEV-31737 : Node never returns from Donor/Desynced to Synced when wsrep_mode = BF_ABORT_MARIABACKUP Problem was incorrect condition when node should have resumed and resync at backup_end. Simplified condition to fix the problem and added missing test case for this wsrep_mode = BF_ABORT_MARIABACKUP. Signed-off-by: Julius Goryavsky --- .../r/galera_bf_abort_mariabackup.result | 58 ++++++++ .../galera/t/galera_bf_abort_mariabackup.cnf | 16 +++ .../galera/t/galera_bf_abort_mariabackup.test | 136 ++++++++++++++++++ sql/backup.cc | 18 +-- 4 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test diff --git a/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result b/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result new file mode 100644 index 00000000000..88c200ee933 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result @@ -0,0 +1,58 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +# Case 1 : MariaBackup SST +connection node_1; +CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB; +INSERT INTO t VALUES(1); +# Restart node_2, force SST. +connection node_2; +connection node_1; +connection node_2; +Starting server ... +connection node_1; +# Both should return FOUND 2 as we have bootstrap and SST +FOUND 2 /Desyncing and pausing the provider/ in mysqld.1.err +FOUND 2 /Resuming and resyncing the provider/ in mysqld.1.err +connection node_1; +SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP"; +# Restart node_2, force SST. +connection node_2; +connection node_1; +INSERT INTO t VALUES(2); +connection node_2; +Starting server ... +connection node_2; +connection node_1; +# Both should return FOUND 3 as we have 1 new SST +FOUND 3 /Desyncing and pausing the provider/ in mysqld.1.err +FOUND 3 /Resuming and resyncing the provider/ in mysqld.1.err +SET GLOBAL wsrep_mode = ""; +DROP TABLE t; +# Case 2: MariaBackup backup from node_2 +connection node_1; +CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB; +INSERT INTO t VALUES(1),(2),(3),(4),(5); +connection node_2; +SET GLOBAL wsrep_mode = ""; +SELECT @@wsrep_mode; +@@wsrep_mode + +# Both should return FOUND 1 as we have backup +FOUND 1 /Desyncing and pausing the provider/ in mysqld.2.err +FOUND 1 /Resuming and resyncing the provider/ in mysqld.2.err +SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP"; +SELECT @@wsrep_mode; +@@wsrep_mode +BF_ABORT_MARIABACKUP +# Both should return FOUND 1 as node should not desync +FOUND 1 /Desyncing and pausing the provider/ in mysqld.2.err +FOUND 1 /Resuming and resyncing the provider/ in mysqld.2.err +# Should return FOUND 1 because only last backup does not desync +FOUND 1 /Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used./ in mysqld.2.err +SET GLOBAL wsrep_mode = ""; +connection node_1; +DROP TABLE t; +disconnect node_2; +disconnect node_1; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf b/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf new file mode 100644 index 00000000000..857a4101406 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf @@ -0,0 +1,16 @@ +!include ../galera_2nodes.cnf + +[mysqld] +wsrep_sst_method=mariabackup +wsrep_sst_auth="root:" +wsrep_debug=1 + +[mysqld.1] +wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' + +[mysqld.2] +wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' + +[sst] +transferfmt=@ENV.MTR_GALERA_TFMT +streamfmt=mbstream diff --git a/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test b/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test new file mode 100644 index 00000000000..34c3f5d3621 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test @@ -0,0 +1,136 @@ +--source include/galera_cluster.inc +--source include/have_mariabackup.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +# +--echo # Case 1 : MariaBackup SST +# +--connection node_1 +CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB; +INSERT INTO t VALUES(1); +# +# In default settings donor should desync +# +--echo # Restart node_2, force SST. +--connection node_2 +--source include/shutdown_mysqld.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +--connection node_2 +--echo Starting server ... +let $restart_noprint=2; +--source include/start_mysqld.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc + +--connection node_1 +let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; +--echo # Both should return FOUND 2 as we have bootstrap and SST +let SEARCH_PATTERN = Desyncing and pausing the provider; +--source include/search_pattern_in_file.inc +let SEARCH_PATTERN = Resuming and resyncing the provider; +--source include/search_pattern_in_file.inc + +# +# Now we set wsrep_mode = BF_ABORT_MARIABACKUP +# +--connection node_1 +SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP"; + +--echo # Restart node_2, force SST. +--connection node_2 +--source include/shutdown_mysqld.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc +INSERT INTO t VALUES(2); + +--connection node_2 +--echo Starting server ... +let $restart_noprint=2; +--source include/start_mysqld.inc + +--connection node_2 +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'; +--source include/wait_condition.inc + +# Restore original auto_increment_offset values. +--source include/auto_increment_offset_restore.inc + +--connection node_1 +let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; +--echo # Both should return FOUND 3 as we have 1 new SST +let SEARCH_PATTERN = Desyncing and pausing the provider; +--source include/search_pattern_in_file.inc +let SEARCH_PATTERN = Resuming and resyncing the provider; +--source include/search_pattern_in_file.inc +SET GLOBAL wsrep_mode = ""; +DROP TABLE t; +# +--echo # Case 2: MariaBackup backup from node_2 +# +--connection node_1 +CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB; +INSERT INTO t VALUES(1),(2),(3),(4),(5); + +--connection node_2 +SET GLOBAL wsrep_mode = ""; +SELECT @@wsrep_mode; +let $targetdir=$MYSQLTEST_VARDIR/tmp/backup; +--let $backup_log=$MYSQLTEST_VARDIR/tmp/backup.log +--disable_result_log +--exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --defaults-group-suffix=.2 --backup --target-dir=$targetdir > $backup_log 2>&1; +--enable_result_log + +let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.2.err; +--echo # Both should return FOUND 1 as we have backup +let SEARCH_PATTERN = Desyncing and pausing the provider; +--source include/search_pattern_in_file.inc +let SEARCH_PATTERN = Resuming and resyncing the provider; +--source include/search_pattern_in_file.inc +# +# Now we set wsrep_mode = BF_ABORT_MARIABACKUP +# +SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP"; +SELECT @@wsrep_mode; +let $targetdir=$MYSQLTEST_VARDIR/tmp/backup2; +--let $backup_log=$MYSQLTEST_VARDIR/tmp/backup2.log +--disable_result_log +--exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --defaults-group-suffix=.2 --backup --target-dir=$targetdir --mysqld-args --wsrep-mode="BF_ABORT_MARIABACKUP" > $backup_log 2>&1; +--enable_result_log + +let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.2.err; +--echo # Both should return FOUND 1 as node should not desync +let SEARCH_PATTERN = Desyncing and pausing the provider; +--source include/search_pattern_in_file.inc +let SEARCH_PATTERN = Resuming and resyncing the provider; +--source include/search_pattern_in_file.inc +--echo # Should return FOUND 1 because only last backup does not desync +let SEARCH_PATTERN = Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.; +--source include/search_pattern_in_file.inc + +SET GLOBAL wsrep_mode = ""; + +--connection node_1 +DROP TABLE t; + +--source include/auto_increment_offset_restore.inc + +--source include/galera_end.inc diff --git a/sql/backup.cc b/sql/backup.cc index 5f74c67add7..60505fcc8c4 100644 --- a/sql/backup.cc +++ b/sql/backup.cc @@ -292,6 +292,7 @@ static bool backup_block_ddl(THD *thd) thd->clear_error(); #ifdef WITH_WSREP + DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false); /* if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock holder, then do not desync and pause the node from cluster replication. @@ -303,6 +304,7 @@ static bool backup_block_ddl(THD *thd) if (WSREP_NNULL(thd)) { Wsrep_server_state &server_state= Wsrep_server_state::instance(); + if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) || server_state.state() == Wsrep_server_state::s_donor) { @@ -352,17 +354,17 @@ static bool backup_block_ddl(THD *thd) /* There can't be anything more that needs to be logged to ddl log */ THD_STAGE_INFO(thd, org_stage); stop_ddl_logging(); -#ifdef WITH_WSREP - // Allow tests to block the applier thread using the DBUG facilities - DBUG_EXECUTE_IF("sync.wsrep_after_mdl_block_ddl", + + // Allow tests to block the backup thread + DBUG_EXECUTE_IF("sync.after_mdl_block_ddl", { const char act[]= - "now " - "signal signal.wsrep_apply_toi"; + "now " + "SIGNAL sync.after_mdl_block_ddl_reached " + "WAIT_FOR signal.after_mdl_block_ddl_continue"; DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); };); -#endif /* WITH_WSREP */ DBUG_RETURN(0); err: @@ -423,8 +425,8 @@ bool backup_end(THD *thd) thd->current_backup_stage= BACKUP_FINISHED; thd->mdl_context.release_lock(old_ticket); #ifdef WITH_WSREP - if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage && - !wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP)) + // If node was desynced, resume and resync + if (thd->wsrep_desynced_backup_stage) { Wsrep_server_state &server_state= Wsrep_server_state::instance(); server_state.resume_and_resync(); From af7a22d2fa0125d268a33169635d3258e1974148 Mon Sep 17 00:00:00 2001 From: Monty Date: Tue, 8 Aug 2023 21:02:23 +0300 Subject: [PATCH 08/35] MDEV-31501 Assertion `cond_selectivity <= 1.000000001' failed (actual value 1.125) The problem was that because there was no good indexes for the lineitem table make_join_select() tried to do a last attempt to find a usable key and called test_quick_select(). test_quick_select() found an index with fewer matching rows then every before and that caused a problem in get_range_limit_read_cost(), which assumes that 'best_rows' is the minium rows matched when all conditions has been taken into account and thus smaller than rows found in the range. Fixed by adjusting best_rows to the least rows in any index after test_quick_select() has been called. --- mysql-test/main/range_innodb.result | 20 ++++++++++++++++++++ mysql-test/main/range_innodb.test | 22 ++++++++++++++++++++++ sql/sql_select.cc | 14 ++++++++------ 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/mysql-test/main/range_innodb.result b/mysql-test/main/range_innodb.result index 542d0eed294..98f41270524 100644 --- a/mysql-test/main/range_innodb.result +++ b/mysql-test/main/range_innodb.result @@ -230,3 +230,23 @@ SELECT a FROM t1 WHERE pk < 0 AND a <= 'w' and b > 0; a drop table t1; # End of 10.4 tests +# +# MDEV-31501 Assertion `cond_selectivity <= 1.000000001' failed +# (actual value 1.125) +# +CREATE TABLE lineitem (l_orderkey int(11) NOT NULL DEFAULT '0', l_partkey int(11) DEFAULT NULL, l_suppkey int(11) DEFAULT NULL, l_linenumber int(11) NOT NULL DEFAULT '0', l_quantity double DEFAULT NULL, l_extendedprice double DEFAULT NULL, l_discount double DEFAULT NULL, l_tax double DEFAULT NULL, l_returnflag char(1) DEFAULT NULL, l_linestatus char(1) DEFAULT NULL, l_shipDATE date DEFAULT NULL, l_commitDATE date DEFAULT NULL, l_receiptDATE date DEFAULT NULL, l_shipinstruct char(25) DEFAULT NULL, l_shipmode char(10) DEFAULT NULL, l_comment varchar(44) DEFAULT NULL, PRIMARY KEY (l_orderkey,l_linenumber), KEY i_l_shipdate (l_shipDATE), KEY i_l_suppkey_partkey (l_partkey,l_suppkey), KEY i_l_partkey (l_partkey), KEY i_l_suppkey (l_suppkey), KEY i_l_receiptdate (l_receiptDATE), KEY i_l_orderkey (l_orderkey), KEY i_l_orderkey_quantity (l_orderkey,l_quantity), KEY i_l_commitdate (l_commitDATE)) ENGINE=InnoDB; +CREATE TABLE part (p_partkey int(11) NOT NULL, p_name varchar(55) DEFAULT NULL, p_mfgr char(25) DEFAULT NULL, p_brand char(10) DEFAULT NULL, p_type varchar(25) DEFAULT NULL, p_size int(11) DEFAULT NULL, p_container char(10) DEFAULT NULL, p_retailprice double DEFAULT NULL, p_comment varchar(23) DEFAULT NULL, PRIMARY KEY (p_partkey)) ENGINE=InnoDB; +CREATE TABLE partsupp (ps_partkey int(11) NOT NULL DEFAULT '0', ps_suppkey int(11) NOT NULL DEFAULT '0', ps_availqty int(11) DEFAULT NULL, ps_supplycost double DEFAULT NULL, ps_comment varchar(199) DEFAULT NULL, PRIMARY KEY (ps_partkey,ps_suppkey), KEY i_ps_partkey (ps_partkey), KEY i_ps_suppkey (ps_suppkey)) ENGINE=InnoDB; +INSERT INTO lineitem VALUES (581,7,1,1,41,37187,0.09,0.07,'N','O','1997-05-26','1997-04-06','1997-06-10','TAKE BACK RETURN','MAIL','even, bold excuses about the'),(581,10,1,2,14,12740.14,0.06,0.08,'N','O','1997-05-17','1997-04-14','1997-06-08','NONE','MAIL','furiously silent depths'),(581,11,1,3,49,44639.49,0.1,0.02,'N','O','1997-02-27','1997-04-24','1997-03-10','TAKE BACK RETURN','MAIL','furiously regular theodolites wake qu'),(581,8,1,4,30,27240,0.1,0.08,'N','O','1997-06-19','1997-05-21','1997-06-22','TAKE BACK RETURN','TRUCK','even deposits wa'),(582,6,1,1,7,6342,0.07,0,'N','O','1997-11-16','1997-11-29','1997-12-10','TAKE BACK RETURN','FOB','carefully final requests sleep slyly. even,'),(582,6,1,2,49,44394,0.05,0.03,'N','O','1997-12-17','1998-01-12','1997-12-31','COLLECT COD','REG AIR','carefully regular'),(582,15,1,3,42,38430.42,0.07,0,'N','O','1997-11-15','1997-12-21','1997-12-03','COLLECT COD','SHIP','pending, spe'),(582,17,1,4,36,33012.36,0.06,0.01,'N','O','1997-12-09','1997-11-27','1997-12-26','TAKE BACK RETURN','SHIP','slyly final foxes nag permanen'); +INSERT INTO part VALUES (1,'goldenrod lace spring peru powder','Manufacturer#1','Brand#13','PROMO BURNISHED COPPER',7,'JUMBO PKG',901,'final deposits s'),(2,'blush rosy metallic lemon navajo','Manufacturer#1','Brand#13','LARGE BRUSHED BRASS',1,'LG CASE',902,'final platelets hang f'),(3,'dark green antique puff wheat','Manufacturer#4','Brand#42','STANDARD POLISHED BRASS',21,'WRAP CASE',903,'unusual excuses ac'),(4,'chocolate metallic smoke ghost drab','Manufacturer#3','Brand#34','SMALL PLATED BRASS',14,'MED DRUM',904,'ironi'),(5,'forest blush chiffon thistle chocolate','Manufacturer#3','Brand#32','STANDARD POLISHED TIN',15,'SM PKG',905,'pending, spe'),(6,'white ivory azure firebrick black','Manufacturer#2','Brand#24','PROMO PLATED STEEL',4,'MED BAG',906,'pending pinto be'),(7,'blue blanched tan indian olive','Manufacturer#1','Brand#11','SMALL PLATED COPPER',45,'SM BAG',907,'blithely ironic'); +INSERT INTO partsupp VALUES (1,1,3325,771.64,'requests after the carefully ironic ideas cajole alongside of the enticingly special accounts. fluffily regular deposits haggle about the blithely ironic deposits. regular requests sleep c'),(2,1,8895,378.49,'furiously even asymptotes are furiously regular plate'),(3,1,4651,920.92,'ironic, pending theodolites sleep slyly at the slyly final foxes. slyly ironic accounts sleep express accounts. quickly fina'),(4,1,1339,113.97,'furiously even accounts sleep. slyly regular deposits hag'),(5,1,3735,255.88,'even, ironic requests snooze blithely throughout the carefully pending deposits. busy deposits mold blithely after the furiously final pinto beans. regular, unus'),(6,1,8851,130.72,'carefully regular asymptotes wake slyly according to the quickly re'),(7,1,7454,763.98,'carefully even ideas should wake. blithely final instructions are special foxes. pending, ironic requests wake blithely about the furiously regular foxes. ironic idea'),(8,1,6834,249.63,'furiously unusual instructions use quickly. unusual deposits among the regular deposits doze unusual epitaph'),(9,1,7054,84.2,'final ideas through the requests boost quickly about the furiously regular accounts. blithely silent foxes affix carefully ironic instructions. blithely bold foxe'),(10,1,2952,996.12,'blithely even foxes nag furiously about the quickly ex'),(11,1,4540,709.87,'final packages mold after the carefully unusual requests. quickly fi'),(12,1,3610,659.73,'unusual dolphins sleep slyly. ironic deposits use fluffily. carefully unusual platelets poach slyly. evenly pending deposits nag ironi'),(13,1,612,169.44,'blithely special theodolites serve quickly. regular deposits are alongside of the fluffily sl'),(14,1,5278,650.07,'quickly even deposits doze quickly pending, bold deposits. carefully regular packages sublate carefully'),(15,1,7047,835.7,'packages haggle across the carefully express ideas. slyly bold instructions promise even, express packages. furiously ironic acc'); +SELECT ps_suppkey FROM part +JOIN partsupp ON (p_partkey = ps_partkey) +JOIN lineitem ON (ps_partkey = l_partkey AND ps_suppkey = l_suppkey) +WHERE p_partkey != 222 order by 1 limit 1; +ps_suppkey +1 +drop table partsupp,part,lineitem; +# +# End of 11.0 tests +# diff --git a/mysql-test/main/range_innodb.test b/mysql-test/main/range_innodb.test index 35511279910..f7e6a31dc63 100644 --- a/mysql-test/main/range_innodb.test +++ b/mysql-test/main/range_innodb.test @@ -225,3 +225,25 @@ SELECT a FROM t1 WHERE pk < 0 AND a <= 'w' and b > 0; drop table t1; -- echo # End of 10.4 tests + +--echo # +--echo # MDEV-31501 Assertion `cond_selectivity <= 1.000000001' failed +--echo # (actual value 1.125) +--echo # + +CREATE TABLE lineitem (l_orderkey int(11) NOT NULL DEFAULT '0', l_partkey int(11) DEFAULT NULL, l_suppkey int(11) DEFAULT NULL, l_linenumber int(11) NOT NULL DEFAULT '0', l_quantity double DEFAULT NULL, l_extendedprice double DEFAULT NULL, l_discount double DEFAULT NULL, l_tax double DEFAULT NULL, l_returnflag char(1) DEFAULT NULL, l_linestatus char(1) DEFAULT NULL, l_shipDATE date DEFAULT NULL, l_commitDATE date DEFAULT NULL, l_receiptDATE date DEFAULT NULL, l_shipinstruct char(25) DEFAULT NULL, l_shipmode char(10) DEFAULT NULL, l_comment varchar(44) DEFAULT NULL, PRIMARY KEY (l_orderkey,l_linenumber), KEY i_l_shipdate (l_shipDATE), KEY i_l_suppkey_partkey (l_partkey,l_suppkey), KEY i_l_partkey (l_partkey), KEY i_l_suppkey (l_suppkey), KEY i_l_receiptdate (l_receiptDATE), KEY i_l_orderkey (l_orderkey), KEY i_l_orderkey_quantity (l_orderkey,l_quantity), KEY i_l_commitdate (l_commitDATE)) ENGINE=InnoDB; +CREATE TABLE part (p_partkey int(11) NOT NULL, p_name varchar(55) DEFAULT NULL, p_mfgr char(25) DEFAULT NULL, p_brand char(10) DEFAULT NULL, p_type varchar(25) DEFAULT NULL, p_size int(11) DEFAULT NULL, p_container char(10) DEFAULT NULL, p_retailprice double DEFAULT NULL, p_comment varchar(23) DEFAULT NULL, PRIMARY KEY (p_partkey)) ENGINE=InnoDB; +CREATE TABLE partsupp (ps_partkey int(11) NOT NULL DEFAULT '0', ps_suppkey int(11) NOT NULL DEFAULT '0', ps_availqty int(11) DEFAULT NULL, ps_supplycost double DEFAULT NULL, ps_comment varchar(199) DEFAULT NULL, PRIMARY KEY (ps_partkey,ps_suppkey), KEY i_ps_partkey (ps_partkey), KEY i_ps_suppkey (ps_suppkey)) ENGINE=InnoDB; +INSERT INTO lineitem VALUES (581,7,1,1,41,37187,0.09,0.07,'N','O','1997-05-26','1997-04-06','1997-06-10','TAKE BACK RETURN','MAIL','even, bold excuses about the'),(581,10,1,2,14,12740.14,0.06,0.08,'N','O','1997-05-17','1997-04-14','1997-06-08','NONE','MAIL','furiously silent depths'),(581,11,1,3,49,44639.49,0.1,0.02,'N','O','1997-02-27','1997-04-24','1997-03-10','TAKE BACK RETURN','MAIL','furiously regular theodolites wake qu'),(581,8,1,4,30,27240,0.1,0.08,'N','O','1997-06-19','1997-05-21','1997-06-22','TAKE BACK RETURN','TRUCK','even deposits wa'),(582,6,1,1,7,6342,0.07,0,'N','O','1997-11-16','1997-11-29','1997-12-10','TAKE BACK RETURN','FOB','carefully final requests sleep slyly. even,'),(582,6,1,2,49,44394,0.05,0.03,'N','O','1997-12-17','1998-01-12','1997-12-31','COLLECT COD','REG AIR','carefully regular'),(582,15,1,3,42,38430.42,0.07,0,'N','O','1997-11-15','1997-12-21','1997-12-03','COLLECT COD','SHIP','pending, spe'),(582,17,1,4,36,33012.36,0.06,0.01,'N','O','1997-12-09','1997-11-27','1997-12-26','TAKE BACK RETURN','SHIP','slyly final foxes nag permanen'); +INSERT INTO part VALUES (1,'goldenrod lace spring peru powder','Manufacturer#1','Brand#13','PROMO BURNISHED COPPER',7,'JUMBO PKG',901,'final deposits s'),(2,'blush rosy metallic lemon navajo','Manufacturer#1','Brand#13','LARGE BRUSHED BRASS',1,'LG CASE',902,'final platelets hang f'),(3,'dark green antique puff wheat','Manufacturer#4','Brand#42','STANDARD POLISHED BRASS',21,'WRAP CASE',903,'unusual excuses ac'),(4,'chocolate metallic smoke ghost drab','Manufacturer#3','Brand#34','SMALL PLATED BRASS',14,'MED DRUM',904,'ironi'),(5,'forest blush chiffon thistle chocolate','Manufacturer#3','Brand#32','STANDARD POLISHED TIN',15,'SM PKG',905,'pending, spe'),(6,'white ivory azure firebrick black','Manufacturer#2','Brand#24','PROMO PLATED STEEL',4,'MED BAG',906,'pending pinto be'),(7,'blue blanched tan indian olive','Manufacturer#1','Brand#11','SMALL PLATED COPPER',45,'SM BAG',907,'blithely ironic'); +INSERT INTO partsupp VALUES (1,1,3325,771.64,'requests after the carefully ironic ideas cajole alongside of the enticingly special accounts. fluffily regular deposits haggle about the blithely ironic deposits. regular requests sleep c'),(2,1,8895,378.49,'furiously even asymptotes are furiously regular plate'),(3,1,4651,920.92,'ironic, pending theodolites sleep slyly at the slyly final foxes. slyly ironic accounts sleep express accounts. quickly fina'),(4,1,1339,113.97,'furiously even accounts sleep. slyly regular deposits hag'),(5,1,3735,255.88,'even, ironic requests snooze blithely throughout the carefully pending deposits. busy deposits mold blithely after the furiously final pinto beans. regular, unus'),(6,1,8851,130.72,'carefully regular asymptotes wake slyly according to the quickly re'),(7,1,7454,763.98,'carefully even ideas should wake. blithely final instructions are special foxes. pending, ironic requests wake blithely about the furiously regular foxes. ironic idea'),(8,1,6834,249.63,'furiously unusual instructions use quickly. unusual deposits among the regular deposits doze unusual epitaph'),(9,1,7054,84.2,'final ideas through the requests boost quickly about the furiously regular accounts. blithely silent foxes affix carefully ironic instructions. blithely bold foxe'),(10,1,2952,996.12,'blithely even foxes nag furiously about the quickly ex'),(11,1,4540,709.87,'final packages mold after the carefully unusual requests. quickly fi'),(12,1,3610,659.73,'unusual dolphins sleep slyly. ironic deposits use fluffily. carefully unusual platelets poach slyly. evenly pending deposits nag ironi'),(13,1,612,169.44,'blithely special theodolites serve quickly. regular deposits are alongside of the fluffily sl'),(14,1,5278,650.07,'quickly even deposits doze quickly pending, bold deposits. carefully regular packages sublate carefully'),(15,1,7047,835.7,'packages haggle across the carefully express ideas. slyly bold instructions promise even, express packages. furiously ironic acc'); + +SELECT ps_suppkey FROM part + JOIN partsupp ON (p_partkey = ps_partkey) + JOIN lineitem ON (ps_partkey = l_partkey AND ps_suppkey = l_suppkey) + WHERE p_partkey != 222 order by 1 limit 1; + +drop table partsupp,part,lineitem; +--echo # +--echo # End of 11.0 tests +--echo # diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b5714af2075..83c4ef21331 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8347,7 +8347,8 @@ best_access_path(JOIN *join, best.records= DBL_MAX; best.records_read= DBL_MAX; best.records_after_filter= DBL_MAX; - best.records_out= table->stat_records() * table->cond_selectivity; + best.records_out= MY_MIN(table->stat_records() * table->cond_selectivity, + table->opt_range_condition_rows); best.prev_record_reads= best.identical_keys= 0; best.filter= 0; best.key= 0; @@ -13945,18 +13946,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->best_positions[i].records_read= (double) sel->quick->records; set_if_smaller(join->best_positions[i].records_out, - join->best_positions[i].records_read); + rows2double(sel->head->opt_range_condition_rows)); } else { /* - sel->head->opt_range_condition_rows may have been updated to a smaller number than - before by a call to test_quick_select. This can happen even if the range optimizer - decided to not use the range (sel->quick was not set). + sel->head->opt_range_condition_rows may have been + updated to a smaller number than before by a call to + test_quick_select. This can happen even if the range + optimizer decided to not use the range (sel->quick was + not set). */ set_if_smaller(join->best_positions[i].records_out, rows2double(sel->head->opt_range_condition_rows)); - } } else From e9333ff03cba05cff82a325d9d44720546fc7cb9 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 10 Aug 2023 16:13:32 +0300 Subject: [PATCH 09/35] MDEV-31893 Valgrind reports issues in main.join_cache_notasan This is also related to MDEV-31348 Assertion `last_key_entry >= end_pos' failed in virtual bool JOIN_CACHE_HASHED::put_record() Valgrind exposed a problem with the join_cache for hash joins: =25636== Conditional jump or move depends on uninitialised value(s) ==25636== at 0xA8FF4E: JOIN_CACHE_HASHED::init_hash_table() (sql_join_cache.cc:2901) The reason for this was that avg_record_length contained a random value if one had used SET optimizer_switch='optimize_join_buffer_size=off'. This causes either 'random size' memory to be allocated (up to join_buffer_size) which can increase memory usage or, if avg_record_length is less than the row size, memory overwrites in thd->mem_root, which is bad. Fixed by setting avg_record_length in JOIN_CACHE_HASHED::init() before it's used. There is no test case for MDEV-31893 as valgrind of join_cache_notasan checks that. I added a test case for MDEV-31348. --- include/no_valgrind_without_big.inc | 13 +++++++++++ mysql-test/main/join_cache.result | 16 ++++++++++++++ mysql-test/main/join_cache.test | 19 ++++++++++++++++ mysql-test/main/join_cache_notasan.test | 3 +++ sql/sql_join_cache.cc | 29 +++++++++++++++---------- sql/sql_join_cache.h | 1 + 6 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 include/no_valgrind_without_big.inc diff --git a/include/no_valgrind_without_big.inc b/include/no_valgrind_without_big.inc new file mode 100644 index 00000000000..ea1f2ac91ab --- /dev/null +++ b/include/no_valgrind_without_big.inc @@ -0,0 +1,13 @@ +# include/no_valgrind_without_big.inc +# +# If we are running with Valgrind ($VALGRIND_TEST <> 0) than the resource +# consumption (storage space needed, runtime ...) will be extreme. +# Therefore we require that the option "--big-test" is also set. +# + +if ($VALGRIND_TEST) { + if (!$BIG_TEST) + { + --skip Need "--big-test" when running with Valgrind + } +} diff --git a/mysql-test/main/join_cache.result b/mysql-test/main/join_cache.result index 20980d09001..92c04bb002b 100644 --- a/mysql-test/main/join_cache.result +++ b/mysql-test/main/join_cache.result @@ -6256,3 +6256,19 @@ length(concat(t1.f,t2.f)) DROP TABLE t; set @@optimizer_switch=@org_optimizer_switch; set @@join_buffer_size=@org_join_buffer_size; +# +# MDEV-31348 Assertion `last_key_entry >= end_pos' failed in +# virtual bool JOIN_CACHE_HASHED::put_record() +# +SET JOIN_buffer_size=1; +Warnings: +Warning 1292 Truncated incorrect join_buffer_size value: '1' +SET SESSION JOIN_cache_level=4; +SET SESSION optimizer_switch='optimize_JOIN_buffer_size=OFF'; +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +ERROR HY001: Could not create a join buffer. Please check and adjust the value of the variables 'JOIN_BUFFER_SIZE (128)' and 'JOIN_BUFFER_SPACE_LIMIT (2097152)' +SET JOIN_buffer_size=16384; +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +# +# End of 10.4 tests +# diff --git a/mysql-test/main/join_cache.test b/mysql-test/main/join_cache.test index 43ce3b97ad1..f8723447efe 100644 --- a/mysql-test/main/join_cache.test +++ b/mysql-test/main/join_cache.test @@ -4231,3 +4231,22 @@ SELECT length(concat(t1.f,t2.f)) FROM t t1, t t2; DROP TABLE t; set @@optimizer_switch=@org_optimizer_switch; set @@join_buffer_size=@org_join_buffer_size; + +--echo # +--echo # MDEV-31348 Assertion `last_key_entry >= end_pos' failed in +--echo # virtual bool JOIN_CACHE_HASHED::put_record() +--echo # + +SET JOIN_buffer_size=1; +SET SESSION JOIN_cache_level=4; +SET SESSION optimizer_switch='optimize_JOIN_buffer_size=OFF'; +--error ER_OUTOFMEMORY +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +SET JOIN_buffer_size=16384; +--disable_result_log +SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name); +--enable_result_log + +--echo # +--echo # End of 10.4 tests +--echo # diff --git a/mysql-test/main/join_cache_notasan.test b/mysql-test/main/join_cache_notasan.test index cfdfe4eff18..406303ef7b3 100644 --- a/mysql-test/main/join_cache_notasan.test +++ b/mysql-test/main/join_cache_notasan.test @@ -2,7 +2,10 @@ # Tests that should be in join_cache but cannot be run with ASAN --source include/have_64bit.inc +# Disable asan it asan builds crashes when trying to allocate too much memory --source include/not_asan.inc +# Valgrind is useful here, but very slow as lots of memory is allocated +--source include/no_valgrind_without_big.inc --source include/have_innodb.inc --echo # diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 1319fd59a99..f1dd23d9618 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -800,6 +800,18 @@ size_t JOIN_CACHE::get_min_join_buffer_size() } +size_t JOIN_CACHE::calc_avg_record_length() +{ + size_t len= 0; + for (JOIN_TAB *tab= start_tab; tab != join_tab; + tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) + { + len+= tab->get_used_fieldlength(); + } + len+= get_record_max_affix_length(); + return len; +} + /* Get the maximum possible size of the cache join buffer @@ -822,9 +834,9 @@ size_t JOIN_CACHE::get_min_join_buffer_size() 'max_buff_size' in order to use it directly at the next invocations of the function. - RETURN VALUE - The maximum possible size of the join buffer of this cache + The maximum possible size of the join buffer of this cache + avg_record_length is also updated if optimize_buff_size != 0 */ size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size, @@ -839,19 +851,13 @@ size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size, return max_buff_size= limit_sz; size_t max_sz; - size_t len= 0; + size_t len; double max_records, partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality(); /* Expected join buffer space used for one record */ size_t space_per_record; - for (JOIN_TAB *tab= start_tab; tab != join_tab; - tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) - { - len+= tab->get_used_fieldlength(); - } - len+= get_record_max_affix_length(); - avg_record_length= len; + len= avg_record_length= calc_avg_record_length(); len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; space_per_record= len; @@ -2786,7 +2792,6 @@ bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain) int JOIN_CACHE_HASHED::init(bool for_explain) { TABLE_REF *ref= &join_tab->ref; - DBUG_ENTER("JOIN_CACHE_HASHED::init"); hash_table= 0; @@ -2873,6 +2878,8 @@ int JOIN_CACHE_HASHED::init_hash_table() hash_table= 0; key_entries= 0; + avg_record_length= calc_avg_record_length(); + /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */ uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset()); for (size_of_key_ofs= 2; diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h index 8bdce1bd592..b0cfb674ef9 100644 --- a/sql/sql_join_cache.h +++ b/sql/sql_join_cache.h @@ -130,6 +130,7 @@ protected: case 4: int4store(ptr, (uint32) ofs); return; } } + size_t calc_avg_record_length(); /* The maximum total length of the fields stored for a record in the cache. From 5055490c1798aa3385ab5d8d7f5f87d665e9af40 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Thu, 3 Aug 2023 14:20:47 +0200 Subject: [PATCH 10/35] MDEV-381: fdatasync() does not correctly flush growing binlog file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert the old work-around for buggy fdatasync() on Linux ext3. This bug was fixed in Linux > 10 years ago back to kernel version at least 3.0. Reviewed-by: Marko Mäkelä Signed-off-by: Kristian Nielsen --- include/my_sys.h | 1 - mysys/my_sync.c | 18 ++---------------- sql/backup.cc | 3 +-- sql/log.cc | 12 ++++++------ 4 files changed, 9 insertions(+), 25 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index 017968ab6ce..3c44b6a0a80 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -92,7 +92,6 @@ typedef struct my_aio_result { #define MY_THREADSAFE 2048U /* my_seek(): lock fd mutex */ #define MY_SYNC 4096U /* my_copy(): sync dst file */ #define MY_SYNC_DIR 32768U /* my_create/delete/rename: sync directory */ -#define MY_SYNC_FILESIZE 65536U /* my_sync(): safe sync when file is extended */ #define MY_THREAD_SPECIFIC 0x10000U /* my_malloc(): thread specific */ #define MY_THREAD_MOVE 0x20000U /* realloc(); Memory can move */ /* Tree that should delete things automatically */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 736d97d14ef..6f8760c3183 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -49,13 +49,6 @@ void thr_set_sync_wait_callback(void (*before_wait)(void), (which is correct behaviour, if we know that the other thread synced the file before closing) - MY_SYNC_FILESIZE is useful when syncing a file after it has been extended. - On Linux, fdatasync() on ext3/ext4 file systems does not properly flush - to disk the inode data required to preserve the added data across a crash - (this looks to be a bug). But when a file is extended, inode data will most - likely need flushing in any case, so passing MY_SYNC_FILESIZE as flags - is not likely to be any slower, and will be crash safe on Linux ext3/ext4. - RETURN 0 ok -1 error @@ -88,12 +81,8 @@ int my_sync(File fd, myf my_flags) DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); #endif #if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC - if (!(my_flags & MY_SYNC_FILESIZE)) - res= fdatasync(fd); - else - { -#endif -#if defined(HAVE_FSYNC) + res= fdatasync(fd); +#elif defined(HAVE_FSYNC) res= fsync(fd); if (res == -1 && errno == ENOLCK) res= 0; /* Result Bug in Old FreeBSD */ @@ -102,9 +91,6 @@ int my_sync(File fd, myf my_flags) #else #error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ -#endif -#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC - } #endif } while (res == -1 && errno == EINTR); diff --git a/sql/backup.cc b/sql/backup.cc index d3f2e453a85..d45c18a3cdf 100644 --- a/sql/backup.cc +++ b/sql/backup.cc @@ -311,8 +311,7 @@ static bool backup_block_commit(THD *thd) if (mysql_bin_log.is_open()) { mysql_mutex_lock(mysql_bin_log.get_log_lock()); - mysql_file_sync(mysql_bin_log.get_log_file()->file, - MYF(MY_WME|MY_SYNC_FILESIZE)); + mysql_file_sync(mysql_bin_log.get_log_file()->file, MYF(MY_WME)); mysql_mutex_unlock(mysql_bin_log.get_log_lock()); } thd->clear_error(); diff --git a/sql/log.cc b/sql/log.cc index 06ba27c4b79..726598affe3 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3730,7 +3730,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name, bytes_written+= description_event_for_queue->data_written; } if (flush_io_cache(&log_file) || - mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))) + mysql_file_sync(log_file.file, MYF(MY_WME))) goto err; my_off_t offset= my_b_tell(&log_file); @@ -3768,7 +3768,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name, strlen(log_file_name)) || my_b_write(&index_file, (uchar*) "\n", 1) || flush_io_cache(&index_file) || - mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))) + mysql_file_sync(index_file.file, MYF(MY_WME))) goto err; #ifdef HAVE_REPLICATION @@ -3908,7 +3908,7 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset) } /* The following will either truncate the file or fill the end with \n' */ if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) || - mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE))) + mysql_file_sync(file, MYF(MY_WME))) goto err; /* Reset data in old index cache */ @@ -4702,7 +4702,7 @@ int MYSQL_BIN_LOG::sync_purge_index_file() if (unlikely((error= flush_io_cache(&purge_index_file))) || unlikely((error= my_sync(purge_index_file.file, - MYF(MY_WME | MY_SYNC_FILESIZE))))) + MYF(MY_WME))))) DBUG_RETURN(error); DBUG_RETURN(error); @@ -5462,7 +5462,7 @@ bool MYSQL_BIN_LOG::flush_and_sync(bool *synced) if (sync_period && ++sync_counter >= sync_period) { sync_counter= 0; - err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE)); + err= mysql_file_sync(fd, MYF(MY_WME)); if (synced) *synced= 1; #ifndef DBUG_OFF @@ -6142,7 +6142,7 @@ MYSQL_BIN_LOG::write_state_to_file() log_inited= false; if ((err= end_io_cache(&cache))) goto err; - if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE)))) + if ((err= mysql_file_sync(file_no, MYF(MY_WME)))) goto err; goto end; From b2e312b0558cd9af6d8426412a3827e4e264b7b0 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Tue, 8 Aug 2023 16:10:31 +0200 Subject: [PATCH 11/35] MDEV-23021: rpl.rpl_parallel_optimistic_until fails in Buildbot The test case accessed slave-relay-bin.000003 without waiting for the IO thread to write it first. If the IO thread was slow, this could fail. Signed-off-by: Kristian Nielsen --- .../rpl/t/rpl_parallel_optimistic_until.test | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test index 508213c9075..0797e8bf220 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_until.test @@ -263,13 +263,34 @@ BEGIN; START SLAVE IO_THREAD; --source include/wait_for_slave_io_to_start.inc -# The following test sets the stop coordinate is set to inside the first event +# The following test sets the stop coordinate to inside the first event # of a relay log that holds events of a transaction started in an earlier log. # Peek the stop position in the middle of trx1, not even on a event boundary. --let $pos_until=255 --let $file_rl=slave-relay-bin.000003 --let $binlog_file=$file_rl +# Wait for the IO thread to write the trx1 to the relaylog before querying it. +# (wait_for_slave_param.inc isn't flexible enough, so do it manually.) +--let $continue= 1 +--let $count=600 +while ($continue) +{ + --let $cur_file= query_get_value(SHOW SLAVE STATUS, 'Master_Log_File', 1) + --let $cur_pos= query_get_value(SHOW SLAVE STATUS, 'Read_Master_Log_Pos', 1) + --let $continue= `SELECT '$cur_file' = '$fil_1' AND $cur_pos < $pos_trx1` + if ($continue) + { + --dec $count + if (!$count) + { + --echo **** ERROR: timeout waiting for Read_Master_Log_Pos($cur_pos) >= $pos_trx1 (file='$cur_file') ****" + --die Timeout waiting for IO thread to write master events to the relaylog + } + --sleep 0.1 + } +} + --let $pos_xid=508 --let $info= query_get_value(SHOW RELAYLOG EVENTS IN '$file_rl' FROM $pos_xid LIMIT 1, Info, 1) From 646eb7be497fd8da173b937ef83d7f1b72ca64d2 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Fri, 11 Aug 2023 07:13:35 +0200 Subject: [PATCH 12/35] galera: wsrep-lib submodule update --- wsrep-lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wsrep-lib b/wsrep-lib index e238c0d240c..173693f2eeb 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit e238c0d240c2557229b0523a4a032f3cf8b41639 +Subproject commit 173693f2eeb61054424233fe85fde4086bed36be From 649fdd9d0b7c187231818837a84dc27c2ba9d4cf Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Mon, 14 Aug 2023 10:32:17 +1000 Subject: [PATCH 13/35] deb autobake - add trixie --- debian/autobake-deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index 9c658b6df49..d16c5ccf379 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -137,7 +137,7 @@ in replace_uring_with_aio fi ;& - "sid") + "trixie"|"sid") # The default packaging should always target Debian Sid, so in this case # there is intentionally no customizations whatsoever. ;; From e9723c2cbbb429f85ff9165be87228e8f611df9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 14 Aug 2023 13:36:17 +0300 Subject: [PATCH 14/35] MDEV-31473 Wrong information about innodb_checksum_algorithm in information_schema.SYSTEM_VARIABLES MYSQL_SYSVAR_ENUM(checksum_algorithm): Correct the documentation string. Fixes up commit 7a4fbb55b02b449a135fe935f624422eaacfdd7c (MDEV-25105). --- .../suite/sys_vars/r/sysvars_innodb.result | 2 +- storage/innobase/handler/ha_innodb.cc | 18 ++---------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index e07725abbeb..79f6dccefe3 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -276,7 +276,7 @@ SESSION_VALUE NULL DEFAULT_VALUE full_crc32 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE ENUM -VARIABLE_COMMENT The algorithm InnoDB uses for page checksumming. Possible values are FULL_CRC32 for new files, always use CRC-32C; for old, see CRC32 below; STRICT_FULL_CRC32 for new files, always use CRC-32C; for old, see STRICT_CRC32 below; CRC32 write crc32, allow any of the other checksums to match when reading; STRICT_CRC32 write crc32, do not allow other algorithms to match when reading; INNODB write a software calculated checksum, allow any other checksums to match when reading; STRICT_INNODB write a software calculated checksum, do not allow other algorithms to match when reading; NONE write a constant magic number, do not do any checksum verification when reading; STRICT_NONE write a constant magic number, do not allow values other than that magic number when reading; Files updated when this option is set to crc32 or strict_crc32 will not be readable by MariaDB versions older than 10.0.4; new files created with full_crc32 are readable by MariaDB 10.4.3+ +VARIABLE_COMMENT The algorithm InnoDB uses for page checksumming. Possible values are FULL_CRC32 for new files, always use CRC-32C; for old, see CRC32 below; STRICT_FULL_CRC32 for new files, always use CRC-32C; for old, see STRICT_CRC32 below; CRC32 write crc32, allow previously used algorithms to match when reading; STRICT_CRC32 write crc32, do not allow other algorithms to match when reading; New files created with full_crc32 are readable by MariaDB 10.4.3+ NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f2b0013411d..03bcdf3363e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18822,24 +18822,10 @@ static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm, " STRICT_FULL_CRC32" " for new files, always use CRC-32C; for old, see STRICT_CRC32 below;" " CRC32" - " write crc32, allow any of the other checksums to match when reading;" + " write crc32, allow previously used algorithms to match when reading;" " STRICT_CRC32" " write crc32, do not allow other algorithms to match when reading;" - " INNODB" - " write a software calculated checksum, allow any other checksums" - " to match when reading;" - " STRICT_INNODB" - " write a software calculated checksum, do not allow other algorithms" - " to match when reading;" - " NONE" - " write a constant magic number, do not do any checksum verification" - " when reading;" - " STRICT_NONE" - " write a constant magic number, do not allow values other than that" - " magic number when reading;" - " Files updated when this option is set to crc32 or strict_crc32 will" - " not be readable by MariaDB versions older than 10.0.4;" - " new files created with full_crc32 are readable by MariaDB 10.4.3+", + " New files created with full_crc32 are readable by MariaDB 10.4.3+", NULL, NULL, SRV_CHECKSUM_ALGORITHM_FULL_CRC32, &innodb_checksum_algorithm_typelib); From dd19ba188c6fbcd1b385eac34553aaceca42fc29 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:43:36 -0400 Subject: [PATCH 15/35] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 8c23df65833..203fc146cec 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=4 -MYSQL_VERSION_PATCH=31 +MYSQL_VERSION_PATCH=32 SERVER_MATURITY=stable From d84df2b878bf1bb61f1d4c7e474615b2c299722d Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:46:16 -0400 Subject: [PATCH 16/35] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e1357a38a48..0f8a4608896 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=22 +MYSQL_VERSION_PATCH=23 SERVER_MATURITY=stable From e0398c5b8c118cbdb14e0e6807bfa7c72b03a544 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:47:13 -0400 Subject: [PATCH 17/35] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 44b9653f38a..c8596588b6b 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=6 -MYSQL_VERSION_PATCH=15 +MYSQL_VERSION_PATCH=16 SERVER_MATURITY=stable From 19a2456f07fc42af3df97dbf1cdf049086022396 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:48:05 -0400 Subject: [PATCH 18/35] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 281e3f49dcf..323c7c10143 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=10 -MYSQL_VERSION_PATCH=6 +MYSQL_VERSION_PATCH=7 SERVER_MATURITY=stable From b96172555c0269f8a054885c95b1b8ee77c26045 Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:48:55 -0400 Subject: [PATCH 19/35] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index edb88571b8d..1db717e2370 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=11 -MYSQL_VERSION_PATCH=5 +MYSQL_VERSION_PATCH=6 SERVER_MATURITY=stable From 0a9d1f2ae5434a51274ee67dd720b34d897d357d Mon Sep 17 00:00:00 2001 From: Daniel Bartholomew Date: Mon, 14 Aug 2023 13:49:38 -0400 Subject: [PATCH 20/35] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 3e4b76e3287..18b3c9acff0 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=11 MYSQL_VERSION_MINOR=0 -MYSQL_VERSION_PATCH=3 +MYSQL_VERSION_PATCH=4 SERVER_MATURITY=stable From 1fa7c9a3cdeded4cae85e8e0869b67da4b617065 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 21 Jul 2023 15:19:38 +0400 Subject: [PATCH 21/35] MDEV-31724 Compressed varchar values lost on joins when sorting on columns from joined table(s) Field_varstring::get_copy_func() did not take into account that functions do_varstring1[_mb], do_varstring2[_mb] do not support compressed data. Changing the return value of Field_varstring::get_copy_func() to `do_field_string` if there is a compresion and truncation at the same time. This fixes the problem, so now it works as follows: - val_str() uncompresses the data - The prefix is then calculated on the uncompressed data Additionally, introducing two new copying functions - do_varstring1_no_truncation() - do_varstring2_no_truncation() Using new copying functions in cases when: - a Field_varstring with length_bytes==1 is changing to a longer Field_varstring with length_bytes==1 - a Field_varstring with length_bytes==2 is changing to a longer Field_varstring with length_bytes==2 In these cases we don't care neither of compression nor of multi-byte prefixes: the entire data gets fully copied from the source column to the target column as is. This is a kind of new optimization, but this also was needed to preserve existing MTR test results. --- mysql-test/main/column_compression.result | 238 ++++++++++++++++++++++ mysql-test/main/column_compression.test | 175 ++++++++++++++++ sql/field_conv.cc | 49 +++++ 3 files changed, 462 insertions(+) diff --git a/mysql-test/main/column_compression.result b/mysql-test/main/column_compression.result index 5ea981cfdc3..e3bccd5f0de 100644 --- a/mysql-test/main/column_compression.result +++ b/mysql-test/main/column_compression.result @@ -2672,3 +2672,241 @@ SET column_compression_threshold=0; INSERT INTO t1 VALUES('aa'); SET column_compression_threshold=DEFAULT; DROP TABLE t1; +# +# MDEV-31724 Compressed varchar values lost on joins when sorting on columns from joined table(s) +# +CREATE TABLE t1 ( +id int(10) unsigned not null, +txt varchar(5000) COMPRESSED NOT NULL DEFAULT '', +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; +CREATE TABLE t2 ( +id int(10) unsigned not null, +n1 bigint(20) NOT NULL, +n2 bigint(20) NOT NULL, +n3 bigint(20) NOT NULL, +PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; +INSERT INTO t1 VALUES +(1, 'short string < 100 chars'), +(2, 'long string = 99 chars '), +(3, 'long string = 100 chars !'), +(4, 'long string = 101 chars !'); +INSERT INTO t2 VALUES +(1, 24, 1, 1), +(2, 99, 2, 2), +(3, 100, 3, 3), +(4, 101, 4, 4); +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id ORDER BY v.n1; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id ORDER BY v.n1; +txt id n1 n2 n3 +short string < 100 chars 1 24 1 1 +long string = 99 chars 2 99 2 2 +long string = 100 chars ! 3 100 3 3 +long string = 101 chars ! 4 101 4 4 +DROP TABLE t1, t2; +CREATE OR REPLACE TABLE t1 ( +id INT NOT NULL PRIMARY KEY, +txt varchar(5000) COMPRESSED NOT NULL DEFAULT '' +) CHARSET=utf8mb3; +INSERT INTO t1 VALUES +(1, REPEAT('a', 10)), +(2, REPEAT('b', 99)), +(3, REPEAT('c', 100)), +(4, REPEAT('d', 121)); +SELECT txt, sysdate(6) FROM t1 ORDER BY 2; +txt sysdate(6) +aaaaaaaaaa +bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd +DROP TABLE t1; +CREATE FUNCTION f1(imax INT, jmax INT) RETURNS TEXT +BEGIN +DECLARE res TEXT DEFAULT 'x'; +FOR i IN 0..imax +DO +FOR j IN 0..jmax +DO +SET res=CONCAT(res, ' ', i, ' ', j); +END FOR; +END FOR; +RETURN res; +END; +$$ +SET @@column_compression_threshold=32; +# VARCHAR1, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +197 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 6 6 0 6 1 6 2 6 3 6 4 6 5 6 6 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 1 +SELECT LENGTH(a), a FROM t1; +LENGTH(a) a +4 x 0 +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (REPEAT('a',254)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +254 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 1 +SELECT LENGTH(a), a FROM t1; +LENGTH(a) a +4 aaaa +DROP TABLE t1; +# VARCHAR1, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(250) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +197 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 6 6 0 6 1 6 2 6 3 6 4 6 5 6 6 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(254) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +197 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 6 6 0 6 1 6 2 6 3 6 4 6 5 6 6 +DROP TABLE t1; +# VARCHAR2, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET latin1; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 1 +Column_decompressions 1 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +256 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 1 17 1 18 1 19 1 20 1 21 1 22 +DROP TABLE t1; +# VARCHAR2, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a, 30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(32001) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a, 30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +DROP TABLE t1; +# VARCHAR1, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +65 x 0 0 0 1 0 2 0 3 1 0 1 1 1 2 0 2 1 2 2 2 3 3 0 3 1 3 2 3 3 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(1) COMPRESSED CHARACTER SET utf8mb3; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 1 +SELECT LENGTH(a), a FROM t1; +LENGTH(a) a +1 x +DROP TABLE t1; +# VARCHAR1, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +65 x 0 0 0 1 0 2 0 3 1 0 1 1 1 2 0 2 1 2 2 2 3 3 0 3 1 3 2 3 3 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(81) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +65 x 0 0 0 1 0 2 0 3 1 0 1 1 1 2 0 2 1 2 2 2 3 3 0 3 1 3 2 3 3 +DROP TABLE t1; +# VARCHAR2, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET utf8mb3; +Warnings: +Warning 1265 Data truncated for column 'a' at row 1 +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 1 +Column_decompressions 1 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +256 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 1 17 1 18 1 19 1 20 1 21 1 22 +DROP TABLE t1; +# VARCHAR2, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(10001) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +Variable_name Value +Column_compressions 0 +Column_decompressions 0 +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +LENGTH(a) LEFT(a,30) RIGHT(a,30) +5505 x 0 0 0 1 0 2 0 3 0 4 0 5 0 6 31 27 31 28 31 29 31 30 31 31 +DROP TABLE t1; +SET @@column_compression_threshold=DEFAULT; +DROP FUNCTION f1; +# +# End of 10.4 tests +# diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test index 1560b23ad3e..29e0218749c 100644 --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -266,3 +266,178 @@ SET column_compression_threshold=0; INSERT INTO t1 VALUES('aa'); SET column_compression_threshold=DEFAULT; DROP TABLE t1; + + +--echo # +--echo # MDEV-31724 Compressed varchar values lost on joins when sorting on columns from joined table(s) +--echo # + +CREATE TABLE t1 ( + id int(10) unsigned not null, + txt varchar(5000) COMPRESSED NOT NULL DEFAULT '', + PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; + +CREATE TABLE t2 ( + id int(10) unsigned not null, + n1 bigint(20) NOT NULL, + n2 bigint(20) NOT NULL, + n3 bigint(20) NOT NULL, + PRIMARY KEY (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin; + +INSERT INTO t1 VALUES +(1, 'short string < 100 chars'), +(2, 'long string = 99 chars '), +(3, 'long string = 100 chars !'), +(4, 'long string = 101 chars !'); + +INSERT INTO t2 VALUES +(1, 24, 1, 1), +(2, 99, 2, 2), +(3, 100, 3, 3), +(4, 101, 4, 4); + +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id; +SELECT txt, v.* FROM t1 LEFT JOIN t2 v ON t1.id = v.id ORDER BY v.n1; +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id; +SELECT txt, v.* FROM t1 JOIN t2 v ON t1.id = v.id ORDER BY v.n1; + +DROP TABLE t1, t2; + +CREATE OR REPLACE TABLE t1 ( + id INT NOT NULL PRIMARY KEY, + txt varchar(5000) COMPRESSED NOT NULL DEFAULT '' +) CHARSET=utf8mb3; + +INSERT INTO t1 VALUES +(1, REPEAT('a', 10)), +(2, REPEAT('b', 99)), +(3, REPEAT('c', 100)), +(4, REPEAT('d', 121)); + +--replace_column 2 +--sorted_result +SELECT txt, sysdate(6) FROM t1 ORDER BY 2; +DROP TABLE t1; + + +DELIMITER $$; +CREATE FUNCTION f1(imax INT, jmax INT) RETURNS TEXT +BEGIN + DECLARE res TEXT DEFAULT 'x'; + FOR i IN 0..imax + DO + FOR j IN 0..jmax + DO + SET res=CONCAT(res, ' ', i, ' ', j); + END FOR; + END FOR; + RETURN res; +END; +$$ +DELIMITER ;$$ + + +# Let's override the default threshold (100) to force +# comression for VARCHAR1+MB, for example, for: +# VARCHAR(80) CHARACTER SET utf8mb3 + +SET @@column_compression_threshold=32; + +--echo # VARCHAR1, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), a FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(254) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (REPEAT('a',254)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(4) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), a FROM t1; +DROP TABLE t1; + +--echo # VARCHAR1, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(250) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(6,6)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(254) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, 8bit, truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, 8bit, no truncation +CREATE TABLE t1 (a VARCHAR(32000) COMPRESSED CHARACTER SET latin1); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(32001) COMPRESSED CHARACTER SET latin1; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a, 30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR1, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(1) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), a FROM t1; +DROP TABLE t1; + +--echo # VARCHAR1, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(80) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(3,3)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(81) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, multi-byte, truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(256) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +--echo # VARCHAR2, multi-byte, no truncation +CREATE TABLE t1 (a VARCHAR(10000) COMPRESSED CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES (f1(31,31)); +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +FLUSH STATUS; +ALTER IGNORE TABLE t1 MODIFY a VARCHAR(10001) COMPRESSED CHARACTER SET utf8mb3; +SHOW STATUS LIKE 'Column%compressions'; +SELECT LENGTH(a), LEFT(a,30), RIGHT(a,30) FROM t1; +DROP TABLE t1; + +SET @@column_compression_threshold=DEFAULT; + +DROP FUNCTION f1; + +--echo # +--echo # End of 10.4 tests +--echo # diff --git a/sql/field_conv.cc b/sql/field_conv.cc index 7ec93e032e6..7e60e37a36e 100644 --- a/sql/field_conv.cc +++ b/sql/field_conv.cc @@ -532,6 +532,40 @@ static void do_expand_string(Copy_field *copy) } +/* + Copy from a Field_varstring with length_bytes==1 + into another Field_varstring with length_bytes==1 + when the target column is not shorter than the source column. + We don't need to calculate the prefix in this case. It works for + - non-compressed and compressed columns + - single byte and multi-byte character sets +*/ +static void do_varstring1_no_truncation(Copy_field *copy) +{ + uint length= (uint) *(uchar*) copy->from_ptr; + DBUG_ASSERT(length <= copy->to_length - 1); + *(uchar*) copy->to_ptr= (uchar) length; + memcpy(copy->to_ptr+1, copy->from_ptr + 1, length); +} + +/* + Copy from a Field_varstring with length_bytes==2 + into another Field_varstring with length_bytes==2 + when the target column is not shorter than the source column. + We don't need to calculate the prefix in this case. It works for + - non-compressed and compressed columns + - single byte and multi-byte character sets +*/ +static void do_varstring2_no_truncation(Copy_field *copy) +{ + uint length= uint2korr(copy->from_ptr); + DBUG_ASSERT(length <= copy->to_length - HA_KEY_BLOB_LENGTH); + int2store(copy->to_ptr, length); + memcpy(copy->to_ptr + HA_KEY_BLOB_LENGTH, + copy->from_ptr + HA_KEY_BLOB_LENGTH, length); +} + + static void do_varstring1(Copy_field *copy) { uint length= (uint) *(uchar*) copy->from_ptr; @@ -776,6 +810,21 @@ Field::Copy_func *Field_varstring::get_copy_func(const Field *from) const length_bytes != ((const Field_varstring*) from)->length_bytes || !compression_method() != !from->compression_method()) return do_field_string; + + if (field_length >= from->field_length) + return length_bytes == 1 ? do_varstring1_no_truncation : + do_varstring2_no_truncation; + + if (compression_method()) + { + /* + Truncation is going to happen, so we need to calculate prefixes. + Can't calculate prefixes directly on compressed data, + need to go through val_str() to uncompress. + */ + return do_field_string; + } + return length_bytes == 1 ? (from->charset()->mbmaxlen == 1 ? do_varstring1 : do_varstring1_mb) : (from->charset()->mbmaxlen == 1 ? do_varstring2 : do_varstring2_mb); From 9c8ae6dca50534eeb08a9f8837f4e28c80688740 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Tue, 15 Aug 2023 09:36:38 +0400 Subject: [PATCH 22/35] MDEV-24797 Column Compression - ERROR 1265 (01000): Data truncated for column Fix issue was earlier fixed by MDEV-31724. Only adding MTR tests. --- mysql-test/main/column_compression.result | 11 +++++++++++ mysql-test/main/column_compression.test | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/mysql-test/main/column_compression.result b/mysql-test/main/column_compression.result index e3bccd5f0de..bea6f2d3998 100644 --- a/mysql-test/main/column_compression.result +++ b/mysql-test/main/column_compression.result @@ -2908,5 +2908,16 @@ DROP TABLE t1; SET @@column_compression_threshold=DEFAULT; DROP FUNCTION f1; # +# MDEV-24797 Column Compression - ERROR 1265 (01000): Data truncated for column +# +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=MyISAM; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=InnoDB; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; +# # End of 10.4 tests # diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test index 29e0218749c..84870f539e8 100644 --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -438,6 +438,21 @@ SET @@column_compression_threshold=DEFAULT; DROP FUNCTION f1; +--echo # +--echo # MDEV-24797 Column Compression - ERROR 1265 (01000): Data truncated for column +--echo # + +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=MyISAM; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(500) COMPRESSED CHARACTER SET utf8mb3) ENGINE=InnoDB; +INSERT INTO t1 SET a=REPEAT('x',127); +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +DROP TABLE t1; + + --echo # --echo # End of 10.4 tests --echo # From b4ace139a11a312ea4d32509295b45030ff8f119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 15 Aug 2023 12:14:31 +0300 Subject: [PATCH 23/35] Remove the often-hanging test innodb.alter_rename_files The test innodb.alter_rename_files rather frequently hangs in checkpoint_set_now. The test was removed in MariaDB Server 10.5 commit 37e7bde12abddcda4d5505450e39a739561bd4d5 when the code that it aimed to cover was simplified. Starting with MariaDB Server 10.5 the page flushing and log checkpointing is much simpler, handled by the single buf_flush_page_cleaner() thread. Let us remove the test to avoid occasional failures. We are not going to fix the cause of the failure in MariaDB Server 10.4. --- .../suite/innodb/r/alter_rename_files.result | 19 ------------ .../suite/innodb/t/alter_rename_files.test | 31 ------------------- 2 files changed, 50 deletions(-) delete mode 100644 mysql-test/suite/innodb/r/alter_rename_files.result delete mode 100644 mysql-test/suite/innodb/t/alter_rename_files.test diff --git a/mysql-test/suite/innodb/r/alter_rename_files.result b/mysql-test/suite/innodb/r/alter_rename_files.result deleted file mode 100644 index 4e373d95fad..00000000000 --- a/mysql-test/suite/innodb/r/alter_rename_files.result +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE t1 (x INT NOT NULL UNIQUE KEY) ENGINE=InnoDB; -INSERT INTO t1 VALUES(5); -SET GLOBAL innodb_log_checkpoint_now=TRUE; -SET DEBUG_SYNC='commit_cache_rebuild SIGNAL ready WAIT_FOR finish'; -ALTER TABLE t1 FORCE;; -connect con1,localhost,root,,; -SET DEBUG_SYNC='now WAIT_FOR ready'; -SET GLOBAL innodb_log_checkpoint_now=TRUE; -SET DEBUG_SYNC='now SIGNAL finish'; -disconnect con1; -connection default; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `x` int(11) NOT NULL, - UNIQUE KEY `x` (`x`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -DROP TABLE t1; -SET DEBUG_SYNC='RESET'; diff --git a/mysql-test/suite/innodb/t/alter_rename_files.test b/mysql-test/suite/innodb/t/alter_rename_files.test deleted file mode 100644 index 27408320f7d..00000000000 --- a/mysql-test/suite/innodb/t/alter_rename_files.test +++ /dev/null @@ -1,31 +0,0 @@ ---source include/have_debug.inc ---source include/have_debug_sync.inc ---source include/have_innodb.inc ---source include/count_sessions.inc - -CREATE TABLE t1 (x INT NOT NULL UNIQUE KEY) ENGINE=InnoDB; -INSERT INTO t1 VALUES(5); - -SET GLOBAL innodb_log_checkpoint_now=TRUE; - -# Start an ALTER TABLE and stop it before renaming the files -SET DEBUG_SYNC='commit_cache_rebuild SIGNAL ready WAIT_FOR finish'; - ---send ALTER TABLE t1 FORCE; - -connect (con1,localhost,root,,); - -SET DEBUG_SYNC='now WAIT_FOR ready'; - -SET GLOBAL innodb_log_checkpoint_now=TRUE; - -SET DEBUG_SYNC='now SIGNAL finish'; - -disconnect con1; -connection default; -reap; -SHOW CREATE TABLE t1; -DROP TABLE t1; -SET DEBUG_SYNC='RESET'; - ---source include/wait_until_count_sessions.inc From 920789e9d413504a137aab486961eac06d02b203 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 9 Jul 2023 16:45:47 +0200 Subject: [PATCH 24/35] MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication Remove the exception that InnoDB does not report auto-increment locks waits to the parallel replication. There was an assumption that these waits could not cause conflicts with in-order parallel replication and thus need not be reported. However, this assumption is wrong and it is possible to get conflicts that lead to hangs for the duration of --innodb-lock-wait-timeout. This can be seen with three transactions: 1. T1 is waiting for T3 on an autoinc lock 2. T2 is waiting for T1 to commit 3. T3 is waiting on a normal row lock held by T2 Here, T3 needs to be deadlock killed on the wait by T1. Note: This should be null-merged to 10.6, as a different fix is needed there due to InnoDB lock code changes. Signed-off-by: Kristian Nielsen --- .../suite/rpl/r/rpl_parallel_autoinc.result | 95 ++++++++++++ .../suite/rpl/t/rpl_parallel_autoinc.test | 140 ++++++++++++++++++ sql/sql_class.cc | 6 - storage/innobase/lock/lock0lock.cc | 8 +- 4 files changed, 236 insertions(+), 13 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_autoinc.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_autoinc.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result new file mode 100644 index 00000000000..c1829bafa1a --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result @@ -0,0 +1,95 @@ +include/master-slave.inc +[connection master] +MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication +include/rpl_connect.inc [creating slave2] +include/rpl_connect.inc [creating slave3] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.innodb_lock_wait_timeout= 20; +connection master; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +DELETE FROM t1 WHERE c >= 4 and c < 6; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +include/save_master_gtid.inc +connection slave1; +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; +a b +row for T1 0 +connection slave2; +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; +a b +row for T3 0 +connection slave3; +BEGIN; +DELETE FROM t2 WHERE a=30; +connection slave; +include/start_slave.inc +connection slave2; +ROLLBACK; +connection slave1; +ROLLBACK; +connection slave3; +ROLLBACK; +connection slave; +include/sync_with_master_gtid.inc +SELECT * FROM t1 ORDER BY a; +a b c +1 0 1 +2 0 1 +3 0 2 +4 0 3 +6 0 7 +7 0 8 +8 1 1 +9 3 2 +10 3 3 +11 3 4 +SELECT * FROM t2 ORDER BY a; +a b +10 1 +20 2 +30 3 +40 4 +50 5 +SELECT * FROM t3 ORDER BY a; +a b +row for T1 1 +row for T2 0 +row for T3 1 +connection master; +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= conservative; +SET @@global.innodb_lock_wait_timeout= 50; +include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +@@GLOBAL.innodb_autoinc_lock_mode +1 +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test new file mode 100644 index 00000000000..0e96b4dfb80 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test @@ -0,0 +1,140 @@ +--source include/have_binlog_format_statement.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--echo MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication + +# The scenario is transactions T1, T2, T3: +# +# T1 is waiting for T3 on an autoinc lock +# T2 is waiting for T1 to commit +# T3 is waiting on a normal row lock held by T2 +# +# This caused a hang until innodb_lock_wait_timeout, because autoinc +# locks were not reported to the in-order parallel replication, so T3 +# was not deadlock killed. + +--let $lock_wait_timeout=20 + +--let $rpl_connection_name= slave2 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--let $rpl_connection_name= slave3 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; + +# A table as destination for INSERT-SELECT +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); + +# A table as source for INSERT-SELECT. +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); + +# A table to help order slave worker threads to setup the desired scenario. +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_innodb_lock_wait_timeout= `SELECT @@global.innodb_lock_wait_timeout` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +eval set @@global.innodb_lock_wait_timeout= $lock_wait_timeout; + +--connection master +# Transaction T1. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +COMMIT; + +# Transaction T2. +DELETE FROM t1 WHERE c >= 4 and c < 6; + +# Transaction T3. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +COMMIT; + +--source include/save_master_gtid.inc + +--connection slave1 +# Temporarily block T1 to create the scheduling that triggers the bug. +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; + +--connection slave2 +# Temporarily block T3 from starting (so T2 can reach commit). +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; + +--connection slave3 +# This critical step blocks T3 after it has inserted its first row, +# and thus taken the auto-increment lock, but before it has reached +# the point where it gets a row lock wait on T2. Even though +# auto-increment lock waits were not reported due to the bug, +# transitive lock waits (T1 waits on autoinc of T3 which waits on row +# on T2) _were_ reported as T1 waiting on T2, and thus a deadlock kill +# happened and the bug was not triggered. +BEGIN; +DELETE FROM t2 WHERE a=30; + +--connection slave +--source include/start_slave.inc + +# First let T2 complete until it is waiting for T1 to commit. +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Then let T3 reach the point where it has obtained the autoinc lock, +# but it is not yet waiting for a row lock held by T2. +--connection slave2 +ROLLBACK; +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Sending data' and info LIKE 'INSERT INTO t1(b, c) SELECT 3, t2.b%' and time_ms > 500 and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Now let T1 continue, while T3 is holding the autoinc lock but before +# it is waiting for T2. Wait a short while to give the hang a chance to +# happen; T1 needs to get to request the autoinc lock before we let T3 +# continue. (There's a small chance the sleep will be too small, which will +# let the test occasionally pass on non-fixed server). +--connection slave1 +ROLLBACK; +--sleep 0.5 + +# Now let T3 continue; the bug was that this lead to an undetected +# deadlock that remained until innodb lock wait timeout. +--connection slave3 +ROLLBACK; + +--connection slave +--let $slave_timeout= `SELECT $lock_wait_timeout/2` +--source include/sync_with_master_gtid.inc +--let $slave_timeout= +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; +SELECT * FROM t3 ORDER BY a; + +# Cleanup. +--connection master +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; + +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout; +--source include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +--source include/rpl_end.inc diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d2a3cbcf613..ae388515961 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5144,12 +5144,6 @@ thd_need_wait_reports(const MYSQL_THD thd) deadlock with the pre-determined commit order, we kill the later transaction, and later re-try it, to resolve the deadlock. - This call need only receive reports about waits for locks that will remain - until the holding transaction commits. InnoDB auto-increment locks, - for example, are released earlier, and so need not be reported. (Such false - positives are not harmful, but could lead to unnecessary kill and retry, so - best avoided). - Returns 1 if the OTHER_THD will be killed to resolve deadlock, 0 if not. The actual kill will happen later, asynchronously from another thread. The caller does not need to take any actions on the return value if the diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 8e75955c8af..b31e8ba5b56 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -6941,13 +6941,7 @@ DeadlockChecker::search() return m_start; } - /* We do not need to report autoinc locks to the upper - layer. These locks are released before commit, so they - can not cause deadlocks with binlog-fixed commit - order. */ - if (m_report_waiters - && (lock_get_type_low(lock) != LOCK_TABLE - || lock_get_mode(lock) != LOCK_AUTO_INC)) { + if (m_report_waiters) { thd_rpl_deadlock_check(m_start->mysql_thd, lock->trx->mysql_thd); } From 900c4d692073ae51413d8f739977216a56663cbf Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Tue, 11 Jul 2023 00:31:29 +0200 Subject: [PATCH 25/35] MDEV-31655: Parallel replication deadlock victim preference code errorneously removed Restore code to make InnoDB choose the second transaction as a deadlock victim if two transactions deadlock that need to commit in-order for parallel replication. This code was erroneously removed when VATS was implemented in InnoDB. Also add a test case for InnoDB choosing the right deadlock victim. Also fixes this bug, with testcase that reliably reproduces: MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master Note: This should be null-merged to 10.6, as a different fix is needed there due to InnoDB locking code changes. Signed-off-by: Kristian Nielsen --- .../binlog_encryption/rpl_parallel.result | 42 +++++++++- mysql-test/suite/rpl/r/rpl_parallel.result | 42 +++++++++- .../r/rpl_parallel_deadlock_victim2.result | 50 +++++++++++ mysql-test/suite/rpl/t/rpl_parallel.test | 71 +++++++++++++++- .../rpl/t/rpl_parallel_deadlock_victim2.test | 83 +++++++++++++++++++ sql/rpl_parallel.cc | 12 ++- sql/slave.cc | 1 + sql/sql_class.cc | 43 ++++++++++ sql/sql_insert.cc | 7 ++ storage/innobase/lock/lock0lock.cc | 17 ++++ storage/innobase/lock/lock0wait.cc | 15 +++- storage/innobase/trx/trx0trx.cc | 15 ++++ 12 files changed, 393 insertions(+), 5 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test diff --git a/mysql-test/suite/binlog_encryption/rpl_parallel.result b/mysql-test/suite/binlog_encryption/rpl_parallel.result index b75a66a634a..b24ff7ba53d 100644 --- a/mysql-test/suite/binlog_encryption/rpl_parallel.result +++ b/mysql-test/suite/binlog_encryption/rpl_parallel.result @@ -2,6 +2,7 @@ include/master-slave.inc [connection master] connection server_2; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; SET GLOBAL slave_parallel_threads=10; ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first include/stop_slave.inc @@ -1680,13 +1681,52 @@ a 2000 SELECT * FROM t2 WHERE a>=2000 ORDER BY a; a +MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +connection server_1; +CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +COMMIT; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 5; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 1+1000; +SET @commit_id= 2+1000; +SET @commit_id= 3+1000; +SET @commit_id= 4+1000; +SET @commit_id= 5+1000; +SET @commit_id= 6+1000; +SET @commit_id= 7+1000; +SET @commit_id= 8+1000; +SET @commit_id= 9+1000; +SET @commit_id= 10+1000; +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 +include/save_master_gtid.inc +connection server_2; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 connection server_2; include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; connection server_1; DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 9b2e68d366e..ef89d954faa 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -2,6 +2,7 @@ include/master-slave.inc [connection master] connection server_2; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; SET GLOBAL slave_parallel_threads=10; ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first include/stop_slave.inc @@ -1679,13 +1680,52 @@ a 2000 SELECT * FROM t2 WHERE a>=2000 ORDER BY a; a +MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +connection server_1; +CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +COMMIT; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 5; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; +connection master; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 1+1000; +SET @commit_id= 2+1000; +SET @commit_id= 3+1000; +SET @commit_id= 4+1000; +SET @commit_id= 5+1000; +SET @commit_id= 6+1000; +SET @commit_id= 7+1000; +SET @commit_id= 8+1000; +SET @commit_id= 9+1000; +SET @commit_id= 10+1000; +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 +include/save_master_gtid.inc +connection server_2; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; +COUNT(*) SUM(a*100*b) +10 225000 connection server_2; include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; connection server_1; DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result new file mode 100644 index 00000000000..f154f37a057 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result @@ -0,0 +1,50 @@ +include/master-slave.inc +[connection master] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; +*** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +connection master; +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +include/save_master_gtid.inc +connection slave; +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= conservative; +SET @@global.slave_transaction_retries= 10; +include/start_slave.inc +connection master; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 9ba7a30f2eb..d43cec4df34 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -13,6 +13,7 @@ --connection server_2 SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; --error ER_SLAVE_MUST_STOP SET GLOBAL slave_parallel_threads=10; --source include/stop_slave.inc @@ -2203,16 +2204,84 @@ SELECT * FROM t1 WHERE a>=2000 ORDER BY a; SELECT * FROM t2 WHERE a>=2000 ORDER BY a; +--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +# The problem was that InnoDB would choose the wrong deadlock victim. +# Create a lot of transactions that can cause deadlocks, and use error +# injection to check that the first transactions in each group is never +# selected as deadlock victim. +--let $rows= 10 +--let $transactions= 5 +--let $gcos= 10 + +--connection server_1 +CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +--disable_query_log +--let $i= 0 +while ($i < 10) { + eval INSERT INTO t7 VALUES ($i, 0); + inc $i; +} +--enable_query_log +COMMIT; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +eval set @@global.slave_parallel_threads= $transactions; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +# This error injection will allow no retries for GTIDs divisible by 1000. +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; + +--connection master +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +--let $j= 1 +while ($j <= $gcos) { + eval SET @commit_id= $j+1000; + --let $i= 0 + while ($i < $transactions) { + --disable_query_log + eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i; + BEGIN; + --let $k= 0 + while ($k < $rows) { + eval UPDATE t7 SET b=b+1 WHERE a=(($i+$k) MOD $rows); + inc $k; + } + COMMIT; + --enable_query_log + inc $i; + } + inc $j; +} + +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; + +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t7; + + # Clean up. --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; --source include/start_slave.inc SET DEBUG_SYNC= 'RESET'; --connection server_1 DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7; SET DEBUG_SYNC= 'RESET'; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test new file mode 100644 index 00000000000..522cec18bbc --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test @@ -0,0 +1,83 @@ +--source include/master-slave.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; + +--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +# This was a failure where a transaction T1 could deadlock multiple times +# with T2, eventually exceeding the default --slave-transaction-retries=10. +# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock +# victim over T2. If thread scheduling is right, it was possible for T1 to +# repeatedly deadlock, roll back, and have time to grab an S lock again before +# T2 woke up and got its waiting X lock, thus repeating the same deadlock over +# and over. +# Once the bug is fixed, it is not possible to re-create the same execution +# and thread scheduling. Instead we inject small sleeps in a way that +# triggered the problem when the bug was there, to demonstrate that the +# problem no longer occurs. + +--connection master +# T1 +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +# T2 +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; + +SELECT * FROM t1 ORDER BY a; +--source include/save_master_gtid.inc + +--connection slave +SET @save_dbug= @@GLOBAL.debug_dbug; + +# Inject various delays to hint thread scheduling to happen in the way that +# triggered MDEV-28776. + +# Small delay starting T1 so it will be the youngest trx and be chosen over +# T2 as the deadlock victim by default in InnoDB. +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; + +# Small delay before taking insert X lock to give time for both T1 and T2 to +# get the S lock first and cause a deadlock. +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; + +# Small delay after T2's wait on the X lock, to give time for T1 retry to +# re-aquire the T1 S lock first. +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; + +# Delay deadlock kill of T2. +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; + +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; + +# Cleanup. +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.slave_transaction_retries= $save_transaction_retries; +--source include/start_slave.inc + +--connection master +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index ad9c2ac802c..ba5cf54e673 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1284,6 +1284,11 @@ handle_rpl_parallel_thread(void *arg) bool did_enter_cond= false; PSI_stage_info old_stage; + DBUG_EXECUTE_IF("rpl_parallel_delay_gtid_0_x_100_start", { + if (rgi->current_gtid.domain_id==0 && + rgi->current_gtid.seq_no == 100) + my_sleep(10000); + }); #ifdef ENABLED_DEBUG_SYNC DBUG_EXECUTE_IF("hold_worker_on_schedule", { if (rgi->current_gtid.domain_id == 0 && @@ -1463,8 +1468,13 @@ handle_rpl_parallel_thread(void *arg) err= dbug_simulate_tmp_error(rgi, thd);); if (unlikely(err)) { + ulong max_retries= slave_trans_retries; convert_kill_to_deadlock_error(rgi); - if (has_temporary_error(thd) && slave_trans_retries > 0) + DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries", + if ((rgi->current_gtid.seq_no % 1000) == 0) + max_retries= 0; + ); + if (has_temporary_error(thd) && max_retries > 0) err= retry_event_group(rgi, rpt, qev); } } diff --git a/sql/slave.cc b/sql/slave.cc index 3fe89a95e5e..cdb16f873c3 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -502,6 +502,7 @@ static void bg_rpl_load_gtid_slave_state(void *) static void bg_slave_kill(void *victim) { THD *to_kill= (THD *)victim; + DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000);); to_kill->awake(KILL_CONNECTION); mysql_mutex_lock(&to_kill->LOCK_wakeup_ready); to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index ae388515961..e7e27401d61 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5272,6 +5272,49 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) return 0; } + +/* + If the storage engine detects a deadlock, and needs to choose a victim + transaction to roll back, it can call this function to ask the upper + server layer for which of two possible transactions is prefered to be + aborted and rolled back. + + In parallel replication, if two transactions are running in parallel and + one is fixed to commit before the other, then the one that commits later + will be prefered as the victim - chosing the early transaction as a victim + will not resolve the deadlock anyway, as the later transaction still needs + to wait for the earlier to commit. + + The return value is -1 if the first transaction is prefered as a deadlock + victim, 1 if the second transaction is prefered, or 0 for no preference (in + which case the storage engine can make the choice as it prefers). +*/ +extern "C" int +thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2) +{ + rpl_group_info *rgi1, *rgi2; + + if (!thd1 || !thd2) + return 0; + + /* + If the transactions are participating in the same replication domain in + parallel replication, then request to select the one that will commit + later (in the fixed commit order from the master) as the deadlock victim. + */ + rgi1= thd1->rgi_slave; + rgi2= thd2->rgi_slave; + if (rgi1 && rgi2 && + rgi1->is_parallel_exec && + rgi1->rli == rgi2->rli && + rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id) + return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1; + + /* No preferences, let the storage engine decide. */ + return 0; +} + + extern "C" int thd_non_transactional_update(const MYSQL_THD thd) { return(thd->transaction.all.modified_non_trans_table); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index a2c571373cb..a64314814dd 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -78,6 +78,7 @@ #include "sql_audit.h" #include "sql_derived.h" // mysql_handle_derived #include "sql_prepare.h" +#include "rpl_rli.h" #include #include "debug_sync.h" @@ -1753,6 +1754,12 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) save_read_set= table->read_set; save_write_set= table->write_set; + DBUG_EXECUTE_IF("rpl_write_record_small_sleep_gtid_100_200", + { + if (thd->rgi_slave && (thd->rgi_slave->current_gtid.seq_no == 100 || + thd->rgi_slave->current_gtid.seq_no == 200)) + my_sleep(20000); + }); if (info->handle_duplicates == DUP_REPLACE || info->handle_duplicates == DUP_UPDATE) { diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index b31e8ba5b56..cc20ab95930 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -71,6 +71,9 @@ static void lock_grant_after_reset(lock_t* lock); extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); +#ifdef HAVE_REPLICATION +extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); +#endif /** Pretty-print a table lock. @param[in,out] file output stream @@ -1546,6 +1549,20 @@ static bool has_higher_priority(lock_t *lock1, lock_t *lock2) } else if (!lock_get_wait(lock2)) { return false; } + +#ifdef HAVE_REPLICATION + // Ask the upper server layer if any of the two trx should be prefered. + int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd, + lock2->trx->mysql_thd); + if (preference == -1) { + // lock1 is preferred as a victim, so lock2 has higher priority + return false; + } else if (preference == 1) { + // lock2 is preferred as a victim, so lock1 has higher priority + return true; + } +#endif + return lock1->trx->start_time_micro <= lock2->trx->start_time_micro; } diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index 5eb03f668b3..a26b3a6efaa 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -278,7 +278,9 @@ lock_wait_suspend_thread( } ulint lock_type = ULINT_UNDEFINED; - +#ifndef DBUG_OFF + ulint lock_mode = LOCK_NONE; +#endif /* The wait_lock can be cleared by another thread when the lock is released. But the wait can only be initiated by the current thread which owns the transaction. Only acquire the @@ -288,6 +290,9 @@ lock_wait_suspend_thread( wait_lock = trx->lock.wait_lock; if (wait_lock) { lock_type = lock_get_type_low(wait_lock); +#ifndef DBUG_OFF + lock_mode = lock_get_mode(wait_lock); +#endif } lock_mutex_exit(); } @@ -336,6 +341,14 @@ lock_wait_suspend_thread( } os_event_wait(slot->event); + DBUG_EXECUTE_IF("small_sleep_after_lock_wait", + { + if (lock_type == LOCK_REC && lock_mode == LOCK_X && + trx->error_state != DB_DEADLOCK && + !trx_is_interrupted(trx)) { + my_sleep(20000); + } + }); thd_wait_end(trx->mysql_thd); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7cd95878b0c..b742d1c3686 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -52,6 +52,11 @@ Created 3/26/1996 Heikki Tuuri #include #include +#ifdef HAVE_REPLICATION +extern "C" +int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); +#endif + /** The bit pattern corresponding to TRX_ID_MAX */ const byte trx_id_max_bytes[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff @@ -1907,6 +1912,16 @@ trx_weight_ge( ibool a_notrans_edit; ibool b_notrans_edit; +#ifdef HAVE_REPLICATION + /* First ask the upper server layer if it has any preference for which + to prefer as a deadlock victim. */ + int pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd); + if (pref < 0) + return FALSE; + else if (pref > 0) + return TRUE; +#endif + /* If mysql_thd is NULL for a transaction we assume that it has not edited non-transactional tables. */ From 18acbaf416ea7a42edc0b2fc51084eacda4d074c Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sat, 5 Aug 2023 22:53:44 +0200 Subject: [PATCH 26/35] MDEV-31655: Parallel replication deadlock victim preference code errorneously removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore code to make InnoDB choose the second transaction as a deadlock victim if two transactions deadlock that need to commit in-order for parallel replication. This code was erroneously removed when VATS was implemented in InnoDB. Also add a test case for InnoDB choosing the right deadlock victim. Also fixes this bug, with testcase that reliably reproduces: MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master Reviewed-by: Marko Mäkelä Signed-off-by: Kristian Nielsen --- .../rpl/r/rpl_parallel_deadlock_victim.result | 51 ++++++++++ .../r/rpl_parallel_deadlock_victim2.result | 50 ++++++++++ .../rpl/t/rpl_parallel_deadlock_victim.test | 86 +++++++++++++++++ .../rpl/t/rpl_parallel_deadlock_victim2.test | 83 +++++++++++++++++ sql/rpl_parallel.cc | 12 ++- sql/slave.cc | 1 + sql/sql_class.cc | 43 +++++++++ sql/sql_insert.cc | 7 ++ storage/innobase/lock/lock0lock.cc | 93 ++++++++++++------- 9 files changed, 390 insertions(+), 36 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result new file mode 100644 index 00000000000..f438f335283 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result @@ -0,0 +1,51 @@ +include/master-slave.inc +[connection master] +MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +connection server_1; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +COMMIT; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +include/stop_slave.inc +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; +set @@global.slave_parallel_threads= 5; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; +connection server_1; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +SET @commit_id= 1+1000; +SET @commit_id= 2+1000; +SET @commit_id= 3+1000; +SET @commit_id= 4+1000; +SET @commit_id= 5+1000; +SET @commit_id= 6+1000; +SET @commit_id= 7+1000; +SET @commit_id= 8+1000; +SET @commit_id= 9+1000; +SET @commit_id= 10+1000; +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; +COUNT(*) SUM(a*100*b) +10 225000 +include/save_master_gtid.inc +connection server_2; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; +COUNT(*) SUM(a*100*b) +10 225000 +connection server_2; +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; +include/start_slave.inc +connection server_1; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result new file mode 100644 index 00000000000..c26944b3321 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result @@ -0,0 +1,50 @@ +include/master-slave.inc +[connection master] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; +*** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +connection master; +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +include/save_master_gtid.inc +connection slave; +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; +include/start_slave.inc +include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= optimistic; +SET @@global.slave_transaction_retries= 10; +include/start_slave.inc +connection master; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test new file mode 100644 index 00000000000..ab634d2953e --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test @@ -0,0 +1,86 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/master-slave.inc + + +--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed +# The problem was that InnoDB would choose the wrong deadlock victim. +# Create a lot of transactions that can cause deadlocks, and use error +# injection to check that the first transactions in each group is never +# selected as deadlock victim. +--let $rows= 10 +--let $transactions= 5 +--let $gcos= 10 + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +BEGIN; +--disable_query_log +--let $i= 0 +while ($i < 10) { + eval INSERT INTO t1 VALUES ($i, 0); + inc $i; +} +--enable_query_log +COMMIT; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; +eval set @@global.slave_parallel_threads= $transactions; +set @@global.slave_parallel_mode= conservative; +SET @old_dbug= @@GLOBAL.debug_dbug; +# This error injection will allow no retries for GTIDs divisible by 1000. +SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries"; + +--connection server_1 +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; + +--let $j= 1 +while ($j <= $gcos) { + eval SET @commit_id= $j+1000; + --let $i= 0 + while ($i < $transactions) { + --disable_query_log + eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i; + BEGIN; + --let $k= 0 + while ($k < $rows) { + eval UPDATE t1 SET b=b+1 WHERE a=(($i+$k) MOD $rows); + inc $k; + } + COMMIT; + --enable_query_log + inc $i; + } + inc $j; +} + +SET SESSION debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; + +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @old_dbug; +SELECT COUNT(*), SUM(a*100*b) FROM t1; + + +# Clean up. +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +SET GLOBAL slave_parallel_mode=@old_parallel_mode; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test new file mode 100644 index 00000000000..522cec18bbc --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test @@ -0,0 +1,83 @@ +--source include/master-slave.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1(a INT) ENGINE=INNODB; +INSERT INTO t1 VALUES(1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 2; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.slave_transaction_retries= 2; + +--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master +# This was a failure where a transaction T1 could deadlock multiple times +# with T2, eventually exceeding the default --slave-transaction-retries=10. +# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock +# victim over T2. If thread scheduling is right, it was possible for T1 to +# repeatedly deadlock, roll back, and have time to grab an S lock again before +# T2 woke up and got its waiting X lock, thus repeating the same deadlock over +# and over. +# Once the bug is fixed, it is not possible to re-create the same execution +# and thread scheduling. Instead we inject small sleeps in a way that +# triggered the problem when the bug was there, to demonstrate that the +# problem no longer occurs. + +--connection master +# T1 +SET @@gtid_seq_no= 100; +INSERT INTO t1 SELECT 1+a FROM t1; +# T2 +SET @@gtid_seq_no= 200; +INSERT INTO t1 SELECT 2+a FROM t1; + +SELECT * FROM t1 ORDER BY a; +--source include/save_master_gtid.inc + +--connection slave +SET @save_dbug= @@GLOBAL.debug_dbug; + +# Inject various delays to hint thread scheduling to happen in the way that +# triggered MDEV-28776. + +# Small delay starting T1 so it will be the youngest trx and be chosen over +# T2 as the deadlock victim by default in InnoDB. +SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start"; + +# Small delay before taking insert X lock to give time for both T1 and T2 to +# get the S lock first and cause a deadlock. +SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200"; + +# Small delay after T2's wait on the X lock, to give time for T1 retry to +# re-aquire the T1 S lock first. +SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait"; + +# Delay deadlock kill of T2. +SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill"; + +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SET GLOBAL debug_dbug= @save_dbug; +SELECT * FROM t1 ORDER BY a; + +# Cleanup. +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.slave_transaction_retries= $save_transaction_retries; +--source include/start_slave.inc + +--connection master +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 8034fe7718a..c044defd000 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1301,6 +1301,11 @@ handle_rpl_parallel_thread(void *arg) bool did_enter_cond= false; PSI_stage_info old_stage; + DBUG_EXECUTE_IF("rpl_parallel_delay_gtid_0_x_100_start", { + if (rgi->current_gtid.domain_id==0 && + rgi->current_gtid.seq_no == 100) + my_sleep(10000); + }); #ifdef ENABLED_DEBUG_SYNC DBUG_EXECUTE_IF("hold_worker_on_schedule", { if (rgi->current_gtid.domain_id == 0 && @@ -1480,8 +1485,13 @@ handle_rpl_parallel_thread(void *arg) err= dbug_simulate_tmp_error(rgi, thd);); if (unlikely(err)) { + ulong max_retries= slave_trans_retries; convert_kill_to_deadlock_error(rgi); - if (has_temporary_error(thd) && slave_trans_retries > 0) + DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries", + if ((rgi->current_gtid.seq_no % 1000) == 0) + max_retries= 0; + ); + if (has_temporary_error(thd) && max_retries > 0) err= retry_event_group(rgi, rpt, qev); } } diff --git a/sql/slave.cc b/sql/slave.cc index 4b24d281d5d..5ad1a446ae6 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -506,6 +506,7 @@ static void bg_rpl_load_gtid_slave_state(void *) static void bg_slave_kill(void *victim) { THD *to_kill= (THD *)victim; + DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000);); to_kill->awake(KILL_CONNECTION); mysql_mutex_lock(&to_kill->LOCK_wakeup_ready); to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8e2f34ac53d..0c929837957 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5516,6 +5516,49 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) return 0; } + +/* + If the storage engine detects a deadlock, and needs to choose a victim + transaction to roll back, it can call this function to ask the upper + server layer for which of two possible transactions is prefered to be + aborted and rolled back. + + In parallel replication, if two transactions are running in parallel and + one is fixed to commit before the other, then the one that commits later + will be prefered as the victim - chosing the early transaction as a victim + will not resolve the deadlock anyway, as the later transaction still needs + to wait for the earlier to commit. + + The return value is -1 if the first transaction is prefered as a deadlock + victim, 1 if the second transaction is prefered, or 0 for no preference (in + which case the storage engine can make the choice as it prefers). +*/ +extern "C" int +thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2) +{ + rpl_group_info *rgi1, *rgi2; + + if (!thd1 || !thd2) + return 0; + + /* + If the transactions are participating in the same replication domain in + parallel replication, then request to select the one that will commit + later (in the fixed commit order from the master) as the deadlock victim. + */ + rgi1= thd1->rgi_slave; + rgi2= thd2->rgi_slave; + if (rgi1 && rgi2 && + rgi1->is_parallel_exec && + rgi1->rli == rgi2->rli && + rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id) + return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1; + + /* No preferences, let the storage engine decide. */ + return 0; +} + + extern "C" int thd_non_transactional_update(const MYSQL_THD thd) { return(thd->transaction->all.modified_non_trans_table); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 3ca8fc3303b..132be65a848 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -80,6 +80,7 @@ #include "debug_sync.h" // DEBUG_SYNC #include "debug.h" // debug_crash_here #include +#include "rpl_rli.h" #ifdef WITH_WSREP #include "wsrep_trans_observer.h" /* wsrep_start_transction() */ @@ -1828,6 +1829,12 @@ int write_record(THD *thd, TABLE *table, COPY_INFO *info, select_result *sink) save_read_set= table->read_set; save_write_set= table->write_set; + DBUG_EXECUTE_IF("rpl_write_record_small_sleep_gtid_100_200", + { + if (thd->rgi_slave && (thd->rgi_slave->current_gtid.seq_no == 100 || + thd->rgi_slave->current_gtid.seq_no == 200)) + my_sleep(20000); + }); if (info->handle_duplicates == DUP_REPLACE || info->handle_duplicates == DUP_UPDATE) { diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 0bb14f5fcf7..a794e2bbf03 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -63,6 +63,7 @@ ulong innodb_deadlock_report; extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); +extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); #endif /** Functor for accessing the embedded node within a table lock. */ @@ -1958,6 +1959,14 @@ check_trx_error: end_wait: mysql_mutex_unlock(&lock_sys.wait_mutex); + DBUG_EXECUTE_IF("small_sleep_after_lock_wait", + { + if (!(type_mode & LOCK_TABLE) && + (type_mode & LOCK_MODE_MASK) == LOCK_X && + trx->error_state != DB_DEADLOCK && !trx_is_interrupted(trx)) { + my_sleep(20000); + } + }); thd_wait_end(trx->mysql_thd); return trx->error_state; @@ -6309,6 +6318,28 @@ namespace Deadlock } } + ATTRIBUTE_COLD + /** Calculate a number used to compare deadlock victim candidates. +Bit 62 is used to prefer transaction that did not modified non-transactional +tables. Bits 1-61 are set to TRX_WEIGHT to prefer transactions with less locks +and less modified rows. Bit 0 is used to prefer orig_trx in case of a tie. + @param trx Transaction + @return a 64-bit unsigned, the lower the more preferred TRX is as a deadlock + victim */ + static undo_no_t calc_victim_weight(trx_t *trx, const trx_t *orig_trx) + { + const undo_no_t trx_weight= (trx != orig_trx) | (TRX_WEIGHT(trx) << 1) | + (trx->mysql_thd && +#ifdef WITH_WSREP + (thd_has_edited_nontrans_tables(trx->mysql_thd) || + (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false))) +#else + thd_has_edited_nontrans_tables(trx->mysql_thd) +#endif /* WITH_WSREP */ + ? 1ULL << 62 : 0); + return trx_weight; + } + ATTRIBUTE_COLD /** Report a deadlock (cycle in the waits-for graph). @param trx transaction waiting for a lock in this thread @@ -6332,24 +6363,7 @@ namespace Deadlock static const char rollback_msg[]= "*** WE ROLL BACK TRANSACTION (%u)\n"; char buf[9 + sizeof rollback_msg]; - - /* If current_trx=true, trx is owned by this thread, and we can - safely invoke these without holding trx->mutex or lock_sys.latch. - If current_trx=false, a concurrent commit is protected by both - lock_sys.latch and lock_sys.wait_mutex. */ - const undo_no_t trx_weight= TRX_WEIGHT(trx) | - (trx->mysql_thd && -#ifdef WITH_WSREP - (thd_has_edited_nontrans_tables(trx->mysql_thd) || - (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false))) -#else - thd_has_edited_nontrans_tables(trx->mysql_thd) -#endif /* WITH_WSREP */ - ? 1ULL << 63 : 0); - trx_t *victim= nullptr; - undo_no_t victim_weight= ~0ULL; - unsigned victim_pos= 0, trx_pos= 0; /* Here, lock elision does not make sense, because for the output we are going to invoke system calls, @@ -6362,43 +6376,52 @@ namespace Deadlock } { - unsigned l= 0; + unsigned l= 1; /* Now that we are holding lock_sys.wait_mutex again, check whether a cycle still exists. */ trx_t *cycle= find_cycle(trx); if (!cycle) goto func_exit; /* One of the transactions was already aborted. */ + + victim= cycle; + undo_no_t victim_weight= calc_victim_weight(victim, trx); + unsigned victim_pos= l; for (trx_t *next= cycle;;) { next= next->lock.wait_trx; l++; - const undo_no_t next_weight= TRX_WEIGHT(next) | - (next->mysql_thd && -#ifdef WITH_WSREP - (thd_has_edited_nontrans_tables(next->mysql_thd) || - (next->is_wsrep() && wsrep_thd_is_BF(next->mysql_thd, false))) + const undo_no_t next_weight= calc_victim_weight(next, trx); +#ifdef HAVE_REPLICATION + const int pref= + thd_deadlock_victim_preference(victim->mysql_thd, next->mysql_thd); + /* Set bit 63 for any non-preferred victim to make such preference take + priority in the weight comparison. + -1 means victim is preferred. 1 means next is preferred. */ + undo_no_t victim_not_pref= (1ULL << 63) & (undo_no_t)(int64_t)(-pref); + undo_no_t next_not_pref= (1ULL << 63) & (undo_no_t)(int64_t)pref; #else - thd_has_edited_nontrans_tables(next->mysql_thd) -#endif /* WITH_WSREP */ - ? 1ULL << 63 : 0); - if (next_weight < victim_weight) + undo_no_t victim_not_pref= 0; + undo_no_t next_not_pref= 0; +#endif + /* Single comparison to decide which of two transactions is preferred + as a deadlock victim. + - If thd_deadlock_victim_preference() returned non-zero, bit 63 + comparison will decide the preferred one. + - Else if exactly one of them modified non-transactional tables, + bit 62 will decide. + - Else the TRX_WEIGHT in bits 1-61 will decide, if not equal. + - Else, if one of them is the original trx, bit 0 will decide. + - If all is equal, previous victim will arbitrarily be chosen. */ + if ((next_weight|next_not_pref) < (victim_weight|victim_not_pref)) { victim_weight= next_weight; victim= next; victim_pos= l; } - if (next == victim) - trx_pos= l; if (next == cycle) break; } - if (trx_pos && trx_weight == victim_weight) - { - victim= trx; - victim_pos= trx_pos; - } - /* Finally, display the deadlock */ switch (const auto r= static_cast(innodb_deadlock_report)) { case REPORT_OFF: From 805e0668c95f6a6ab3c2da3d966944ac0a58d903 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 9 Jul 2023 16:45:47 +0200 Subject: [PATCH 27/35] MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication Remove the exception that InnoDB does not report auto-increment locks waits to the parallel replication. There was an assumption that these waits could not cause conflicts with in-order parallel replication and thus need not be reported. However, this assumption is wrong and it is possible to get conflicts that lead to hangs for the duration of --innodb-lock-wait-timeout. This can be seen with three transactions: 1. T1 is waiting for T3 on an autoinc lock 2. T2 is waiting for T1 to commit 3. T3 is waiting on a normal row lock held by T2 Here, T3 needs to be deadlock killed on the wait by T1. Signed-off-by: Kristian Nielsen --- .../suite/rpl/r/rpl_parallel_autoinc.result | 95 ++++++++++++ .../suite/rpl/t/rpl_parallel_autoinc.test | 140 ++++++++++++++++++ sql/sql_class.cc | 6 - storage/innobase/lock/lock0lock.cc | 8 +- 4 files changed, 238 insertions(+), 11 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel_autoinc.result create mode 100644 mysql-test/suite/rpl/t/rpl_parallel_autoinc.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result new file mode 100644 index 00000000000..e6f18e2d558 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_autoinc.result @@ -0,0 +1,95 @@ +include/master-slave.inc +[connection master] +MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication +include/rpl_connect.inc [creating slave2] +include/rpl_connect.inc [creating slave3] +connection master; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +set @@global.innodb_lock_wait_timeout= 20; +connection master; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +DELETE FROM t1 WHERE c >= 4 and c < 6; +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave +COMMIT; +include/save_master_gtid.inc +connection slave1; +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; +a b +row for T1 0 +connection slave2; +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; +a b +row for T3 0 +connection slave3; +BEGIN; +DELETE FROM t2 WHERE a=30; +connection slave; +include/start_slave.inc +connection slave2; +ROLLBACK; +connection slave1; +ROLLBACK; +connection slave3; +ROLLBACK; +connection slave; +include/sync_with_master_gtid.inc +SELECT * FROM t1 ORDER BY a; +a b c +1 0 1 +2 0 1 +3 0 2 +4 0 3 +6 0 7 +7 0 8 +8 1 1 +9 3 2 +10 3 3 +11 3 4 +SELECT * FROM t2 ORDER BY a; +a b +10 1 +20 2 +30 3 +40 4 +50 5 +SELECT * FROM t3 ORDER BY a; +a b +row for T1 1 +row for T2 0 +row for T3 1 +connection master; +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= 0; +SET @@global.slave_parallel_mode= optimistic; +SET @@global.innodb_lock_wait_timeout= 50; +include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +@@GLOBAL.innodb_autoinc_lock_mode +1 +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test new file mode 100644 index 00000000000..0e96b4dfb80 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_autoinc.test @@ -0,0 +1,140 @@ +--source include/have_binlog_format_statement.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--echo MDEV-31482: Lock wait timeout with INSERT-SELECT, autoinc, and statement-based replication + +# The scenario is transactions T1, T2, T3: +# +# T1 is waiting for T3 on an autoinc lock +# T2 is waiting for T1 to commit +# T3 is waiting on a normal row lock held by T2 +# +# This caused a hang until innodb_lock_wait_timeout, because autoinc +# locks were not reported to the in-order parallel replication, so T3 +# was not deadlock killed. + +--let $lock_wait_timeout=20 + +--let $rpl_connection_name= slave2 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--let $rpl_connection_name= slave3 +--let $rpl_server_number= 2 +--source include/rpl_connect.inc + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; + +# A table as destination for INSERT-SELECT +CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT, c INT, INDEX (c)) ENGINE=InnoDB; +INSERT INTO t1 (b,c) VALUES (0, 1), (0, 1), (0, 2), (0,3), (0, 5), (0, 7), (0, 8); + +# A table as source for INSERT-SELECT. +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t2 VALUES (10,1), (20,2), (30,3), (40,4), (50,5); + +# A table to help order slave worker threads to setup the desired scenario. +CREATE TABLE t3 (a VARCHAR(20) PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES ('row for T1', 0), ('row for T2', 0), ('row for T3', 0); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--let $save_innodb_lock_wait_timeout= `SELECT @@global.innodb_lock_wait_timeout` +--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads` +--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode` +set @@global.slave_parallel_threads= 3; +set @@global.slave_parallel_mode= OPTIMISTIC; +eval set @@global.innodb_lock_wait_timeout= $lock_wait_timeout; + +--connection master +# Transaction T1. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T1"; +INSERT INTO t1(b, c) SELECT 1, t2.b FROM t2 WHERE a=10; +COMMIT; + +# Transaction T2. +DELETE FROM t1 WHERE c >= 4 and c < 6; + +# Transaction T3. +BEGIN; +UPDATE t3 SET b=b+1 where a="row for T3"; +INSERT INTO t1(b, c) SELECT 3, t2.b FROM t2 WHERE a >= 20 AND a <= 40; +COMMIT; + +--source include/save_master_gtid.inc + +--connection slave1 +# Temporarily block T1 to create the scheduling that triggers the bug. +BEGIN; +SELECT * FROM t3 WHERE a="row for T1" FOR UPDATE; + +--connection slave2 +# Temporarily block T3 from starting (so T2 can reach commit). +BEGIN; +SELECT * FROM t3 WHERE a="row for T3" FOR UPDATE; + +--connection slave3 +# This critical step blocks T3 after it has inserted its first row, +# and thus taken the auto-increment lock, but before it has reached +# the point where it gets a row lock wait on T2. Even though +# auto-increment lock waits were not reported due to the bug, +# transitive lock waits (T1 waits on autoinc of T3 which waits on row +# on T2) _were_ reported as T1 waiting on T2, and thus a deadlock kill +# happened and the bug was not triggered. +BEGIN; +DELETE FROM t2 WHERE a=30; + +--connection slave +--source include/start_slave.inc + +# First let T2 complete until it is waiting for T1 to commit. +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Waiting for prior transaction to commit' and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Then let T3 reach the point where it has obtained the autoinc lock, +# but it is not yet waiting for a row lock held by T2. +--connection slave2 +ROLLBACK; +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state='Sending data' and info LIKE 'INSERT INTO t1(b, c) SELECT 3, t2.b%' and time_ms > 500 and command LIKE 'Slave_worker'; +--source include/wait_condition.inc + +# Now let T1 continue, while T3 is holding the autoinc lock but before +# it is waiting for T2. Wait a short while to give the hang a chance to +# happen; T1 needs to get to request the autoinc lock before we let T3 +# continue. (There's a small chance the sleep will be too small, which will +# let the test occasionally pass on non-fixed server). +--connection slave1 +ROLLBACK; +--sleep 0.5 + +# Now let T3 continue; the bug was that this lead to an undetected +# deadlock that remained until innodb lock wait timeout. +--connection slave3 +ROLLBACK; + +--connection slave +--let $slave_timeout= `SELECT $lock_wait_timeout/2` +--source include/sync_with_master_gtid.inc +--let $slave_timeout= +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; +SELECT * FROM t3 ORDER BY a; + +# Cleanup. +--connection master +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format"); +DROP TABLE t1, t2, t3; + +--connection slave +--source include/stop_slave.inc +eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads; +eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode; +eval SET @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout; +--source include/start_slave.inc +SELECT @@GLOBAL.innodb_autoinc_lock_mode; +--source include/rpl_end.inc diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 0c929837957..fabfe304e6c 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5382,12 +5382,6 @@ thd_need_wait_reports(const MYSQL_THD thd) deadlock with the pre-determined commit order, we kill the later transaction, and later re-try it, to resolve the deadlock. - This call need only receive reports about waits for locks that will remain - until the holding transaction commits. InnoDB auto-increment locks, - for example, are released earlier, and so need not be reported. (Such false - positives are not harmful, but could lead to unnecessary kill and retry, so - best avoided). - Returns 1 if the OTHER_THD will be killed to resolve deadlock, 0 if not. The actual kill will happen later, asynchronously from another thread. The caller does not need to take any actions on the return value if the diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index a794e2bbf03..64df6320451 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1740,7 +1740,6 @@ static void lock_wait_rpl_report(trx_t *trx) const lock_t *wait_lock= trx->lock.wait_lock; if (!wait_lock) return; - ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); /* This would likely be too large to attempt to use a memory transaction, even for wait_lock->is_table(). */ const bool nowait= lock_sys.wr_lock_try(); @@ -1764,14 +1763,13 @@ func_exit: } else if (!wait_lock->is_waiting()) goto func_exit; - ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); if (wait_lock->is_table()) { dict_table_t *table= wait_lock->un_member.tab_lock.table; for (lock_t *lock= UT_LIST_GET_FIRST(table->locks); lock; lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) - if (!(lock->type_mode & LOCK_AUTO_INC) && lock->trx != trx) + if (lock->trx != trx) thd_rpl_deadlock_check(thd, lock->trx->mysql_thd); } else @@ -1862,8 +1860,8 @@ dberr_t lock_wait(que_thr_t *thr) thd_need_wait_reports() will hold even if parallel (or any) replication is not being used. We want to be allow the user to skip lock_wait_rpl_report(). */ - const bool rpl= !(type_mode & LOCK_AUTO_INC) && trx->mysql_thd && - innodb_deadlock_detect && thd_need_wait_reports(trx->mysql_thd); + const bool rpl= trx->mysql_thd && innodb_deadlock_detect && + thd_need_wait_reports(trx->mysql_thd); #endif const bool row_lock_wait= thr->lock_state == QUE_THR_LOCK_ROW; timespec abstime; From ca5c122adcd39c34b1bd7059903668586496caf6 Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 11 Aug 2023 17:59:40 +0300 Subject: [PATCH 28/35] MDEV-9938 Prepared statement return wrong result (missing row) The problem is that the first execution of the prepared statement makes a permanent optimization of converting the LEFT JOIN to an INNER JOIN. This is based on the assumption that all the user parameters (?) are always constants and that parameters to Item_cond() will not change value from true and false between different executions. (The example was using IS NULL, which will change value if parameter depending on if the parameter is NULL or not). The fix is to change Item_cond::fix_fields() and Item_cond::eval_not_null_tables() to not threat user parameters as constants. This will ensure that we don't do the LEFT_JOIN -> INNER JOIN conversion that causes problems. There is also some things that needs to be improved regarding calculations of not_null_tables_cache as we get a different value for WHERE 1 or t1.a=1 compared to WHERE t1.a= or 1 Changes done: - Mark Item_param with the PARAM flag to be able to quickly check in Item_cond::eval_not_null_tables() if an item contains a prepared statement parameter (just like we check for stored procedure parameters). - Fixed that Item_cond::not_null_tables_cache is not depending on order of arguments. - Don't call item->eval_const_cond() for items that are NOT on the top level of the WHERE clause. This removed a lot of unnecessary warnings in the test suite! - Do not reset not_null_tables_cache for not top level items. - Simplified Item_cond::fix_fields by calling eval_not_null_tables() instead of having duplication of all the code in eval_not_null_tables(). - Return an error if Item_cond::fix_field() generates an error The old code did generate an error in some cases, but not in all cases. - Fixed all handling of the above error in make_cond_for_tables(). The error handling by the callers did not exists before which could lead to asserts in many different places in the old code). - All changes in sql_select.cc are just checking the return value of fix_fields() and make_cond_for_tables() and returning an error value if fix_fields() returns true or make_cond_for_tables() returns NULL and is_error() is set. - Mark Item_cond as const_item if all arguments returns true for can_eval_in_optimize(). Reviewer: Sergei Petrunia --- mysql-test/main/having_cond_pushdown.result | 4 -- mysql-test/main/prepare.result | 35 ++++++++++ mysql-test/main/prepare.test | 36 ++++++++++ mysql-test/main/ps.result | 7 ++ mysql-test/main/ps.test | 8 ++- mysql-test/main/range.result | 2 - mysql-test/main/range_mrr_icp.result | 2 - mysql-test/main/subselect_exists2in.result | 4 +- mysql-test/main/view.result | 4 +- mysql-test/suite/vcol/r/vcol_syntax.result | 10 ++- mysql-test/suite/vcol/t/vcol_syntax.test | 2 + sql/item.cc | 1 + sql/item.h | 11 +-- sql/item_cmpfunc.cc | 74 ++++++++------------- sql/sql_select.cc | 74 ++++++++++++++++----- 15 files changed, 190 insertions(+), 84 deletions(-) diff --git a/mysql-test/main/having_cond_pushdown.result b/mysql-test/main/having_cond_pushdown.result index fea8f83f9a1..fb73cf789b5 100644 --- a/mysql-test/main/having_cond_pushdown.result +++ b/mysql-test/main/having_cond_pushdown.result @@ -4932,12 +4932,8 @@ SELECT * FROM t1 GROUP BY i HAVING i IN ( i IS NULL); i SELECT * FROM t1 GROUP BY i HAVING i IN ( i IS NULL AND 'x' = 0); i -Warnings: -Warning 1292 Truncated incorrect DECIMAL value: 'x' SELECT * FROM t1 GROUP BY i HAVING i='1' IN ( i IS NULL AND 'x' = 0); i -Warnings: -Warning 1292 Truncated incorrect DECIMAL value: 'x' DROP TABLE t1; # # MDEV-28080: HAVING with NOT EXIST predicate in an equality diff --git a/mysql-test/main/prepare.result b/mysql-test/main/prepare.result index 7c730bff0c5..a9ac531280c 100644 --- a/mysql-test/main/prepare.result +++ b/mysql-test/main/prepare.result @@ -80,3 +80,38 @@ drop table t1, t2, t3; # # End of 10.4 tests # +# +# MDEV-9938 Prepared statement return wrong result (missing row) +# +CREATE TABLE t1 (a_id INT AUTO_INCREMENT PRIMARY KEY, a_text VARCHAR(20)); +CREATE TABLE t2 (b_id INT AUTO_INCREMENT PRIMARY KEY, b_a_id INT); +INSERT INTO t1 VALUES (NULL, 'word1'); +INSERT INTO t2 VALUES (NULL, 1), (NULL, NULL); +PREPARE q FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; +SET @var = 'word1'; +expect row count 1 +EXECUTE q USING @var, @var; +b_id b_a_id a_id a_text +1 1 1 word1 +expect row count = 2 +EXECUTE q USING @nul, @nul; +b_id b_a_id a_id a_text +1 1 1 word1 +2 NULL NULL NULL +PREPARE q2 FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; +expect row count 2 +SET @var = 'word1'; +EXECUTE q2 USING @nul, @nul; +b_id b_a_id a_id a_text +1 1 1 word1 +2 NULL NULL NULL +deallocate prepare q; +deallocate prepare q2; +drop table t1,t2; +# +# End of 10.6 tests +# diff --git a/mysql-test/main/prepare.test b/mysql-test/main/prepare.test index bf37f6dc8d1..b8ee5ad6b6d 100644 --- a/mysql-test/main/prepare.test +++ b/mysql-test/main/prepare.test @@ -69,3 +69,39 @@ drop table t1, t2, t3; --echo # --echo # End of 10.4 tests --echo # + +--echo # +--echo # MDEV-9938 Prepared statement return wrong result (missing row) +--echo # + +CREATE TABLE t1 (a_id INT AUTO_INCREMENT PRIMARY KEY, a_text VARCHAR(20)); +CREATE TABLE t2 (b_id INT AUTO_INCREMENT PRIMARY KEY, b_a_id INT); + +INSERT INTO t1 VALUES (NULL, 'word1'); +INSERT INTO t2 VALUES (NULL, 1), (NULL, NULL); + +PREPARE q FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; + +SET @var = 'word1'; +--echo expect row count 1 +EXECUTE q USING @var, @var; +--echo expect row count = 2 +EXECUTE q USING @nul, @nul; + +PREPARE q2 FROM 'SELECT * FROM t2 + LEFT JOIN t1 ON (t1.a_id = t2.b_a_id) +WHERE ((? IS NULL) OR (t1.a_text = ?))'; + +--echo expect row count 2 +SET @var = 'word1'; +EXECUTE q2 USING @nul, @nul; + +deallocate prepare q; +deallocate prepare q2; +drop table t1,t2; + +--echo # +--echo # End of 10.6 tests +--echo # diff --git a/mysql-test/main/ps.result b/mysql-test/main/ps.result index fc982fba4e5..d96fcbe2dcc 100644 --- a/mysql-test/main/ps.result +++ b/mysql-test/main/ps.result @@ -4094,9 +4094,16 @@ DROP TABLE t1, t2; # CREATE TABLE t1 (a INT); PREPARE stmt FROM "SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1"; +execute stmt; +1 +SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; +1 +insert into t1 values(1),(2); +execute stmt; ERROR 22003: BIGINT UNSIGNED value is out of range in '18446744073709551615 + 1' SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; ERROR 22003: BIGINT UNSIGNED value is out of range in '18446744073709551615 + 1' +deallocate prepare stmt; drop table t1; # End of 5.3 tests # diff --git a/mysql-test/main/ps.test b/mysql-test/main/ps.test index ebf646eadf3..0043d3aa141 100644 --- a/mysql-test/main/ps.test +++ b/mysql-test/main/ps.test @@ -3632,12 +3632,16 @@ DROP TABLE t1, t2; --echo # with out of range in GROUP BY --echo # CREATE TABLE t1 (a INT); - ---error ER_DATA_OUT_OF_RANGE PREPARE stmt FROM "SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1"; +execute stmt; +SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; +insert into t1 values(1),(2); +--error ER_DATA_OUT_OF_RANGE +execute stmt; --error ER_DATA_OUT_OF_RANGE SELECT 1 FROM t1 GROUP BY 0 OR 18446744073709551615+1; +deallocate prepare stmt; drop table t1; --echo # End of 5.3 tests diff --git a/mysql-test/main/range.result b/mysql-test/main/range.result index 31777773240..8b4eafa3682 100644 --- a/mysql-test/main/range.result +++ b/mysql-test/main/range.result @@ -1625,8 +1625,6 @@ NULL Warnings: Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date SELECT str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20'; str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20' 1 diff --git a/mysql-test/main/range_mrr_icp.result b/mysql-test/main/range_mrr_icp.result index 6817edd30cd..4fb717b0b51 100644 --- a/mysql-test/main/range_mrr_icp.result +++ b/mysql-test/main/range_mrr_icp.result @@ -1628,8 +1628,6 @@ NULL Warnings: Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date -Warning 1411 Incorrect datetime value: '2007-20-00' for function str_to_date SELECT str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20'; str_to_date('2007-10-00', '%Y-%m-%d') BETWEEN '' AND '2007/10/20' 1 diff --git a/mysql-test/main/subselect_exists2in.result b/mysql-test/main/subselect_exists2in.result index 6ff518b5a29..78f5457012f 100644 --- a/mysql-test/main/subselect_exists2in.result +++ b/mysql-test/main/subselect_exists2in.result @@ -333,7 +333,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'test.t2.b' of SELECT #3 was resolved in SELECT #2 -Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ((1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` +Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ( (/* select#3 */ select `test`.`t3`.`c` from `test`.`t3` where `test`.`t3`.`c` is not null ), (1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` SELECT ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) FROM t1; ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) 1 @@ -347,7 +347,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'test.t2.b' of SELECT #3 was resolved in SELECT #2 -Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ((1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` +Note 1003 /* select#1 */ select (/* select#2 */ select 1 from dual where !(1 is not null and (1,1 in ( (/* select#3 */ select `test`.`t3`.`c` from `test`.`t3` where `test`.`t3`.`c` is not null ), (1 in on distinct_key where 1 = ``.`c`))))) AS `( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) )` from `test`.`t1` SELECT ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) FROM t1; ( SELECT b FROM t2 WHERE NOT EXISTS ( SELECT c FROM t3 WHERE c = b ) ) 1 diff --git a/mysql-test/main/view.result b/mysql-test/main/view.result index 329f367844e..75cfd55f47c 100644 --- a/mysql-test/main/view.result +++ b/mysql-test/main/view.result @@ -4630,7 +4630,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t4 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or 1) having trigcond(NULL is null)))) +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or (NULL is null)) having trigcond(NULL is null)))) SELECT * FROM t1, t2 WHERE t2.a NOT IN (SELECT t3.b FROM t3 RIGHT JOIN t4 ON (t4.a = t3.a) WHERE t4.a >= t1.a); @@ -4646,7 +4646,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t4 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: Note 1276 Field or reference 'v1.a' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or 1) having trigcond(NULL is null)))) +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,10 AS `a` from `test`.`t1` where !<10,`test`.`t1`.`a`>((10,(/* select#2 */ select NULL from `test`.`t4` where `test`.`t4`.`a` >= `test`.`t1`.`a` and trigcond((10) = NULL or (NULL is null)) having trigcond(NULL is null)))) SELECT * FROM v1, t2 WHERE t2.a NOT IN (SELECT t3.b FROM t3 RIGHT JOIN t4 ON (t4.a = t3.a) WHERE t4.a >= v1.a); diff --git a/mysql-test/suite/vcol/r/vcol_syntax.result b/mysql-test/suite/vcol/r/vcol_syntax.result index 144d4ab335d..7725d59f59c 100644 --- a/mysql-test/suite/vcol/r/vcol_syntax.result +++ b/mysql-test/suite/vcol/r/vcol_syntax.result @@ -201,18 +201,22 @@ drop table t1; # MDEV-31319 Assertion const_item_cache == true failed in Item_func::fix_fields # create table t (f1 int, f2 int, fv int generated always as (case user() when 'foo' or 'bar' then f1 else f2 end) virtual); -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'foo' -Warning 1292 Truncated incorrect DOUBLE value: 'bar' select * from t; f1 f2 fv +insert into t (f1,f2) values(1,1); +select * from t; +f1 f2 fv +1 1 1 Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'root@localhost' Warning 1292 Truncated incorrect DOUBLE value: 'foo' Warning 1292 Truncated incorrect DOUBLE value: 'bar' create table tmp as select * from information_schema.tables where table_name = 't'; select * from t; f1 f2 fv +1 1 1 Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'root@localhost' Warning 1292 Truncated incorrect DOUBLE value: 'foo' Warning 1292 Truncated incorrect DOUBLE value: 'bar' drop table t, tmp; diff --git a/mysql-test/suite/vcol/t/vcol_syntax.test b/mysql-test/suite/vcol/t/vcol_syntax.test index da2ad27d37d..c26c4897833 100644 --- a/mysql-test/suite/vcol/t/vcol_syntax.test +++ b/mysql-test/suite/vcol/t/vcol_syntax.test @@ -168,6 +168,8 @@ drop table t1; --echo # create table t (f1 int, f2 int, fv int generated always as (case user() when 'foo' or 'bar' then f1 else f2 end) virtual); select * from t; +insert into t (f1,f2) values(1,1); +select * from t; create table tmp as select * from information_schema.tables where table_name = 't'; select * from t; diff --git a/sql/item.cc b/sql/item.cc index ce1ba00bd5b..9f964ed019a 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -4068,6 +4068,7 @@ Item_param::Item_param(THD *thd, const LEX_CSTRING *name_arg, value is set. */ set_maybe_null(); + with_flags= with_flags | item_with_t::PARAM; } diff --git a/sql/item.h b/sql/item.h index 0f59b38cd38..3be7cd7283a 100644 --- a/sql/item.h +++ b/sql/item.h @@ -787,7 +787,8 @@ enum class item_with_t : item_flags_t FIELD= (1<<2), // If any item except Item_sum contains a field. SUM_FUNC= (1<<3), // If item contains a sum func SUBQUERY= (1<<4), // If item containts a sub query - ROWNUM_FUNC= (1<<5) + ROWNUM_FUNC= (1<<5), // If ROWNUM function was used + PARAM= (1<<6) // If user parameter was used }; @@ -1087,6 +1088,8 @@ public: { return (bool) (with_flags & item_with_t::SUBQUERY); } inline bool with_rownum_func() const { return (bool) (with_flags & item_with_t::ROWNUM_FUNC); } + inline bool with_param() const + { return (bool) (with_flags & item_with_t::PARAM); } inline void copy_flags(const Item *org, item_base_t mask) { base_flags= (item_base_t) (((item_flags_t) base_flags & @@ -5304,17 +5307,17 @@ public: :used_tables_cache(other->used_tables_cache), const_item_cache(other->const_item_cache) { } - void used_tables_and_const_cache_init() + inline void used_tables_and_const_cache_init() { used_tables_cache= 0; const_item_cache= true; } - void used_tables_and_const_cache_join(const Item *item) + inline void used_tables_and_const_cache_join(const Item *item) { used_tables_cache|= item->used_tables(); const_item_cache&= item->const_item(); } - void used_tables_and_const_cache_update_and_join(Item *item) + inline void used_tables_and_const_cache_update_and_join(Item *item) { item->update_used_tables(); used_tables_and_const_cache_join(item); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 1e7209ad37c..1ef5d3c9d0c 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4915,7 +4915,7 @@ Item_cond::fix_fields(THD *thd, Item **ref) List_iterator li(list); Item *item; uchar buff[sizeof(char*)]; // Max local vars in function - bool is_and_cond= functype() == Item_func::COND_AND_FUNC; + not_null_tables_cache= 0; used_tables_and_const_cache_init(); @@ -4957,52 +4957,24 @@ Item_cond::fix_fields(THD *thd, Item **ref) merge_sub_condition(li); item= *li.ref(); // may be substituted in fix_fields/merge_item_if_possible - used_tables_cache|= item->used_tables(); - if (item->can_eval_in_optimize() && !item->with_sp_var() && - !cond_has_datetime_is_null(item)) - { - if (item->eval_const_cond() == is_and_cond && top_level()) - { - /* - a. This is "... AND true_cond AND ..." - In this case, true_cond has no effect on cond_and->not_null_tables() - b. This is "... OR false_cond/null cond OR ..." - In this case, false_cond has no effect on cond_or->not_null_tables() - */ - } - else - { - /* - a. This is "... AND false_cond/null_cond AND ..." - The whole condition is FALSE/UNKNOWN. - b. This is "... OR const_cond OR ..." - In this case, cond_or->not_null_tables()=0, because the condition - const_cond might evaluate to true (regardless of whether some tables - were NULL-complemented). - */ - not_null_tables_cache= (table_map) 0; - and_tables_cache= (table_map) 0; - } - if (thd->is_error()) - return TRUE; - } - else - { - table_map tmp_table_map= item->not_null_tables(); - not_null_tables_cache|= tmp_table_map; - and_tables_cache&= tmp_table_map; - - const_item_cache= FALSE; - } + used_tables_and_const_cache_join(item); base_flags|= item->base_flags & item_base_t::MAYBE_NULL; with_flags|= item->with_flags; } - if (fix_length_and_dec()) - return TRUE; + (void) eval_not_null_tables((void*) 0); + + /* + We have to set fixed as some other items will check it and fail if we + do not. This can be changed when we properly check if fix_fields() + fails in call cases. + */ base_flags|= item_base_t::FIXED; + if (fix_length_and_dec() || thd->is_error()) + return TRUE; return FALSE; } + /** @brief Merge a lower-level condition pointed by the iterator into this Item_cond @@ -5052,6 +5024,9 @@ void Item_cond::merge_sub_condition(List_iterator& li) } } +/* + Calculate not_null_tables_cache and and_tables_cache. +*/ bool Item_cond::eval_not_null_tables(void *opt_arg) @@ -5059,15 +5034,17 @@ Item_cond::eval_not_null_tables(void *opt_arg) Item *item; bool is_and_cond= functype() == Item_func::COND_AND_FUNC; List_iterator li(list); + bool found= 0; + not_null_tables_cache= (table_map) 0; and_tables_cache= ~(table_map) 0; while ((item=li++)) { - table_map tmp_table_map; - if (item->can_eval_in_optimize() && !item->with_sp_var() && - !cond_has_datetime_is_null(item)) + if (item->can_eval_in_optimize() && + !item->with_sp_var() && !item->with_param() && + !cond_has_datetime_is_null(item) && top_level()) { - if (item->eval_const_cond() == is_and_cond && top_level()) + if (item->eval_const_cond() == is_and_cond) { /* a. This is "... AND true_cond AND ..." @@ -5086,14 +5063,19 @@ Item_cond::eval_not_null_tables(void *opt_arg) const_cond might evaluate to true (regardless of whether some tables were NULL-complemented). */ + found= 1; not_null_tables_cache= (table_map) 0; and_tables_cache= (table_map) 0; } } else { - tmp_table_map= item->not_null_tables(); - not_null_tables_cache|= tmp_table_map; + table_map tmp_table_map= item->not_null_tables(); + if (!found) + { + /* We should not depend on the order of items */ + not_null_tables_cache|= tmp_table_map; + } and_tables_cache&= tmp_table_map; } } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ae68d84b30f..9ae356e0749 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2479,6 +2479,8 @@ JOIN::optimize_inner() COND *table_independent_conds= make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1, FALSE, FALSE); + if (!table_independent_conds && thd->is_error()) + DBUG_RETURN(1); DBUG_EXECUTE("where", print_where(table_independent_conds, "where after opt_sum_query()", @@ -2819,6 +2821,8 @@ int JOIN::optimize_stage2() if (make_join_select(this, select, conds)) { + if (thd->is_error()) + DBUG_RETURN(1); zero_result_cause= "Impossible WHERE noticed after reading const tables"; select_lex->mark_const_derived(zero_result_cause); @@ -3423,9 +3427,13 @@ bool JOIN::add_having_as_table_cond(JOIN_TAB *tab) having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0, ~used_tables, 0, false, false); + if (!having && thd->is_error()) + DBUG_RETURN(true); DBUG_EXECUTE("where", print_where(having, "having after sort", QT_ORDINARY);); } + else if (thd->is_error()) + DBUG_RETURN(true); DBUG_RETURN(false); } @@ -10625,17 +10633,22 @@ int JOIN_TAB::make_scan_filter() Item *cond= is_inner_table_of_outer_join() ? *get_first_inner_table()->on_expr_ref : join->conds; - if (cond && - (tmp= make_cond_for_table(join->thd, cond, - join->const_table_map | table->map, - table->map, -1, FALSE, TRUE))) + if (cond) { - DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY);); - if (!(cache_select= - (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT)))) - DBUG_RETURN(1); - cache_select->cond= tmp; - cache_select->read_tables=join->const_table_map; + if ((tmp= make_cond_for_table(join->thd, cond, + join->const_table_map | table->map, + table->map, -1, FALSE, TRUE))) + { + DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY);); + if (!(cache_select= + (SQL_SELECT*) join->thd->memdup((uchar*) select, + sizeof(SQL_SELECT)))) + DBUG_RETURN(1); + cache_select->cond= tmp; + cache_select->read_tables=join->const_table_map; + } + else if (join->thd->is_error()) + DBUG_RETURN(1); } DBUG_RETURN(0); } @@ -12215,6 +12228,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) const_cond= make_cond_for_table(thd, cond, join->const_table_map, (table_map) 0, -1, FALSE, FALSE); + if (!const_cond && thd->is_error()) + DBUG_RETURN(1); + /* Add conditions added by add_not_null_conds(). */ for (uint i= 0 ; i < join->const_tables ; i++) add_cond_and_fix(thd, &const_cond, @@ -12263,6 +12279,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond); join->outer_ref_cond= outer_ref_cond; } + else if (thd->is_error()) + DBUG_RETURN(1); } else { @@ -12278,6 +12296,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->pseudo_bits_cond); join->pseudo_bits_cond= pseudo_bits_cond; } + else if (thd->is_error()) + DBUG_RETURN(1); } } } @@ -12381,6 +12401,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) { tmp= make_cond_for_table(thd, cond, used_tables, current_map, i, FALSE, FALSE); + if (!tmp && thd->is_error()) + DBUG_RETURN(1); + if (tab == join->join_tab + last_top_base_tab_idx) { /* @@ -12393,7 +12416,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) COND *rand_cond= make_cond_for_table(thd, cond, used_tables, rand_table_bit, -1, FALSE, FALSE); - add_cond_and_fix(thd, &tmp, rand_cond); + if (rand_cond) + add_cond_and_fix(thd, &tmp, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); } } /* Add conditions added by add_not_null_conds(). */ @@ -12478,8 +12504,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) trace_cp.add_table_name(tab->table); COND *push_cond= - make_cond_for_table(thd, tmp_cond, current_map, current_map, - -1, FALSE, FALSE); + make_cond_for_table(thd, tmp_cond, current_map, current_map, + -1, FALSE, FALSE); if (push_cond) { trace_cp.add("push_cond", push_cond); @@ -12487,6 +12513,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (!tab->table->file->cond_push(push_cond)) tab->table->file->pushed_cond= push_cond; } + else if (thd->is_error()) + DBUG_RETURN(1); } } } @@ -12695,7 +12723,11 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->const_table_map, (table_map) 0, -1, FALSE, FALSE); if (!tmp_cond) - continue; + { + if (!thd->is_error()) + continue; + DBUG_RETURN(1); + } tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond, &cond_tab->not_null_compl); if (!tmp_cond) @@ -12749,6 +12781,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) current_map, /*(inner_tab - first_tab)*/ -1, FALSE, FALSE); + if (!tmp_cond && thd->is_error()) + DBUG_RETURN(1); if (tab == last_tab) { /* @@ -12762,7 +12796,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2, rand_table_bit, -1, FALSE, FALSE); - add_cond_and_fix(thd, &tmp_cond, rand_cond); + if (rand_cond) + add_cond_and_fix(thd, &tmp_cond, rand_cond); + else if (thd->is_error()) + DBUG_RETURN(1); } bool is_sjm_lookup_tab= FALSE; if (inner_tab->bush_children) @@ -23676,6 +23713,8 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, retain_ref_cond, false); if (fix) new_cond->argument_list()->push_back(fix, thd->mem_root); + else if (thd->is_error()) + return ((COND*) 0); } switch (new_cond->argument_list()->elements) { case 0: @@ -23718,7 +23757,7 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, exclude_expensive_cond, retain_ref_cond, false); if (!fix) - return (COND*) 0; // Always true + return (COND*) 0; // Always true or error new_cond->argument_list()->push_back(fix, thd->mem_root); } /* @@ -23726,7 +23765,8 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, the new parent Item. This should not be expensive because all children of Item_cond_and should be fixed by now. */ - new_cond->fix_fields(thd, 0); + if (new_cond->fix_fields(thd, 0)) + return (COND*) 0; new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; new_cond->top_level_item(); return new_cond; From 88dd50b80ad9624d05b72751fd6e4a2cfdb6a3fe Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 16 Aug 2023 09:12:28 +0400 Subject: [PATCH 29/35] After-merge cleanup for MDEV-27207 + MDEV-31719 Something went wrong during a merge (from 10.5 to 10.6) of 68403eeda320ad0831563ce09a9c4af1549fe65e (fixing bugs MDEV-27207 and MDEV-31719). Originally (in 10.5) the fix was done in_inet6::set() in plugin/type_inet/sql_type_inet.cc. In 10.6 this code resides in a different place: in the method in_fbt::set() of a template class in sql/sql_type_fixedbin.h. During the merge: - the fix did not properly migrate to in_fbt::set() - the related MTR tests disappeared This patch fixes in_fbt::set() properly and restores MTR tests. --- .../mysql-test/type_inet/type_inet6.result | 26 +++++++++++++++++++ .../mysql-test/type_inet/type_inet6.test | 19 ++++++++++++++ sql/sql_type_fixedbin.h | 8 +++--- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.result b/plugin/type_inet/mysql-test/type_inet/type_inet6.result index 79958ee77f4..9c601697e4f 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.result +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.result @@ -2257,3 +2257,29 @@ a m ::10 1 DROP VIEW v1; DROP TABLE t1, t2; +# +# MDEV-27207 Assertion `!m_null_value' failed in int FixedBinTypeBundle::cmp_item_fbt::compare or in cmp_item_inet6::compare +# +CREATE TABLE t1 (a CHAR,b INET6); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +a b +Warnings: +Warning 1292 Incorrect inet6 value: '' +Warning 1292 Incorrect inet6 value: '' +INSERT INTO t1 VALUES ('','::'),('','::'); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +a b +Warnings: +Warning 1292 Incorrect inet6 value: '' +Warning 1292 Incorrect inet6 value: '' +DROP TABLE t1; +# +# MDEV-31719 Wrong result of: WHERE inet6_column IN ('','::1') +# +CREATE OR REPLACE TABLE t1 (a INET6); +INSERT INTO t1 VALUES ('::'); +SELECT * FROM t1 WHERE a IN ('','::1'); +a +Warnings: +Warning 1292 Incorrect inet6 value: '' +DROP TABLE t1; diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.test b/plugin/type_inet/mysql-test/type_inet/type_inet6.test index 2cdbc0eb2b9..771d8fbc347 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.test +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.test @@ -1656,3 +1656,22 @@ SELECT * FROM v1 ORDER BY a; SELECT * FROM t2 ORDER BY a; DROP VIEW v1; DROP TABLE t1, t2; + +--echo # +--echo # MDEV-27207 Assertion `!m_null_value' failed in int FixedBinTypeBundle::cmp_item_fbt::compare or in cmp_item_inet6::compare +--echo # + +CREATE TABLE t1 (a CHAR,b INET6); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +INSERT INTO t1 VALUES ('','::'),('','::'); +SELECT * FROM t1 WHERE (a,b) IN (('',''),('','')); +DROP TABLE t1; + +--echo # +--echo # MDEV-31719 Wrong result of: WHERE inet6_column IN ('','::1') +--echo # + +CREATE OR REPLACE TABLE t1 (a INET6); +INSERT INTO t1 VALUES ('::'); +SELECT * FROM t1 WHERE a IN ('','::1'); +DROP TABLE t1; diff --git a/sql/sql_type_fixedbin.h b/sql/sql_type_fixedbin.h index dbfc958cd3b..e971dc08b66 100644 --- a/sql/sql_type_fixedbin.h +++ b/sql/sql_type_fixedbin.h @@ -776,10 +776,12 @@ public: Fbt *buff= &((Fbt *) base)[pos]; Fbt_null value(item); if (value.is_null()) + { *buff= Fbt::zero(); - else - *buff= value; - return FALSE; + return true; + } + *buff= value; + return false; } uchar *get_value(Item *item) override { From 8aaacb5509a7981062d3ad0331cef212e3d79d5d Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 14 Aug 2023 11:09:51 +0300 Subject: [PATCH 30/35] MDEV-31432 tmp_table field accessed after free Before this patch, the code in Item_field::print() used this convention (described in sql_explain.h:ExplainDataStructureLifetime): - By default, the table that Item_field refers to is accessible. - ANALYZE and SHOW {EXPLAIN|ANALYZE} may print Items after some temporary tables have been dropped. They use QT_DONT_ACCESS_TMP_TABLES flag. When it is ON, Item_field::print will not access the table it refers to, if it is a temp.table The bug was that EXPLAIN statement also may compute subqueries (depending on subquery context and @@expensive_subquery_limit setting). After the computation, the subquery calls JOIN::cleanup(true) which drops some of its temporary tables. Calling Item_field::print() that refer to such table will cause an access to free'd memory. In this patch, we take into account that query optimization can compute a subquery and discard its temporary tables. Item_field::print() now assumes that any temporary table might have already been dropped. This means QT_DONT_ACCESS_TMP_TABLES flag is not needed - we imply it is always present. But we also make one exception: derived tables are not freed in JOIN::cleanup() call. They are freed later in close_thread_tables(), at the same time when regular tables are closed. Because of that, Item_field::print may assume that temp.tables representing derived tables are available. Initial patch by: Rex Jonston Reviewed by: Monty --- mysql-test/main/show_analyze.result | 39 +++++++++ mysql-test/main/show_analyze.test | 41 +++++++++ sql/item.cc | 32 ++++--- sql/item.h | 31 +++---- sql/item_func.cc | 2 +- sql/item_subselect.cc | 4 +- sql/item_sum.cc | 2 +- sql/mysqld.h | 6 +- sql/sql_base.cc | 3 + sql/sql_explain.cc | 131 +++++++++++----------------- sql/sql_explain.h | 53 +++++------ sql/sql_lex.cc | 1 - sql/sql_parse.cc | 3 +- sql/sql_select.cc | 6 +- sql/sql_window.cc | 2 +- 15 files changed, 199 insertions(+), 157 deletions(-) diff --git a/mysql-test/main/show_analyze.result b/mysql-test/main/show_analyze.result index c5fd61ecf64..f0a492344ba 100644 --- a/mysql-test/main/show_analyze.result +++ b/mysql-test/main/show_analyze.result @@ -434,3 +434,42 @@ ANALYZE } } DROP TABLE t1; +# +# MDEV-31432 tmp_table field accessed after free +# testing for the above (MDEV-28201) caused use after free error +# +create table t1 (x int) engine=myisam; +insert into t1 values(1); +set @tmp=@@optimizer_trace; +set @@optimizer_trace=1; +SELECT +1 IN +(( +SELECT +1 IN (SELECT 1 AS x0 +FROM +( +SELECT * +FROM (SELECT 1 AS x) AS x5 +GROUP BY x,x +HAVING +x IN ( +SELECT * +FROM t1 AS x1 +WHERE +x IN (SELECT 1 AS x +FROM t1 AS x3 +GROUP BY x +HAVING +x IN (SELECT 0 FROM t1 AS x4) +) +) +) AS x6 +) +FROM +t1 +)) as VAL; +VAL +0 +set optimizer_trace=@tmp; +drop table t1; diff --git a/mysql-test/main/show_analyze.test b/mysql-test/main/show_analyze.test index 9d59d5b2188..58d36d7dd16 100644 --- a/mysql-test/main/show_analyze.test +++ b/mysql-test/main/show_analyze.test @@ -364,3 +364,44 @@ ANALYZE format=json SELECT 1 FROM t1 GROUP BY convert_tz('1969-12-31 22:00:00',a,'+10:00'); DROP TABLE t1; +--echo # +--echo # MDEV-31432 tmp_table field accessed after free +--echo # testing for the above (MDEV-28201) caused use after free error +--echo # +create table t1 (x int) engine=myisam; +insert into t1 values(1); +set @tmp=@@optimizer_trace; +set @@optimizer_trace=1; +# Different warning text is produced in regular and --ps-protocol runs: +--disable_warnings +SELECT + 1 IN + (( + SELECT + 1 IN (SELECT 1 AS x0 + FROM + ( + SELECT * + FROM (SELECT 1 AS x) AS x5 + GROUP BY x,x + HAVING + x IN ( + SELECT * + FROM t1 AS x1 + WHERE + x IN (SELECT 1 AS x + FROM t1 AS x3 + GROUP BY x + HAVING + x IN (SELECT 0 FROM t1 AS x4) + ) + ) + ) AS x6 + ) + FROM + t1 + )) as VAL; +--enable_warnings +set optimizer_trace=@tmp; +drop table t1; + diff --git a/sql/item.cc b/sql/item.cc index 6b3a5ffe084..5591bcdb9c4 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3138,7 +3138,7 @@ void Item_field::set_field(Field *field_par) if (field->table->s->tmp_table == SYSTEM_TMP_TABLE || field->table->s->tmp_table == INTERNAL_TMP_TABLE) - set_refers_to_temp_table(true); + set_refers_to_temp_table(); } @@ -3615,7 +3615,7 @@ Item *Item_field::get_tmp_table_item(THD *thd) if (new_item) { new_item->field= new_item->result_field; - new_item->set_refers_to_temp_table(true); + new_item->set_refers_to_temp_table(); } return new_item; } @@ -3626,9 +3626,14 @@ longlong Item_field::val_int_endpoint(bool left_endp, bool *incl_endp) return null_value? LONGLONG_MIN : res; } -void Item_field::set_refers_to_temp_table(bool value) +void Item_field::set_refers_to_temp_table() { - refers_to_temp_table= value; + /* + Derived temp. tables have non-zero derived_select_number. + We don't need to distingish between other kinds of temp.tables currently. + */ + refers_to_temp_table= (field->table->derived_select_number != 0)? + REFERS_TO_DERIVED_TMP : REFERS_TO_OTHER_TMP; } @@ -6292,7 +6297,7 @@ void Item_field::cleanup() field= 0; item_equal= NULL; null_value= FALSE; - refers_to_temp_table= FALSE; + refers_to_temp_table= NO_TEMP_TABLE; DBUG_VOID_RETURN; } @@ -7860,14 +7865,15 @@ void Item_field::print(String *str, enum_query_type query_type) { /* If the field refers to a constant table, print the value. - (1): But don't attempt to do that if - * the field refers to a temporary (work) table, and - * temp. tables might already have been dropped. + There are two exceptions: + 1. For temporary (aka "work") tables, we can only access the derived temp. + tables. Other kinds of tables might already have been dropped. + 2. Don't print constants if QT_NO_DATA_EXPANSION or QT_VIEW_INTERNAL is + specified. */ - if (!(refers_to_temp_table && // (1) - (query_type & QT_DONT_ACCESS_TMP_TABLES)) && // (1) - field && field->table->const_table && - !(query_type & (QT_NO_DATA_EXPANSION | QT_VIEW_INTERNAL))) + if ((refers_to_temp_table != REFERS_TO_OTHER_TMP) && // (1) + !(query_type & (QT_NO_DATA_EXPANSION | QT_VIEW_INTERNAL)) && // (2) + field && field->table->const_table) { print_value(str); return; @@ -9145,7 +9151,7 @@ Item* Item_cache_wrapper::get_tmp_table_item(THD *thd) { auto item_field= new (thd->mem_root) Item_field(thd, result_field); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); return item_field; } return copy_or_same(thd); diff --git a/sql/item.h b/sql/item.h index 6b7008c8075..cc1075dc1a9 100644 --- a/sql/item.h +++ b/sql/item.h @@ -3568,27 +3568,18 @@ public: private: /* - Setting this member to TRUE (via set_refers_to_temp_table()) - ensures print() function continues to work even if the table - has been dropped. + Indicates whether this Item_field refers to a regular or some kind of + temporary table. + This is needed for print() to work: it may be called even after the table + referred by the Item_field has been dropped. - We need this for "ANALYZE statement" feature. Query execution has - these steps: - 1. Run the query. - 2. Cleanup starts. Temporary tables are destroyed - 3. print "ANALYZE statement" output, if needed - 4. Call close_thread_table() for regular tables. - - Step #4 is done after step #3, so "ANALYZE stmt" has no problem printing - Item_field objects that refer to regular tables. - - However, Step #3 is done after Step #2. Attempt to print Item_field objects - that refer to temporary tables will cause access to freed memory. - - To resolve this, we use refers_to_temp_table member to refer to items - in temporary (work) tables. + See ExplainDataStructureLifetime in sql_explain.h for details. */ - bool refers_to_temp_table= false; + enum { + NO_TEMP_TABLE= 0, + REFERS_TO_DERIVED_TMP= 1, + REFERS_TO_OTHER_TMP=2 + } refers_to_temp_table = NO_TEMP_TABLE; public: Item_field(THD *thd, Name_resolution_context *context_arg, @@ -3804,7 +3795,7 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg) override; - void set_refers_to_temp_table(bool value); + void set_refers_to_temp_table(); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; diff --git a/sql/item_func.cc b/sql/item_func.cc index ee0f507d3e8..cb432b4f82f 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -749,7 +749,7 @@ Item *Item_func::get_tmp_table_item(THD *thd) { auto item_field= new (thd->mem_root) Item_field(thd, result_field); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); return item_field; } return copy_or_same(thd); diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index ff004bbb045..e020627d68b 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -1030,7 +1030,7 @@ Item *Item_subselect::get_tmp_table_item(THD *thd_arg) auto item_field= new (thd->mem_root) Item_field(thd_arg, result_field); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); return item_field; } return copy_or_same(thd_arg); @@ -5310,7 +5310,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds() Item_field *right_col_item= new (thd->mem_root) Item_field(thd, context, tmp_table->field[i]); if (right_col_item) - right_col_item->set_refers_to_temp_table(true); + right_col_item->set_refers_to_temp_table(); if (!right_col_item || !(eq_cond= new (thd->mem_root) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index ffac6dbb912..bbd09a59267 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -562,7 +562,7 @@ Item *Item_sum::get_tmp_table_item(THD *thd) auto item_field= new (thd->mem_root) Item_field(thd, result_field_tmp++); if (item_field) - item_field->set_refers_to_temp_table(true); + item_field->set_refers_to_temp_table(); sum_item->args[i]= item_field; } } diff --git a/sql/mysqld.h b/sql/mysqld.h index 5263e397a15..28a0cb30637 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -905,11 +905,7 @@ enum enum_query_type // don't reveal values. QT_NO_DATA_EXPANSION= (1 << 9), // Remove wrappers added for TVC when creating or showing view - QT_NO_WRAPPERS_FOR_TVC_IN_VIEW= (1 << 12), - - // The temporary tables used by the query might be freed by the time - // this print() call is made. - QT_DONT_ACCESS_TMP_TABLES= (1 << 13) + QT_NO_WRAPPERS_FOR_TVC_IN_VIEW= (1 << 12) }; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index c9c74046fd9..a4269e20614 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -871,6 +871,9 @@ int close_thread_tables(THD *thd) TODO: Probably even better approach is to simply associate list of derived tables with (sub-)statement instead of thread and destroy them at the end of its execution. + + Note: EXPLAIN/ANALYZE depends on derived tables being freed here. See + sql_explain.h:ExplainDataStructureLifetime. */ if (thd->derived_tables) { diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index 9c09259cb56..30ce9535a14 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -39,8 +39,8 @@ const char *unit_operation_text[4]= const char *pushed_derived_text= "PUSHED DERIVED"; const char *pushed_select_text= "PUSHED SELECT"; -static void write_item(Json_writer *writer, Item *item, bool no_tmp_tbl); -static void append_item_to_str(String *out, Item *item, bool no_tmp_tbl); +static void write_item(Json_writer *writer, Item *item); +static void append_item_to_str(String *out, Item *item); Explain_query::Explain_query(THD *thd_arg, MEM_ROOT *root) : mem_root(root), upd_del_plan(nullptr), insert_plan(nullptr), @@ -196,7 +196,7 @@ int Explain_query::send_explain(THD *thd, bool extended) int res= 0; if (thd->lex->explain_json) - print_explain_json(result, thd->lex->analyze_stmt, false /*is_show_cmd*/); + print_explain_json(result, thd->lex->analyze_stmt); else { res= print_explain(result, lex->describe, thd->lex->analyze_stmt); @@ -252,16 +252,8 @@ int Explain_query::print_explain(select_result_sink *output, } -/* - @param is_show_cmd TRUE<=> This is a SHOW EXPLAIN|ANALYZE command. - (These commands may be called at late stage in - the query processing, we need to pass no_tmp_tbl=true - to other print functions) -*/ - int Explain_query::print_explain_json(select_result_sink *output, bool is_analyze, - bool is_show_cmd, ulonglong query_time_in_progress_ms) { Json_writer writer; @@ -275,25 +267,17 @@ int Explain_query::print_explain_json(select_result_sink *output, writer.add_member("r_query_time_in_progress_ms"). add_ull(query_time_in_progress_ms); - /* - If we are printing ANALYZE FORMAT=JSON output, take into account that - query's temporary tables have already been freed. See sql_explain.h, - sql_explain.h:ExplainDataStructureLifetime for details. - */ - if (is_analyze) - is_show_cmd= true; - if (upd_del_plan) - upd_del_plan->print_explain_json(this, &writer, is_analyze, is_show_cmd); + upd_del_plan->print_explain_json(this, &writer, is_analyze); else if (insert_plan) - insert_plan->print_explain_json(this, &writer, is_analyze, is_show_cmd); + insert_plan->print_explain_json(this, &writer, is_analyze); else { /* Start printing from node with id=1 */ Explain_node *node= get_node(1); if (!node) return 1; /* No query plan */ - node->print_explain_json(this, &writer, is_analyze, is_show_cmd); + node->print_explain_json(this, &writer, is_analyze); } writer.end_object(); @@ -656,8 +640,7 @@ int Explain_union::print_explain(Explain_query *query, void Explain_union::print_explain_json(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl) + Json_writer *writer, bool is_analyze) { Json_writer_nesting_guard guard(writer); char table_name_buffer[SAFE_NAME_LEN]; @@ -702,12 +685,12 @@ void Explain_union::print_explain_json(Explain_query *query, //writer->add_member("dependent").add_str("TODO"); //writer->add_member("cacheable").add_str("TODO"); Explain_select *sel= query->get_select(union_members.at(i)); - sel->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + sel->print_explain_json(query, writer, is_analyze); writer->end_object(); } writer->end_array(); - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); // union_result writer->end_object(); // query_block @@ -769,8 +752,7 @@ bool is_connection_printable_in_json(enum Explain_node::explain_connection_type void Explain_node::print_explain_json_for_children(Explain_query *query, Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -797,7 +779,7 @@ void Explain_node::print_explain_json_for_children(Explain_query *query, } writer->start_object(); - node->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + node->print_explain_json(query, writer, is_analyze); writer->end_object(); } @@ -977,8 +959,7 @@ void Explain_select::add_linkage(Json_writer *writer) } void Explain_select::print_explain_json(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl) + Json_writer *writer, bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -1000,7 +981,7 @@ void Explain_select::print_explain_json(Explain_query *query, message); writer->end_object(); - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); } else @@ -1022,17 +1003,17 @@ void Explain_select::print_explain_json(Explain_query *query, if (exec_const_cond) { writer->add_member("const_condition"); - write_item(writer, exec_const_cond, no_tmp_tbl); + write_item(writer, exec_const_cond); } if (outer_ref_cond) { writer->add_member("outer_ref_condition"); - write_item(writer, outer_ref_cond, no_tmp_tbl); + write_item(writer, outer_ref_cond); } if (pseudo_bits_cond) { writer->add_member("pseudo_bits_condition"); - write_item(writer, pseudo_bits_cond, no_tmp_tbl); + write_item(writer, pseudo_bits_cond); } /* we do not print HAVING which always evaluates to TRUE */ @@ -1040,7 +1021,7 @@ void Explain_select::print_explain_json(Explain_query *query, { writer->add_member("having_condition"); if (likely(having)) - write_item(writer, having, no_tmp_tbl); + write_item(writer, having); else { /* Normally we should not go this branch, left just for safety */ @@ -1064,7 +1045,7 @@ void Explain_select::print_explain_json(Explain_query *query, { writer->add_member("filesort").start_object(); auto aggr_node= (Explain_aggr_filesort*)node; - aggr_node->print_json_members(writer, is_analyze, no_tmp_tbl); + aggr_node->print_json_members(writer, is_analyze); break; } case AGGR_OP_REMOVE_DUPLICATES: @@ -1075,7 +1056,7 @@ void Explain_select::print_explain_json(Explain_query *query, //TODO: make print_json_members virtual? writer->add_member("window_functions_computation").start_object(); auto aggr_node= (Explain_aggr_window_funcs*)node; - aggr_node->print_json_members(writer, is_analyze, no_tmp_tbl); + aggr_node->print_json_members(writer, is_analyze); break; } default: @@ -1084,8 +1065,7 @@ void Explain_select::print_explain_json(Explain_query *query, started_objects++; } - Explain_basic_join::print_explain_json_interns(query, writer, is_analyze, - no_tmp_tbl); + Explain_basic_join::print_explain_json_interns(query, writer, is_analyze); for (;started_objects; started_objects--) writer->end_object(); @@ -1114,8 +1094,7 @@ Explain_aggr_filesort::Explain_aggr_filesort(MEM_ROOT *mem_root, void Explain_aggr_filesort::print_json_members(Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { char item_buf[256]; String str(item_buf, sizeof(item_buf), &my_charset_bin); @@ -1135,7 +1114,7 @@ void Explain_aggr_filesort::print_json_members(Json_writer *writer, { str.append(STRING_WITH_LEN(", ")); } - append_item_to_str(&str, item, no_tmp_tbl); + append_item_to_str(&str, item); if (*direction == ORDER::ORDER_DESC) str.append(STRING_WITH_LEN(" desc")); } @@ -1148,8 +1127,7 @@ void Explain_aggr_filesort::print_json_members(Json_writer *writer, void Explain_aggr_window_funcs::print_json_members(Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Explain_aggr_filesort *srt; List_iterator it(sorts); @@ -1158,19 +1136,19 @@ void Explain_aggr_window_funcs::print_json_members(Json_writer *writer, { Json_writer_object sort(writer); Json_writer_object filesort(writer, "filesort"); - srt->print_json_members(writer, is_analyze, no_tmp_tbl); + srt->print_json_members(writer, is_analyze); } } void Explain_basic_join::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl) + bool is_analyze) { writer->add_member("query_block").start_object(); writer->add_member("select_id").add_ll(select_id); - print_explain_json_interns(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_interns(query, writer, is_analyze); writer->end_object(); } @@ -1179,7 +1157,7 @@ void Explain_basic_join::print_explain_json(Explain_query *query, void Explain_basic_join:: print_explain_json_interns(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl) + bool is_analyze) { { Json_writer_array loop(writer, "nested_loop"); @@ -1192,7 +1170,7 @@ print_explain_json_interns(Explain_query *query, writer->start_array(); } - join_tabs[i]->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + join_tabs[i]->print_explain_json(query, writer, is_analyze); if (join_tabs[i]->end_dups_weedout) { @@ -1201,7 +1179,7 @@ print_explain_json_interns(Explain_query *query, } } } // "nested_loop" - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); } @@ -1601,7 +1579,7 @@ const char *String_list::append_str(MEM_ROOT *mem_root, const char *str) } -static void write_item(Json_writer *writer, Item *item, bool no_tmp_tbl) +static void write_item(Json_writer *writer, Item *item) { THD *thd= current_thd; char item_buf[256]; @@ -1611,27 +1589,25 @@ static void write_item(Json_writer *writer, Item *item, bool no_tmp_tbl) ulonglong save_option_bits= thd->variables.option_bits; thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; - auto qtype= QT_EXPLAIN | (no_tmp_tbl? QT_DONT_ACCESS_TMP_TABLES : 0); - item->print(&str, (enum_query_type)qtype); + item->print(&str, QT_EXPLAIN); thd->variables.option_bits= save_option_bits; writer->add_str(str.c_ptr_safe()); } -static void append_item_to_str(String *out, Item *item, bool no_tmp_tbl) +static void append_item_to_str(String *out, Item *item) { THD *thd= current_thd; ulonglong save_option_bits= thd->variables.option_bits; thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; - auto qtype= QT_EXPLAIN | (no_tmp_tbl? QT_DONT_ACCESS_TMP_TABLES : 0); - item->print(out, (enum_query_type)qtype); + item->print(out, QT_EXPLAIN); + thd->variables.option_bits= save_option_bits; } void Explain_table_access::tag_to_json(Json_writer *writer, - enum explain_extra_tag tag, - bool no_tmp_tbl) + enum explain_extra_tag tag) { switch (tag) { @@ -1655,11 +1631,11 @@ void Explain_table_access::tag_to_json(Json_writer *writer, break; case ET_USING_INDEX_CONDITION: writer->add_member("index_condition"); - write_item(writer, pushed_index_cond, no_tmp_tbl); + write_item(writer, pushed_index_cond); break; case ET_USING_INDEX_CONDITION_BKA: writer->add_member("index_condition_bka"); - write_item(writer, pushed_index_cond, no_tmp_tbl); + write_item(writer, pushed_index_cond); break; case ET_USING_WHERE: { @@ -1673,7 +1649,7 @@ void Explain_table_access::tag_to_json(Json_writer *writer, if (item) { writer->add_member("attached_condition"); - write_item(writer, item, no_tmp_tbl); + write_item(writer, item); } } break; @@ -1807,7 +1783,7 @@ static void trace_engine_stats(handler *file, Json_writer *writer) void Explain_table_access::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl) + bool is_analyze) { Json_writer_object jsobj(writer); @@ -1838,7 +1814,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, } } writer->add_member("filesort").start_object(); - pre_join_sort->print_json_members(writer, is_analyze, no_tmp_tbl); + pre_join_sort->print_json_members(writer, is_analyze); } if (bka_type.is_using_jbuf()) @@ -1976,7 +1952,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, for (int i=0; i < (int)extra_tags.elements(); i++) { - tag_to_json(writer, extra_tags.at(i), no_tmp_tbl); + tag_to_json(writer, extra_tags.at(i)); } if (full_scan_on_null_key) @@ -1997,7 +1973,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, if (where_cond) { writer->add_member("attached_condition"); - write_item(writer, where_cond, no_tmp_tbl); + write_item(writer, where_cond); } if (is_analyze) @@ -2044,7 +2020,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, { writer->add_member("lateral").add_ll(1); } - node->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + node->print_explain_json(query, writer, is_analyze); writer->end_object(); } if (non_merged_sjm_number) @@ -2054,7 +2030,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, writer->add_member("unique").add_ll(1); Explain_node *node= query->get_node(non_merged_sjm_number); node->connection_type= Explain_node::EXPLAIN_NODE_NON_MERGED_SJ; - node->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + node->print_explain_json(query, writer, is_analyze); writer->end_object(); } if (sjm_nest) @@ -2062,7 +2038,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, /* This is a non-merged semi-join table. Print its contents here */ writer->add_member("materialized").start_object(); writer->add_member("unique").add_ll(1); - sjm_nest->print_explain_json(query, writer, is_analyze, no_tmp_tbl); + sjm_nest->print_explain_json(query, writer, is_analyze); writer->end_object(); } @@ -2368,8 +2344,7 @@ int Explain_delete::print_explain(Explain_query *query, void Explain_delete::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -2384,7 +2359,7 @@ void Explain_delete::print_explain_json(Explain_query *query, writer->end_object(); // query_block return; } - Explain_update::print_explain_json(query, writer, is_analyze, no_tmp_tbl); + Explain_update::print_explain_json(query, writer, is_analyze); } @@ -2487,8 +2462,7 @@ int Explain_update::print_explain(Explain_query *query, void Explain_update::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, - bool no_tmp_tbl) + bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -2655,7 +2629,7 @@ void Explain_update::print_explain_json(Explain_query *query, if (where_cond) { writer->add_member("attached_condition"); - write_item(writer, where_cond, no_tmp_tbl); + write_item(writer, where_cond); } /*** The part of plan that is before the buffering/sorting ends here ***/ @@ -2667,7 +2641,7 @@ void Explain_update::print_explain_json(Explain_query *query, writer->end_object(); // table - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); // query_block } @@ -2697,8 +2671,7 @@ int Explain_insert::print_explain(Explain_query *query, } void Explain_insert::print_explain_json(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl) + Json_writer *writer, bool is_analyze) { Json_writer_nesting_guard guard(writer); @@ -2707,7 +2680,7 @@ void Explain_insert::print_explain_json(Explain_query *query, writer->add_member("table").start_object(); writer->add_member("table_name").add_str(table_name.c_ptr()); writer->end_object(); // table - print_explain_json_for_children(query, writer, is_analyze, no_tmp_tbl); + print_explain_json_for_children(query, writer, is_analyze); writer->end_object(); // query_block } diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 894309568ba..4510c1aa3ce 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -134,13 +134,12 @@ public: virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze)=0; virtual void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl)= 0; + bool is_analyze)= 0; int print_explain_for_children(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json_for_children(Explain_query *query, - Json_writer *writer, bool is_analyze, - bool no_tmp_tbl); + Json_writer *writer, bool is_analyze); bool print_explain_json_cache(Json_writer *writer, bool is_analyze); virtual ~Explain_node() = default; }; @@ -174,10 +173,10 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); void print_explain_json_interns(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); /* A flat array of Explain structs for tables. */ Explain_table_access** join_tabs; @@ -261,7 +260,7 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); Table_access_tracker *get_using_temporary_read_tracker() { @@ -304,8 +303,7 @@ public: Explain_aggr_filesort(MEM_ROOT *mem_root, bool is_analyze, Filesort *filesort); - void print_json_members(Json_writer *writer, bool is_analyze, - bool no_tmp_tbl); + void print_json_members(Json_writer *writer, bool is_analyze); }; class Explain_aggr_tmp_table : public Explain_aggr_node @@ -326,8 +324,7 @@ class Explain_aggr_window_funcs : public Explain_aggr_node public: enum_explain_aggr_node_type get_type() { return AGGR_OP_WINDOW_FUNCS; } - void print_json_members(Json_writer *writer, bool is_analyze, - bool no_tmp_tbl); + void print_json_members(Json_writer *writer, bool is_analyze); friend class Window_funcs_computation; }; @@ -380,7 +377,7 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); const char *fake_select_type; bool using_filesort; @@ -448,19 +445,19 @@ class Explain_insert; (1) - Query plan construction is finished and it is available for reading. - (2) - Temporary tables are freed. After this point, - we need to pass QT_DONT_ACCESS_TMP_TABLES to item->print(). Since - we don't track when #2 happens for each temp.table, we pass this - flag whenever we're printing the query plan for a SHOW command. - Also, we pass it when printing ANALYZE (?) + (2) - Temporary tables are freed (with exception of derived tables + which are freed at step (4)). + The tables are no longer accessible but one can still call + item->print(), even for items that refer to temp.tables (see + Item_field::print() for details) (3) - Notification about (4). - (4) - Tables used by the query are closed. One known consequence of this is - that the values of the const tables' fields are not available anymore. - We could use the same approach as in QT_DONT_ACCESS_TMP_TABLES to work - around that, but instead we disallow producing FORMAT=JSON output at - step #3. We also processing of SHOW command. The rationale is that - query is close to finish anyway. + (4) - Tables used by the query are closed. One consequence of this is that + the values of the const tables' fields are not available anymore. + We could adjust the code in Item_field::print() to handle this but + instead we make step (3) disallow production of FORMAT=JSON output. + We also disable processing of SHOW EXPLAIN|ANALYZE output because + the query is about to finish anyway. (5) - Item objects are freed. After this, it's certainly not possible to print them into FORMAT=JSON output. @@ -499,7 +496,6 @@ public: bool print_explain_str(THD *thd, String *out_str, bool is_analyze); int print_explain_json(select_result_sink *output, bool is_analyze, - bool is_show_cmd, ulonglong query_time_in_progress_ms= 0); /* If true, at least part of EXPLAIN can be printed */ @@ -908,15 +904,14 @@ public: uint select_id, const char *select_type, bool using_temporary, bool using_filesort); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); private: void append_tag_name(String *str, enum explain_extra_tag tag); void fill_key_str(String *key_str, bool is_json) const; void fill_key_len_str(String *key_len_str, bool is_json) const; double get_r_filtered(); - void tag_to_json(Json_writer *writer, enum explain_extra_tag tag, - bool no_tmp_tbl); + void tag_to_json(Json_writer *writer, enum explain_extra_tag tag); }; @@ -1003,7 +998,7 @@ public: virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); virtual void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); }; @@ -1029,7 +1024,7 @@ public: int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); }; @@ -1056,7 +1051,7 @@ public: virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); virtual void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, bool no_tmp_tbl); + bool is_analyze); }; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 31f31850807..c367bc7fe15 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -5802,7 +5802,6 @@ int LEX::print_explain(select_result_sink *output, uint8 explain_flags, query_time_in_progress_ms= (now - start_time) / (HRTIME_RESOLUTION / 1000); res= explain->print_explain_json(output, is_analyze, - true /* is_show_cmd */, query_time_in_progress_ms); } else diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 2cfbbc22af9..b0338250088 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -6235,8 +6235,7 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) result->remove_offset_limit(); if (lex->explain_json) { - lex->explain->print_explain_json(result, lex->analyze_stmt, - false /* is_show_cmd */); + lex->explain->print_explain_json(result, lex->analyze_stmt); } else { diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 2d4e28f095c..3a6a8859326 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -20014,7 +20014,7 @@ bool Create_tmp_table::add_fields(THD *thd, if (!(tmp_item= new (thd->mem_root) Item_field(thd, new_field))) goto err; - ((Item_field*) tmp_item)->set_refers_to_temp_table(true); + ((Item_field*) tmp_item)->set_refers_to_temp_table(); arg= sum_item->set_arg(i, thd, tmp_item); thd->mem_root= &table->mem_root; @@ -27290,7 +27290,7 @@ change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, Item_field *new_field= new (thd->mem_root) Item_field(thd, field); if (!suv || !new_field) DBUG_RETURN(true); // Fatal error - new_field->set_refers_to_temp_table(true); + new_field->set_refers_to_temp_table(); List list; list.push_back(new_field, thd->mem_root); suv->set_arguments(thd, list); @@ -27309,7 +27309,7 @@ change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, { item_field= (Item*) new (thd->mem_root) Item_field(thd, field); if (item_field) - ((Item_field*) item_field)->set_refers_to_temp_table(true); + ((Item_field*) item_field)->set_refers_to_temp_table(); } if (!item_field) DBUG_RETURN(true); // Fatal error diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 4d6d5ddd951..9a46413d3e7 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -3122,7 +3122,7 @@ bool Window_funcs_sort::setup(THD *thd, SQL_SELECT *sel, Item_field *item= new (thd->mem_root) Item_field(thd, join_tab->table->field[0]); if (item) - item->set_refers_to_temp_table(true); + item->set_refers_to_temp_table(); order->item= (Item **)alloc_root(thd->mem_root, 2 * sizeof(Item *)); order->item[1]= NULL; order->item[0]= item; From 0254eb9307f58409f856517a90109d37ef9e10c4 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Mon, 31 Jul 2023 14:49:01 +1000 Subject: [PATCH 31/35] MDEV-31586 Make the MDEV-31463 test more fitting and stable The original test in the report of MDEV-31463 is contrived and nondeterministic, causing MDEV-31586. We update the test to make it more directly addresses the underlying cause of MDEV-31463, namely errors from queries sent to the data node not consumed when trying to set lock wait timeout. This is achieved through the debug sync facility. --- .../spider/bugfix/r/mdev_31463.result | 51 +++++++----- .../mysql-test/spider/bugfix/t/mdev_31463.cnf | 3 + .../spider/bugfix/t/mdev_31463.test | 80 ++++++++++++------- storage/spider/spd_db_mysql.cc | 3 +- 4 files changed, 87 insertions(+), 50 deletions(-) create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_31463.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_31463.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_31463.result index 5cfa56a27e9..e6d0c77ac87 100644 --- a/storage/spider/mysql-test/spider/bugfix/r/mdev_31463.result +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_31463.result @@ -3,25 +3,40 @@ # for master_1 for child2 +child2_1 +child2_2 +child2_3 for child3 -CREATE SERVER IF NOT EXISTS $srv FOREIGN DATA WRAPPER MYSQL OPTIONS (SOCKET "$MASTER_1_MYSOCK", DATABASE 'test',user 'root'); -SET @old_wait_timeout = @@global.wait_timeout; -SET GLOBAL wait_timeout=1; -CREATE TABLE t (c INT KEY,c1 BLOB,c2 TEXT) ENGINE=Spider COMMENT='WRAPPER "mysql",srv "srv_mdev_31463"'; -XA START 'a'; -INSERT INTO t VALUES (0,0,0,0); -ERROR 21S01: Column count doesn't match value count at row 1 -SELECT SLEEP (1); -SLEEP (1) -0 -INSERT INTO t VALUES (1,2,3),(4,5,6),(7,8,9); -Got one of the listed errors -SELECT * FROM information_schema.key_column_usage; -XA END 'a'; -XA PREPARE 'a'; -XA COMMIT 'a'; -SET GLOBAL wait_timeout=@old_wait_timeout; -drop table t; +connection child2_1; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +CREATE TABLE t1 (a INT) ENGINE=InnoDB DEFAULT CHARSET=utf8; +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +CREATE TABLE t2 (a INT) ENGINE=Spider COMMENT='table "t1", srv "s_2_1"'; +connect master_1_another, localhost, root, , auto_test_local, $MASTER_1_MYPORT, $MASTER_1_MYSOCK; +SET DEBUG_SYNC='spider_set_lock_wait_timeout_before_query SIGNAL ready WAIT_FOR go'; +insert into t2 values (42); +connection master_1; +SET DEBUG_SYNC='now WAIT_FOR ready'; +connection child2_1; +connection master_1; +SET DEBUG_SYNC='now SIGNAL go'; +connection master_1_another; +ERROR HY000: Got an error writing communication packets +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +# restart +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; for master_1 for child2 +child2_1 +child2_2 +child2_3 for child3 +# +# end test +# diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.test index cbea53d52d8..f7f27489bdd 100644 --- a/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.test +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_31463.test @@ -1,47 +1,65 @@ +--source include/have_debug.inc --echo # --echo # MDEV-31463 SIGSEGV in server_mysql_send_query | server_mysql_real_query --echo # +# The original test in the report of MDEV-31463 is contrived and +# nondeterministic, causing MDEV-31586. This test more directly +# addresses the underlying issue causing MDEV-31463, namely errors +# from queries sent to the data node not consumed when trying to set +# lock wait timeout. + --disable_query_log --disable_result_log --source ../../t/test_init.inc --enable_result_log --enable_query_log ---let $srv=srv_mdev_31463 ---disable_warnings -evalp CREATE SERVER IF NOT EXISTS $srv FOREIGN DATA WRAPPER MYSQL OPTIONS (SOCKET "$MASTER_1_MYSOCK", DATABASE 'test',user 'root'); ---enable_warnings +connection child2_1; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +eval CREATE TABLE t1 (a INT) $CHILD2_1_ENGINE $CHILD2_1_CHARSET; -SET @old_wait_timeout = @@global.wait_timeout; -SET GLOBAL wait_timeout=1; -eval CREATE TABLE t (c INT KEY,c1 BLOB,c2 TEXT) ENGINE=Spider COMMENT='WRAPPER "mysql",srv "$srv"'; -XA START 'a'; ---error ER_WRONG_VALUE_COUNT_ON_ROW -INSERT INTO t VALUES (0,0,0,0); -SELECT SLEEP (1); -# MDEV-31586: The error code is non-deterministic, presumably due to -# some race condition from the SLEEP statement above. The correct -# error should be 12701 ER_SPIDER_REMOTE_SERVER_GONE_AWAY_NUM as it is -# the last failure. Nevertheless, this contrived test is needed to -# cover the error reporting when setting lock wait timeout, until we -# find a better one and/or fixing the non-deterministic error -# reporting ---error 12701,ER_NET_READ_ERROR,ER_NET_ERROR_ON_WRITE,ER_XAER_DUPID -INSERT INTO t VALUES (1,2,3),(4,5,6),(7,8,9); ---disable_result_log ---error 0,12701,ER_XAER_DUPID -SELECT * FROM information_schema.key_column_usage; ---enable_result_log +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +eval CREATE TABLE t2 (a INT) $MASTER_1_ENGINE COMMENT='table "t1", srv "s_2_1"'; -XA END 'a'; -XA PREPARE 'a'; -XA COMMIT 'a'; -SET GLOBAL wait_timeout=@old_wait_timeout; -drop table t; +# Create a new thread on the spider node server, send an insert query +# to the spider node. Shutdown the data node server when the query +# reaches and waits at set lockwait timeout so it fails there. Check +# the error code is correct. +connect (master_1_another, localhost, root, , auto_test_local, $MASTER_1_MYPORT, $MASTER_1_MYSOCK); +SET DEBUG_SYNC='spider_set_lock_wait_timeout_before_query SIGNAL ready WAIT_FOR go'; +send insert into t2 values (42); +connection master_1; +SET DEBUG_SYNC='now WAIT_FOR ready'; + +connection child2_1; +let $shutdown_timeout=0; +--source include/shutdown_mysqld.inc + +connection master_1; +SET DEBUG_SYNC='now SIGNAL go'; +connection master_1_another; +--error ER_CONNECT_TO_FOREIGN_DATA_SOURCE +reap; + +# Cleanup +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; + +connection child2_1; +--source include/start_mysqld.inc +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; --disable_query_log --disable_result_log ---source ../../t/test_deinit.inc ---enable_result_log +--source ../t/test_deinit.inc --enable_query_log +--enable_result_log + +--echo # +--echo # end test +--echo # diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index b432f3668f0..01a01ee6cf7 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -13429,7 +13429,7 @@ int spider_mbase_handler::sts_mode_exchange( static int spider_set_lock_wait_timeout(uint seconds, SPIDER_CONN *conn, int *need_mon) { - char query[512];; + char query[512]; int error_num; DBUG_ENTER("spider_db_set_lock_wait_timeout"); size_t query_len = @@ -13437,6 +13437,7 @@ static int spider_set_lock_wait_timeout(uint seconds, SPIDER_CONN *conn, "set @old_lock_wait_timeout=@@session.lock_wait_timeout;" "set session lock_wait_timeout=%d;", seconds); + DEBUG_SYNC(conn->thd, "spider_set_lock_wait_timeout_before_query"); if (spider_db_query(conn, query, query_len, -1, need_mon)) DBUG_RETURN(spider_db_errorno(conn)); spider_db_result *result; From e666b8651ad55a73fbe926c5447740833e80e13e Mon Sep 17 00:00:00 2001 From: Federico Razzoli Date: Mon, 14 Aug 2023 20:56:27 +0100 Subject: [PATCH 32/35] EXAMPLE storage engine: update comments --- storage/example/ha_example.cc | 16 ++++++---------- storage/example/ha_example.h | 30 +++++++++++++++--------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index c66c33a7818..fbad065760f 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -38,7 +38,7 @@ build by doing the following during your build process:
./configure --with-example-storage-engine - Once this is done, MySQL will let you create tables with:
+ Once this is done, MariaDB will let you create tables with:
CREATE TABLE (...) ENGINE=EXAMPLE; The example storage engine is set up to use table locks. It @@ -51,9 +51,9 @@ of this file. @note - When you create an EXAMPLE table, the MySQL Server creates a table .frm + When you create an EXAMPLE table, the MariaDB Server creates a table .frm (format) file in the database directory, using the table name as the file - name as is customary with MySQL. No other files are created. To get an idea + name as is customary with MariaDB. No other files are created. To get an idea of what occurs, here is an example select that would do a scan of an entire table: @@ -86,10 +86,6 @@ ha_example::open() would also have been necessary. Calls to ha_example::extra() are hints as to what will be occuring to the request. - A Longer Example can be found called the "Skeleton Engine" which can be - found on TangentOrg. It has both an engine and a full build environment - for building a pluggable storage engine. - Happy coding!
-Brian */ @@ -771,7 +767,7 @@ int ha_example::external_lock(THD *thd, int lock_type) Before adding the lock into the table lock handler (see thr_lock.c), mysqld calls store lock with the requested locks. Store lock can now modify a write lock to a read lock (or some other lock), ignore the - lock (if we don't want to use MySQL table locks at all), or add locks + lock (if we don't want to use MariaDB table locks at all), or add locks for many tables (like we do when we are using a MERGE handler). Berkeley DB, for example, changes all WRITE locks to TL_WRITE_ALLOW_WRITE @@ -781,7 +777,7 @@ int ha_example::external_lock(THD *thd, int lock_type) When releasing locks, store_lock() is also called. In this case one usually doesn't have to do anything. - In some exceptional cases MySQL may send a request for a TL_IGNORE; + In some exceptional cases MariaDB may send a request for a TL_IGNORE; This means that we are requesting the same lock as last time and this should also be ignored. (This may happen when someone does a flush table when we have opened a part of the tables, in which case mysqld @@ -1084,7 +1080,7 @@ static int show_func_example(MYSQL_THD thd, struct st_mysql_show_var *var, var->value= buf; // it's of SHOW_VAR_FUNC_BUFF_SIZE bytes my_snprintf(buf, SHOW_VAR_FUNC_BUFF_SIZE, "enum_var is %lu, ulong_var is %lu, int_var is %d, " - "double_var is %f, %.6b", // %b is a MySQL extension + "double_var is %f, %.6b", // %b is a MariaDB/MySQL extension srv_enum_var, srv_ulong_var, THDVAR(thd, int_var), srv_double_var, "really"); return 0; diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h index 78b07ed5d9f..1797031fc1f 100644 --- a/storage/example/ha_example.h +++ b/storage/example/ha_example.h @@ -62,7 +62,7 @@ public: */ class ha_example: public handler { - THR_LOCK_DATA lock; ///< MySQL lock + THR_LOCK_DATA lock; ///< MariaDB lock Example_share *share; ///< Shared lock info Example_share *get_share(); ///< Get the share @@ -97,7 +97,7 @@ public: @details part is the key part to check. First key part is 0. - If all_parts is set, MySQL wants to know the flags for the combined + If all_parts is set, MariaDB wants to know the flags for the combined index, up to and including 'part'. */ ulong index_flags(uint inx, uint part, bool all_parts) const @@ -109,7 +109,7 @@ public: unireg.cc will call max_supported_record_length(), max_supported_keys(), max_supported_key_parts(), uint max_supported_key_length() to make sure that the storage engine can handle the data it is about to - send. Return *real* limits of your storage engine here; MySQL will do + send. Return *real* limits of your storage engine here; MariaDB will do min(your_limits, MySQL_limits) automatically. */ uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } @@ -117,7 +117,7 @@ public: /** @brief unireg.cc will call this to make sure that the storage engine can handle the data it is about to send. Return *real* limits of your storage engine - here; MySQL will do min(your_limits, MySQL_limits) automatically. + here; MariaDB will do min(your_limits, MySQL_limits) automatically. @details There is no need to implement ..._key_... methods if your engine doesn't @@ -128,7 +128,7 @@ public: /** @brief unireg.cc will call this to make sure that the storage engine can handle the data it is about to send. Return *real* limits of your storage engine - here; MySQL will do min(your_limits, MySQL_limits) automatically. + here; MariaDB will do min(your_limits, MySQL_limits) automatically. @details There is no need to implement ..._key_... methods if your engine doesn't @@ -139,7 +139,7 @@ public: /** @brief unireg.cc will call this to make sure that the storage engine can handle the data it is about to send. Return *real* limits of your storage engine - here; MySQL will do min(your_limits, MySQL_limits) automatically. + here; MariaDB will do min(your_limits, MySQL_limits) automatically. @details There is no need to implement ..._key_... methods if your engine doesn't @@ -187,7 +187,7 @@ public: Everything below are methods that we implement in ha_example.cc. Most of these methods are not obligatory, skip them and - MySQL will treat them as not implemented + MariaDB will treat them as not implemented */ /** @brief We implement this in ha_example.cc; it's a required method. @@ -201,50 +201,50 @@ public: /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int write_row(const uchar *buf); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int update_row(const uchar *old_data, const uchar *new_data); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int delete_row(const uchar *buf); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int index_next(uchar *buf); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int index_prev(uchar *buf); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int index_first(uchar *buf); /** @brief We implement this in ha_example.cc. It's not an obligatory method; - skip it and and MySQL will treat it as not implemented. + skip it and and MariaDB will treat it as not implemented. */ int index_last(uchar *buf); From d515fa72a0294d97e3f8d6894714c10331bd771d Mon Sep 17 00:00:00 2001 From: Lena Startseva Date: Fri, 11 Aug 2023 13:25:47 +0700 Subject: [PATCH 33/35] MDEV-31795: main.flush_corruption times out with view-protocol Fixed tests: main.flush_corruption - disabled service connection for some queries since it is necessary that the query SELECT pass in the same session --- mysql-test/main/flush_corruption.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mysql-test/main/flush_corruption.test b/mysql-test/main/flush_corruption.test index 97ef416307f..424aa97d90c 100644 --- a/mysql-test/main/flush_corruption.test +++ b/mysql-test/main/flush_corruption.test @@ -66,8 +66,10 @@ let $wait_condition= SELECT 1 FROM INFORMATION_SCHEMA.PROCESSLIST --echo # Unlock and close table and wait for con1 to close too. FLUSH TABLES; #SELECT NOW(); +--disable_service_connection --echo # This should give no result. SELECT * FROM t1; +--enable_service_connection #SELECT NOW(); UNLOCK TABLES; connection con1; From 030ee267874c3b20ff3ddf999e2eb526a4d97988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 19 Sep 2023 18:07:10 +0300 Subject: [PATCH 34/35] MDEV-32044 Mariadb crash after upgrading to 11.0.3 ibuf_bitmap_buffered(): A new predicate, to check if the IBUF_BITMAP_BUFFERED bit for a particular page is set. ibuf_merge(): If ibuf_bitmap_buffered() does not hold, skip the records for the page. One reason why we might have this situation is the bug that was fixed in commit 34c283ba1b3b0b51488746416a737c1456df2fe9 (MDEV-32132). --- storage/innobase/ibuf/ibuf0ibuf.cc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index c48740b7dc6..ce5e0541ecf 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -675,6 +675,17 @@ ATTRIBUTE_COLD static dberr_t ibuf_move_to_next(btr_cur_t *cur, mtr_t *mtr) return page_cur_move_to_next(&cur->page_cur) ? DB_SUCCESS : DB_CORRUPTION; } +/** @return if buffered changes exist for the page */ +ATTRIBUTE_COLD +static bool ibuf_bitmap_buffered(const buf_block_t *bitmap, uint32_t offset) +{ + if (!bitmap) + return false; + offset&= uint32_t(bitmap->physical_size() - 1); + byte *map_byte= &bitmap->page.frame[PAGE_DATA + offset / 2]; + return *map_byte & (byte{4} << ((offset & 1) << 4)); +} + /** Apply changes to a block. */ ATTRIBUTE_COLD static dberr_t ibuf_merge(fil_space_t *space, btr_cur_t *cur, mtr_t *mtr) @@ -697,12 +708,15 @@ static dberr_t ibuf_merge(fil_space_t *space, btr_cur_t *cur, mtr_t *mtr) block->zip_size(), RW_X_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, mtr) : nullptr; + bool buffered= false; if (!block); else if (fil_page_get_type(block->page.frame) != FIL_PAGE_INDEX || !page_is_leaf(block->page.frame) || DB_SUCCESS == fseg_page_is_allocated(space, page_no)) block= nullptr; + else + buffered= ibuf_bitmap_buffered(bitmap, block->page.id().page_no()); do { @@ -764,7 +778,7 @@ static dberr_t ibuf_merge(fil_space_t *space, btr_cur_t *cur, mtr_t *mtr) the server is killed before the completion of ibuf_upgrade(). */ btr_rec_set_deleted(cur->page_cur.block, rec, mtr); - if (block) + if (buffered) { page_header_reset_last_insert(block, mtr); page_update_max_trx_id(block, buf_block_get_page_zip(block), From 9880006b5ba28d7c7dc1596d958295c67efa5ad0 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 16 Sep 2023 21:21:32 +0200 Subject: [PATCH 35/35] update wsrep.wsrep_provider_plugin_defaults to 26.4.16 --- .../r/wsrep_provider_plugin_defaults.result | 17 ++++++++++++++++- .../wsrep/t/wsrep_provider_plugin_defaults.test | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/wsrep/r/wsrep_provider_plugin_defaults.result b/mysql-test/suite/wsrep/r/wsrep_provider_plugin_defaults.result index ce164f66a22..8e2bf85d632 100644 --- a/mysql-test/suite/wsrep/r/wsrep_provider_plugin_defaults.result +++ b/mysql-test/suite/wsrep/r/wsrep_provider_plugin_defaults.result @@ -10,7 +10,7 @@ SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIK 'wsrep_provider_signal', 'wsrep_provider_gmcast_listen_addr'); COUNT(*) -83 +84 SELECT * FROM INFORMATION_SCHEMA.SYSTEM_VARIABLES WHERE VARIABLE_NAME LIKE 'wsrep_provider_%' AND VARIABLE_NAME NOT IN ( 'wsrep_provider', @@ -998,6 +998,21 @@ ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED GLOBAL_VALUE_PATH NULL +VARIABLE_NAME WSREP_PROVIDER_PROTONET_BACKEND +SESSION_VALUE NULL +GLOBAL_VALUE asio +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE asio +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE VARCHAR +VARIABLE_COMMENT Wsrep provider option +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED +GLOBAL_VALUE_PATH NULL VARIABLE_NAME WSREP_PROVIDER_PROTONET_VERSION SESSION_VALUE NULL GLOBAL_VALUE 0 diff --git a/mysql-test/suite/wsrep/t/wsrep_provider_plugin_defaults.test b/mysql-test/suite/wsrep/t/wsrep_provider_plugin_defaults.test index bdeef21a38a..16ecabfa182 100644 --- a/mysql-test/suite/wsrep/t/wsrep_provider_plugin_defaults.test +++ b/mysql-test/suite/wsrep/t/wsrep_provider_plugin_defaults.test @@ -1,7 +1,7 @@ --source include/have_wsrep.inc --source include/have_innodb.inc ---let $galera_version=26.4.14 +--let $galera_version=26.4.16 source include/check_galera_version.inc; SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIKE 'wsrep_provider%' AND VARIABLE_NAME NOT IN (