1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-21953 deadlock between BACKUP STAGE BLOCK_COMMIT and parallel repl.

The issue was:
T1, a parallel slave worker thread, is waiting for another worker thread to
commit. While waiting, it has the MDL_BACKUP_COMMIT lock.
T2, working for mariabackup, is doing BACKUP STAGE BLOCK_COMMIT and blocks
all commits.
This causes a deadlock as the thread T1 is waiting for can't commit.

Fixed by moving locking of MDL_BACKUP_COMMIT from ha_commit_trans() to
commit_one_phase_2()

Other things:
- Added a new argument to ha_comit_one_phase() to signal if the
  transaction was a write transaction.
- Ensured that ha_maria::implicit_commit() is always called under
  MDL_BACKUP_COMMIT. This code is not needed in 10.5
- Ensure that MDL_Request values 'type' and 'ticket' are always
  initialized. This makes it easier to check the state of the MDL_Request.
- Moved thd->store_globals() earlier in handle_rpl_parallel_thread() as
  thd->init_for_queries() could use a MDL that could crash if store_globals
  where not called.
- Don't call ha_enable_transactions() in THD::init_for_queries() as this
  is both slow (uses MDL locks) and not needed.
This commit is contained in:
Monty
2020-07-20 15:19:25 +03:00
parent c4d5b6b157
commit fc48c8ff4c
11 changed files with 210 additions and 59 deletions

View File

@@ -0,0 +1,75 @@
--source include/have_innodb.inc
# The test is not format specific, MIXED is required to optimize testing time
--source include/have_binlog_format_mixed.inc
--source include/master-slave.inc
--echo #
--echo # MDEV-21953: deadlock between BACKUP STAGE BLOCK_COMMIT and parallel
--echo # replication
--echo #
--connection master
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE = innodb;
--sync_slave_with_master
--source include/stop_slave.inc
SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
SET @@global.slave_parallel_threads= 2;
SET @@global.slave_parallel_mode = 'optimistic';
--connection master
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
--save_master_pos
# The plot:
# Block the 1st of two workers and, at waiting-for-prior-commit by the 2nd,
# issue BACKUP commands.
# BLOCK_COMMIT may hang so it is --send.
# Release the 1st worker to observe a deadlock unless its fixed.
--connect (aux_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,)
BEGIN;
# block the 1st worker and wait for the 2nd ready to commit
INSERT INTO t1 VALUES (1);
--connection slave
--source include/start_slave.inc
--connection aux_slave
--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc
# While the 1st worker is locked out run backup
--connect (backup_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,)
BACKUP STAGE START;
--send BACKUP STAGE BLOCK_COMMIT
# release the 1st work
--connection aux_slave
--sleep 1
ROLLBACK;
--connection backup_slave
--reap
BACKUP STAGE END;
--connection slave
--sync_with_master
--let $diff_tables= master:t1,slave:t1
--source include/diff_tables.inc
# Clean up.
--connection slave
--source include/stop_slave.inc
SET @@global.slave_parallel_threads= @old_parallel_threads;
SET @@global.slave_parallel_mode = @old_parallel_mode;
--source include/start_slave.inc
--connection server_1
DROP TABLE t1;
--source include/rpl_end.inc