1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-31755 Replica's DML event deadlocks wit online alter table

The deadlock was caused by too strong MDL acquired by the start ALTER.

Replica's ALTER TABLE replication consists of two phases:
1. Start ALTER (SA) -- the event is emittd in the very beginning,
allowing replication start ALTER in parallel
2. Commit ALTER (CA) -- ensures that master finishes successfully

CA is normally received by wait_for_master call.
If parallel DML was run, the following sequence will take place:

|- SA
|- DML
|- CA

If CA is handled after MDL upgrade, it'll will deadlock with DML.

While MDL is shared by the start ALTER wait for its 2nd part
to allow concurrent DMLs to grab the lock.

The fix uses wait_for_master reentrancy -- no need to avoid a second call
in the end of mysql_alter_table.

Since SA and CA are marked with FL_DDL, the DML issued in-between cannot be
rescheduled before or after them. However, SA "commits" (by he call of
write_bin_log_start_alter and, subsequently,
thd->wakeup_subsequent_commits) before the copy stage begins, unlocking
the DMLs to run on this table. That is, these DMLs will be executed
concurrently with the copy stage, making Online alter effective on replicas
as well

Co-authored-by: Nikita Malyavin (nikitamalyavin@gmail.com)
This commit is contained in:
Andrei
2023-07-20 21:20:37 +03:00
committed by Sergei Golubchik
parent e1b9ab1995
commit bac8f189ed
3 changed files with 110 additions and 3 deletions

View File

@ -86,7 +86,7 @@ static int copy_data_between_tables(THD *, TABLE *,TABLE *,
List<Create_field> &, bool, uint, ORDER *,
ha_rows *, ha_rows *,
Alter_info::enum_enable_or_disable,
Alter_table_ctx *, bool);
Alter_table_ctx *, bool, uint64);
static int append_system_key_parts(THD *thd, HA_CREATE_INFO *create_info,
Key *key);
static int mysql_prepare_create_table(THD *, HA_CREATE_INFO *, Alter_info *,
@ -11140,7 +11140,7 @@ do_continue:;
alter_info->create_list, ignore,
order_num, order, &copied, &deleted,
alter_info->keys_onoff,
&alter_ctx, online))
&alter_ctx, online, start_alter_id))
goto err_new_table_cleanup;
}
else
@ -11696,7 +11696,8 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
uint order_num, ORDER *order,
ha_rows *copied, ha_rows *deleted,
Alter_info::enum_enable_or_disable keys_onoff,
Alter_table_ctx *alter_ctx, bool online)
Alter_table_ctx *alter_ctx, bool online,
uint64 start_alter_id)
{
int error= 1;
Copy_field *copy= NULL, *copy_end;
@ -12104,6 +12105,14 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
thd->lock= NULL;
error= online_alter_read_from_binlog(thd, &rgi, binlog);
if (start_alter_id)
{
DBUG_ASSERT(thd->slave_thread);
int rpl_error= wait_for_master(thd);
if (rpl_error)
error= 1;
}
DEBUG_SYNC(thd, "alter_table_online_before_lock");