mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MWL#116: Efficient group commit for binary log
Preliminary commit for testing
This commit is contained in:
63
mysql-test/r/group_commit.result
Normal file
63
mysql-test/r/group_commit.result
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
|
||||||
|
SELECT variable_value INTO @commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_commits';
|
||||||
|
SELECT variable_value INTO @group_commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_group_commits';
|
||||||
|
SET DEBUG_SYNC= "commit_after_group_log_xid SIGNAL group1_running WAIT_FOR group2_queued";
|
||||||
|
INSERT INTO t1 VALUES ("con1");
|
||||||
|
set DEBUG_SYNC= "now WAIT_FOR group1_running";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
|
||||||
|
SET DEBUG_SYNC= "commit_after_release_LOCK_group_commit WAIT_FOR group3_committed";
|
||||||
|
SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
|
||||||
|
INSERT INTO t1 VALUES ("con2");
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
|
||||||
|
INSERT INTO t1 VALUES ("con3");
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
|
||||||
|
INSERT INTO t1 VALUES ("con4");
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
|
||||||
|
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
a
|
||||||
|
SET DEBUG_SYNC= "now SIGNAL group2_queued";
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
a
|
||||||
|
con1
|
||||||
|
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
|
||||||
|
SET DEBUG_SYNC= "commit_after_get_LOCK_group_commit SIGNAL con5_leader WAIT_FOR con6_queued";
|
||||||
|
INSERT INTO t1 VALUES ("con5");
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
|
||||||
|
INSERT INTO t1 VALUES ("con6");
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
a
|
||||||
|
con1
|
||||||
|
SET DEBUG_SYNC= "now SIGNAL group3_committed";
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
a
|
||||||
|
con1
|
||||||
|
con2
|
||||||
|
con3
|
||||||
|
con4
|
||||||
|
SET DEBUG_SYNC= "now SIGNAL group2_checked";
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
a
|
||||||
|
con1
|
||||||
|
con2
|
||||||
|
con3
|
||||||
|
con4
|
||||||
|
con5
|
||||||
|
con6
|
||||||
|
SELECT variable_value - @commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_commits';
|
||||||
|
variable_value - @commits
|
||||||
|
6
|
||||||
|
SELECT variable_value - @group_commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_group_commits';
|
||||||
|
variable_value - @group_commits
|
||||||
|
3
|
||||||
|
SET DEBUG_SYNC= 'RESET';
|
||||||
|
DROP TABLE t1;
|
28
mysql-test/suite/binlog/r/binlog_ioerr.result
Normal file
28
mysql-test/suite/binlog/r/binlog_ioerr.result
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
CALL mtr.add_suppression("Error writing file 'master-bin'");
|
||||||
|
RESET MASTER;
|
||||||
|
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
|
||||||
|
INSERT INTO t1 VALUES(0);
|
||||||
|
SET SESSION debug='+d,fail_binlog_write_1';
|
||||||
|
INSERT INTO t1 VALUES(1);
|
||||||
|
ERROR HY000: Error writing file 'master-bin' (errno: 22)
|
||||||
|
INSERT INTO t1 VALUES(2);
|
||||||
|
ERROR HY000: Error writing file 'master-bin' (errno: 22)
|
||||||
|
SET SESSION debug='';
|
||||||
|
INSERT INTO t1 VALUES(3);
|
||||||
|
SELECT * FROM t1;
|
||||||
|
a
|
||||||
|
0
|
||||||
|
3
|
||||||
|
SHOW BINLOG EVENTS;
|
||||||
|
Log_name Pos Event_type Server_id End_log_pos Info
|
||||||
|
BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: #
|
||||||
|
BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb
|
||||||
|
BINLOG POS Query 1 ENDPOS BEGIN
|
||||||
|
BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0)
|
||||||
|
BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
|
||||||
|
BINLOG POS Query 1 ENDPOS BEGIN
|
||||||
|
BINLOG POS Query 1 ENDPOS BEGIN
|
||||||
|
BINLOG POS Query 1 ENDPOS BEGIN
|
||||||
|
BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3)
|
||||||
|
BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
|
||||||
|
DROP TABLE t1;
|
29
mysql-test/suite/binlog/t/binlog_ioerr.test
Normal file
29
mysql-test/suite/binlog/t/binlog_ioerr.test
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
source include/have_debug.inc;
|
||||||
|
source include/have_innodb.inc;
|
||||||
|
source include/have_log_bin.inc;
|
||||||
|
source include/have_binlog_format_mixed_or_statement.inc;
|
||||||
|
|
||||||
|
CALL mtr.add_suppression("Error writing file 'master-bin'");
|
||||||
|
|
||||||
|
RESET MASTER;
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
|
||||||
|
INSERT INTO t1 VALUES(0);
|
||||||
|
SET SESSION debug='+d,fail_binlog_write_1';
|
||||||
|
--error ER_ERROR_ON_WRITE
|
||||||
|
INSERT INTO t1 VALUES(1);
|
||||||
|
--error ER_ERROR_ON_WRITE
|
||||||
|
INSERT INTO t1 VALUES(2);
|
||||||
|
SET SESSION debug='';
|
||||||
|
INSERT INTO t1 VALUES(3);
|
||||||
|
SELECT * FROM t1;
|
||||||
|
|
||||||
|
# Actually the output from this currently shows a bug.
|
||||||
|
# The injected IO error leaves partially written transactions in the binlog in
|
||||||
|
# the form of stray "BEGIN" events.
|
||||||
|
# These should disappear from the output if binlog error handling is improved.
|
||||||
|
--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
|
||||||
|
--replace_column 1 BINLOG 2 POS 5 ENDPOS
|
||||||
|
SHOW BINLOG EVENTS;
|
||||||
|
|
||||||
|
DROP TABLE t1;
|
115
mysql-test/t/group_commit.test
Normal file
115
mysql-test/t/group_commit.test
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
--source include/have_debug_sync.inc
|
||||||
|
--source include/have_innodb.inc
|
||||||
|
--source include/have_log_bin.inc
|
||||||
|
|
||||||
|
# Test some group commit code paths by using debug_sync to do controlled
|
||||||
|
# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
|
||||||
|
# group.
|
||||||
|
#
|
||||||
|
# Group 3 is allowed to race as far as possible ahead before group 2 finishes
|
||||||
|
# to check some edge case for concurrency control.
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
|
||||||
|
|
||||||
|
SELECT variable_value INTO @commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_commits';
|
||||||
|
SELECT variable_value INTO @group_commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_group_commits';
|
||||||
|
|
||||||
|
connect(con1,localhost,root,,);
|
||||||
|
connect(con2,localhost,root,,);
|
||||||
|
connect(con3,localhost,root,,);
|
||||||
|
connect(con4,localhost,root,,);
|
||||||
|
connect(con5,localhost,root,,);
|
||||||
|
connect(con6,localhost,root,,);
|
||||||
|
|
||||||
|
# Start group1 (with one thread) doing commit, waiting for
|
||||||
|
# group2 to queue up before finishing.
|
||||||
|
|
||||||
|
connection con1;
|
||||||
|
SET DEBUG_SYNC= "commit_after_group_log_xid SIGNAL group1_running WAIT_FOR group2_queued";
|
||||||
|
send INSERT INTO t1 VALUES ("con1");
|
||||||
|
|
||||||
|
# Make group2 (with three threads) queue up.
|
||||||
|
# Make sure con2 is the group commit leader for group2.
|
||||||
|
# Make group2 wait with running commit_ordered() until group3 has committed.
|
||||||
|
|
||||||
|
connection con2;
|
||||||
|
set DEBUG_SYNC= "now WAIT_FOR group1_running";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
|
||||||
|
SET DEBUG_SYNC= "commit_after_release_LOCK_group_commit WAIT_FOR group3_committed";
|
||||||
|
SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
|
||||||
|
send INSERT INTO t1 VALUES ("con2");
|
||||||
|
connection con3;
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
|
||||||
|
send INSERT INTO t1 VALUES ("con3");
|
||||||
|
connection con4;
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
|
||||||
|
send INSERT INTO t1 VALUES ("con4");
|
||||||
|
|
||||||
|
# When group2 is queued, let group1 continue and queue group3.
|
||||||
|
|
||||||
|
connection default;
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
|
||||||
|
|
||||||
|
# At this point, trasaction 1 is still not visible as commit_ordered() has not
|
||||||
|
# been called yet.
|
||||||
|
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
|
||||||
|
SET DEBUG_SYNC= "now SIGNAL group2_queued";
|
||||||
|
connection con1;
|
||||||
|
reap;
|
||||||
|
|
||||||
|
# Now transaction 1 is visible.
|
||||||
|
connection default;
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
|
||||||
|
connection con5;
|
||||||
|
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
|
||||||
|
SET DEBUG_SYNC= "commit_after_get_LOCK_group_commit SIGNAL con5_leader WAIT_FOR con6_queued";
|
||||||
|
send INSERT INTO t1 VALUES ("con5");
|
||||||
|
|
||||||
|
connection con6;
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
|
||||||
|
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
|
||||||
|
send INSERT INTO t1 VALUES ("con6");
|
||||||
|
|
||||||
|
connection default;
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
|
||||||
|
# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
SET DEBUG_SYNC= "now SIGNAL group3_committed";
|
||||||
|
SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
|
||||||
|
# Now transactions 1-4 visible.
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
SET DEBUG_SYNC= "now SIGNAL group2_checked";
|
||||||
|
|
||||||
|
connection con2;
|
||||||
|
reap;
|
||||||
|
|
||||||
|
connection con3;
|
||||||
|
reap;
|
||||||
|
|
||||||
|
connection con4;
|
||||||
|
reap;
|
||||||
|
|
||||||
|
connection con5;
|
||||||
|
reap;
|
||||||
|
|
||||||
|
connection con6;
|
||||||
|
reap;
|
||||||
|
|
||||||
|
connection default;
|
||||||
|
# Check all transactions finally visible.
|
||||||
|
SELECT * FROM t1 ORDER BY a;
|
||||||
|
|
||||||
|
SELECT variable_value - @commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_commits';
|
||||||
|
SELECT variable_value - @group_commits FROM information_schema.global_status
|
||||||
|
WHERE variable_name = 'binlog_group_commits';
|
||||||
|
|
||||||
|
SET DEBUG_SYNC= 'RESET';
|
||||||
|
DROP TABLE t1;
|
126
sql/handler.cc
126
sql/handler.cc
@@ -76,6 +76,8 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
|
|||||||
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
|
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
|
||||||
uint known_extensions_id= 0;
|
uint known_extensions_id= 0;
|
||||||
|
|
||||||
|
static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
|
||||||
|
bool is_real_trans);
|
||||||
|
|
||||||
|
|
||||||
static plugin_ref ha_default_plugin(THD *thd)
|
static plugin_ref ha_default_plugin(THD *thd)
|
||||||
@@ -1070,7 +1072,7 @@ ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
|
|||||||
*/
|
*/
|
||||||
int ha_commit_trans(THD *thd, bool all)
|
int ha_commit_trans(THD *thd, bool all)
|
||||||
{
|
{
|
||||||
int error= 0, cookie= 0;
|
int error= 0, cookie;
|
||||||
/*
|
/*
|
||||||
'all' means that this is either an explicit commit issued by
|
'all' means that this is either an explicit commit issued by
|
||||||
user, or an implicit commit issued by a DDL.
|
user, or an implicit commit issued by a DDL.
|
||||||
@@ -1085,7 +1087,8 @@ int ha_commit_trans(THD *thd, bool all)
|
|||||||
*/
|
*/
|
||||||
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
|
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
|
||||||
Ha_trx_info *ha_info= trans->ha_list;
|
Ha_trx_info *ha_info= trans->ha_list;
|
||||||
my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
|
bool need_prepare_ordered, need_commit_ordered;
|
||||||
|
my_xid xid;
|
||||||
DBUG_ENTER("ha_commit_trans");
|
DBUG_ENTER("ha_commit_trans");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1118,10 +1121,13 @@ int ha_commit_trans(THD *thd, bool all)
|
|||||||
DBUG_RETURN(2);
|
DBUG_RETURN(2);
|
||||||
}
|
}
|
||||||
#ifdef USING_TRANSACTIONS
|
#ifdef USING_TRANSACTIONS
|
||||||
if (ha_info)
|
if (!ha_info)
|
||||||
{
|
{
|
||||||
uint rw_ha_count;
|
/* Free resources and perform other cleanup even for 'empty' transactions. */
|
||||||
bool rw_trans;
|
if (is_real_trans)
|
||||||
|
thd->transaction.cleanup();
|
||||||
|
DBUG_RETURN(0);
|
||||||
|
}
|
||||||
|
|
||||||
DBUG_EXECUTE_IF("crash_commit_before", abort(););
|
DBUG_EXECUTE_IF("crash_commit_before", abort(););
|
||||||
|
|
||||||
@@ -1129,9 +1135,9 @@ int ha_commit_trans(THD *thd, bool all)
|
|||||||
if (is_real_trans) /* not a statement commit */
|
if (is_real_trans) /* not a statement commit */
|
||||||
thd->stmt_map.close_transient_cursors();
|
thd->stmt_map.close_transient_cursors();
|
||||||
|
|
||||||
rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
|
uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
|
||||||
/* rw_trans is TRUE when we in a transaction changing data */
|
/* rw_trans is TRUE when we in a transaction changing data */
|
||||||
rw_trans= is_real_trans && (rw_ha_count > 0);
|
bool rw_trans= is_real_trans && (rw_ha_count > 0);
|
||||||
|
|
||||||
if (rw_trans &&
|
if (rw_trans &&
|
||||||
wait_if_global_read_lock(thd, 0, 0))
|
wait_if_global_read_lock(thd, 0, 0))
|
||||||
@@ -1146,57 +1152,81 @@ int ha_commit_trans(THD *thd, bool all)
|
|||||||
!thd->slave_thread)
|
!thd->slave_thread)
|
||||||
{
|
{
|
||||||
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
|
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
|
||||||
ha_rollback_trans(thd, all);
|
goto err;
|
||||||
error= 1;
|
}
|
||||||
|
|
||||||
|
if (trans->no_2pc || (rw_ha_count <= 1))
|
||||||
|
{
|
||||||
|
error= ha_commit_one_phase(thd, all);
|
||||||
|
DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!trans->no_2pc && (rw_ha_count > 1))
|
need_prepare_ordered= FALSE;
|
||||||
{
|
need_commit_ordered= FALSE;
|
||||||
for (; ha_info && !error; ha_info= ha_info->next())
|
xid= thd->transaction.xid_state.xid.get_my_xid();
|
||||||
|
|
||||||
|
for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
handlerton *ht= ha_info->ht();
|
handlerton *ht= hi->ht();
|
||||||
/*
|
/*
|
||||||
Do not call two-phase commit if this particular
|
Do not call two-phase commit if this particular
|
||||||
transaction is read-only. This allows for simpler
|
transaction is read-only. This allows for simpler
|
||||||
implementation in engines that are always read-only.
|
implementation in engines that are always read-only.
|
||||||
*/
|
*/
|
||||||
if (! ha_info->is_trx_read_write())
|
if (! hi->is_trx_read_write())
|
||||||
continue;
|
continue;
|
||||||
/*
|
/*
|
||||||
Sic: we know that prepare() is not NULL since otherwise
|
Sic: we know that prepare() is not NULL since otherwise
|
||||||
trans->no_2pc would have been set.
|
trans->no_2pc would have been set.
|
||||||
*/
|
*/
|
||||||
if ((err= ht->prepare(ht, thd, all)))
|
if ((err= ht->prepare(ht, thd, all)))
|
||||||
{
|
|
||||||
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
|
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
|
||||||
error= 1;
|
|
||||||
}
|
|
||||||
status_var_increment(thd->status_var.ha_prepare_count);
|
status_var_increment(thd->status_var.ha_prepare_count);
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (ht->prepare_ordered)
|
||||||
|
need_prepare_ordered= TRUE;
|
||||||
|
if (ht->commit_ordered)
|
||||||
|
need_commit_ordered= TRUE;
|
||||||
}
|
}
|
||||||
DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
|
DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
|
||||||
if (error || (is_real_trans && xid &&
|
|
||||||
(error= !(cookie= tc_log->log_xid(thd, xid)))))
|
if (!is_real_trans)
|
||||||
{
|
{
|
||||||
ha_rollback_trans(thd, all);
|
error= commit_one_phase_2(thd, all, trans, is_real_trans);
|
||||||
error= 1;
|
DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
|
||||||
|
need_commit_ordered);
|
||||||
|
if (!cookie)
|
||||||
|
goto err;
|
||||||
|
|
||||||
DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
|
DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
|
||||||
}
|
|
||||||
error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
|
error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
|
||||||
DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
|
|
||||||
if (cookie)
|
|
||||||
tc_log->unlog(cookie, xid);
|
|
||||||
DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
|
DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
|
||||||
|
tc_log->unlog(cookie, xid);
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
|
||||||
|
goto end;
|
||||||
|
|
||||||
|
/* Come here if error and we need to rollback. */
|
||||||
|
err:
|
||||||
|
if (!error)
|
||||||
|
error= 1;
|
||||||
|
ha_rollback_trans(thd, all);
|
||||||
|
|
||||||
end:
|
end:
|
||||||
if (rw_trans)
|
if (rw_trans)
|
||||||
start_waiting_global_read_lock(thd);
|
start_waiting_global_read_lock(thd);
|
||||||
}
|
|
||||||
/* Free resources and perform other cleanup even for 'empty' transactions. */
|
|
||||||
else if (is_real_trans)
|
|
||||||
thd->transaction.cleanup();
|
|
||||||
#endif /* USING_TRANSACTIONS */
|
#endif /* USING_TRANSACTIONS */
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
}
|
}
|
||||||
@@ -1207,7 +1237,6 @@ end:
|
|||||||
*/
|
*/
|
||||||
int ha_commit_one_phase(THD *thd, bool all)
|
int ha_commit_one_phase(THD *thd, bool all)
|
||||||
{
|
{
|
||||||
int error=0;
|
|
||||||
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
|
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
|
||||||
/*
|
/*
|
||||||
"real" is a nick name for a transaction for which a commit will
|
"real" is a nick name for a transaction for which a commit will
|
||||||
@@ -1217,8 +1246,41 @@ int ha_commit_one_phase(THD *thd, bool all)
|
|||||||
enclosing 'all' transaction is rolled back.
|
enclosing 'all' transaction is rolled back.
|
||||||
*/
|
*/
|
||||||
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
|
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
|
||||||
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
|
Ha_trx_info *ha_info= trans->ha_list;
|
||||||
DBUG_ENTER("ha_commit_one_phase");
|
DBUG_ENTER("ha_commit_one_phase");
|
||||||
|
#ifdef USING_TRANSACTIONS
|
||||||
|
if (ha_info)
|
||||||
|
{
|
||||||
|
if (is_real_trans)
|
||||||
|
{
|
||||||
|
bool locked= false;
|
||||||
|
for (; ha_info; ha_info= ha_info->next())
|
||||||
|
{
|
||||||
|
handlerton *ht= ha_info->ht();
|
||||||
|
if (ht->commit_ordered)
|
||||||
|
{
|
||||||
|
if (ha_info->is_trx_read_write() && !locked)
|
||||||
|
{
|
||||||
|
pthread_mutex_lock(&LOCK_commit_ordered);
|
||||||
|
locked= 1;
|
||||||
|
}
|
||||||
|
ht->commit_ordered(ht, thd, all);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (locked)
|
||||||
|
pthread_mutex_unlock(&LOCK_commit_ordered);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* USING_TRANSACTIONS */
|
||||||
|
DBUG_RETURN(commit_one_phase_2(thd, all, trans, is_real_trans));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
|
||||||
|
{
|
||||||
|
int error= 0;
|
||||||
|
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
|
||||||
|
DBUG_ENTER("commit_one_phase_2");
|
||||||
#ifdef USING_TRANSACTIONS
|
#ifdef USING_TRANSACTIONS
|
||||||
if (ha_info)
|
if (ha_info)
|
||||||
{
|
{
|
||||||
|
@@ -657,8 +657,95 @@ struct handlerton
|
|||||||
and 'real commit' mean the same event.
|
and 'real commit' mean the same event.
|
||||||
*/
|
*/
|
||||||
int (*commit)(handlerton *hton, THD *thd, bool all);
|
int (*commit)(handlerton *hton, THD *thd, bool all);
|
||||||
|
/*
|
||||||
|
The commit_ordered() method is called prior to the commit() method, after
|
||||||
|
the transaction manager has decided to commit (not rollback) the
|
||||||
|
transaction. Unlike commit(), commit_ordered() is called only when the
|
||||||
|
full transaction is committed, not for each commit of statement
|
||||||
|
transaction in a multi-statement transaction.
|
||||||
|
|
||||||
|
The calls to commit_ordered() in multiple parallel transactions is
|
||||||
|
guaranteed to happen in the same order in every participating
|
||||||
|
handler. This can be used to ensure the same commit order among multiple
|
||||||
|
handlers (eg. in table handler and binlog). So if transaction T1 calls
|
||||||
|
into commit_ordered() of handler A before T2, then T1 will also call
|
||||||
|
commit_ordered() of handler B before T2.
|
||||||
|
|
||||||
|
Engines that implement this method should during this call make the
|
||||||
|
transaction visible to other transactions, thereby making the order of
|
||||||
|
transaction commits be defined by the order of commit_ordered() calls.
|
||||||
|
|
||||||
|
The intension is that commit_ordered() should do the minimal amount of
|
||||||
|
work that needs to happen in consistent commit order among handlers. To
|
||||||
|
preserve ordering, calls need to be serialised on a global mutex, so
|
||||||
|
doing any time-consuming or blocking operations in commit_ordered() will
|
||||||
|
limit scalability.
|
||||||
|
|
||||||
|
Handlers can rely on commit_ordered() calls for transactions that updated
|
||||||
|
data to be serialised (no two calls can run in parallel, so no extra
|
||||||
|
locking on the handler part is required to ensure this). However, calls
|
||||||
|
for SELECT-only transactions are not serialised, so can occur in parallel
|
||||||
|
with each other and with at most one write-transaction.
|
||||||
|
|
||||||
|
Note that commit_ordered() can be called from a different thread than the
|
||||||
|
one handling the transaction! So it can not do anything that depends on
|
||||||
|
thread local storage, in particular it can not call my_error() and
|
||||||
|
friends (instead it can store the error code and delay the call of
|
||||||
|
my_error() to the commit() method).
|
||||||
|
|
||||||
|
Similarly, since commit_ordered() returns void, any return error code
|
||||||
|
must be saved and returned from the commit() method instead.
|
||||||
|
|
||||||
|
The commit_ordered method is optional, and can be left unset if not
|
||||||
|
needed in a particular handler.
|
||||||
|
*/
|
||||||
|
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
|
||||||
int (*rollback)(handlerton *hton, THD *thd, bool all);
|
int (*rollback)(handlerton *hton, THD *thd, bool all);
|
||||||
int (*prepare)(handlerton *hton, THD *thd, bool all);
|
int (*prepare)(handlerton *hton, THD *thd, bool all);
|
||||||
|
/*
|
||||||
|
The prepare_ordered method is optional. If set, it will be called after
|
||||||
|
successful prepare() in all handlers participating in 2-phase
|
||||||
|
commit. Like commit_ordered(), it is called only when the full
|
||||||
|
transaction is committed, not for each commit of statement transaction.
|
||||||
|
|
||||||
|
The calls to prepare_ordered() among multiple parallel transactions are
|
||||||
|
ordered consistently with calls to commit_ordered(). This means that
|
||||||
|
calls to prepare_ordered() effectively define the commit order, and that
|
||||||
|
each handler will see the same sequence of transactions calling into
|
||||||
|
prepare_ordered() and commit_ordered().
|
||||||
|
|
||||||
|
Thus, prepare_ordered() can be used to define commit order for handlers
|
||||||
|
that need to do this in the prepare step (like binlog). It can also be
|
||||||
|
used to release transaction's locks early in an order consistent with the
|
||||||
|
order transactions will be eventually committed.
|
||||||
|
|
||||||
|
Like commit_ordered(), prepare_ordered() calls are serialised to maintain
|
||||||
|
ordering, so the intension is that they should execute fast, with only
|
||||||
|
the minimal amount of work needed to define commit order. Handlers can
|
||||||
|
rely on this serialisation, and do not need to do any extra locking to
|
||||||
|
avoid two prepare_ordered() calls running in parallel.
|
||||||
|
|
||||||
|
Like commit_ordered(), prepare_ordered() is not guaranteed to be called
|
||||||
|
in the context of the thread handling the rest of the transaction. So it
|
||||||
|
cannot invoke code that relies on thread local storage, in particular it
|
||||||
|
cannot call my_error().
|
||||||
|
|
||||||
|
When prepare_ordered() is called, the transaction coordinator has already
|
||||||
|
decided to commit (not rollback) the transaction. So prepare_ordered()
|
||||||
|
cannot cause a rollback by returning an error, all possible errors must
|
||||||
|
be handled in prepare() (the prepare_ordered() method returns void). In
|
||||||
|
case of some fatal error, a record of the error must be made internally
|
||||||
|
by the engine and returned from commit() later.
|
||||||
|
|
||||||
|
Note that for user-level XA SQL commands, no consistent ordering among
|
||||||
|
prepare_ordered() and commit_ordered() is guaranteed (as that would
|
||||||
|
require blocking all other commits for an indefinite time).
|
||||||
|
|
||||||
|
When 2-phase commit is not used (eg. only one engine (and no binlog) in
|
||||||
|
transaction), prepare() is not called and in such cases prepare_ordered()
|
||||||
|
also is not called.
|
||||||
|
*/
|
||||||
|
void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
|
||||||
int (*recover)(handlerton *hton, XID *xid_list, uint len);
|
int (*recover)(handlerton *hton, XID *xid_list, uint len);
|
||||||
int (*commit_by_xid)(handlerton *hton, XID *xid);
|
int (*commit_by_xid)(handlerton *hton, XID *xid);
|
||||||
int (*rollback_by_xid)(handlerton *hton, XID *xid);
|
int (*rollback_by_xid)(handlerton *hton, XID *xid);
|
||||||
|
1210
sql/log.cc
1210
sql/log.cc
File diff suppressed because it is too large
Load Diff
209
sql/log.h
209
sql/log.h
@@ -33,11 +33,173 @@ class TC_LOG
|
|||||||
|
|
||||||
virtual int open(const char *opt_name)=0;
|
virtual int open(const char *opt_name)=0;
|
||||||
virtual void close()=0;
|
virtual void close()=0;
|
||||||
virtual int log_xid(THD *thd, my_xid xid)=0;
|
virtual int log_and_order(THD *thd, my_xid xid, bool all,
|
||||||
|
bool need_prepare_ordered,
|
||||||
|
bool need_commit_ordered) = 0;
|
||||||
virtual void unlog(ulong cookie, my_xid xid)=0;
|
virtual void unlog(ulong cookie, my_xid xid)=0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/*
|
||||||
|
These methods are meant to be invoked from log_and_order() implementations
|
||||||
|
to run any prepare_ordered() respectively commit_ordered() methods in
|
||||||
|
participating handlers.
|
||||||
|
|
||||||
|
They must be called using suitable thread syncronisation to ensure that
|
||||||
|
they are each called in the correct commit order among all
|
||||||
|
transactions. However, it is only necessary to call them if the
|
||||||
|
corresponding flag passed to log_and_order is set (it is safe, but not
|
||||||
|
required, to call them when the flag is false).
|
||||||
|
|
||||||
|
The caller must be holding LOCK_prepare_ordered respectively
|
||||||
|
LOCK_commit_ordered when calling these methods.
|
||||||
|
*/
|
||||||
|
void run_prepare_ordered(THD *thd, bool all);
|
||||||
|
void run_commit_ordered(THD *thd, bool all);
|
||||||
};
|
};
|
||||||
|
|
||||||
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
|
/*
|
||||||
|
Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
|
||||||
|
and TC_LOG::run_commit_ordered(), or any other code that calls handler
|
||||||
|
prepare_ordered() or commit_ordered() methods.
|
||||||
|
*/
|
||||||
|
extern pthread_mutex_t LOCK_prepare_ordered;
|
||||||
|
extern pthread_mutex_t LOCK_commit_ordered;
|
||||||
|
|
||||||
|
extern void TC_init();
|
||||||
|
extern void TC_destroy();
|
||||||
|
|
||||||
|
/*
|
||||||
|
Base class for two TC implementations TC_LOG_unordered and
|
||||||
|
TC_LOG_group_commit that both use a queue of threads waiting for group
|
||||||
|
commit.
|
||||||
|
*/
|
||||||
|
class TC_LOG_queued: public TC_LOG
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
TC_LOG_queued();
|
||||||
|
~TC_LOG_queued();
|
||||||
|
|
||||||
|
/* Structure used to link list of THDs waiting for group commit. */
|
||||||
|
struct TC_group_commit_entry
|
||||||
|
{
|
||||||
|
struct TC_group_commit_entry *next;
|
||||||
|
THD *thd;
|
||||||
|
/* This is the `all' parameter for ha_commit_trans() etc. */
|
||||||
|
bool all;
|
||||||
|
/*
|
||||||
|
Flag set true when it is time for this thread to wake up after group
|
||||||
|
commit. Used with THD::LOCK_commit_ordered and THD::COND_commit_ordered.
|
||||||
|
*/
|
||||||
|
bool group_commit_ready;
|
||||||
|
/*
|
||||||
|
Set by TC_LOG_group_commit::group_log_xid(), to return per-thd error and
|
||||||
|
cookie.
|
||||||
|
*/
|
||||||
|
int xid_error;
|
||||||
|
};
|
||||||
|
|
||||||
|
TC_group_commit_entry * reverse_queue(TC_group_commit_entry *queue);
|
||||||
|
|
||||||
|
void group_commit_wait_for_wakeup(TC_group_commit_entry *entry);
|
||||||
|
void group_commit_wakeup_other(TC_group_commit_entry *other);
|
||||||
|
|
||||||
|
/*
|
||||||
|
This is a queue of threads waiting for being allowed to commit.
|
||||||
|
Access to the queue must be protected by LOCK_prepare_ordered.
|
||||||
|
*/
|
||||||
|
TC_group_commit_entry *group_commit_queue;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TC_LOG_unordered: public TC_LOG_queued
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TC_LOG_unordered();
|
||||||
|
~TC_LOG_unordered();
|
||||||
|
|
||||||
|
int log_and_order(THD *thd, my_xid xid, bool all,
|
||||||
|
bool need_prepare_ordered, bool need_commit_ordered);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual int log_xid(THD *thd, my_xid xid)=0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/*
|
||||||
|
This flag and condition is used to reserve the queue while threads in it
|
||||||
|
each run the commit_ordered() methods one after the other. Only once the
|
||||||
|
last commit_ordered() in the queue is done can we start on a new queue
|
||||||
|
run.
|
||||||
|
|
||||||
|
Since we start this process in the first thread in the queue and finish in
|
||||||
|
the last (and possibly different) thread, we need a condition variable for
|
||||||
|
this (we cannot unlock a mutex in a different thread than the one who
|
||||||
|
locked it).
|
||||||
|
|
||||||
|
The condition is used together with the LOCK_prepare_ordered mutex.
|
||||||
|
*/
|
||||||
|
my_bool group_commit_queue_busy;
|
||||||
|
pthread_cond_t COND_queue_busy;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TC_LOG_group_commit: public TC_LOG_queued
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TC_LOG_group_commit();
|
||||||
|
~TC_LOG_group_commit();
|
||||||
|
|
||||||
|
int log_and_order(THD *thd, my_xid xid, bool all,
|
||||||
|
bool need_prepare_ordered, bool need_commit_ordered);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/* Total number of committed transactions. */
|
||||||
|
ulonglong num_commits;
|
||||||
|
/* Number of group commits done. */
|
||||||
|
ulonglong num_group_commits;
|
||||||
|
|
||||||
|
/*
|
||||||
|
When using this class, this method is used instead of log_xid() to do
|
||||||
|
logging of a group of transactions all at once.
|
||||||
|
|
||||||
|
The transactions will be linked through THD::next_commit_ordered.
|
||||||
|
|
||||||
|
Additionally, when this method is used instead of log_xid(), the order in
|
||||||
|
which handler->prepare_ordered() and handler->commit_ordered() are called
|
||||||
|
is guaranteed to be the same as the order of calls and THD list elements
|
||||||
|
for group_log_xid().
|
||||||
|
|
||||||
|
This can be used to efficiently implement group commit that at the same
|
||||||
|
time preserves the order of commits among handlers and TC (eg. to get same
|
||||||
|
commit order in InnoDB and binary log).
|
||||||
|
|
||||||
|
For TCs that do not need this, it can be preferable to use plain log_xid()
|
||||||
|
with class TC_LOG_unordered instead, as it allows threads to run log_xid()
|
||||||
|
in parallel with each other. In contrast, group_log_xid() runs under a
|
||||||
|
global mutex, so it is guaranteed that only once call into it will be
|
||||||
|
active at once.
|
||||||
|
|
||||||
|
Since this call handles multiple threads/THDs at once, my_error() (and
|
||||||
|
other code that relies on thread local storage) cannot be used in this
|
||||||
|
method. Instead, the implementation must record any error and report it as
|
||||||
|
the return value from xid_log_after(), which will be invoked individually
|
||||||
|
for each thread.
|
||||||
|
|
||||||
|
In the success case, this method must set thd->xid_cookie for each thread
|
||||||
|
to the cookie that is normally returned from log_xid() (which must be
|
||||||
|
non-zero in the non-error case).
|
||||||
|
*/
|
||||||
|
virtual void group_log_xid(TC_group_commit_entry *first) = 0;
|
||||||
|
/*
|
||||||
|
Called for each transaction (in corrent thread context) after
|
||||||
|
group_log_xid() has finished, but with no guarantee on ordering among
|
||||||
|
threads.
|
||||||
|
Can be used to do error reporting etc. */
|
||||||
|
virtual int xid_log_after(TC_group_commit_entry *entry) = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/* Mutex used to serialise calls to group_log_xid(). */
|
||||||
|
pthread_mutex_t LOCK_group_commit;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TC_LOG_DUMMY: public TC_LOG_unordered // use it to disable the logging
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TC_LOG_DUMMY() {}
|
TC_LOG_DUMMY() {}
|
||||||
@@ -48,7 +210,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef HAVE_MMAP
|
#ifdef HAVE_MMAP
|
||||||
class TC_LOG_MMAP: public TC_LOG
|
class TC_LOG_MMAP: public TC_LOG_unordered
|
||||||
{
|
{
|
||||||
public: // only to keep Sun Forte on sol9x86 happy
|
public: // only to keep Sun Forte on sol9x86 happy
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@@ -227,12 +389,19 @@ private:
|
|||||||
time_t last_time;
|
time_t last_time;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
class binlog_trx_data;
|
||||||
|
class MYSQL_BIN_LOG: public TC_LOG_group_commit, private MYSQL_LOG
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
|
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
|
||||||
pthread_mutex_t LOCK_index;
|
pthread_mutex_t LOCK_index;
|
||||||
pthread_mutex_t LOCK_prep_xids;
|
pthread_mutex_t LOCK_prep_xids;
|
||||||
|
/*
|
||||||
|
Mutex to protect the queue of transactions waiting to participate in group
|
||||||
|
commit. (Only used on platforms without native atomic operations).
|
||||||
|
*/
|
||||||
|
pthread_mutex_t LOCK_queue;
|
||||||
|
|
||||||
pthread_cond_t COND_prep_xids;
|
pthread_cond_t COND_prep_xids;
|
||||||
pthread_cond_t update_cond;
|
pthread_cond_t update_cond;
|
||||||
ulonglong bytes_written;
|
ulonglong bytes_written;
|
||||||
@@ -271,8 +440,8 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
|||||||
In 5.0 it's 0 for relay logs too!
|
In 5.0 it's 0 for relay logs too!
|
||||||
*/
|
*/
|
||||||
bool no_auto_events;
|
bool no_auto_events;
|
||||||
|
/* Queue of transactions queued up to participate in group commit. */
|
||||||
ulonglong m_table_map_version;
|
binlog_trx_data *group_commit_queue;
|
||||||
|
|
||||||
int write_to_file(IO_CACHE *cache);
|
int write_to_file(IO_CACHE *cache);
|
||||||
/*
|
/*
|
||||||
@@ -282,6 +451,14 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
|||||||
*/
|
*/
|
||||||
void new_file_without_locking();
|
void new_file_without_locking();
|
||||||
void new_file_impl(bool need_lock);
|
void new_file_impl(bool need_lock);
|
||||||
|
int write_transaction(binlog_trx_data *trx_data);
|
||||||
|
bool write_transaction_to_binlog_events(binlog_trx_data *trx_data);
|
||||||
|
void trx_group_commit_participant(binlog_trx_data *trx_data);
|
||||||
|
void trx_group_commit_leader(TC_group_commit_entry *first);
|
||||||
|
binlog_trx_data *atomic_enqueue_trx(binlog_trx_data *trx_data);
|
||||||
|
binlog_trx_data *atomic_grab_trx_queue();
|
||||||
|
void mark_xid_done();
|
||||||
|
void mark_xids_active(uint xid_count);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MYSQL_LOG::generate_name;
|
MYSQL_LOG::generate_name;
|
||||||
@@ -310,18 +487,11 @@ public:
|
|||||||
|
|
||||||
int open(const char *opt_name);
|
int open(const char *opt_name);
|
||||||
void close();
|
void close();
|
||||||
int log_xid(THD *thd, my_xid xid);
|
void group_log_xid(TC_group_commit_entry *first);
|
||||||
|
int xid_log_after(TC_group_commit_entry *entry);
|
||||||
void unlog(ulong cookie, my_xid xid);
|
void unlog(ulong cookie, my_xid xid);
|
||||||
int recover(IO_CACHE *log, Format_description_log_event *fdle);
|
int recover(IO_CACHE *log, Format_description_log_event *fdle);
|
||||||
#if !defined(MYSQL_CLIENT)
|
#if !defined(MYSQL_CLIENT)
|
||||||
bool is_table_mapped(TABLE *table) const
|
|
||||||
{
|
|
||||||
return table->s->table_map_version == table_map_version();
|
|
||||||
}
|
|
||||||
|
|
||||||
ulonglong table_map_version() const { return m_table_map_version; }
|
|
||||||
void update_table_map_version() { ++m_table_map_version; }
|
|
||||||
|
|
||||||
int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event);
|
int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event);
|
||||||
int remove_pending_rows_event(THD *thd);
|
int remove_pending_rows_event(THD *thd);
|
||||||
|
|
||||||
@@ -362,10 +532,12 @@ public:
|
|||||||
void new_file();
|
void new_file();
|
||||||
|
|
||||||
bool write(Log_event* event_info); // binary log write
|
bool write(Log_event* event_info); // binary log write
|
||||||
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
|
bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
|
||||||
bool write_incident(THD *thd, bool lock);
|
Log_event *end_ev);
|
||||||
|
bool trx_group_commit_finish(binlog_trx_data *trx_data);
|
||||||
|
bool write_incident(THD *thd);
|
||||||
|
|
||||||
int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
|
int write_cache(IO_CACHE *cache);
|
||||||
void set_write_error(THD *thd);
|
void set_write_error(THD *thd);
|
||||||
bool check_write_error(THD *thd);
|
bool check_write_error(THD *thd);
|
||||||
|
|
||||||
@@ -420,6 +592,7 @@ public:
|
|||||||
inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
|
inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
|
||||||
inline IO_CACHE *get_index_file() { return &index_file;}
|
inline IO_CACHE *get_index_file() { return &index_file;}
|
||||||
inline uint32 get_open_count() { return open_count; }
|
inline uint32 get_open_count() { return open_count; }
|
||||||
|
void set_status_variables();
|
||||||
};
|
};
|
||||||
|
|
||||||
class Log_event_handler
|
class Log_event_handler
|
||||||
|
@@ -463,10 +463,9 @@ struct sql_ex_info
|
|||||||
#define LOG_EVENT_SUPPRESS_USE_F 0x8
|
#define LOG_EVENT_SUPPRESS_USE_F 0x8
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The table map version internal to the log should be increased after
|
This used to be LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F, but is now unused.
|
||||||
the event has been written to the binary log.
|
|
||||||
*/
|
*/
|
||||||
#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10
|
#define LOG_EVENT_UNUSED1_F 0x10
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@def LOG_EVENT_ARTIFICIAL_F
|
@def LOG_EVENT_ARTIFICIAL_F
|
||||||
|
@@ -1333,6 +1333,7 @@ void clean_up(bool print_message)
|
|||||||
ha_end();
|
ha_end();
|
||||||
if (tc_log)
|
if (tc_log)
|
||||||
tc_log->close();
|
tc_log->close();
|
||||||
|
TC_destroy();
|
||||||
xid_cache_free();
|
xid_cache_free();
|
||||||
wt_end();
|
wt_end();
|
||||||
delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
|
delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
|
||||||
@@ -4124,6 +4125,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
|
|||||||
if (!errmesg[0][0])
|
if (!errmesg[0][0])
|
||||||
unireg_abort(1);
|
unireg_abort(1);
|
||||||
|
|
||||||
|
TC_init();
|
||||||
|
|
||||||
/* We have to initialize the storage engines before CSV logging */
|
/* We have to initialize the storage engines before CSV logging */
|
||||||
if (ha_init())
|
if (ha_init())
|
||||||
{
|
{
|
||||||
|
@@ -673,6 +673,8 @@ THD::THD()
|
|||||||
active_vio = 0;
|
active_vio = 0;
|
||||||
#endif
|
#endif
|
||||||
pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
|
||||||
|
pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_FAST);
|
||||||
|
pthread_cond_init(&COND_commit_ordered, 0);
|
||||||
|
|
||||||
/* Variables with default values */
|
/* Variables with default values */
|
||||||
proc_info="login";
|
proc_info="login";
|
||||||
@@ -999,6 +1001,8 @@ THD::~THD()
|
|||||||
free_root(&transaction.mem_root,MYF(0));
|
free_root(&transaction.mem_root,MYF(0));
|
||||||
#endif
|
#endif
|
||||||
mysys_var=0; // Safety (shouldn't be needed)
|
mysys_var=0; // Safety (shouldn't be needed)
|
||||||
|
pthread_cond_destroy(&COND_commit_ordered);
|
||||||
|
pthread_mutex_destroy(&LOCK_commit_ordered);
|
||||||
pthread_mutex_destroy(&LOCK_thd_data);
|
pthread_mutex_destroy(&LOCK_thd_data);
|
||||||
#ifndef DBUG_OFF
|
#ifndef DBUG_OFF
|
||||||
dbug_sentry= THD_SENTRY_GONE;
|
dbug_sentry= THD_SENTRY_GONE;
|
||||||
@@ -3773,7 +3777,6 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end)
|
|||||||
if (stmt_end)
|
if (stmt_end)
|
||||||
{
|
{
|
||||||
pending->set_flags(Rows_log_event::STMT_END_F);
|
pending->set_flags(Rows_log_event::STMT_END_F);
|
||||||
pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
|
|
||||||
binlog_table_maps= 0;
|
binlog_table_maps= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3901,7 +3904,6 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
|
|||||||
{
|
{
|
||||||
Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use,
|
Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use,
|
||||||
errcode);
|
errcode);
|
||||||
qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
|
|
||||||
/*
|
/*
|
||||||
Binlog table maps will be irrelevant after a Query_log_event
|
Binlog table maps will be irrelevant after a Query_log_event
|
||||||
(they are just removed on the slave side) so after the query
|
(they are just removed on the slave side) so after the query
|
||||||
|
@@ -1438,6 +1438,10 @@ public:
|
|||||||
/* container for handler's private per-connection data */
|
/* container for handler's private per-connection data */
|
||||||
Ha_data ha_data[MAX_HA];
|
Ha_data ha_data[MAX_HA];
|
||||||
|
|
||||||
|
/* Mutex and condition for waking up threads after group commit. */
|
||||||
|
pthread_mutex_t LOCK_commit_ordered;
|
||||||
|
pthread_cond_t COND_commit_ordered;
|
||||||
|
|
||||||
#ifndef MYSQL_CLIENT
|
#ifndef MYSQL_CLIENT
|
||||||
int binlog_setup_trx_data();
|
int binlog_setup_trx_data();
|
||||||
|
|
||||||
|
@@ -516,7 +516,6 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
Delete_file_log_event d(thd, db, transactional_table);
|
Delete_file_log_event d(thd, db, transactional_table);
|
||||||
d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
|
|
||||||
(void) mysql_bin_log.write(&d);
|
(void) mysql_bin_log.write(&d);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -698,7 +697,6 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
|
|||||||
(duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
|
(duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
|
||||||
(ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
|
(ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
|
||||||
transactional_table, FALSE, errcode);
|
transactional_table, FALSE, errcode);
|
||||||
e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
|
|
||||||
return mysql_bin_log.write(&e);
|
return mysql_bin_log.write(&e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
sql/table.cc
10
sql/table.cc
@@ -296,13 +296,6 @@ TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key,
|
|||||||
|
|
||||||
share->version= refresh_version;
|
share->version= refresh_version;
|
||||||
|
|
||||||
/*
|
|
||||||
This constant is used to mark that no table map version has been
|
|
||||||
assigned. No arithmetic is done on the value: it will be
|
|
||||||
overwritten with a value taken from MYSQL_BIN_LOG.
|
|
||||||
*/
|
|
||||||
share->table_map_version= ~(ulonglong)0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Since alloc_table_share() can be called without any locking (for
|
Since alloc_table_share() can be called without any locking (for
|
||||||
example, ha_create_table... functions), we do not assign a table
|
example, ha_create_table... functions), we do not assign a table
|
||||||
@@ -367,10 +360,9 @@ void init_tmp_table_share(THD *thd, TABLE_SHARE *share, const char *key,
|
|||||||
share->frm_version= FRM_VER_TRUE_VARCHAR;
|
share->frm_version= FRM_VER_TRUE_VARCHAR;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Temporary tables are not replicated, but we set up these fields
|
Temporary tables are not replicated, but we set up this fields
|
||||||
anyway to be able to catch errors.
|
anyway to be able to catch errors.
|
||||||
*/
|
*/
|
||||||
share->table_map_version= ~(ulonglong)0;
|
|
||||||
share->cached_row_logging_check= -1;
|
share->cached_row_logging_check= -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -433,7 +433,6 @@ typedef struct st_table_share
|
|||||||
bool waiting_on_cond; /* Protection against free */
|
bool waiting_on_cond; /* Protection against free */
|
||||||
bool deleting; /* going to delete this table */
|
bool deleting; /* going to delete this table */
|
||||||
ulong table_map_id; /* for row-based replication */
|
ulong table_map_id; /* for row-based replication */
|
||||||
ulonglong table_map_version;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Cache for row-based replication table share checks that does not
|
Cache for row-based replication table share checks that does not
|
||||||
|
@@ -138,8 +138,6 @@ bool check_global_access(THD *thd, ulong want_access);
|
|||||||
|
|
||||||
/** to protect innobase_open_files */
|
/** to protect innobase_open_files */
|
||||||
static pthread_mutex_t innobase_share_mutex;
|
static pthread_mutex_t innobase_share_mutex;
|
||||||
/** to force correct commit order in binlog */
|
|
||||||
static pthread_mutex_t prepare_commit_mutex;
|
|
||||||
static ulong commit_threads = 0;
|
static ulong commit_threads = 0;
|
||||||
static pthread_mutex_t commit_threads_m;
|
static pthread_mutex_t commit_threads_m;
|
||||||
static pthread_cond_t commit_cond;
|
static pthread_cond_t commit_cond;
|
||||||
@@ -239,6 +237,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
|
|||||||
static INNOBASE_SHARE *get_share(const char *table_name);
|
static INNOBASE_SHARE *get_share(const char *table_name);
|
||||||
static void free_share(INNOBASE_SHARE *share);
|
static void free_share(INNOBASE_SHARE *share);
|
||||||
static int innobase_close_connection(handlerton *hton, THD* thd);
|
static int innobase_close_connection(handlerton *hton, THD* thd);
|
||||||
|
static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
|
||||||
static int innobase_commit(handlerton *hton, THD* thd, bool all);
|
static int innobase_commit(handlerton *hton, THD* thd, bool all);
|
||||||
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
|
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
|
||||||
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
|
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
|
||||||
@@ -1356,7 +1355,6 @@ innobase_trx_init(
|
|||||||
trx_t* trx) /*!< in/out: InnoDB transaction handle */
|
trx_t* trx) /*!< in/out: InnoDB transaction handle */
|
||||||
{
|
{
|
||||||
DBUG_ENTER("innobase_trx_init");
|
DBUG_ENTER("innobase_trx_init");
|
||||||
DBUG_ASSERT(EQ_CURRENT_THD(thd));
|
|
||||||
DBUG_ASSERT(thd == trx->mysql_thd);
|
DBUG_ASSERT(thd == trx->mysql_thd);
|
||||||
|
|
||||||
trx->check_foreigns = !thd_test_options(
|
trx->check_foreigns = !thd_test_options(
|
||||||
@@ -1416,8 +1414,6 @@ check_trx_exists(
|
|||||||
{
|
{
|
||||||
trx_t*& trx = thd_to_trx(thd);
|
trx_t*& trx = thd_to_trx(thd);
|
||||||
|
|
||||||
ut_ad(EQ_CURRENT_THD(thd));
|
|
||||||
|
|
||||||
if (trx == NULL) {
|
if (trx == NULL) {
|
||||||
trx = innobase_trx_allocate(thd);
|
trx = innobase_trx_allocate(thd);
|
||||||
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
|
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
|
||||||
@@ -2024,6 +2020,7 @@ innobase_init(
|
|||||||
innobase_hton->savepoint_set=innobase_savepoint;
|
innobase_hton->savepoint_set=innobase_savepoint;
|
||||||
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
|
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
|
||||||
innobase_hton->savepoint_release=innobase_release_savepoint;
|
innobase_hton->savepoint_release=innobase_release_savepoint;
|
||||||
|
innobase_hton->commit_ordered=innobase_commit_ordered;
|
||||||
innobase_hton->commit=innobase_commit;
|
innobase_hton->commit=innobase_commit;
|
||||||
innobase_hton->rollback=innobase_rollback;
|
innobase_hton->rollback=innobase_rollback;
|
||||||
innobase_hton->prepare=innobase_xa_prepare;
|
innobase_hton->prepare=innobase_xa_prepare;
|
||||||
@@ -2492,7 +2489,6 @@ skip_overwrite:
|
|||||||
|
|
||||||
innobase_open_tables = hash_create(200);
|
innobase_open_tables = hash_create(200);
|
||||||
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
|
||||||
pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
|
|
||||||
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
|
||||||
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
|
||||||
pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST);
|
pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST);
|
||||||
@@ -2547,7 +2543,6 @@ innobase_end(
|
|||||||
my_free(internal_innobase_data_file_path,
|
my_free(internal_innobase_data_file_path,
|
||||||
MYF(MY_ALLOW_ZERO_PTR));
|
MYF(MY_ALLOW_ZERO_PTR));
|
||||||
pthread_mutex_destroy(&innobase_share_mutex);
|
pthread_mutex_destroy(&innobase_share_mutex);
|
||||||
pthread_mutex_destroy(&prepare_commit_mutex);
|
|
||||||
pthread_mutex_destroy(&commit_threads_m);
|
pthread_mutex_destroy(&commit_threads_m);
|
||||||
pthread_mutex_destroy(&commit_cond_m);
|
pthread_mutex_destroy(&commit_cond_m);
|
||||||
pthread_mutex_destroy(&analyze_mutex);
|
pthread_mutex_destroy(&analyze_mutex);
|
||||||
@@ -2680,6 +2675,101 @@ innobase_start_trx_and_assign_read_view(
|
|||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*****************************************************************//**
|
||||||
|
Perform the first, fast part of InnoDB commit.
|
||||||
|
|
||||||
|
Doing it in this call ensures that we get the same commit order here
|
||||||
|
as in binlog and any other participating transactional storage engines.
|
||||||
|
|
||||||
|
Note that we want to do as little as really needed here, as we run
|
||||||
|
under a global mutex. The expensive fsync() is done later, in
|
||||||
|
innobase_commit(), without a lock so group commit can take place.
|
||||||
|
|
||||||
|
Note also that this method can be called from a different thread than
|
||||||
|
the one handling the rest of the transaction. */
|
||||||
|
static
|
||||||
|
void
|
||||||
|
innobase_commit_ordered(
|
||||||
|
/*============*/
|
||||||
|
handlerton *hton, /*!< in: Innodb handlerton */
|
||||||
|
THD* thd, /*!< in: MySQL thread handle of the user for whom
|
||||||
|
the transaction should be committed */
|
||||||
|
bool all) /*!< in: TRUE - commit transaction
|
||||||
|
FALSE - the current SQL statement ended */
|
||||||
|
{
|
||||||
|
trx_t* trx;
|
||||||
|
DBUG_ENTER("innobase_commit_ordered");
|
||||||
|
DBUG_ASSERT(hton == innodb_hton_ptr);
|
||||||
|
|
||||||
|
trx = check_trx_exists(thd);
|
||||||
|
|
||||||
|
if (trx->active_trans == 0
|
||||||
|
&& trx->conc_state != TRX_NOT_STARTED) {
|
||||||
|
/* We cannot throw error here; instead we will catch this error
|
||||||
|
again in innobase_commit() and report it from there. */
|
||||||
|
DBUG_VOID_RETURN;
|
||||||
|
}
|
||||||
|
/* Since we will reserve the kernel mutex, we have to release
|
||||||
|
the search system latch first to obey the latching order. */
|
||||||
|
|
||||||
|
if (trx->has_search_latch) {
|
||||||
|
trx_search_latch_release_if_reserved(trx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* commit_ordered is only called when committing the whole transaction
|
||||||
|
(or an SQL statement when autocommit is on). */
|
||||||
|
DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
|
||||||
|
|
||||||
|
/* We need current binlog position for ibbackup to work.
|
||||||
|
Note, the position is current because commit_ordered is guaranteed
|
||||||
|
to be called in same sequenece as writing to binlog. */
|
||||||
|
|
||||||
|
retry:
|
||||||
|
if (innobase_commit_concurrency > 0) {
|
||||||
|
pthread_mutex_lock(&commit_cond_m);
|
||||||
|
commit_threads++;
|
||||||
|
|
||||||
|
if (commit_threads > innobase_commit_concurrency) {
|
||||||
|
commit_threads--;
|
||||||
|
pthread_cond_wait(&commit_cond,
|
||||||
|
&commit_cond_m);
|
||||||
|
pthread_mutex_unlock(&commit_cond_m);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
pthread_mutex_unlock(&commit_cond_m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The following calls to read the MySQL binary log
|
||||||
|
file name and the position return consistent results:
|
||||||
|
1) We use commit_ordered() to get same commit order
|
||||||
|
in InnoDB as in binary log.
|
||||||
|
2) A MySQL log file rotation cannot happen because
|
||||||
|
MySQL protects against this by having a counter of
|
||||||
|
transactions in prepared state and it only allows
|
||||||
|
a rotation when the counter drops to zero. See
|
||||||
|
LOCK_prep_xids and COND_prep_xids in log.cc. */
|
||||||
|
trx->mysql_log_file_name = mysql_bin_log_file_name();
|
||||||
|
trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
|
||||||
|
|
||||||
|
/* Don't do write + flush right now. For group commit
|
||||||
|
to work we want to do the flush in the innobase_commit()
|
||||||
|
method, which runs without holding any locks. */
|
||||||
|
trx->flush_log_later = TRUE;
|
||||||
|
innobase_commit_low(trx);
|
||||||
|
trx->flush_log_later = FALSE;
|
||||||
|
|
||||||
|
if (innobase_commit_concurrency > 0) {
|
||||||
|
pthread_mutex_lock(&commit_cond_m);
|
||||||
|
commit_threads--;
|
||||||
|
pthread_cond_signal(&commit_cond);
|
||||||
|
pthread_mutex_unlock(&commit_cond_m);
|
||||||
|
}
|
||||||
|
|
||||||
|
DBUG_VOID_RETURN;
|
||||||
|
}
|
||||||
|
|
||||||
/*****************************************************************//**
|
/*****************************************************************//**
|
||||||
Commits a transaction in an InnoDB database or marks an SQL statement
|
Commits a transaction in an InnoDB database or marks an SQL statement
|
||||||
ended.
|
ended.
|
||||||
@@ -2702,13 +2792,6 @@ innobase_commit(
|
|||||||
|
|
||||||
trx = check_trx_exists(thd);
|
trx = check_trx_exists(thd);
|
||||||
|
|
||||||
/* Since we will reserve the kernel mutex, we have to release
|
|
||||||
the search system latch first to obey the latching order. */
|
|
||||||
|
|
||||||
if (trx->has_search_latch) {
|
|
||||||
trx_search_latch_release_if_reserved(trx);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The flag trx->active_trans is set to 1 in
|
/* The flag trx->active_trans is set to 1 in
|
||||||
|
|
||||||
1. ::external_lock(),
|
1. ::external_lock(),
|
||||||
@@ -2736,62 +2819,8 @@ innobase_commit(
|
|||||||
/* We were instructed to commit the whole transaction, or
|
/* We were instructed to commit the whole transaction, or
|
||||||
this is an SQL statement end and autocommit is on */
|
this is an SQL statement end and autocommit is on */
|
||||||
|
|
||||||
/* We need current binlog position for ibbackup to work.
|
/* We did the first part already in innobase_commit_ordered(),
|
||||||
Note, the position is current because of
|
Now finish by doing a write + flush of logs. */
|
||||||
prepare_commit_mutex */
|
|
||||||
retry:
|
|
||||||
if (innobase_commit_concurrency > 0) {
|
|
||||||
pthread_mutex_lock(&commit_cond_m);
|
|
||||||
commit_threads++;
|
|
||||||
|
|
||||||
if (commit_threads > innobase_commit_concurrency) {
|
|
||||||
commit_threads--;
|
|
||||||
pthread_cond_wait(&commit_cond,
|
|
||||||
&commit_cond_m);
|
|
||||||
pthread_mutex_unlock(&commit_cond_m);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
pthread_mutex_unlock(&commit_cond_m);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The following calls to read the MySQL binary log
|
|
||||||
file name and the position return consistent results:
|
|
||||||
1) Other InnoDB transactions cannot intervene between
|
|
||||||
these calls as we are holding prepare_commit_mutex.
|
|
||||||
2) Binary logging of other engines is not relevant
|
|
||||||
to InnoDB as all InnoDB requires is that committing
|
|
||||||
InnoDB transactions appear in the same order in the
|
|
||||||
MySQL binary log as they appear in InnoDB logs.
|
|
||||||
3) A MySQL log file rotation cannot happen because
|
|
||||||
MySQL protects against this by having a counter of
|
|
||||||
transactions in prepared state and it only allows
|
|
||||||
a rotation when the counter drops to zero. See
|
|
||||||
LOCK_prep_xids and COND_prep_xids in log.cc. */
|
|
||||||
trx->mysql_log_file_name = mysql_bin_log_file_name();
|
|
||||||
trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
|
|
||||||
|
|
||||||
/* Don't do write + flush right now. For group commit
|
|
||||||
to work we want to do the flush after releasing the
|
|
||||||
prepare_commit_mutex. */
|
|
||||||
trx->flush_log_later = TRUE;
|
|
||||||
innobase_commit_low(trx);
|
|
||||||
trx->flush_log_later = FALSE;
|
|
||||||
|
|
||||||
if (innobase_commit_concurrency > 0) {
|
|
||||||
pthread_mutex_lock(&commit_cond_m);
|
|
||||||
commit_threads--;
|
|
||||||
pthread_cond_signal(&commit_cond);
|
|
||||||
pthread_mutex_unlock(&commit_cond_m);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (trx->active_trans == 2) {
|
|
||||||
|
|
||||||
pthread_mutex_unlock(&prepare_commit_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Now do a write + flush of logs. */
|
|
||||||
trx_commit_complete_for_mysql(trx);
|
trx_commit_complete_for_mysql(trx);
|
||||||
trx->active_trans = 0;
|
trx->active_trans = 0;
|
||||||
|
|
||||||
@@ -4621,6 +4650,7 @@ no_commit:
|
|||||||
no need to re-acquire locks on it. */
|
no need to re-acquire locks on it. */
|
||||||
|
|
||||||
/* Altering to InnoDB format */
|
/* Altering to InnoDB format */
|
||||||
|
innobase_commit_ordered(ht, user_thd, 1);
|
||||||
innobase_commit(ht, user_thd, 1);
|
innobase_commit(ht, user_thd, 1);
|
||||||
/* Note that this transaction is still active. */
|
/* Note that this transaction is still active. */
|
||||||
prebuilt->trx->active_trans = 1;
|
prebuilt->trx->active_trans = 1;
|
||||||
@@ -4637,6 +4667,7 @@ no_commit:
|
|||||||
|
|
||||||
/* Commit the transaction. This will release the table
|
/* Commit the transaction. This will release the table
|
||||||
locks, so they have to be acquired again. */
|
locks, so they have to be acquired again. */
|
||||||
|
innobase_commit_ordered(ht, user_thd, 1);
|
||||||
innobase_commit(ht, user_thd, 1);
|
innobase_commit(ht, user_thd, 1);
|
||||||
/* Note that this transaction is still active. */
|
/* Note that this transaction is still active. */
|
||||||
prebuilt->trx->active_trans = 1;
|
prebuilt->trx->active_trans = 1;
|
||||||
@@ -8339,6 +8370,7 @@ ha_innobase::external_lock(
|
|||||||
|
|
||||||
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
|
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
|
||||||
if (trx->active_trans != 0) {
|
if (trx->active_trans != 0) {
|
||||||
|
innobase_commit_ordered(ht, thd, TRUE);
|
||||||
innobase_commit(ht, thd, TRUE);
|
innobase_commit(ht, thd, TRUE);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -9448,36 +9480,6 @@ innobase_xa_prepare(
|
|||||||
|
|
||||||
srv_active_wake_master_thread();
|
srv_active_wake_master_thread();
|
||||||
|
|
||||||
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
|
|
||||||
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
|
|
||||||
{
|
|
||||||
if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
|
|
||||||
/* choose group commit rather than binlog order */
|
|
||||||
return(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For ibbackup to work the order of transactions in binlog
|
|
||||||
and InnoDB must be the same. Consider the situation
|
|
||||||
|
|
||||||
thread1> prepare; write to binlog; ...
|
|
||||||
<context switch>
|
|
||||||
thread2> prepare; write to binlog; commit
|
|
||||||
thread1> ... commit
|
|
||||||
|
|
||||||
To ensure this will not happen we're taking the mutex on
|
|
||||||
prepare, and releasing it on commit.
|
|
||||||
|
|
||||||
Note: only do it for normal commits, done via ha_commit_trans.
|
|
||||||
If 2pc protocol is executed by external transaction
|
|
||||||
coordinator, it will be just a regular MySQL client
|
|
||||||
executing XA PREPARE and XA COMMIT commands.
|
|
||||||
In this case we cannot know how many minutes or hours
|
|
||||||
will be between XA PREPARE and XA COMMIT, and we don't want
|
|
||||||
to block for undefined period of time. */
|
|
||||||
pthread_mutex_lock(&prepare_commit_mutex);
|
|
||||||
trx->active_trans = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
return(error);
|
return(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -10669,11 +10671,6 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
|
|||||||
"Enable/Disable flushing along modified age. (none, reflex, [estimate])",
|
"Enable/Disable flushing along modified age. (none, reflex, [estimate])",
|
||||||
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
|
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
|
||||||
|
|
||||||
static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
|
|
||||||
PLUGIN_VAR_RQCMDARG,
|
|
||||||
"Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
|
|
||||||
NULL, NULL, 0, 0, 1, 0);
|
|
||||||
|
|
||||||
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
|
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
|
||||||
PLUGIN_VAR_RQCMDARG,
|
PLUGIN_VAR_RQCMDARG,
|
||||||
"Enable/Disable converting automatically *.ibd files when import tablespace.",
|
"Enable/Disable converting automatically *.ibd files when import tablespace.",
|
||||||
@@ -10763,7 +10760,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||||||
MYSQL_SYSVAR(flush_neighbor_pages),
|
MYSQL_SYSVAR(flush_neighbor_pages),
|
||||||
MYSQL_SYSVAR(read_ahead),
|
MYSQL_SYSVAR(read_ahead),
|
||||||
MYSQL_SYSVAR(adaptive_checkpoint),
|
MYSQL_SYSVAR(adaptive_checkpoint),
|
||||||
MYSQL_SYSVAR(enable_unsafe_group_commit),
|
|
||||||
MYSQL_SYSVAR(expand_import),
|
MYSQL_SYSVAR(expand_import),
|
||||||
MYSQL_SYSVAR(extra_rsegments),
|
MYSQL_SYSVAR(extra_rsegments),
|
||||||
MYSQL_SYSVAR(dict_size_limit),
|
MYSQL_SYSVAR(dict_size_limit),
|
||||||
|
Reference in New Issue
Block a user