1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

Merge MWL#116 into mariadb-5.2-rpl.

This commit is contained in:
unknown
2010-12-25 15:42:33 +01:00
23 changed files with 1930 additions and 452 deletions

View File

@ -0,0 +1,63 @@
CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
SELECT variable_value INTO @commits FROM information_schema.global_status
WHERE variable_name = 'binlog_commits';
SELECT variable_value INTO @group_commits FROM information_schema.global_status
WHERE variable_name = 'binlog_group_commits';
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
INSERT INTO t1 VALUES ("con1");
set DEBUG_SYNC= "now WAIT_FOR group1_running";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
INSERT INTO t1 VALUES ("con2");
SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
INSERT INTO t1 VALUES ("con3");
SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
INSERT INTO t1 VALUES ("con4");
SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SELECT * FROM t1 ORDER BY a;
a
SET DEBUG_SYNC= "now SIGNAL group2_queued";
SELECT * FROM t1 ORDER BY a;
a
con1
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
INSERT INTO t1 VALUES ("con5");
SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
INSERT INTO t1 VALUES ("con6");
SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
SELECT * FROM t1 ORDER BY a;
a
con1
SET DEBUG_SYNC= "now SIGNAL group3_committed";
SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
SELECT * FROM t1 ORDER BY a;
a
con1
con2
con3
con4
SET DEBUG_SYNC= "now SIGNAL group2_checked";
SELECT * FROM t1 ORDER BY a;
a
con1
con2
con3
con4
con5
con6
SELECT variable_value - @commits FROM information_schema.global_status
WHERE variable_name = 'binlog_commits';
variable_value - @commits
6
SELECT variable_value - @group_commits FROM information_schema.global_status
WHERE variable_name = 'binlog_group_commits';
variable_value - @group_commits
3
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;

View File

@ -0,0 +1,35 @@
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
INSERT INTO t1 VALUES (1);
SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
INSERT INTO t1 VALUES (2);
SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
INSERT INTO t1 VALUES (3);
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
SELECT * FROM t1 ORDER BY a;
a
0
1
2
SET SESSION debug="+d,crash_dispatch_command_before";
SELECT 1;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
SELECT * FROM t1 ORDER BY a;
a
0
1
2
3
InnoDB: Last MySQL binlog file position 0 767, file name ./master-bin.000001
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;

View File

@ -0,0 +1,120 @@
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
c CHAR(255),
d CHAR(255),
id INT AUTO_INCREMENT,
PRIMARY KEY(id)) ENGINE=InnoDB;
create table t2 like t1;
create procedure setcrash(IN i INT)
begin
CASE i
WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
ELSE BEGIN END;
END CASE;
end //
FLUSH TABLES;
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(5);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(4);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
a b c d 1
a b c d 2
a b c d 3
a b c d 4
a b c d 5
a b c d 6
a b c d 7
a b c d 8
a b c d 9
a b c d 10
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(3);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
a b c d 1
a b c d 2
a b c d 3
a b c d 4
a b c d 5
a b c d 6
a b c d 7
a b c d 8
a b c d 9
a b c d 10
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(2);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
a b c d 1
a b c d 2
a b c d 3
a b c d 4
a b c d 5
a b c d 6
a b c d 7
a b c d 8
a b c d 9
a b c d 10
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 174 Query 1 268 use `test`; insert into t1 select * from t2
delete from t1;
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
call setcrash(1);
COMMIT;
Got one of the listed errors
SELECT * FROM t1 ORDER BY id;
a b c d id
SHOW BINLOG EVENTS LIMIT 2,1;
Log_name Pos Event_type Server_id End_log_pos Info
delete from t1;
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE setcrash;

View File

@ -0,0 +1,28 @@
CALL mtr.add_suppression("Error writing file 'master-bin'");
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES(0);
SET SESSION debug='+d,fail_binlog_write_1';
INSERT INTO t1 VALUES(1);
ERROR HY000: Error writing file 'master-bin' (errno: 28)
INSERT INTO t1 VALUES(2);
ERROR HY000: Error writing file 'master-bin' (errno: 28)
SET SESSION debug='';
INSERT INTO t1 VALUES(3);
SELECT * FROM t1;
a
0
3
SHOW BINLOG EVENTS;
Log_name Pos Event_type Server_id End_log_pos Info
BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: #
BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0)
BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS BEGIN
BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3)
BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
DROP TABLE t1;

View File

@ -0,0 +1,29 @@
source include/have_debug.inc;
source include/have_innodb.inc;
source include/have_log_bin.inc;
source include/have_binlog_format_mixed_or_statement.inc;
CALL mtr.add_suppression("Error writing file 'master-bin'");
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES(0);
SET SESSION debug='+d,fail_binlog_write_1';
--error ER_ERROR_ON_WRITE
INSERT INTO t1 VALUES(1);
--error ER_ERROR_ON_WRITE
INSERT INTO t1 VALUES(2);
SET SESSION debug='';
INSERT INTO t1 VALUES(3);
SELECT * FROM t1;
# Actually the output from this currently shows a bug.
# The injected IO error leaves partially written transactions in the binlog in
# the form of stray "BEGIN" events.
# These should disappear from the output if binlog error handling is improved.
--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
--replace_column 1 BINLOG 2 POS 5 ENDPOS
SHOW BINLOG EVENTS;
DROP TABLE t1;

View File

@ -0,0 +1,31 @@
drop table if exists t1, t2;
SET binlog_format = 'mixed';
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=pbxt;
BEGIN;
SELECT @@log_bin;
@@log_bin
1
INSERT INTO t1 VALUES (1);
INSERT INTO t2 VALUES (2);
COMMIT;
select * from t1;
a
1
select * from t2;
b
2
SET sql_log_bin = 0;
INSERT INTO t1 VALUES (3);
INSERT INTO t2 VALUES (4);
COMMIT;
select * from t1 order by a;
a
1
3
select * from t2 order by b;
b
2
4
drop table t1, t2;
drop database pbxt;

View File

@ -0,0 +1,31 @@
--source include/have_innodb.inc
--source include/have_log_bin.inc
--disable_warnings
drop table if exists t1, t2;
--enable_warnings
SET binlog_format = 'mixed';
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=pbxt;
BEGIN;
# verify that binlog is on
SELECT @@log_bin;
INSERT INTO t1 VALUES (1);
INSERT INTO t2 VALUES (2);
COMMIT;
select * from t1;
select * from t2;
# Test 2-phase commit when we disable binlogging.
SET sql_log_bin = 0;
INSERT INTO t1 VALUES (3);
INSERT INTO t2 VALUES (4);
COMMIT;
select * from t1 order by a;
select * from t2 order by b;
drop table t1, t2;
drop database pbxt;

View File

@ -0,0 +1,115 @@
--source include/have_debug_sync.inc
--source include/have_innodb.inc
--source include/have_log_bin.inc
# Test some group commit code paths by using debug_sync to do controlled
# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
# group.
#
# Group 3 is allowed to race as far as possible ahead before group 2 finishes
# to check some edge case for concurrency control.
CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
SELECT variable_value INTO @commits FROM information_schema.global_status
WHERE variable_name = 'binlog_commits';
SELECT variable_value INTO @group_commits FROM information_schema.global_status
WHERE variable_name = 'binlog_group_commits';
connect(con1,localhost,root,,);
connect(con2,localhost,root,,);
connect(con3,localhost,root,,);
connect(con4,localhost,root,,);
connect(con5,localhost,root,,);
connect(con6,localhost,root,,);
# Start group1 (with one thread) doing commit, waiting for
# group2 to queue up before finishing.
connection con1;
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
send INSERT INTO t1 VALUES ("con1");
# Make group2 (with three threads) queue up.
# Make sure con2 is the group commit leader for group2.
# Make group2 wait with running commit_ordered() until group3 has committed.
connection con2;
set DEBUG_SYNC= "now WAIT_FOR group1_running";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
send INSERT INTO t1 VALUES ("con2");
connection con3;
SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
send INSERT INTO t1 VALUES ("con3");
connection con4;
SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
send INSERT INTO t1 VALUES ("con4");
# When group2 is queued, let group1 continue and queue group3.
connection default;
SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
# At this point, trasaction 1 is still not visible as commit_ordered() has not
# been called yet.
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SELECT * FROM t1 ORDER BY a;
SET DEBUG_SYNC= "now SIGNAL group2_queued";
connection con1;
reap;
# Now transaction 1 is visible.
connection default;
SELECT * FROM t1 ORDER BY a;
connection con5;
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
send INSERT INTO t1 VALUES ("con5");
connection con6;
SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
send INSERT INTO t1 VALUES ("con6");
connection default;
SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
SELECT * FROM t1 ORDER BY a;
SET DEBUG_SYNC= "now SIGNAL group3_committed";
SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
# Now transactions 1-4 visible.
SELECT * FROM t1 ORDER BY a;
SET DEBUG_SYNC= "now SIGNAL group2_checked";
connection con2;
reap;
connection con3;
reap;
connection con4;
reap;
connection con5;
reap;
connection con6;
reap;
connection default;
# Check all transactions finally visible.
SELECT * FROM t1 ORDER BY a;
SELECT variable_value - @commits FROM information_schema.global_status
WHERE variable_name = 'binlog_commits';
SELECT variable_value - @group_commits FROM information_schema.global_status
WHERE variable_name = 'binlog_group_commits';
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;

View File

@ -0,0 +1 @@
--skip-stack-trace --skip-core-file

View File

@ -0,0 +1,85 @@
--source include/have_debug_sync.inc
--source include/have_innodb.inc
--source include/have_log_bin.inc
--source include/have_binlog_format_mixed_or_statement.inc
# Need DBUG to crash the server intentionally
--source include/have_debug.inc
# Don't test this under valgrind, memory leaks will occur as we crash
--source include/not_valgrind.inc
# XtraDB stores the binlog position corresponding to the last commit, and
# prints it during crash recovery.
# Test that we get the correct position when we group commit several
# transactions together.
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
connect(con1,localhost,root,,);
connect(con2,localhost,root,,);
connect(con3,localhost,root,,);
# Queue up three commits for group commit.
connection con1;
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
send INSERT INTO t1 VALUES (1);
connection con2;
SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
send INSERT INTO t1 VALUES (2);
connection con3;
SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
send INSERT INTO t1 VALUES (3);
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
# At this point, no transactions are committed.
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
# At this point, 1 transaction is committed.
SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
# At this point, 2 transactions are committed.
SELECT * FROM t1 ORDER BY a;
connection con2;
reap;
# Now crash the server with 1+2 in-memory committed, 3 only prepared.
connection default;
system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
SET SESSION debug="+d,crash_dispatch_command_before";
--error 2006,2013
SELECT 1;
connection con1;
--error 2006,2013
reap;
connection con3;
--error 2006,2013
reap;
system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
connection default;
--enable_reconnect
--source include/wait_until_connected_again.inc
# Crash recovery should recover all three transactions.
SELECT * FROM t1 ORDER BY a;
# Check that the binlog position reported by InnoDB is the correct one
# for the end of the second transaction (as can be checked with
# mysqlbinlog).
let $MYSQLD_DATADIR= `SELECT @@datadir`;
--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;

View File

@ -0,0 +1 @@
--skip-stack-trace --skip-core-file

View File

@ -0,0 +1,80 @@
# Testing group commit by crashing a few times.
# Test adapted from the Facebook patch: lp:mysqlatfacebook
--source include/not_embedded.inc
# Don't test this under valgrind, memory leaks will occur
--source include/not_valgrind.inc
# Binary must be compiled with debug for crash to occur
--source include/have_debug.inc
--source include/have_innodb.inc
--source include/have_log_bin.inc
let $file_format_check=`SELECT @@innodb_file_format_check`;
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
c CHAR(255),
d CHAR(255),
id INT AUTO_INCREMENT,
PRIMARY KEY(id)) ENGINE=InnoDB;
create table t2 like t1;
delimiter //;
create procedure setcrash(IN i INT)
begin
CASE i
WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
ELSE BEGIN END;
END CASE;
end //
delimiter ;//
# Avoid getting a crashed mysql.proc table.
FLUSH TABLES;
let $numtests = 5;
let $numinserts = 10;
while ($numinserts)
{
dec $numinserts;
INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
}
--enable_reconnect
while ($numtests)
{
RESET MASTER;
START TRANSACTION;
insert into t1 select * from t2;
# Write file to make mysql-test-run.pl expect crash
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
eval call setcrash($numtests);
# Run the crashing query
--error 2006,2013
COMMIT;
# Poll the server waiting for it to be back online again.
--source include/wait_until_connected_again.inc
# table and binlog should be in sync.
SELECT * FROM t1 ORDER BY id;
SHOW BINLOG EVENTS LIMIT 2,1;
delete from t1;
dec $numtests;
}
# final cleanup
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE setcrash;
--disable_query_log
eval SET GLOBAL innodb_file_format_check=$file_format_check;
--enable_query_log

View File

@ -78,6 +78,8 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
uint known_extensions_id= 0;
static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
bool is_real_trans);
static plugin_ref ha_default_plugin(THD *thd)
@ -1076,7 +1078,7 @@ ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
*/
int ha_commit_trans(THD *thd, bool all)
{
int error= 0, cookie= 0;
int error= 0, cookie;
/*
'all' means that this is either an explicit commit issued by
user, or an implicit commit issued by a DDL.
@ -1091,7 +1093,8 @@ int ha_commit_trans(THD *thd, bool all)
*/
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list;
my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
bool need_prepare_ordered, need_commit_ordered;
my_xid xid;
DBUG_ENTER("ha_commit_trans");
/* Just a random warning to test warnings pushed during autocommit. */
@ -1130,10 +1133,13 @@ int ha_commit_trans(THD *thd, bool all)
DBUG_RETURN(2);
}
#ifdef USING_TRANSACTIONS
if (ha_info)
if (!ha_info)
{
uint rw_ha_count;
bool rw_trans;
/* Free resources and perform other cleanup even for 'empty' transactions. */
if (is_real_trans)
thd->transaction.cleanup();
DBUG_RETURN(0);
}
DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
@ -1141,9 +1147,9 @@ int ha_commit_trans(THD *thd, bool all)
if (is_real_trans) /* not a statement commit */
thd->stmt_map.close_transient_cursors();
rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
/* rw_trans is TRUE when we in a transaction changing data */
rw_trans= is_real_trans && (rw_ha_count > 0);
bool rw_trans= is_real_trans && (rw_ha_count > 0);
if (rw_trans &&
wait_if_global_read_lock(thd, 0, 0))
@ -1158,57 +1164,81 @@ int ha_commit_trans(THD *thd, bool all)
!thd->slave_thread)
{
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
ha_rollback_trans(thd, all);
error= 1;
goto err;
}
if (trans->no_2pc || (rw_ha_count <= 1))
{
error= ha_commit_one_phase(thd, all);
DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
goto end;
}
if (!trans->no_2pc && (rw_ha_count > 1))
{
for (; ha_info && !error; ha_info= ha_info->next())
need_prepare_ordered= FALSE;
need_commit_ordered= FALSE;
xid= thd->transaction.xid_state.xid.get_my_xid();
for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
{
int err;
handlerton *ht= ha_info->ht();
handlerton *ht= hi->ht();
/*
Do not call two-phase commit if this particular
transaction is read-only. This allows for simpler
implementation in engines that are always read-only.
*/
if (! ha_info->is_trx_read_write())
if (! hi->is_trx_read_write())
continue;
/*
Sic: we know that prepare() is not NULL since otherwise
trans->no_2pc would have been set.
*/
if ((err= ht->prepare(ht, thd, all)))
{
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
error= 1;
}
status_var_increment(thd->status_var.ha_prepare_count);
if (err)
goto err;
if (ht->prepare_ordered)
need_prepare_ordered= TRUE;
if (ht->commit_ordered)
need_commit_ordered= TRUE;
}
DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
if (error || (is_real_trans && xid &&
(error= !(cookie= tc_log->log_xid(thd, xid)))))
if (!is_real_trans)
{
ha_rollback_trans(thd, all);
error= 1;
error= commit_one_phase_2(thd, all, trans, is_real_trans);
DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
goto end;
}
cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
need_commit_ordered);
if (!cookie)
goto err;
DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
}
error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
if (cookie)
tc_log->unlog(cookie, xid);
error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
tc_log->unlog(cookie, xid);
DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
goto end;
/* Come here if error and we need to rollback. */
err:
if (!error)
error= 1;
ha_rollback_trans(thd, all);
end:
if (rw_trans)
start_waiting_global_read_lock(thd);
}
/* Free resources and perform other cleanup even for 'empty' transactions. */
else if (is_real_trans)
thd->transaction.cleanup();
#endif /* USING_TRANSACTIONS */
DBUG_RETURN(error);
}
@ -1219,7 +1249,6 @@ end:
*/
int ha_commit_one_phase(THD *thd, bool all)
{
int error=0;
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
/*
"real" is a nick name for a transaction for which a commit will
@ -1229,8 +1258,16 @@ int ha_commit_one_phase(THD *thd, bool all)
enclosing 'all' transaction is rolled back.
*/
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
DBUG_ENTER("ha_commit_one_phase");
DBUG_RETURN(commit_one_phase_2(thd, all, trans, is_real_trans));
}
static int
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
{
int error= 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
DBUG_ENTER("commit_one_phase_2");
#ifdef USING_TRANSACTIONS
if (ha_info)
{
@ -1847,7 +1884,13 @@ int ha_start_consistent_snapshot(THD *thd)
{
bool warn= true;
/*
Holding the LOCK_commit_ordered mutex ensures that for any transaction
we either see it committed in all engines, or in none.
*/
pthread_mutex_lock(&LOCK_commit_ordered);
plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
pthread_mutex_unlock(&LOCK_commit_ordered);
/*
Same idea as when one wants to CREATE TABLE in one engine which does not

View File

@ -765,8 +765,97 @@ struct handlerton
and 'real commit' mean the same event.
*/
int (*commit)(handlerton *hton, THD *thd, bool all);
/*
The commit_ordered() method is called prior to the commit() method, after
the transaction manager has decided to commit (not rollback) the
transaction. Unlike commit(), commit_ordered() is called only when the
full transaction is committed, not for each commit of statement
transaction in a multi-statement transaction.
Not that like prepare(), commit_ordered() is only called when 2-phase
commit takes place. Ie. when no binary log and only a single engine
participates in a transaction, one commit() is called, no
commit_orderd(). So engines must be prepared for this.
The calls to commit_ordered() in multiple parallel transactions is
guaranteed to happen in the same order in every participating
handler. This can be used to ensure the same commit order among multiple
handlers (eg. in table handler and binlog). So if transaction T1 calls
into commit_ordered() of handler A before T2, then T1 will also call
commit_ordered() of handler B before T2.
Engines that implement this method should during this call make the
transaction visible to other transactions, thereby making the order of
transaction commits be defined by the order of commit_ordered() calls.
The intension is that commit_ordered() should do the minimal amount of
work that needs to happen in consistent commit order among handlers. To
preserve ordering, calls need to be serialised on a global mutex, so
doing any time-consuming or blocking operations in commit_ordered() will
limit scalability.
Handlers can rely on commit_ordered() calls to be serialised (no two
calls can run in parallel, so no extra locking on the handler part is
required to ensure this).
Note that commit_ordered() can be called from a different thread than the
one handling the transaction! So it can not do anything that depends on
thread local storage, in particular it can not call my_error() and
friends (instead it can store the error code and delay the call of
my_error() to the commit() method).
Similarly, since commit_ordered() returns void, any return error code
must be saved and returned from the commit() method instead.
The commit_ordered method is optional, and can be left unset if not
needed in a particular handler (then there will be no ordering guarantees
wrt. other engines and binary log).
*/
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
int (*rollback)(handlerton *hton, THD *thd, bool all);
int (*prepare)(handlerton *hton, THD *thd, bool all);
/*
The prepare_ordered method is optional. If set, it will be called after
successful prepare() in all handlers participating in 2-phase
commit. Like commit_ordered(), it is called only when the full
transaction is committed, not for each commit of statement transaction.
The calls to prepare_ordered() among multiple parallel transactions are
ordered consistently with calls to commit_ordered(). This means that
calls to prepare_ordered() effectively define the commit order, and that
each handler will see the same sequence of transactions calling into
prepare_ordered() and commit_ordered().
Thus, prepare_ordered() can be used to define commit order for handlers
that need to do this in the prepare step (like binlog). It can also be
used to release transaction's locks early in an order consistent with the
order transactions will be eventually committed.
Like commit_ordered(), prepare_ordered() calls are serialised to maintain
ordering, so the intension is that they should execute fast, with only
the minimal amount of work needed to define commit order. Handlers can
rely on this serialisation, and do not need to do any extra locking to
avoid two prepare_ordered() calls running in parallel.
Like commit_ordered(), prepare_ordered() is not guaranteed to be called
in the context of the thread handling the rest of the transaction. So it
cannot invoke code that relies on thread local storage, in particular it
cannot call my_error().
prepare_ordered() cannot cause a rollback by returning an error, all
possible errors must be handled in prepare() (the prepare_ordered()
method returns void). In case of some fatal error, a record of the error
must be made internally by the engine and returned from commit() later.
Note that for user-level XA SQL commands, no consistent ordering among
prepare_ordered() and commit_ordered() is guaranteed (as that would
require blocking all other commits for an indefinite time).
When 2-phase commit is not used (eg. only one engine (and no binlog) in
transaction), prepare() is not called and in such cases prepare_ordered()
also is not called.
*/
void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);

File diff suppressed because it is too large Load Diff

122
sql/log.h
View File

@ -38,17 +38,58 @@ class TC_LOG
virtual int open(const char *opt_name)=0;
virtual void close()=0;
virtual int log_xid(THD *thd, my_xid xid)=0;
virtual int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered,
bool need_commit_ordered) = 0;
virtual void unlog(ulong cookie, my_xid xid)=0;
protected:
/*
These methods are meant to be invoked from log_and_order() implementations
to run any prepare_ordered() respectively commit_ordered() methods in
participating handlers.
They must be called using suitable thread syncronisation to ensure that
they are each called in the correct commit order among all
transactions. However, it is only necessary to call them if the
corresponding flag passed to log_and_order is set (it is safe, but not
required, to call them when the flag is false).
The caller must be holding LOCK_prepare_ordered respectively
LOCK_commit_ordered when calling these methods.
*/
void run_prepare_ordered(THD *thd, bool all);
void run_commit_ordered(THD *thd, bool all);
};
/*
Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
and TC_LOG::run_commit_ordered(), or any other code that calls handler
prepare_ordered() or commit_ordered() methods.
*/
extern pthread_mutex_t LOCK_prepare_ordered;
extern pthread_mutex_t LOCK_commit_ordered;
extern void TC_init();
extern void TC_destroy();
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
{
public:
TC_LOG_DUMMY() {}
int open(const char *opt_name) { return 0; }
void close() { }
int log_xid(THD *thd, my_xid xid) { return 1; }
/*
TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
only use internal XA during commit when >= 2 XA-capable engines
participate.
*/
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered)
{
DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
return 1;
}
void unlog(ulong cookie, my_xid xid) { }
};
@ -74,6 +115,13 @@ class TC_LOG_MMAP: public TC_LOG
pthread_cond_t cond; // to wait for a sync
} PAGE;
/* List of THDs for which to invoke commit_ordered(), in order. */
struct commit_entry
{
struct commit_entry *next;
THD *thd;
};
char logname[FN_REFLEN];
File fd;
my_off_t file_length;
@ -88,16 +136,38 @@ class TC_LOG_MMAP: public TC_LOG
*/
pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
pthread_cond_t COND_pool, COND_active;
/*
Queue of threads that need to call commit_ordered().
Access to this queue must be protected by LOCK_prepare_ordered.
*/
commit_entry *commit_ordered_queue;
/*
This flag and condition is used to reserve the queue while threads in it
each run the commit_ordered() methods one after the other. Only once the
last commit_ordered() in the queue is done can we start on a new queue
run.
Since we start this process in the first thread in the queue and finish in
the last (and possibly different) thread, we need a condition variable for
this (we cannot unlock a mutex in a different thread than the one who
locked it).
The condition is used together with the LOCK_prepare_ordered mutex.
*/
my_bool commit_ordered_queue_busy;
pthread_cond_t COND_queue_busy;
public:
TC_LOG_MMAP(): inited(0) {}
int open(const char *opt_name);
void close();
int log_xid(THD *thd, my_xid xid);
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered);
void unlog(ulong cookie, my_xid xid);
int recover();
private:
int log_one_transaction(my_xid xid);
void get_active_from_pool();
int sync();
int overflow();
@ -232,9 +302,31 @@ private:
time_t last_time;
};
class binlog_trx_data;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
{
private:
struct group_commit_entry
{
struct group_commit_entry *next;
THD *thd;
binlog_trx_data *trx_data;
/*
Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
written during group commit. The incident_event is only valid if
trx_data->has_incident() is true.
*/
Log_event *begin_event;
Log_event *end_event;
Log_event *incident_event;
/* Set during group commit to record any per-thread error. */
int error;
int commit_errno;
/* This is the `all' parameter for ha_commit_ordered(). */
bool all;
/* True if we come in through XA log_and_order(), false otherwise. */
};
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
pthread_mutex_t LOCK_index;
pthread_mutex_t LOCK_prep_xids;
@ -276,6 +368,12 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
/* Queue of transactions queued up to participate in group commit. */
group_commit_entry *group_commit_queue;
/* Total number of committed transactions. */
ulonglong num_commits;
/* Number of group commits done. */
ulonglong num_group_commits;
int write_to_file(IO_CACHE *cache);
/*
@ -285,6 +383,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
void new_file_without_locking();
void new_file_impl(bool need_lock);
int write_transaction(group_commit_entry *entry);
bool write_transaction_to_binlog_events(group_commit_entry *entry);
void trx_group_commit_leader(group_commit_entry *leader);
void mark_xid_done();
void mark_xids_active(uint xid_count);
public:
MYSQL_LOG::generate_name;
@ -313,7 +416,8 @@ public:
int open(const char *opt_name);
void close();
int log_xid(THD *thd, my_xid xid);
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered);
void unlog(ulong cookie, my_xid xid);
int recover(IO_CACHE *log, Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
@ -358,10 +462,11 @@ public:
void reset_gathered_updates(THD *thd);
bool write(Log_event* event_info); // binary log write
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
bool write_incident(THD *thd, bool lock);
int write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
bool flush_and_sync);
bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
Log_event *end_ev, bool all);
bool write_incident(THD *thd);
int write_cache(THD *thd, IO_CACHE *cache);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
@ -416,6 +521,7 @@ public:
inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
inline IO_CACHE *get_index_file() { return &index_file;}
inline uint32 get_open_count() { return open_count; }
void set_status_variables();
};
class Log_event_handler

View File

@ -1391,6 +1391,7 @@ void clean_up(bool print_message)
ha_end();
if (tc_log)
tc_log->close();
TC_destroy();
xid_cache_free();
wt_end();
delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
@ -4241,6 +4242,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
if (!errmesg[0][0])
unireg_abort(1);
TC_init();
/* We have to initialize the storage engines before CSV logging */
if (ha_init())
{

View File

@ -761,6 +761,8 @@ THD::THD()
active_vio = 0;
#endif
pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
pthread_cond_init(&COND_wakeup_ready, 0);
/* Variables with default values */
proc_info="login";
@ -1147,6 +1149,8 @@ THD::~THD()
free_root(&transaction.mem_root,MYF(0));
#endif
mysys_var=0; // Safety (shouldn't be needed)
pthread_cond_destroy(&COND_wakeup_ready);
pthread_mutex_destroy(&LOCK_wakeup_ready);
pthread_mutex_destroy(&LOCK_thd_data);
#ifndef DBUG_OFF
dbug_sentry= THD_SENTRY_GONE;
@ -4150,6 +4154,25 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
DBUG_RETURN(0);
}
void
THD::wait_for_wakeup_ready()
{
pthread_mutex_lock(&LOCK_wakeup_ready);
while (!wakeup_ready)
pthread_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready);
pthread_mutex_unlock(&LOCK_wakeup_ready);
}
void
THD::signal_wakeup_ready()
{
pthread_mutex_lock(&LOCK_wakeup_ready);
wakeup_ready= true;
pthread_cond_signal(&COND_wakeup_ready);
pthread_mutex_unlock(&LOCK_wakeup_ready);
}
bool Discrete_intervals_list::append(ulonglong start, ulonglong val,
ulonglong incr)
{

View File

@ -2454,6 +2454,14 @@ public:
return backup;
}
void clear_wakeup_ready() { wakeup_ready= false; }
/*
Sleep waiting for others to wake us up with signal_wakeup_ready().
Must call clear_wakeup_ready() before waiting.
*/
void wait_for_wakeup_ready();
/* Wake this thread up from wait_for_wakeup_ready(). */
void signal_wakeup_ready();
private:
/** The current internal error handler for this thread, or NULL. */
Internal_error_handler *m_internal_handler;
@ -2492,6 +2500,16 @@ private:
*/
LEX_STRING invoker_user;
LEX_STRING invoker_host;
/*
Flag, mutex and condition for a thread to wait for a signal from another
thread.
Currently used to wait for group commit to complete, can also be used for
other purposes.
*/
bool wakeup_ready;
pthread_mutex_t LOCK_wakeup_ready;
pthread_cond_t COND_wakeup_ready;
};
/** A short cut for thd->main_da.set_ok_status(). */

View File

@ -1007,6 +1007,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ENTER("dispatch_command");
DBUG_PRINT("info", ("command: %d", command));
DBUG_EXECUTE_IF("crash_dispatch_command_before",
{ DBUG_PRINT("crash_dispatch_command_before", ("now"));
DBUG_ABORT(); });
thd->command=command;
/*
Commands which always take a long time are logged into

View File

@ -117,8 +117,6 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
/** to force correct commit order in binlog */
static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
@ -222,6 +220,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@ -1365,7 +1364,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@ -1424,8 +1422,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
ut_ad(EQ_CURRENT_THD(thd));
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@ -1717,10 +1713,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
if (trx->active_trans == 0) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
trx->active_trans = 1;
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
}
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
@ -1990,11 +1986,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
if (prebuilt->trx->active_trans == 0) {
if ((prebuilt->trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, user_thd);
prebuilt->trx->active_trans = 1;
prebuilt->trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
}
/* We did the necessary inits in this function, no need to repeat them
@ -2045,6 +2041,7 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@ -2547,7 +2544,6 @@ skip_overwrite:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
@ -2602,7 +2598,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_cond_destroy(&commit_cond);
@ -2723,14 +2718,119 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
if (trx->active_trans == 0) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(hton, thd);
trx->active_trans = 1;
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
}
DBUG_RETURN(0);
}
static
void
innobase_commit_ordered_2(
/*============*/
trx_t* trx, /*!< in: Innodb transaction */
THD* thd) /*!< in: MySQL thread handle */
{
ulonglong tmp_pos;
DBUG_ENTER("innobase_commit_ordered");
/* We need current binlog position for ibbackup to work.
Note, the position is current because commit_ordered is guaranteed
to be called in same sequenece as writing to binlog. */
retry:
if (innobase_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m);
commit_threads++;
if (commit_threads > innobase_commit_concurrency) {
commit_threads--;
pthread_cond_wait(&commit_cond,
&commit_cond_m);
pthread_mutex_unlock(&commit_cond_m);
goto retry;
}
else {
pthread_mutex_unlock(&commit_cond_m);
}
}
mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
trx->mysql_log_offset = (ib_int64_t) tmp_pos;
/* Don't do write + flush right now. For group commit
to work we want to do the flush in the innobase_commit()
method, which runs without holding any locks. */
trx->flush_log_later = TRUE;
innobase_commit_low(trx);
trx->flush_log_later = FALSE;
if (innobase_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m);
commit_threads--;
pthread_cond_signal(&commit_cond);
pthread_mutex_unlock(&commit_cond_m);
}
DBUG_VOID_RETURN;
}
/*****************************************************************//**
Perform the first, fast part of InnoDB commit.
Doing it in this call ensures that we get the same commit order here
as in binlog and any other participating transactional storage engines.
Note that we want to do as little as really needed here, as we run
under a global mutex. The expensive fsync() is done later, in
innobase_commit(), without a lock so group commit can take place.
Note also that this method can be called from a different thread than
the one handling the rest of the transaction. */
static
void
innobase_commit_ordered(
/*============*/
handlerton *hton, /*!< in: Innodb handlerton */
THD* thd, /*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
bool all) /*!< in: TRUE - commit transaction
FALSE - the current SQL statement ended */
{
trx_t* trx;
DBUG_ENTER("innobase_commit_ordered");
DBUG_ASSERT(hton == innodb_hton_ptr);
trx = check_trx_exists(thd);
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
if (trx->has_search_latch) {
trx_search_latch_release_if_reserved(trx);
}
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
/* We cannot throw error here; instead we will catch this error
again in innobase_commit() and report it from there. */
DBUG_VOID_RETURN;
}
/* commit_ordered is only called when committing the whole transaction
(or an SQL statement when autocommit is on). */
DBUG_ASSERT(all ||
(!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
innobase_commit_ordered_2(trx, thd);
trx->active_trans |= TRX_ACTIVE_COMMIT_ORDERED;
DBUG_VOID_RETURN;
}
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@ -2756,11 +2856,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
if (trx->has_search_latch) {
if (trx->has_search_latch &&
(trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
trx_search_latch_release_if_reserved(trx);
}
/* The flag trx->active_trans is set to 1 in
/* The flag TRX_ACTIVE_IN_MYSQL in trx->active_trans is set in
1. ::external_lock(),
2. ::start_stmt(),
@ -2770,79 +2871,31 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
and it is only set to 0 in a commit or a rollback. If it is 0 we know
and it is only cleared in a commit or a rollback. If it is unset we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
if (trx->active_trans == 0
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
sql_print_error("trx->active_trans == 0, but"
" trx->conc_state != TRX_NOT_STARTED");
}
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
/* Run the fast part of commit if we did not already. */
if ((trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
innobase_commit_ordered_2(trx, thd);
}
/* We were instructed to commit the whole transaction, or
this is an SQL statement end and autocommit is on */
/* We need current binlog position for ibbackup to work.
Note, the position is current because of
prepare_commit_mutex */
retry:
if (innobase_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m);
commit_threads++;
if (commit_threads > innobase_commit_concurrency) {
commit_threads--;
pthread_cond_wait(&commit_cond,
&commit_cond_m);
pthread_mutex_unlock(&commit_cond_m);
goto retry;
}
else {
pthread_mutex_unlock(&commit_cond_m);
}
}
/* The following calls to read the MySQL binary log
file name and the position return consistent results:
1) Other InnoDB transactions cannot intervene between
these calls as we are holding prepare_commit_mutex.
2) Binary logging of other engines is not relevant
to InnoDB as all InnoDB requires is that committing
InnoDB transactions appear in the same order in the
MySQL binary log as they appear in InnoDB logs.
3) A MySQL log file rotation cannot happen because
MySQL protects against this by having a counter of
transactions in prepared state and it only allows
a rotation when the counter drops to zero. See
LOCK_prep_xids and COND_prep_xids in log.cc. */
trx->mysql_log_file_name = mysql_bin_log_file_name();
trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
/* Don't do write + flush right now. For group commit
to work we want to do the flush after releasing the
prepare_commit_mutex. */
trx->flush_log_later = TRUE;
innobase_commit_low(trx);
trx->flush_log_later = FALSE;
if (innobase_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m);
commit_threads--;
pthread_cond_signal(&commit_cond);
pthread_mutex_unlock(&commit_cond_m);
}
if (trx->active_trans == 2) {
pthread_mutex_unlock(&prepare_commit_mutex);
}
/* Now do a write + flush of logs. */
/* We did the first part already in innobase_commit_ordered(),
Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
trx->active_trans = 0;
@ -3061,7 +3114,7 @@ innobase_savepoint(
innobase_release_stat_resources(trx);
/* cannot happen outside of transaction */
DBUG_ASSERT(trx->active_trans);
DBUG_ASSERT(trx->active_trans & TRX_ACTIVE_IN_MYSQL);
/* TODO: use provided savepoint data area to store savepoint data */
char name[64];
@ -3091,7 +3144,7 @@ innobase_close_connection(
ut_a(trx);
if (trx->active_trans == 0
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
sql_print_error("trx->active_trans == 0, but"
@ -5019,7 +5072,7 @@ no_commit:
/* Altering to InnoDB format */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
prebuilt->trx->active_trans = 1;
prebuilt->trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
/* We will need an IX lock on the destination table. */
prebuilt->sql_stat_start = TRUE;
} else {
@ -5035,7 +5088,7 @@ no_commit:
locks, so they have to be acquired again. */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
prebuilt->trx->active_trans = 1;
prebuilt->trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql(prebuilt, src_table, mode);
/* We will need an IX lock on the destination table. */
@ -8944,10 +8997,10 @@ ha_innobase::start_stmt(
trx->detailed_error[0] = '\0';
/* Set the MySQL flag to mark that there is an active transaction */
if (trx->active_trans == 0) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
trx->active_trans = 1;
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
} else {
innobase_register_stmt(ht, thd);
}
@ -9045,10 +9098,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
if (trx->active_trans == 0) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
trx->active_trans = 1;
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
} else if (trx->n_mysql_tables_in_use == 0) {
innobase_register_stmt(ht, thd);
}
@ -9146,7 +9199,7 @@ ha_innobase::external_lock(
prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
if (trx->active_trans != 0) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) != 0) {
innobase_commit(ht, thd, TRUE);
}
} else {
@ -9231,10 +9284,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
if (trx->active_trans == 0) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
trx->active_trans = 1;
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
}
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
@ -10288,7 +10341,8 @@ innobase_xa_prepare(
innobase_release_stat_resources(trx);
if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0 &&
trx->conc_state != TRX_NOT_STARTED) {
sql_print_error("trx->active_trans == 0, but trx->conc_state != "
"TRX_NOT_STARTED");
@ -10300,7 +10354,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
ut_ad(trx->active_trans);
ut_ad(trx->active_trans & TRX_ACTIVE_IN_MYSQL);
error = (int) trx_prepare_for_mysql(trx);
} else {
@ -10324,36 +10378,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
/* choose group commit rather than binlog order */
return(error);
}
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time. */
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
return(error);
}
@ -11638,11 +11662,6 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
"Enable/Disable flushing along modified age. (none, reflex, [estimate])",
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
PLUGIN_VAR_RQCMDARG,
"Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
NULL, NULL, 0, 0, 1, 0);
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
PLUGIN_VAR_RQCMDARG,
"Enable/Disable converting automatically *.ibd files when import tablespace.",
@ -11746,7 +11765,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(read_ahead),
MYSQL_SYSVAR(adaptive_checkpoint),
MYSQL_SYSVAR(flush_log_at_trx_commit_session),
MYSQL_SYSVAR(enable_unsafe_group_commit),
MYSQL_SYSVAR(expand_import),
MYSQL_SYSVAR(extra_rsegments),
MYSQL_SYSVAR(dict_size_limit),

View File

@ -241,16 +241,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
#endif
/** Get the file name of the MySQL binlog.
* @return the name of the binlog file
*/
const char* mysql_bin_log_file_name(void);
/** Get the current position of the MySQL binlog.
* @return byte offset from the beginning of the binlog
*/
ulonglong mysql_bin_log_file_pos(void);
/**
Check if a user thread is a replication slave thread
@param thd user thread
@ -291,6 +281,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
/** Get the file name and position of the MySQL binlog corresponding to the
* current commit.
*/
extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h

View File

@ -511,9 +511,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
ulint active_trans; /*!< 1 - if a transaction in MySQL
is active. 2 - if prepare_commit_mutex
was taken */
ulint active_trans; /*!< TRX_ACTIVE_IN_MYSQL - set if a
transaction in MySQL is active.
TRX_ACTIVE_COMMIT_ORDERED - set if
innobase_commit_ordered has run */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@ -824,6 +825,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
/* Flag bits for trx_struct.active_trans */
#define TRX_ACTIVE_IN_MYSQL (1<<0)
#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to