1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-11937: InnoDB flushes redo log too often

Problem was introduced with the InnoDB 5.7 merge, the code related to
avoiding extra fsync at the end of commit when binlog is enabled. The
MariaDB method for this was removed, but the replacement MySQL method
based on thd_get_durability_property() is not functional in MariaDB.

This commit reverts the offending parts of the merge and adds a test
case, to fix the problem for InnoDB. But other storage engines are
likely to have a similar problem.
This commit is contained in:
Kristian Nielsen
2017-08-07 12:38:47 +02:00
parent 5ae598390a
commit 36e81a23c5
5 changed files with 42 additions and 38 deletions

View File

@@ -176,4 +176,14 @@ ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
# There must be no UPDATE query event;
include/show_binlog_events.inc
drop table t1, t2;
*** MDEV-11937: InnoDB flushes redo log too often ***
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET @old_flush = @@GLOBAL.innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit=1;
SELECT IF(@num_sync < 100*1.5, "OK",
CONCAT("ERROR: More than 1 fsync per commit (saw ", @num_sync/100, ")")) AS status;
status
OK
DROP TABLE t1;
SET GLOBAL innodb_flush_log_at_trx_commit=@old_flush;
End of tests

View File

@@ -172,4 +172,33 @@ source include/show_binlog_events.inc;
# cleanup bug#27716
drop table t1, t2;
--echo *** MDEV-11937: InnoDB flushes redo log too often ***
# Count number of log fsyncs reported by InnoDB per commit.
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET @old_flush = @@GLOBAL.innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit=1;
--let $syncs1 = query_get_value(SHOW STATUS LIKE 'Innodb_os_log_fsyncs', Value, 1)
--let $ROWS = 100
--disable_query_log
let $count = $ROWS;
while ($count) {
eval INSERT INTO t1 VALUES ($count);
dec $count;
}
--let $syncs2 = query_get_value(SHOW STATUS LIKE 'Innodb_os_log_fsyncs', Value, 1)
eval SET @num_sync = $syncs2 - $syncs1;
--enable_query_log
# Allow a bit of slack, in case some background process or something
# is introducing a few more syncs.
eval SELECT IF(@num_sync < $ROWS*1.5, "OK",
CONCAT("ERROR: More than 1 fsync per commit (saw ", @num_sync/$ROWS, ")")) AS status;
DROP TABLE t1;
SET GLOBAL innodb_flush_log_at_trx_commit=@old_flush;
--echo End of tests

View File

@@ -1688,18 +1688,6 @@ thd_is_replication_slave_thread(
return thd && ((ibool) thd_slave_thread(thd));
}
/******************************************************************//**
Gets information on the durability property requested by thread.
Used when writing either a prepare or commit record to the log
buffer. @return the durability property. */
enum durability_properties
thd_requested_durability(
/*=====================*/
const THD* thd) /*!< in: thread handle */
{
return(thd_get_durability_property(thd));
}
/******************************************************************//**
Returns true if transaction should be flagged as read-only.
@return true if the thd is marked as read-only */
@@ -4839,10 +4827,6 @@ innobase_commit(
this one, to allow then to group commit with us. */
thd_wakeup_subsequent_commits(thd, 0);
if (!read_only) {
trx->flush_log_later = false;
}
/* Now do a write + flush of logs. */
trx_commit_complete_for_mysql(trx);

View File

@@ -570,11 +570,6 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
*/
bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
/** Gets information on the durability property requested by a thread.
@param thd Thread handle
@return a durability property. */
durability_properties thd_get_durability_property(const MYSQL_THD thd);
/** Is strict sql_mode set.
@param thd Thread object
@return True if sql_mode has strict mode (all or trans), false otherwise. */

View File

@@ -1845,9 +1845,7 @@ trx_commit_in_memory(
} else if (trx->flush_log_later) {
/* Do nothing yet */
trx->must_flush_log_later = true;
} else if (srv_flush_log_at_trx_commit == 0
|| thd_requested_durability(trx->mysql_thd)
== HA_IGNORE_DURABILITY) {
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else {
trx_flush_log_if_needed(lsn, trx);
@@ -2261,8 +2259,7 @@ trx_commit_complete_for_mysql(
{
if (trx->id != 0
|| !trx->must_flush_log_later
|| thd_requested_durability(trx->mysql_thd)
== HA_IGNORE_DURABILITY) {
|| (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
return;
}
@@ -2750,18 +2747,7 @@ trx_prepare(
trx_sys_mutex_exit();
/*--------------------------------------*/
switch (thd_requested_durability(trx->mysql_thd)) {
case HA_IGNORE_DURABILITY:
/* We set the HA_IGNORE_DURABILITY during prepare phase of
binlog group commit to not flush redo log for every transaction
here. So that we can flush prepared records of transactions to
redo log in a group right before writing them to binary log
during flush stage of binlog group commit. */
break;
case HA_REGULAR_DURABILITY:
if (lsn == 0) {
break;
}
if (lsn) {
/* Depending on the my.cnf options, we may now write the log
buffer to the log files, making the prepared state of the
transaction durable if the OS does not crash. We may also