mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
BUG#39934: Slave stops for engine that only support row-based logging
General overview: The logic for switching to row format when binlog_format=MIXED had numerous flaws. The underlying problem was the lack of a consistent architecture. General purpose of this changeset: This changeset introduces an architecture for switching to row format when binlog_format=MIXED. It enforces the architecture where it has to. It leaves some bugs to be fixed later. It adds extensive tests to verify that unsafe statements work as expected and that appropriate errors are produced by problems with the selection of binlog format. It was not practical to split this into smaller pieces of work. Problem 1: To determine the logging mode, the code has to take several parameters into account (namely: (1) the value of binlog_format; (2) the capabilities of the engines; (3) the type of the current statement: normal, unsafe, or row injection). These parameters may conflict in several ways, namely: - binlog_format=STATEMENT for a row injection - binlog_format=STATEMENT for an unsafe statement - binlog_format=STATEMENT for an engine only supporting row logging - binlog_format=ROW for an engine only supporting statement logging - statement is unsafe and engine does not support row logging - row injection in a table that does not support statement logging - statement modifies one table that does not support row logging and one that does not support statement logging Several of these conflicts were not detected, or were detected with an inappropriate error message. The problem of BUG#39934 was that no appropriate error message was written for the case when an engine only supporting row logging executed a row injection with binlog_format=ROW. However, all above cases must be handled. Fix 1: Introduce new error codes (sql/share/errmsg.txt). Ensure that all conditions are detected and handled in decide_logging_format() Problem 2: The binlog format shall be determined once per statement, in decide_logging_format(). It shall not be changed before or after that. Before decide_logging_format() is called, all information necessary to determine the logging format must be available. This principle ensures that all unsafe statements are handled in a consistent way. However, this principle is not followed: thd->set_current_stmt_binlog_row_based_if_mixed() is called in several places, including from code executing UPDATE..LIMIT, INSERT..SELECT..LIMIT, DELETE..LIMIT, INSERT DELAYED, and SET @@binlog_format. After Problem 1 was fixed, that caused inconsistencies where these unsafe statements would not print the appropriate warnings or errors for some of the conflicts. Fix 2: Remove calls to THD::set_current_stmt_binlog_row_based_if_mixed() from code executed after decide_logging_format(). Compensate by calling the set_current_stmt_unsafe() at parse time. This way, all unsafe statements are detected by decide_logging_format(). Problem 3: INSERT DELAYED is not unsafe: it is logged in statement format even if binlog_format=MIXED, and no warning is printed even if binlog_format=STATEMENT. This is BUG#45825. Fix 3: Made INSERT DELAYED set itself to unsafe at parse time. This allows decide_logging_format() to detect that a warning should be printed or the binlog_format changed. Problem 4: LIMIT clause were not marked as unsafe when executed inside stored functions/triggers/views/prepared statements. This is BUG#45785. Fix 4: Make statements containing the LIMIT clause marked as unsafe at parse time, instead of at execution time. This allows propagating unsafe-ness to the view.
This commit is contained in:
121
sql/sql_class.cc
121
sql/sql_class.cc
@ -539,7 +539,7 @@ THD::THD()
|
||||
lock_id(&main_lock_id),
|
||||
user_time(0), in_sub_stmt(0),
|
||||
sql_log_bin_toplevel(false),
|
||||
binlog_table_maps(0), binlog_flags(0UL),
|
||||
binlog_warning_flags(0UL), binlog_table_maps(0),
|
||||
table_map_for_update(0),
|
||||
arg_of_last_insert_id_function(FALSE),
|
||||
first_successful_insert_id_in_prev_stmt(0),
|
||||
@ -2973,13 +2973,13 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup,
|
||||
first_successful_insert_id_in_cur_stmt;
|
||||
|
||||
if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) &&
|
||||
!current_stmt_binlog_row_based)
|
||||
!is_current_stmt_binlog_format_row())
|
||||
{
|
||||
options&= ~OPTION_BIN_LOG;
|
||||
}
|
||||
|
||||
if ((backup->options & OPTION_BIN_LOG) && is_update_query(lex->sql_command)&&
|
||||
!current_stmt_binlog_row_based)
|
||||
!is_current_stmt_binlog_format_row())
|
||||
mysql_bin_log.start_union_events(this, this->query_id);
|
||||
|
||||
/* Disable result sets */
|
||||
@ -3041,7 +3041,7 @@ void THD::restore_sub_statement_state(Sub_statement_state *backup)
|
||||
is_fatal_sub_stmt_error= FALSE;
|
||||
|
||||
if ((options & OPTION_BIN_LOG) && is_update_query(lex->sql_command) &&
|
||||
!current_stmt_binlog_row_based)
|
||||
!is_current_stmt_binlog_format_row())
|
||||
mysql_bin_log.stop_union_events(this);
|
||||
|
||||
/*
|
||||
@ -3463,7 +3463,7 @@ int THD::binlog_write_row(TABLE* table, bool is_trans,
|
||||
MY_BITMAP const* cols, size_t colcnt,
|
||||
uchar const *record)
|
||||
{
|
||||
DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open());
|
||||
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
|
||||
|
||||
/*
|
||||
Pack records into format for transfer. We are allocating more
|
||||
@ -3493,7 +3493,7 @@ int THD::binlog_update_row(TABLE* table, bool is_trans,
|
||||
const uchar *before_record,
|
||||
const uchar *after_record)
|
||||
{
|
||||
DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open());
|
||||
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
|
||||
|
||||
size_t const before_maxlen = max_row_length(table, before_record);
|
||||
size_t const after_maxlen = max_row_length(table, after_record);
|
||||
@ -3538,7 +3538,7 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans,
|
||||
MY_BITMAP const* cols, size_t colcnt,
|
||||
uchar const *record)
|
||||
{
|
||||
DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open());
|
||||
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
|
||||
|
||||
/*
|
||||
Pack records into format for transfer. We are allocating more
|
||||
@ -3620,8 +3620,6 @@ show_query_type(THD::enum_binlog_query_type qtype)
|
||||
return "ROW";
|
||||
case THD::STMT_QUERY_TYPE:
|
||||
return "STMT";
|
||||
case THD::MYSQL_QUERY_TYPE:
|
||||
return "MYSQL";
|
||||
case THD::QUERY_TYPE_COUNT:
|
||||
default:
|
||||
DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
|
||||
@ -3633,28 +3631,30 @@ show_query_type(THD::enum_binlog_query_type qtype)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
Member function that will log query, either row-based or
|
||||
statement-based depending on the value of the 'current_stmt_binlog_row_based'
|
||||
the value of the 'qtype' flag.
|
||||
/**
|
||||
Log the current query.
|
||||
|
||||
This function should be called after the all calls to ha_*_row()
|
||||
functions have been issued, but before tables are unlocked and
|
||||
closed.
|
||||
The query will be logged in either row format or statement format
|
||||
depending on the value of @c current_stmt_binlog_row_based field and
|
||||
the value of the @c qtype parameter.
|
||||
|
||||
OBSERVE
|
||||
There shall be no writes to any system table after calling
|
||||
binlog_query(), so these writes has to be moved to before the call
|
||||
of binlog_query() for correct functioning.
|
||||
This function must be called:
|
||||
|
||||
This is necessesary not only for RBR, but the master might crash
|
||||
after binlogging the query but before changing the system tables.
|
||||
This means that the slave and the master are not in the same state
|
||||
(after the master has restarted), so therefore we have to
|
||||
eliminate this problem.
|
||||
- After the all calls to ha_*_row() functions have been issued.
|
||||
|
||||
RETURN VALUE
|
||||
Error code, or 0 if no error.
|
||||
- After any writes to system tables. Rationale: if system tables
|
||||
were written after a call to this function, and the master crashes
|
||||
after the call to this function and before writing the system
|
||||
tables, then the master and slave get out of sync.
|
||||
|
||||
- Before tables are unlocked and closed.
|
||||
|
||||
@see decide_logging_format
|
||||
|
||||
@retval 0 Success
|
||||
|
||||
@retval nonzero If there is a failure when writing the query (e.g.,
|
||||
write failure), then the error code is returned.
|
||||
*/
|
||||
int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
|
||||
ulong query_len, bool is_trans, bool suppress_use,
|
||||
@ -3679,50 +3679,69 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
|
||||
DBUG_RETURN(error);
|
||||
|
||||
/*
|
||||
If we are in statement mode and trying to log an unsafe statement,
|
||||
we should print a warning.
|
||||
Warnings for unsafe statements logged in statement format are
|
||||
printed here instead of in decide_logging_format(). This is
|
||||
because the warnings should be printed only if the statement is
|
||||
actually logged. When executing decide_logging_format(), we cannot
|
||||
know for sure if the statement will be logged.
|
||||
*/
|
||||
if (sql_log_bin_toplevel && lex->is_stmt_unsafe() &&
|
||||
variables.binlog_format == BINLOG_FORMAT_STMT)
|
||||
if (sql_log_bin_toplevel)
|
||||
{
|
||||
/*
|
||||
A warning can be elevated a error when STRICT sql mode.
|
||||
But we don't want to elevate binlog warning to error here.
|
||||
*/
|
||||
push_warning(this, MYSQL_ERROR::WARN_LEVEL_NOTE,
|
||||
ER_BINLOG_UNSAFE_STATEMENT,
|
||||
ER(ER_BINLOG_UNSAFE_STATEMENT));
|
||||
if (!(binlog_flags & BINLOG_FLAG_UNSAFE_STMT_PRINTED))
|
||||
if (binlog_warning_flags &
|
||||
(1 << BINLOG_WARNING_FLAG_UNSAFE_AND_STMT_ENGINE))
|
||||
{
|
||||
push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_NOTE,
|
||||
ER_BINLOG_UNSAFE_AND_STMT_ENGINE,
|
||||
"%s Statement: %.*s",
|
||||
ER(ER_BINLOG_UNSAFE_AND_STMT_ENGINE),
|
||||
MYSQL_ERRMSG_SIZE, query_arg);
|
||||
sql_print_warning("%s Statement: %.*s",
|
||||
ER(ER_BINLOG_UNSAFE_AND_STMT_ENGINE),
|
||||
MYSQL_ERRMSG_SIZE, query_arg);
|
||||
binlog_warning_flags|= 1 << BINLOG_WARNING_FLAG_PRINTED;
|
||||
}
|
||||
else if (binlog_warning_flags &
|
||||
(1 << BINLOG_WARNING_FLAG_UNSAFE_AND_STMT_MODE))
|
||||
{
|
||||
push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_NOTE,
|
||||
ER_BINLOG_UNSAFE_STATEMENT,
|
||||
"%s Statement: %.*s",
|
||||
ER(ER_BINLOG_UNSAFE_STATEMENT),
|
||||
MYSQL_ERRMSG_SIZE, query_arg);
|
||||
sql_print_warning("%s Statement: %.*s",
|
||||
ER(ER_BINLOG_UNSAFE_STATEMENT),
|
||||
MYSQL_ERRMSG_SIZE, query_arg);
|
||||
binlog_flags|= BINLOG_FLAG_UNSAFE_STMT_PRINTED;
|
||||
binlog_warning_flags|= 1 << BINLOG_WARNING_FLAG_PRINTED;
|
||||
}
|
||||
}
|
||||
|
||||
switch (qtype) {
|
||||
/*
|
||||
ROW_QUERY_TYPE means that the statement may be logged either in
|
||||
row format or in statement format. If
|
||||
current_stmt_binlog_row_based is set, it means that the
|
||||
statement has already been logged in row format and hence shall
|
||||
not be logged again.
|
||||
*/
|
||||
case THD::ROW_QUERY_TYPE:
|
||||
DBUG_PRINT("debug",
|
||||
("current_stmt_binlog_row_based: %d",
|
||||
current_stmt_binlog_row_based));
|
||||
if (current_stmt_binlog_row_based)
|
||||
is_current_stmt_binlog_format_row()));
|
||||
if (is_current_stmt_binlog_format_row())
|
||||
DBUG_RETURN(0);
|
||||
/* Otherwise, we fall through */
|
||||
case THD::MYSQL_QUERY_TYPE:
|
||||
/*
|
||||
Using this query type is a conveniece hack, since we have been
|
||||
moving back and forth between using RBR for replication of
|
||||
system tables and not using it.
|
||||
/* Fall through */
|
||||
|
||||
Make sure to change in check_table_binlog_row_based() according
|
||||
to how you treat this.
|
||||
/*
|
||||
STMT_QUERY_TYPE means that the query must be logged in statement
|
||||
format; it cannot be logged in row format. This is typically
|
||||
used by DDL statements. It is an error to use this query type
|
||||
if current_stmt_binlog_row_based is set.
|
||||
*/
|
||||
case THD::STMT_QUERY_TYPE:
|
||||
/*
|
||||
The MYSQL_LOG::write() function will set the STMT_END_F flag and
|
||||
flush the pending rows event if necessary.
|
||||
*/
|
||||
*/
|
||||
{
|
||||
Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use,
|
||||
errcode);
|
||||
|
Reference in New Issue
Block a user