mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
BUG#38826 Race in MYSQL_LOG::purge_logs is impossible to debug in production
BUG#39325 Server crash inside MYSQL_LOG::purge_first_log halts replicaiton The patch reverses the order of the purging and updating events for log and relay-log.info/index files respectively. This solves the problem of having holes caused by crashes happening between updating info/index files and purging logs. This patch also contains an aditional test case for testing the crashing before purge logs. NOTE1: This is a combined patch for BUG#38826 and BUG#39325. This patch is based on bugteam tree and takes into account reviewers suggestions. NOTE2: Merge from 5.0-bugteam
This commit is contained in:
@ -6,7 +6,7 @@
|
|||||||
let $counter= 500;
|
let $counter= 500;
|
||||||
while ($mysql_errno)
|
while ($mysql_errno)
|
||||||
{
|
{
|
||||||
--error 0,2002,2006
|
--error 0,2002,2006,2003
|
||||||
show status;
|
show status;
|
||||||
|
|
||||||
dec $counter;
|
dec $counter;
|
||||||
|
@ -21,6 +21,7 @@ flush logs;
|
|||||||
*** must be a warning master-bin.000001 was not found ***
|
*** must be a warning master-bin.000001 was not found ***
|
||||||
Warnings:
|
Warnings:
|
||||||
Warning 1612 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
|
Warning 1612 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
|
||||||
|
Warning 1612 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
|
||||||
*** must show one record, of the active binlog, left in the index file after PURGE ***
|
*** must show one record, of the active binlog, left in the index file after PURGE ***
|
||||||
show binary logs;
|
show binary logs;
|
||||||
Log_name File_size
|
Log_name File_size
|
||||||
|
14
mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result
Normal file
14
mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
stop slave;
|
||||||
|
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
||||||
|
reset master;
|
||||||
|
reset slave;
|
||||||
|
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
||||||
|
start slave;
|
||||||
|
stop slave;
|
||||||
|
DROP TABLE IF EXISTS t1;
|
||||||
|
CREATE TABLE t1 (id INT);
|
||||||
|
start slave;
|
||||||
|
SET GLOBAL debug= "+d,crash_before_purge_logs";
|
||||||
|
FLUSH LOGS;
|
||||||
|
ERROR HY000: Lost connection to MySQL server during query
|
||||||
|
start slave;
|
57
mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test
Normal file
57
mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
--source include/master-slave.inc
|
||||||
|
--source include/have_debug.inc
|
||||||
|
--disable_reconnect
|
||||||
|
|
||||||
|
# We have to sync with master, to ensure slave had time to start properly
|
||||||
|
# # before we stop it. If not, we get errors about UNIX_TIMESTAMP() in the
|
||||||
|
# log.
|
||||||
|
sync_slave_with_master;
|
||||||
|
stop slave;
|
||||||
|
--source include/wait_for_slave_to_stop.inc
|
||||||
|
|
||||||
|
# ON MASTER
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
|
||||||
|
--disable_warnings
|
||||||
|
DROP TABLE IF EXISTS t1;
|
||||||
|
--enable_warnings
|
||||||
|
|
||||||
|
CREATE TABLE t1 (id INT);
|
||||||
|
|
||||||
|
let $1=100;
|
||||||
|
disable_query_log;
|
||||||
|
begin;
|
||||||
|
while ($1)
|
||||||
|
{
|
||||||
|
eval INSERT INTO t1 VALUES( $1 );
|
||||||
|
dec $1;
|
||||||
|
}
|
||||||
|
DROP TABLE t1;
|
||||||
|
save_master_pos;
|
||||||
|
|
||||||
|
enable_query_log;
|
||||||
|
|
||||||
|
## ON SLAVE
|
||||||
|
|
||||||
|
connection slave;
|
||||||
|
start slave;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
sync_with_master 0;
|
||||||
|
connection master;
|
||||||
|
save_master_pos;
|
||||||
|
connection slave;
|
||||||
|
|
||||||
|
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/slave0.expect
|
||||||
|
SET GLOBAL debug= "+d,crash_before_purge_logs";
|
||||||
|
|
||||||
|
--error 2013
|
||||||
|
# try to rotate logs
|
||||||
|
FLUSH LOGS;
|
||||||
|
|
||||||
|
--enable_reconnect
|
||||||
|
--source include/wait_until_connected_again.inc
|
||||||
|
start slave;
|
||||||
|
--source include/wait_for_slave_to_start.inc
|
||||||
|
|
||||||
|
sync_with_master 0;
|
220
sql/log.cc
220
sql/log.cc
@ -2359,6 +2359,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG()
|
|||||||
*/
|
*/
|
||||||
index_file_name[0] = 0;
|
index_file_name[0] = 0;
|
||||||
bzero((char*) &index_file, sizeof(index_file));
|
bzero((char*) &index_file, sizeof(index_file));
|
||||||
|
bzero((char*) &purge_temp, sizeof(purge_temp));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this is called only once */
|
/* this is called only once */
|
||||||
@ -2953,6 +2954,7 @@ err:
|
|||||||
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
|
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
char *to_purge_if_included= NULL;
|
||||||
DBUG_ENTER("purge_first_log");
|
DBUG_ENTER("purge_first_log");
|
||||||
|
|
||||||
DBUG_ASSERT(is_open());
|
DBUG_ASSERT(is_open());
|
||||||
@ -2960,36 +2962,20 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
|
|||||||
DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
|
DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
|
||||||
|
|
||||||
pthread_mutex_lock(&LOCK_index);
|
pthread_mutex_lock(&LOCK_index);
|
||||||
pthread_mutex_lock(&rli->log_space_lock);
|
to_purge_if_included= my_strdup(rli->group_relay_log_name, MYF(0));
|
||||||
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
|
|
||||||
0, 0, &rli->log_space_total);
|
|
||||||
// Tell the I/O thread to take the relay_log_space_limit into account
|
|
||||||
rli->ignore_log_space_limit= 0;
|
|
||||||
pthread_mutex_unlock(&rli->log_space_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
Ok to broadcast after the critical region as there is no risk of
|
|
||||||
the mutex being destroyed by this thread later - this helps save
|
|
||||||
context switches
|
|
||||||
*/
|
|
||||||
pthread_cond_broadcast(&rli->log_space_cond);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Read the next log file name from the index file and pass it back to
|
Read the next log file name from the index file and pass it back to
|
||||||
the caller
|
the caller.
|
||||||
If included is true, we want the first relay log;
|
|
||||||
otherwise we want the one after event_relay_log_name.
|
|
||||||
*/
|
*/
|
||||||
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
|
if((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
|
||||||
(!included &&
|
(error=find_next_log(&rli->linfo, 0)))
|
||||||
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
|
|
||||||
(error=find_next_log(&rli->linfo, 0)))))
|
|
||||||
{
|
{
|
||||||
char buff[22];
|
char buff[22];
|
||||||
sql_print_error("next log error: %d offset: %s log: %s included: %d",
|
sql_print_error("next log error: %d offset: %s log: %s included: %d",
|
||||||
error,
|
error,
|
||||||
llstr(rli->linfo.index_file_offset,buff),
|
llstr(rli->linfo.index_file_offset,buff),
|
||||||
rli->group_relay_log_name,
|
rli->event_relay_log_name,
|
||||||
included);
|
included);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
@ -3017,7 +3003,42 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
|
|||||||
/* Store where we are in the new file for the execution thread */
|
/* Store where we are in the new file for the execution thread */
|
||||||
flush_relay_log_info(rli);
|
flush_relay_log_info(rli);
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("crash_before_purge_logs", abort(););
|
||||||
|
|
||||||
|
pthread_mutex_lock(&rli->log_space_lock);
|
||||||
|
rli->relay_log.purge_logs(to_purge_if_included, included,
|
||||||
|
0, 0, &rli->log_space_total);
|
||||||
|
// Tell the I/O thread to take the relay_log_space_limit into account
|
||||||
|
rli->ignore_log_space_limit= 0;
|
||||||
|
pthread_mutex_unlock(&rli->log_space_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Ok to broadcast after the critical region as there is no risk of
|
||||||
|
the mutex being destroyed by this thread later - this helps save
|
||||||
|
context switches
|
||||||
|
*/
|
||||||
|
pthread_cond_broadcast(&rli->log_space_cond);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need to update the log pos because purge logs has been called
|
||||||
|
* after fetching initially the log pos at the begining of the method.
|
||||||
|
*/
|
||||||
|
if(error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))
|
||||||
|
{
|
||||||
|
char buff[22];
|
||||||
|
sql_print_error("next log error: %d offset: %s log: %s included: %d",
|
||||||
|
error,
|
||||||
|
llstr(rli->linfo.index_file_offset,buff),
|
||||||
|
rli->group_relay_log_name,
|
||||||
|
included);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If included was passed, rli->linfo should be the first entry. */
|
||||||
|
DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
my_free(to_purge_if_included, MYF(0));
|
||||||
pthread_mutex_unlock(&LOCK_index);
|
pthread_mutex_unlock(&LOCK_index);
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
}
|
}
|
||||||
@ -3077,8 +3098,36 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log,
|
|||||||
|
|
||||||
if (need_mutex)
|
if (need_mutex)
|
||||||
pthread_mutex_lock(&LOCK_index);
|
pthread_mutex_lock(&LOCK_index);
|
||||||
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
|
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
|
||||||
|
{
|
||||||
|
sql_print_error("MYSQL_LOG::purge_logs was called with file %s not "
|
||||||
|
"listed in the index.", to_log);
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
For crash recovery reasons the index needs to be updated before
|
||||||
|
any files are deleted. Move files to be deleted into a temp file
|
||||||
|
to be processed after the index is updated.
|
||||||
|
*/
|
||||||
|
if (!my_b_inited(&purge_temp))
|
||||||
|
{
|
||||||
|
if (error=open_cached_file(&purge_temp, mysql_tmpdir, TEMP_PREFIX,
|
||||||
|
DISK_BUFFER_SIZE, MYF(MY_WME)))
|
||||||
|
{
|
||||||
|
sql_print_error("MYSQL_LOG::purge_logs failed to open purge_temp");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (error=reinit_io_cache(&purge_temp, WRITE_CACHE, 0, 0, 1))
|
||||||
|
{
|
||||||
|
sql_print_error("MYSQL_LOG::purge_logs failed to reinit purge_temp "
|
||||||
|
"for write");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
File name exists in index file; delete until we find this file
|
File name exists in index file; delete until we find this file
|
||||||
@ -3089,6 +3138,61 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log,
|
|||||||
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
|
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
|
||||||
!log_in_use(log_info.log_file_name))
|
!log_in_use(log_info.log_file_name))
|
||||||
{
|
{
|
||||||
|
if ((error=my_b_write(&purge_temp, (const uchar*)log_info.log_file_name,
|
||||||
|
strlen(log_info.log_file_name))) ||
|
||||||
|
(error=my_b_write(&purge_temp, (const uchar*)"\n", 1)))
|
||||||
|
{
|
||||||
|
sql_print_error("MYSQL_LOG::purge_logs failed to copy %s to purge_temp",
|
||||||
|
log_info.log_file_name);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_next_log(&log_info, 0) || exit_loop)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We know how many files to delete. Update index file. */
|
||||||
|
if (error=update_log_index(&log_info, need_update_threads))
|
||||||
|
{
|
||||||
|
sql_print_error("MSYQL_LOG::purge_logs failed to update the index file");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("crash_after_update_index", abort(););
|
||||||
|
|
||||||
|
/* Switch purge_temp for read. */
|
||||||
|
if (error=reinit_io_cache(&purge_temp, READ_CACHE, 0, 0, 0))
|
||||||
|
{
|
||||||
|
sql_print_error("MSYQL_LOG::purge_logs failed to reinit purge_temp "
|
||||||
|
"for read");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read each entry from purge_temp and delete the file. */
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
uint length;
|
||||||
|
|
||||||
|
if ((length=my_b_gets(&purge_temp, log_info.log_file_name,
|
||||||
|
FN_REFLEN)) <= 1)
|
||||||
|
{
|
||||||
|
if (purge_temp.error)
|
||||||
|
{
|
||||||
|
error= purge_temp.error;
|
||||||
|
sql_print_error("MSYQL_LOG::purge_logs error %d reading from "
|
||||||
|
"purge_temp", error);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reached EOF */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get rid of the trailing '\n' */
|
||||||
|
log_info.log_file_name[length-1]= 0;
|
||||||
|
|
||||||
|
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
|
||||||
|
|
||||||
MY_STAT s;
|
MY_STAT s;
|
||||||
if (!my_stat(log_info.log_file_name, &s, MYF(0)))
|
if (!my_stat(log_info.log_file_name, &s, MYF(0)))
|
||||||
{
|
{
|
||||||
@ -3189,20 +3293,6 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
|
|
||||||
|
|
||||||
if (find_next_log(&log_info, 0) || exit_loop)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
If we get killed -9 here, the sysadmin would have to edit
|
|
||||||
the log index file after restart - otherwise, this should be safe
|
|
||||||
*/
|
|
||||||
error= update_log_index(&log_info, need_update_threads);
|
|
||||||
if (error == 0) {
|
|
||||||
error = ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err:
|
err:
|
||||||
@ -3216,7 +3306,7 @@ err:
|
|||||||
index file.
|
index file.
|
||||||
|
|
||||||
@param thd Thread pointer
|
@param thd Thread pointer
|
||||||
@param before_date Delete all log files before given date.
|
@param purge_time Delete all log files before given date.
|
||||||
|
|
||||||
@note
|
@note
|
||||||
If any of the logs before the deleted one is in use,
|
If any of the logs before the deleted one is in use,
|
||||||
@ -3233,6 +3323,7 @@ err:
|
|||||||
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
|
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
char to_log[FN_REFLEN];
|
||||||
LOG_INFO log_info;
|
LOG_INFO log_info;
|
||||||
MY_STAT stat_area;
|
MY_STAT stat_area;
|
||||||
THD *thd= current_thd;
|
THD *thd= current_thd;
|
||||||
@ -3240,12 +3331,8 @@ int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
|
|||||||
DBUG_ENTER("purge_logs_before_date");
|
DBUG_ENTER("purge_logs_before_date");
|
||||||
|
|
||||||
pthread_mutex_lock(&LOCK_index);
|
pthread_mutex_lock(&LOCK_index);
|
||||||
|
to_log[0]= 0;
|
||||||
|
|
||||||
/*
|
|
||||||
Delete until we find curren file
|
|
||||||
or a file that is used or a file
|
|
||||||
that is older than purge_time.
|
|
||||||
*/
|
|
||||||
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
|
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -3295,55 +3382,18 @@ int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (stat_area.st_mtime >= purge_time)
|
if (stat_area.st_mtime < purge_time)
|
||||||
|
strmake(to_log,
|
||||||
|
log_info.log_file_name,
|
||||||
|
sizeof(log_info.log_file_name));
|
||||||
|
else
|
||||||
break;
|
break;
|
||||||
if (my_delete(log_info.log_file_name, MYF(0)))
|
|
||||||
{
|
|
||||||
if (my_errno == ENOENT)
|
|
||||||
{
|
|
||||||
/* It's not fatal even if we can't delete a log file */
|
|
||||||
if (thd)
|
|
||||||
{
|
|
||||||
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
|
|
||||||
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
|
|
||||||
log_info.log_file_name);
|
|
||||||
}
|
|
||||||
sql_print_information("Failed to delete file '%s'",
|
|
||||||
log_info.log_file_name);
|
|
||||||
my_errno= 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (thd)
|
|
||||||
{
|
|
||||||
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
|
|
||||||
ER_BINLOG_PURGE_FATAL_ERR,
|
|
||||||
"a problem with deleting %s; "
|
|
||||||
"consider examining correspondence "
|
|
||||||
"of your binlog index file "
|
|
||||||
"to the actual binlog files",
|
|
||||||
log_info.log_file_name);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sql_print_information("Failed to delete log file '%s'",
|
|
||||||
log_info.log_file_name);
|
|
||||||
}
|
|
||||||
error= LOG_INFO_FATAL;
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
|
|
||||||
}
|
}
|
||||||
if (find_next_log(&log_info, 0))
|
if (find_next_log(&log_info, 0))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
|
||||||
If we get killed -9 here, the sysadmin would have to edit
|
|
||||||
the log index file after restart - otherwise, this should be safe
|
|
||||||
*/
|
|
||||||
error= update_log_index(&log_info, 1);
|
|
||||||
|
|
||||||
err:
|
err:
|
||||||
pthread_mutex_unlock(&LOCK_index);
|
pthread_mutex_unlock(&LOCK_index);
|
||||||
|
@ -233,6 +233,13 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
|||||||
pthread_cond_t update_cond;
|
pthread_cond_t update_cond;
|
||||||
ulonglong bytes_written;
|
ulonglong bytes_written;
|
||||||
IO_CACHE index_file;
|
IO_CACHE index_file;
|
||||||
|
/*
|
||||||
|
purge_temp is a temp file used in purge_logs so that the index file
|
||||||
|
can be updated before deleting files from disk, yielding better crash
|
||||||
|
recovery. It is created on demand the first time purge_logs is called
|
||||||
|
and then reused for subsequent calls. It is cleaned up in cleanup().
|
||||||
|
*/
|
||||||
|
IO_CACHE purge_temp;
|
||||||
char index_file_name[FN_REFLEN];
|
char index_file_name[FN_REFLEN];
|
||||||
/*
|
/*
|
||||||
The max size before rotation (usable only if log_type == LOG_BIN: binary
|
The max size before rotation (usable only if log_type == LOG_BIN: binary
|
||||||
|
Reference in New Issue
Block a user