1
0
mirror of https://github.com/MariaDB/server.git synced 2025-04-18 21:44:20 +03:00

slave.cc try_to_reconnect remove retry_counter

`try_to_reconnect()` wraps `safe_reconnect()` with logging, but the
latter already loops reconnection attempts up to `master_retry_count`
times with `mi->connect_retry`-msec sleeps inbetween.
This means `try_to_reconnect()` has been counting disconnects of the
replication session (since it doesn’t loop) while `safe_reconnect()`
was counting actual tries (which may be multiple per disconnect).
In practice, this outer counter’s only benefit was to override the edge
case `--master-retry-count=0` that the inner loop doesn’t cover with 1.
This commit is contained in:
ParadoxV5 2025-02-04 17:38:23 -07:00
parent 3a43b7c60b
commit 66f52ba630

View File

@ -112,21 +112,16 @@ enum enum_slave_reconnect_actions
enum enum_slave_reconnect_messages
{
SLAVE_RECON_MSG_WAIT= 0,
SLAVE_RECON_MSG_KILLED_WAITING= 1,
SLAVE_RECON_MSG_AFTER= 2,
SLAVE_RECON_MSG_FAILED= 3,
SLAVE_RECON_MSG_COMMAND= 4,
SLAVE_RECON_MSG_KILLED_AFTER= 5,
SLAVE_RECON_MSG_AFTER,
SLAVE_RECON_MSG_FAILED,
SLAVE_RECON_MSG_COMMAND,
SLAVE_RECON_MSG_KILLED_AFTER,
SLAVE_RECON_MSG_MAX
};
static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
{
{
"Waiting to reconnect after a failed registration on master",
"Slave I/O thread killed while waiting to reconnect after a failed \
registration on master",
"Reconnecting after a failed registration on master",
"failed registering on master, reconnecting to try again, \
log '%s' at position %llu%s",
@ -134,16 +129,12 @@ log '%s' at position %llu%s",
"Slave I/O thread killed during or after reconnect"
},
{
"Waiting to reconnect after a failed binlog dump request",
"Slave I/O thread killed while retrying master dump",
"Reconnecting after a failed binlog dump request",
"failed dump request, reconnecting to try again, log '%s' at position %llu%s",
"COM_BINLOG_DUMP",
"Slave I/O thread killed during or after reconnect"
},
{
"Waiting to reconnect after a failed master event read",
"Slave I/O thread killed while waiting to reconnect after a failed read",
"Reconnecting after a failed master event read",
"Slave I/O thread: Failed reading log event, reconnecting to retry, \
log '%s' at position %llu%s",
@ -4396,20 +4387,18 @@ static bool check_io_slave_killed(Master_info *mi, const char *info)
/**
@brief Try to reconnect slave IO thread.
@details Terminates current connection to master, sleeps for
@c mi->connect_retry msecs and initiates new connection with
@c safe_reconnect(). Variable pointed by @c retry_count is increased -
@details Terminates current connection to master
and initiates new connection with safe_reconnect(), which sleeps for
@c mi->connect_retry msecs and increases @c mi->TODO for each attempt -
if it exceeds @c master_retry_count then connection is not re-established
and function signals error.
Unless @c suppres_warnings is TRUE, a warning is put in the server error log
when reconnecting. The warning message and messages used to report errors
are taken from @c messages array. In case @c master_retry_count is exceeded,
no messages are added to the log.
are taken from @c messages array.
@param[in] thd Thread context.
@param[in] mysql MySQL connection.
@param[in] mi Master connection information.
@param[in,out] retry_count Number of attempts to reconnect.
@param[in] suppress_warnings TRUE when a normal net read timeout
has caused to reconnecting.
@param[in] messages Messages to print/log, see
@ -4420,23 +4409,14 @@ static bool check_io_slave_killed(Master_info *mi, const char *info)
*/
static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
uint *retry_count, bool suppress_warnings,
bool suppress_warnings,
const char *messages[SLAVE_RECON_MSG_MAX])
{
mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
#ifdef SIGNAL_WITH_VIO_CLOSE
thd->clear_active_vio();
#endif
end_server(mysql);
if ((*retry_count)++)
{
if (*retry_count > master_retry_count)
return 1; // Don't retry forever
slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
}
if (check_io_slave_killed(mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
return 1;
thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
if (!suppress_warnings)
{
@ -4497,7 +4477,6 @@ pthread_handler_t handle_slave_io(void *arg)
MYSQL *mysql;
Master_info *mi = (Master_info*)arg;
Relay_log_info *rli= &mi->rli;
uint retry_count;
bool suppress_warnings;
int ret;
rpl_io_thread_info io_info;
@ -4511,7 +4490,6 @@ pthread_handler_t handle_slave_io(void *arg)
DBUG_ASSERT(mi->inited);
mysql= NULL ;
retry_count= 0;
thd= new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
@ -4661,7 +4639,7 @@ connected:
Try to reconnect because the error was caused by a transient network
problem
*/
if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
if (try_to_reconnect(thd, mysql, mi, suppress_warnings,
reconnect_messages[SLAVE_RECON_ACT_REG]))
goto err;
@ -4678,7 +4656,7 @@ connected:
"while registering slave on master"))
{
sql_print_error("Slave I/O thread couldn't register on master");
if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
if (try_to_reconnect(thd, mysql, mi, suppress_warnings,
reconnect_messages[SLAVE_RECON_ACT_REG]))
goto err;
}
@ -4705,7 +4683,7 @@ connected:
{
sql_print_error("Failed on request_dump()");
if (check_io_slave_killed(mi, NullS) ||
try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
try_to_reconnect(thd, mysql, mi, suppress_warnings,
reconnect_messages[SLAVE_RECON_ACT_DUMP]))
goto err;
goto connected;
@ -4764,13 +4742,12 @@ Stopping slave I/O thread due to out-of-memory error from master");
"%s", ER_THD(thd, ER_OUT_OF_RESOURCES));
goto err;
}
if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
if (try_to_reconnect(thd, mysql, mi, suppress_warnings,
reconnect_messages[SLAVE_RECON_ACT_EVENT]))
goto err;
goto connected;
} // if (event_len == packet_error)
retry_count=0; // ok event, reset retry counter
thd->set_time_for_next_stage();
THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
event_buf= mysql->net.read_pos + 1;