1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-16146: MariaDB slave stops with following errors.

Problem:
========
180511 11:07:58 [ERROR] Slave I/O: Unexpected master's heartbeat data:
heartbeat is not compatible with local info;the event's data: log_file_name
mysql-bin.000009 log_pos 1054262041, Error_code: 1623

Analysis:
=========
In replication setup when master server doesn't have any events to send to
slave server it sends an 'Heartbeat_log_event'. This event carries the
current binary log filename and offset details. The offset values is stored
within 4 bytes of event header. When the size of binary log is higher than
UINT32_MAX the log_pos values will not fit in 4 bytes memory.  It overflows
and hence slave stops with an error.

Fix:
===
Since we cannot extend the common_header of Log_event class, a greater than
4GB value of Log_event::log_pos is made to be transported with a HeartBeat
event's sub-header.  Log_event::log_pos in such case is set to zero to
indicate that the 8 byte sub-header is allocated in the event.

In case of cross version replication following behaviour is expected

OLD - Server without fix
NEW - Server with fix

OLD<->NEW : works bidirectionally as long as the binlog offset is
            (normally) within 4GB.

When log_pos > UINT32_MAX
OLD->NEW  : The 'log_pos' is bound to overflow and NEW slave may report
            an invalid event/incompatible heart beat event error.
NEW->OLD  : Since patched server sets log_pos=0 on overflow, OLD slave will
            report invalid event error.
This commit is contained in:
Sujatha
2021-04-30 18:12:43 +05:30
parent 13b9af50e4
commit abe6eb10a6
5 changed files with 112 additions and 10 deletions

View File

@ -32,6 +32,7 @@
#include "debug_sync.h"
#include "log.h" // get_gtid_list_event
enum enum_gtid_until_state {
GTID_UNTIL_NOT_DONE,
GTID_UNTIL_STOP_AFTER_STANDALONE,
@ -781,7 +782,7 @@ get_slave_until_gtid(THD *thd, String *out_str)
@param event_coordinates binlog file name and position of the last
real event master sent from binlog
@note
@note
Among three essential pieces of heartbeat data Log_event::when
is computed locally.
The error to send is serious and should force terminating
@ -795,6 +796,8 @@ static int send_heartbeat_event(binlog_send_info *info,
DBUG_ENTER("send_heartbeat_event");
ulong ev_offset;
char sub_header_buf[HB_SUB_HEADER_LEN];
bool sub_header_in_use=false;
if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
DBUG_RETURN(1);
@ -815,18 +818,38 @@ static int send_heartbeat_event(binlog_send_info *info,
ulong event_len = ident_len + LOG_EVENT_HEADER_LEN +
(do_checksum ? BINLOG_CHECKSUM_LEN : 0);
int4store(header + SERVER_ID_OFFSET, global_system_variables.server_id);
DBUG_EXECUTE_IF("simulate_pos_4G",
{
const_cast<event_coordinates *>(coord)->pos= (UINT_MAX32 + (ulong)1);
DBUG_SET("-d, simulate_pos_4G");
};);
if (coord->pos <= UINT_MAX32)
{
int4store(header + LOG_POS_OFFSET, coord->pos); // log_pos
}
else
{
// Set common_header.log_pos=0 to indicate its overflow
int4store(header + LOG_POS_OFFSET, 0);
sub_header_in_use= true;
int8store(sub_header_buf, coord->pos);
event_len+= HB_SUB_HEADER_LEN;
}
int4store(header + EVENT_LEN_OFFSET, event_len);
int2store(header + FLAGS_OFFSET, 0);
int4store(header + LOG_POS_OFFSET, coord->pos); // log_pos
packet->append(header, sizeof(header));
packet->append(p, ident_len); // log_file_name
if (sub_header_in_use)
packet->append(sub_header_buf, sizeof(sub_header_buf));
packet->append(p, ident_len); // log_file_name
if (do_checksum)
{
char b[BINLOG_CHECKSUM_LEN];
ha_checksum crc= my_checksum(0, (uchar*) header, sizeof(header));
if (sub_header_in_use)
crc= my_checksum(crc, (uchar*) sub_header_buf, sizeof(sub_header_buf));
crc= my_checksum(crc, (uchar*) p, ident_len);
int4store(b, crc);
packet->append(b, sizeof(b));