This will be pushed only after I fix the testsuite.

This is the main commit for Worklog tasks: * A more dynamic binlog format which allows small changes (1064) * Log session variables in Query_log_event (1063) Below 5.0 means 5.0.0. MySQL 5.0 is able to replicate FOREIGN_KEY_CHECKS, UNIQUE_KEY_CHECKS (for speed), SQL_AUTO_IS_NULL, SQL_MODE. Not charsets (WL#1062), not some vars (I can only think of SQL_SELECT_LIMIT, which deserves a special treatment). Note that this works for queries, except LOAD DATA INFILE (for this it would have to wait for Dmitri's push of WL#874, which in turns waits for the present push, so... the deadlock must be broken!). Note that when Dmitri pushes WL#874 in 5.0.1, 5.0.0 won't be able to replicate a LOAD DATA INFILE from 5.0.1. Apart from that, the new binlog format is designed so that it can tolerate a little variation in the events (so that a 5.0.0 slave could replicate a 5.0.1 master, except for LOAD DATA INFILE unfortunately); that is, when I later add replication of charsets it should break nothing. And when I later add a UID to every event, it should break nothing. The main change brought by this patch is a new type of event, Format_description_log_event, which describes some lengthes in other event types. This event is needed for the master/slave/mysqlbinlog to understand a 5.0 log. Thanks to this event, we can later add more bytes to the header of every event without breaking compatibility. Inside Query_log_event, we have some additional dynamic format, as every Query_log_event can have a different number of status variables, stored as pairs (code, value); that's how SQL_MODE and session variables and catalog are stored. Like this, we can later add count of affected rows, charsets... and we can have options --don't-log-count-affected-rows if we want. MySQL 5.0 is able to run on 4.x relay logs, 4.x binlogs. Upgrading a 4.x master to 5.0 is ok (no need to delete binlogs), upgrading a 4.x slave to 5.0 is ok (no need to delete relay logs); so both can be "hot" upgrades. Upgrading a 3.23 master to 5.0 requires as much as upgrading it to 4.0. 3.23 and 4.x can't be slaves of 5.0. So downgrading from 5.0 to 4.x may be complicated. Log_event::log_pos is now the position of the end of the event, which is more useful than the position of the beginning. We take care about compatibility with <5.0 (in which log_pos is the beginning). I added a short test for replication of SQL_MODE and some other variables. TODO: - after committing this, merge the latest 5.0 into it - fix all tests - update the manual with upgrade notes.
2025-07-30 16:24:05 +03:00 · 2003-12-18 01:09:05 +01:00
parent 7a1fddaa1f
commit 66a32e8925
17 changed files with 2418 additions and 587 deletions
--- a/sql/slave.h
+++ b/sql/slave.h
@ -67,11 +67,6 @@ extern my_bool opt_log_slave_updates;
 extern ulonglong relay_log_space_limit;
 struct st_master_info;

-enum enum_binlog_formats {
-  BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */
-  BINLOG_FORMAT_323_LESS_57, 
-  BINLOG_FORMAT_323_GEQ_57 };
-
 /*
  TODO: this needs to be redone, but for now it does not matter since
  we do not have multi-master yet.
@ -186,6 +181,8 @@ typedef struct st_relay_log_info
  ulonglong group_relay_log_pos;
  char event_relay_log_name[FN_REFLEN];
  ulonglong event_relay_log_pos;
+  ulonglong future_event_relay_log_pos;
+
  /* 
     Original log name and position of the group we're currently executing
     (whose coordinates are group_relay_log_name/pos in the relay log)
@ -207,11 +204,13 @@ typedef struct st_relay_log_info

  /*
    InnoDB internally stores the master log position it has processed
-    so far; the position to store is really the sum of 
-    pos + pending + event_len here since we must store the pos of the
-    END of the current log event
+    so far; when the InnoDB code to store this position is called, we have not
+    updated rli->group_master_log_pos yet. So the position is the event's
+    log_pos (the position of the end of the event); we save it in the variable
+    below. It's the *coming* group_master_log_pos (the one which will be
+    group_master_log_pos in the coming milliseconds).
  */
-  int event_len;
+  ulonglong future_group_master_log_pos;

  time_t last_master_timestamp; 

@ -285,16 +284,17 @@ typedef struct st_relay_log_info
      until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
  }
  
-  inline void inc_event_relay_log_pos(ulonglong val)
+  inline void inc_event_relay_log_pos()
  {
-    event_relay_log_pos+= val;
+    event_relay_log_pos= future_event_relay_log_pos;
  }

-  void inc_group_relay_log_pos(ulonglong val, ulonglong log_pos, bool skip_lock=0)
+  void inc_group_relay_log_pos(ulonglong log_pos,
+                               bool skip_lock=0)  
  {
    if (!skip_lock)
      pthread_mutex_lock(&data_lock);
-    inc_event_relay_log_pos(val);
+    inc_event_relay_log_pos();
    group_relay_log_pos= event_relay_log_pos;
    strmake(group_relay_log_name,event_relay_log_name,
            sizeof(group_relay_log_name)-1);
@ -311,8 +311,31 @@ typedef struct st_relay_log_info
      not advance as it should on the non-transactional slave (it advances by
      big leaps, whereas it should advance by small leaps).
    */
-    if (log_pos) // 3.23 binlogs don't have log_posx
-      group_master_log_pos= log_pos+ val;
+    /*
+      In 4.x we used the event's len to compute the positions here. This is
+      wrong if the event was 3.23/4.0 and has been converted to 5.0, because
+      then the event's len is not what is was in the master's binlog, so this
+      will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0
+      replication: Exec_master_log_pos is wrong). Only way to solve this is to
+      have the original offset of the end of the event the relay log. This is
+      what we do in 5.0: log_pos has become "end_log_pos" (because the real use
+      of log_pos in 4.0 was to compute the end_log_pos; so better to store
+      end_log_pos instead of begin_log_pos.
+      If we had not done this fix here, the problem would also have appeared
+      when the slave and master are 5.0 but with different event length (for
+      example the slave is more recent than the master and features the event
+      UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in
+      SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this
+      value which would lead to badly broken replication.
+      Even the relay_log_pos will be corrupted in this case, because the len is
+      the relay log is not "val".
+      With the end_log_pos solution, we avoid computations involving lengthes.
+    */
+    DBUG_PRINT("info", ("log_pos=%lld group_master_log_pos=%lld",
+                        log_pos,group_master_log_pos));
+    if (log_pos) // some events (like fake Rotate) don't have log_pos
+      // when we are here, log_pos is the end of the event
+      group_master_log_pos= log_pos;
    pthread_cond_broadcast(&data_cond);
    if (!skip_lock)
      pthread_mutex_unlock(&data_lock);
@ -389,7 +412,6 @@ typedef struct st_master_info
  int events_till_abort;
 #endif
  bool inited;
-  enum enum_binlog_formats old_format;
  volatile bool abort_slave, slave_running;
  volatile ulong slave_run_id;
  /* 
@ -404,7 +426,7 @@ typedef struct st_master_info
  long clock_diff_with_master; 
  
  st_master_info()
-    :ssl(0), fd(-1),  io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT),
+    :ssl(0), fd(-1),  io_thd(0), inited(0),
     abort_slave(0),slave_running(0), slave_run_id(0)
  {
    host[0] = 0; user[0] = 0; password[0] = 0;
@ -535,10 +557,12 @@ void lock_slave_threads(MASTER_INFO* mi);
 void unlock_slave_threads(MASTER_INFO* mi);
 void init_thread_mask(int* mask,MASTER_INFO* mi,bool inverse);
 int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,ulonglong pos,
-		       bool need_data_lock, const char** errmsg);
+		       bool need_data_lock, const char** errmsg,
+                       bool look_for_description_event);

 int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
 		     const char** errmsg);
+void set_slave_thread_options(THD* thd);
 void rotate_relay_log(MASTER_INFO* mi);

 extern "C" pthread_handler_decl(handle_slave_io,arg);