Merge 10.0.14 into 10.1

2025-12-03 05:41:09 +03:00 · 2014-10-15 12:59:13 +02:00
parent f947f73b2b f1afc003ee
commit f62c12b405
2115 changed files with 87968 additions and 80173 deletions
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1,6 +1,6 @@
 /*
   Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2008, 2013, Monty Program Ab.
+   Copyright (c) 2008, 2014, SkySQL Ab.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -895,7 +895,6 @@ THD::THD(bool is_wsrep_applier)
   accessed_rows_and_keys(0),
   m_statement_psi(NULL),
   m_idle_psi(NULL),
-   m_server_idle(false),
   thread_id(0),
   global_disable_checkpoint(0),
   failed_com_change_user(0),
@@ -4346,6 +4345,220 @@ extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd)
  return thd->rgi_slave && thd->rgi_slave->is_parallel_exec;
 }

+/*
+  This function can optionally be called to check if thd_report_wait_for()
+  needs to be called for waits done by a given transaction.
+
+  If this function returns false for a given thd, there is no need to do any
+  calls to thd_report_wait_for() on that thd.
+
+  This call is optional; it is safe to call thd_report_wait_for() in any case.
+  This call can be used to save some redundant calls to thd_report_wait_for()
+  if desired. (This is unlikely to matter much unless there are _lots_ of
+  waits to report, as the overhead of thd_report_wait_for() is small).
+*/
+extern "C" int
+thd_need_wait_for(const MYSQL_THD thd)
+{
+  rpl_group_info *rgi;
+
+  if (!thd)
+    return false;
+  rgi= thd->rgi_slave;
+  if (!rgi)
+    return false;
+  return rgi->is_parallel_exec;
+}
+
+/*
+  Used by InnoDB/XtraDB to report that one transaction THD is about to go to
+  wait for a transactional lock held by another transactions OTHER_THD.
+
+  This is used for parallel replication, where transactions are required to
+  commit in the same order on the slave as they did on the master. If the
+  transactions on the slave encounters lock conflicts on the slave that did
+  not exist on the master, this can cause deadlocks.
+
+  Normally, such conflicts will not occur, because the same conflict would
+  have prevented the two transactions from committing in parallel on the
+  master, thus preventing them from running in parallel on the slave in the
+  first place. However, it is possible in case when the optimizer chooses a
+  different plan on the slave than on the master (eg. table scan instead of
+  index scan).
+
+  InnoDB/XtraDB reports lock waits using this call. If a lock wait causes a
+  deadlock with the pre-determined commit order, we kill the later transaction,
+  and later re-try it, to resolve the deadlock.
+
+  This call need only receive reports about waits for locks that will remain
+  until the holding transaction commits. InnoDB/XtraDB auto-increment locks
+  are released earlier, and so need not be reported. (Such false positives are
+  not harmful, but could lead to unnecessary kill and retry, so best avoided).
+*/
+extern "C" void
+thd_report_wait_for(const MYSQL_THD thd, MYSQL_THD other_thd)
+{
+  rpl_group_info *rgi;
+  rpl_group_info *other_rgi;
+
+  if (!thd || !other_thd)
+    return;
+  rgi= thd->rgi_slave;
+  other_rgi= other_thd->rgi_slave;
+  if (!rgi || !other_rgi)
+    return;
+  if (!rgi->is_parallel_exec)
+    return;
+  if (rgi->rli != other_rgi->rli)
+    return;
+  if (!rgi->gtid_sub_id || !other_rgi->gtid_sub_id)
+    return;
+  if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id)
+    return;
+  if (rgi->gtid_sub_id > other_rgi->gtid_sub_id)
+    return;
+  /*
+    This transaction is about to wait for another transaction that is required
+    by replication binlog order to commit after. This would cause a deadlock.
+
+    So send a kill to the other transaction, with a temporary error; this will
+    cause replication to rollback (and later re-try) the other transaction,
+    releasing the lock for this transaction so replication can proceed.
+  */
+  other_rgi->killed_for_retry= true;
+  mysql_mutex_lock(&other_thd->LOCK_thd_data);
+  other_thd->awake(KILL_CONNECTION);
+  mysql_mutex_unlock(&other_thd->LOCK_thd_data);
+}
+
+/*
+  This function is called from InnoDB/XtraDB to check if the commit order of
+  two transactions has already been decided by the upper layer. This happens
+  in parallel replication, where the commit order is forced to be the same on
+  the slave as it was originally on the master.
+
+  If this function returns false, it means that such commit order will be
+  enforced. This allows the storage engine to optionally omit gap lock waits
+  or similar measures that would otherwise be needed to ensure that
+  transactions would be serialised in a way that would cause a commit order
+  that is correct for binlogging for statement-based replication.
+
+  Since transactions are only run in parallel on the slave if they ran without
+  lock conflicts on the master, normally no lock conflicts on the slave happen
+  during parallel replication. However, there are a couple of corner cases
+  where it can happen, like these secondary-index operations:
+
+    T1: INSERT INTO t1 VALUES (7, NULL);
+    T2: DELETE FROM t1 WHERE b <= 3;
+
+    T1: UPDATE t1 SET secondary=NULL WHERE primary=1
+    T2: DELETE t1 WHERE secondary <= 3
+
+  The DELETE takes a gap lock that can block the INSERT/UPDATE, but the row
+  locks set by INSERT/UPDATE do not block the DELETE. Thus, the execution
+  order of the transactions determine whether a lock conflict occurs or
+  not. Thus a lock conflict can occur on the slave where it did not on the
+  master.
+
+  If this function returns true, normal locking should be done as required by
+  the binlogging and transaction isolation level in effect. But if it returns
+  false, the correct order will be enforced anyway, and InnoDB/XtraDB can
+  avoid taking the gap lock, preventing the lock conflict.
+
+  Calling this function is just an optimisation to avoid unnecessary
+  deadlocks. If it was not used, a gap lock would be set that could eventually
+  cause a deadlock; the deadlock would be caught by thd_report_wait_for() and
+  the transaction T2 killed and rolled back (and later re-tried).
+*/
+extern "C" int
+thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
+{
+  rpl_group_info *rgi, *other_rgi;
+
+  DBUG_EXECUTE_IF("disable_thd_need_ordering_with", return 1;);
+  if (!thd || !other_thd)
+    return 1;
+  rgi= thd->rgi_slave;
+  other_rgi= other_thd->rgi_slave;
+  if (!rgi || !other_rgi)
+    return 1;
+  if (!rgi->is_parallel_exec)
+    return 1;
+  if (rgi->rli != other_rgi->rli)
+    return 1;
+  if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id)
+    return 1;
+  if (!rgi->commit_id || rgi->commit_id != other_rgi->commit_id)
+    return 1;
+  /*
+    Otherwise, these two threads are doing parallel replication within the same
+    replication domain. Their commit order is already fixed, so we do not need
+    gap locks or similar to otherwise enforce ordering (and in fact such locks
+    could lead to unnecessary deadlocks and transaction retry).
+  */
+  return 0;
+}
+
+
+/*
+  If the storage engine detects a deadlock, and needs to choose a victim
+  transaction to roll back, it can call this function to ask the upper
+  server layer for which of two possible transactions is prefered to be
+  aborted and rolled back.
+
+  In parallel replication, if two transactions are running in parallel and
+  one is fixed to commit before the other, then the one that commits later
+  will be prefered as the victim - chosing the early transaction as a victim
+  will not resolve the deadlock anyway, as the later transaction still needs
+  to wait for the earlier to commit.
+
+  Otherwise, a transaction that uses only transactional tables, and can thus
+  be safely rolled back, will be prefered as a deadlock victim over a
+  transaction that also modified non-transactional (eg. MyISAM) tables.
+
+  The return value is -1 if the first transaction is prefered as a deadlock
+  victim, 1 if the second transaction is prefered, or 0 for no preference (in
+  which case the storage engine can make the choice as it prefers).
+*/
+extern "C" int
+thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2)
+{
+  rpl_group_info *rgi1, *rgi2;
+  bool nontrans1, nontrans2;
+
+  if (!thd1 || !thd2)
+    return 0;
+
+  /*
+    If the transactions are participating in the same replication domain in
+    parallel replication, then request to select the one that will commit
+    later (in the fixed commit order from the master) as the deadlock victim.
+  */
+  rgi1= thd1->rgi_slave;
+  rgi2= thd2->rgi_slave;
+  if (rgi1 && rgi2 &&
+      rgi1->is_parallel_exec &&
+      rgi1->rli == rgi2->rli &&
+      rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id)
+    return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1;
+
+  /*
+    If one transaction has modified non-transactional tables (so that it
+    cannot be safely rolled back), and the other has not, then prefer to
+    select the purely transactional one as the victim.
+  */
+  nontrans1= thd1->transaction.all.modified_non_trans_table;
+  nontrans2= thd2->transaction.all.modified_non_trans_table;
+  if (nontrans1 && !nontrans2)
+    return 1;
+  else if (!nontrans1 && nontrans2)
+    return -1;
+
+  /* No preferences, let the storage engine decide. */
+  return 0;
+}
+
+
 extern "C" int thd_non_transactional_update(const MYSQL_THD thd)
 {
  return(thd->transaction.all.modified_non_trans_table);
@@ -4370,9 +4583,18 @@ extern "C" bool thd_binlog_filter_ok(const MYSQL_THD thd)
  return binlog_filter->db_ok(thd->db);
 }

+/*
+  This is similar to sqlcom_can_generate_row_events, with the expection
+  that we only return 1 if we are going to generate row events in a
+  transaction.
+  CREATE OR REPLACE is always safe to do as this will run in it's own
+  transaction.
+*/
+
 extern "C" bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd)
 {
-  return sqlcom_can_generate_row_events(thd);
+  return (sqlcom_can_generate_row_events(thd) && thd->lex->sql_command !=
+          SQLCOM_CREATE_TABLE);
 }


@@ -5888,23 +6110,35 @@ show_query_type(THD::enum_binlog_query_type qtype)
  Constants required for the limit unsafe warnings suppression
 */
 //seconds after which the limit unsafe warnings suppression will be activated
-#define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
+#define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 5*60
 //number of limit unsafe warnings after which the suppression will be activated
-#define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
+#define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 10

-static ulonglong limit_unsafe_suppression_start_time= 0;
-static bool unsafe_warning_suppression_is_activated= false;
-static int limit_unsafe_warning_count= 0;
+static ulonglong unsafe_suppression_start_time= 0;
+static bool unsafe_warning_suppression_active[LEX::BINLOG_STMT_UNSAFE_COUNT];
+static ulong unsafe_warnings_count[LEX::BINLOG_STMT_UNSAFE_COUNT];
+static ulong total_unsafe_warnings_count;

 /**
  Auxiliary function to reset the limit unsafety warning suppression.
+  This is done without mutex protection, but this should be good
+  enough as it doesn't matter if we loose a couple of suppressed
+  messages or if this is called multiple times.
 */
-static void reset_binlog_unsafe_suppression()
+
+static void reset_binlog_unsafe_suppression(ulonglong now)
 {
+  uint i;
  DBUG_ENTER("reset_binlog_unsafe_suppression");
-  unsafe_warning_suppression_is_activated= false;
-  limit_unsafe_warning_count= 0;
-  limit_unsafe_suppression_start_time= my_interval_timer()/10000000;
+
+  unsafe_suppression_start_time= now;
+  total_unsafe_warnings_count= 0;
+
+  for (i= 0 ; i < LEX::BINLOG_STMT_UNSAFE_COUNT ; i++)
+  {
+    unsafe_warnings_count[i]= 0;
+    unsafe_warning_suppression_active[i]= 0;
+  }
  DBUG_VOID_RETURN;
 }

@@ -5922,95 +6156,94 @@ static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
 }

 /**
-  Auxiliary function to check if the warning for limit unsafety should be
-  thrown or suppressed. Details of the implementation can be found in the
-  comments inline.
+  Auxiliary function to check if the warning for unsafe repliction statements
+  should be thrown or suppressed.
+
+  Logic is:
+  - If we get more than LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT errors
+    of one type, that type of errors will be suppressed for
+    LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT.
+  - When the time limit has been reached, all suppression is reset.
+
+  This means that if one gets many different types of errors, some of them
+  may be reset less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT. However at
+  least one error is disable for this time.
+
  SYNOPSIS:
  @params
-   buf         - buffer to hold the warning message text
   unsafe_type - The type of unsafety.
-   query       - The actual query statement.

-  TODO: Remove this function and implement a general service for all warnings
-  that would prevent flooding the error log.
+  RETURN:
+    0   0k to log
+    1   Message suppressed
 */
-static void do_unsafe_limit_checkout(char* buf, int unsafe_type, char* query)
+
+static bool protect_against_unsafe_warning_flood(int unsafe_type)
 {
-  ulonglong now= 0;
-  DBUG_ENTER("do_unsafe_limit_checkout");
-  DBUG_ASSERT(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
-  limit_unsafe_warning_count++;
+  ulong count;
+  ulonglong now= my_interval_timer()/1000000000ULL;
+  DBUG_ENTER("protect_against_unsafe_warning_flood");
+
+  count= ++unsafe_warnings_count[unsafe_type];
+  total_unsafe_warnings_count++;
+
  /*
    INITIALIZING:
    If this is the first time this function is called with log warning
    enabled, the monitoring the unsafe warnings should start.
  */
-  if (limit_unsafe_suppression_start_time == 0)
+  if (unsafe_suppression_start_time == 0)
  {
-    limit_unsafe_suppression_start_time= my_interval_timer()/10000000;
-    print_unsafe_warning_to_log(unsafe_type, buf, query);
+    reset_binlog_unsafe_suppression(now);
+    DBUG_RETURN(0);
  }
-  else
-  {
-    if (!unsafe_warning_suppression_is_activated)
-      print_unsafe_warning_to_log(unsafe_type, buf, query);

-    if (limit_unsafe_warning_count >=
-        LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
+  /*
+    The following is true if we got too many errors or if the error was
+    already suppressed
+  */
+  if (count >= LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
+  {
+    ulonglong diff_time= (now - unsafe_suppression_start_time);
+
+    if (!unsafe_warning_suppression_active[unsafe_type])
    {
-      now= my_interval_timer()/10000000;
-      if (!unsafe_warning_suppression_is_activated)
+      /*
+        ACTIVATION:
+        We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
+        less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
+        suppression.
+      */
+      if (diff_time <= LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
      {
-        /*
-          ACTIVATION:
-          We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
-          less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
-          suppression.
-        */
-        if ((now-limit_unsafe_suppression_start_time) <=
-                       LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
-        {
-          unsafe_warning_suppression_is_activated= true;
-          DBUG_PRINT("info",("A warning flood has been detected and the limit \
-unsafety warning suppression has been activated."));
-        }
-        else
-        {
-          /*
-           there is no flooding till now, therefore we restart the monitoring
-          */
-          limit_unsafe_suppression_start_time= my_interval_timer()/10000000;
-          limit_unsafe_warning_count= 0;
-        }
+        unsafe_warning_suppression_active[unsafe_type]= 1;
+        sql_print_information("Suppressing warnings of type '%s' for up to %d seconds because of flooding",
+                              ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]),
+                              LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT);
      }
      else
      {
        /*
-          Print the suppression note and the unsafe warning.
+          There is no flooding till now, therefore we restart the monitoring
        */
-        sql_print_information("The following warning was suppressed %d times \
-during the last %d seconds in the error log",
-                              limit_unsafe_warning_count,
-                              (int)
-                              (now-limit_unsafe_suppression_start_time));
-        print_unsafe_warning_to_log(unsafe_type, buf, query);
-        /*
-          DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
-          warnings in more than  LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
-          suppression should be deactivated.
-        */
-        if ((now - limit_unsafe_suppression_start_time) >
-            LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
-        {
-          reset_binlog_unsafe_suppression();
-          DBUG_PRINT("info",("The limit unsafety warning supression has been \
-deactivated"));
-        }
+        reset_binlog_unsafe_suppression(now);
+      }
+    }
+    else
+    {
+      /* This type of warnings was suppressed */
+      if (diff_time > LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
+      {
+        ulong save_count= total_unsafe_warnings_count;
+        /* Print a suppression note and remove the suppression */
+        reset_binlog_unsafe_suppression(now);
+        sql_print_information("Suppressed %lu unsafe warnings during "
+                              "the last %d seconds",
+                              save_count, (int) diff_time);
      }
-      limit_unsafe_warning_count= 0;
    }
  }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(unsafe_warning_suppression_active[unsafe_type]);
 }

 /**
@@ -6022,6 +6255,7 @@ deactivated"));
 void THD::issue_unsafe_warnings()
 {
  char buf[MYSQL_ERRMSG_SIZE * 2];
+  uint32 unsafe_type_flags;
  DBUG_ENTER("issue_unsafe_warnings");
  /*
    Ensure that binlog_unsafe_warning_flags is big enough to hold all
@@ -6029,8 +6263,10 @@ void THD::issue_unsafe_warnings()
  */
  DBUG_ASSERT(LEX::BINLOG_STMT_UNSAFE_COUNT <=
              sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
+  
+  if (!(unsafe_type_flags= binlog_unsafe_warning_flags))
+    DBUG_VOID_RETURN;                           // Nothing to do

-  uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
  /*
    For each unsafe_type, check if the statement is unsafe in this way
    and issue a warning.
@@ -6045,13 +6281,9 @@ void THD::issue_unsafe_warnings()
                          ER_BINLOG_UNSAFE_STATEMENT,
                          ER(ER_BINLOG_UNSAFE_STATEMENT),
                          ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
-      if (global_system_variables.log_warnings)
-      {
-        if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
-          do_unsafe_limit_checkout( buf, unsafe_type, query());
-        else //cases other than LIMIT unsafety
-          print_unsafe_warning_to_log(unsafe_type, buf, query());
-      }
+      if (global_system_variables.log_warnings > 0 &&
+          !protect_against_unsafe_warning_flood(unsafe_type))
+        print_unsafe_warning_to_log(unsafe_type, buf, query());
    }
  }
  DBUG_VOID_RETURN;
@@ -6531,6 +6763,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
      this->waitee= NULL;
    }
  }
+  wakeup_error= 0;
  mysql_mutex_unlock(&LOCK_wait_commit);
 }