Merge 10.6 into 10.11

2025-08-08 11:22:35 +03:00 · 2024-02-14 16:12:53 +02:00
parent 4fbd2e8573 691f923906
commit 64cce8d5bf
165 changed files with 3458 additions and 2319 deletions
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -289,8 +289,6 @@ static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
 static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
 static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
-static bool prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
-                                    bool need_distinct);
 static bool init_sum_functions(Item_sum **func, Item_sum **end);
 static bool update_sum_func(Item_sum **func);
 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
@@ -2351,6 +2349,10 @@ JOIN::optimize_inner()
                                                  select_lex->attach_to_conds,
                                                  &cond_value);
      sel->attach_to_conds.empty();
+      Json_writer_object wrapper(thd);
+      Json_writer_object pushd(thd, "condition_pushdown_from_having");
+      pushd.add("conds", conds);
+      pushd.add("having", having);
    }
  }

@@ -4225,7 +4227,7 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
    if (make_sum_func_list(all_fields, fields_list, true))
      goto err;
    if (prepare_sum_aggregators(thd, sum_funcs,
-                                !(tables_list && 
+                                !(tables_list &&
                                  join_tab->is_using_agg_loose_index_scan())))
      goto err;
    if (setup_sum_funcs(thd, sum_funcs))
@@ -5948,7 +5950,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
      */
      /* Largest integer that can be stored in double (no compiler warning) */
      s->worst_seeks= (double) (1ULL << 53);
-      if (thd->variables.optimizer_adjust_secondary_key_costs != 2)
+      if ((thd->variables.optimizer_adjust_secondary_key_costs &
+           OPTIMIZER_ADJ_DISABLE_MAX_SEEKS) == 0)
      {
        s->worst_seeks= MY_MIN((double) s->found_records / 10,
                               (double) s->read_time*3);
@@ -7935,7 +7938,8 @@ double cost_for_index_read(const THD *thd, const TABLE *table, uint key,
  {
    cost= ((file->keyread_time(key, 0, records) +
            file->read_time(key, 1, MY_MIN(records, worst_seeks))));
-    if (thd->variables.optimizer_adjust_secondary_key_costs == 1 &&
+    if ((thd->variables.optimizer_adjust_secondary_key_costs &
+         OPTIMIZER_ADJ_SEC_KEY_COST) &&
        file->is_clustering_key(0))
    {
      /*
@@ -8131,8 +8135,9 @@ best_access_path(JOIN      *join,
    higher to ensure that ref|filter is not less than range over same
    number of rows
  */
-  double filter_setup_cost= (thd->variables.
-                             optimizer_adjust_secondary_key_costs == 2 ?
+  double filter_setup_cost= ((thd->variables.
+                              optimizer_adjust_secondary_key_costs &
+                              OPTIMIZER_ADJ_DISABLE_MAX_SEEKS) ?
                             1.0 : 0.0);
  MY_BITMAP *eq_join_set= &s->table->eq_join_set;
  KEYUSE *hj_start_key= 0;
@@ -13884,7 +13889,8 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
      if (item->is_null())
        DBUG_RETURN(NESTED_LOOP_OK);
    }
-    fill_record(thd, table, table->field, sjm->sjm_table_cols, TRUE, FALSE);
+    fill_record(thd, table, table->field, sjm->sjm_table_cols, true, false,
+                true);
    if (unlikely(thd->is_error()))
      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
    if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
@@ -27668,15 +27674,86 @@ static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
 }


-static bool prepare_sum_aggregators(THD *thd,Item_sum **func_ptr,
-                                    bool need_distinct)
+/*
+  @brief
+    Setup aggregate functions.
+
+  @param thd            Thread descriptor
+  @param func_ptr       Array of pointers to aggregate functions
+  @param need_distinct  FALSE means that the table access method already
+                        guarantees that arguments of all aggregate functions
+                        will be unique. (This is the case for Loose Scan)
+                        TRUE - Otherwise.
+  @return
+     false  Ok
+     true   Error
+*/
+
+bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
+                                   bool need_distinct)
 {
  Item_sum *func;
  DBUG_ENTER("prepare_sum_aggregators");
  while ((func= *(func_ptr++)))
  {
-    if (func->set_aggregator(thd,
-                             need_distinct && func->has_with_distinct() ?
+    bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
+    if (need_distinct_aggregator && table_count - const_tables == 1)
+    {
+      /*
+        We are doing setup for an aggregate with DISTINCT, like
+
+          SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
+
+        In general case, agg_func will need to use Aggregator_distinct to
+        remove duplicates from its arguments.
+        We won't have to remove duplicates if we know the arguments are already
+        unique. This is true when
+        1. the join operation has only one non-const table (checked above)
+        2. the argument list covers a PRIMARY or a UNIQUE index.
+
+        Example: here the values of t1.pk are unique:
+
+          SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
+
+        and so the whole argument of agg_func is unique.
+      */
+      List<Item> arg_fields;
+      for (uint i= 0; i < func->argument_count(); i++)
+      {
+        if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
+          arg_fields.push_back(func->arguments()[i]);
+      }
+
+      /*
+        If the query has a GROUP BY, then it's sufficient that a unique
+        key is covered by a concatenation of {argument_list, group_by_list}.
+
+        Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
+
+          SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
+
+        Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
+        will be unique, and no de-duplication will be needed.
+      */
+      for (ORDER *group= group_list; group ; group= group->next)
+      {
+        if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
+          arg_fields.push_back(*group->item);
+      }
+
+      if (list_contains_unique_index(join_tab[const_tables].table,
+                                     find_field_in_item_list,
+                                     (void *) &arg_fields))
+        need_distinct_aggregator= false;
+    }
+    Json_writer_object trace_wrapper(thd);
+    Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
+    trace_aggr.add("function", func);
+    trace_aggr.add("aggregator_type",
+                   (need_distinct_aggregator ||
+                    func->uses_non_standard_aggregator_for_distinct()) ?
+                      "distinct" : "simple");
+    if (func->set_aggregator(thd, need_distinct_aggregator ?
                             Aggregator::DISTINCT_AGGREGATOR :
                             Aggregator::SIMPLE_AGGREGATOR))
      DBUG_RETURN(TRUE);
@@ -30604,12 +30681,13 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
  uint best_key_parts= 0;
  int best_key_direction= 0;
  ha_rows best_records= 0;
-  double read_time;
+  double read_time, records;
  int best_key= -1;
  bool is_best_covering= FALSE;
  double fanout= 1;
  ha_rows table_records= table->stat_records();
  bool group= join && join->group && order == join->group_list;
+  bool group_forces_index_usage= group;
  ha_rows refkey_rows_estimate= table->opt_range_condition_rows;
  const bool has_limit= (select_limit_arg != HA_POS_ERROR);
  THD* thd= join ? join->thd : table->in_use;
@@ -30646,6 +30724,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
  {
    uint tablenr= (uint)(tab - join->join_tab);
    read_time= join->best_positions[tablenr].read_time;
+    records= join->best_positions[tablenr].records_read;
    for (uint i= tablenr+1; i < join->table_count; i++)
    {
      fanout*= join->best_positions[i].records_read; // fanout is always >= 1
@@ -30654,8 +30733,23 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
    }
  }
  else
+  {
    read_time= table->file->scan_time();
+    records= rows2double(table_records);
+  }
  
+  if ((thd->variables.optimizer_adjust_secondary_key_costs &
+       OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY) && group)
+  {
+    /*
+      read_time does not include TIME_FOR_COMPARE while opt_range.cost, which
+      is used by index_scan_time contains it.
+      Ensure that read_time and index_scan_time always include it to make
+      costs comparable.
+    */
+    read_time+= records/TIME_FOR_COMPARE;
+  }
+
  trace_cheaper_ordering.add("fanout", fanout);
  /*
    TODO: add cost of sorting here.
@@ -30847,30 +30941,62 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
        possible_key.add("updated_limit", select_limit);
        rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1);
        set_if_bigger(rec_per_key, 1);
-        /*
-          Here we take into account the fact that rows are
-          accessed in sequences rec_per_key records in each.
-          Rows in such a sequence are supposed to be ordered
-          by rowid/primary key. When reading the data
-          in a sequence we'll touch not more pages than the
-          table file contains.
-          TODO. Use the formula for a disk sweep sequential access
-          to calculate the cost of accessing data rows for one 
-          index entry.
-        */
-        index_scan_time= select_limit/rec_per_key *
-                         MY_MIN(rec_per_key, table->file->scan_time());
-        double range_scan_time;
-        if (get_range_limit_read_cost(tab, table, table_records, nr,
-                                      select_limit, &range_scan_time))
+
+        if ((thd->variables.optimizer_adjust_secondary_key_costs &
+             OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY) && group)
        {
-          possible_key.add("range_scan_time", range_scan_time);
-          if (range_scan_time < index_scan_time)
-            index_scan_time= range_scan_time;
+          /* Special optimization to avoid forcing an index when group by is used */
+          group_forces_index_usage= 0;
+
+          if (table->opt_range_keys.is_set(nr))
+          {
+            /* opt_range includes TIME_FOR_COMPARE */
+            index_scan_time= (double) table->opt_range[nr].cost;
+          }
+          else
+          {
+            /* Enable secondary_key_cost and disable max_seek option */
+            ulonglong save= thd->variables.optimizer_adjust_secondary_key_costs;
+            thd->variables.optimizer_adjust_secondary_key_costs|=
+                OPTIMIZER_ADJ_SEC_KEY_COST | OPTIMIZER_ADJ_DISABLE_MAX_SEEKS;
+
+            index_scan_time= cost_for_index_read(thd, table, nr,
+                                                 table_records, HA_ROWS_MAX);
+            index_scan_time+= rows2double(table_records) / TIME_FOR_COMPARE;
+            thd->variables.optimizer_adjust_secondary_key_costs= save;
+          }
+          /* Assume data is proportionalyl distributed */
+          index_scan_time*= MY_MIN(select_limit, rec_per_key) / rec_per_key;
+        }
+        else
+        {
+          /*
+            Here we take into account the fact that rows are
+            accessed in sequences rec_per_key records in each.
+            Rows in such a sequence are supposed to be ordered
+            by rowid/primary key. When reading the data
+            in a sequence we'll touch not more pages than the
+            table file contains.
+            TODO. Use the formula for a disk sweep sequential access
+            to calculate the cost of accessing data rows for one
+            index entry.
+          */
+          index_scan_time= select_limit/rec_per_key *
+            MY_MIN(rec_per_key, table->file->scan_time());
+
+          double range_scan_time;
+          if (get_range_limit_read_cost(tab, table, table_records, nr,
+                                        select_limit, &range_scan_time))
+          {
+            possible_key.add("range_scan_time", range_scan_time);
+            if (range_scan_time < index_scan_time)
+              index_scan_time= range_scan_time;
+          }
        }
        possible_key.add("index_scan_time", index_scan_time);

-        if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
+        if ((ref_key < 0 &&
+             (group_forces_index_usage || table->force_index || is_covering)) ||
            index_scan_time < read_time)
        {
          ha_rows quick_records= table_records;
@@ -30892,6 +31018,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
            possible_key.add("cause", "ref estimates better");
            continue;
          }
+
          if (table->opt_range_keys.is_set(nr))
            quick_records= table->opt_range[nr].rows;
          possible_key.add("records", quick_records);
@@ -30910,6 +31037,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
            is_best_covering= is_covering;
            best_key_direction= direction; 
            best_select_limit= select_limit;
+            if ((thd->variables.optimizer_adjust_secondary_key_costs &
+                 OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY) && group)
+              set_if_smaller(read_time, index_scan_time);
          }
          else
          {