Numerous small fixes to index_merge read time estimates code

2025-07-04 01:23:45 +03:00 · 2003-12-20 00:53:14 +03:00
parent 50f29b0e9e
commit a8456e6801
7 changed files with 190 additions and 110 deletions
--- a/include/my_global.h
+++ b/include/my_global.h
@ -666,6 +666,17 @@ extern double		my_atof(const char*);
 #define FLT_MAX		((float)3.40282346638528860e+38)
 #endif
 /* Define missing math constants. */
 #ifndef M_PI
 #define M_PI 3.14159265358979323846
 #endif
 #ifndef M_E
 #define M_E 2.7182818284590452354
 #endif
 #ifndef M_LN2
 #define M_LN2 0.69314718055994530942
 #endif
 /*
  Max size that must be added to a so that we know Size to make
  adressable obj.
--- a/mysql-test/r/index_merge.result
+++ b/mysql-test/r/index_merge.result
@ -109,7 +109,7 @@ key1	key2	key3	key4	key5	key6	key7	key8
 explain select * from t0 where 
 (key1 < 3 or key2 < 3) and (key3 < 4 or key4 < 4) and (key5 < 2 or key6 < 2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t0	index_merge	i1,i2,i3,i4,i5,i6	i5,i6	4,4	NULL	4	Using where
+1	SIMPLE	t0	index_merge	i1,i2,i3,i4,i5,i6	i1,i2	4,4	NULL	6	Using where
 explain select * from t0 where 
 (key1 < 3 or key2 < 3) and (key3 < 100);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@ -18,10 +18,6 @@
 #include "mysql_priv.h"
 #ifndef M_PI
 #define M_PI 3.14159265358979323846
 #endif
 Item *create_func_abs(Item* a)
 {
  return new Item_func_abs(a);
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@ -750,7 +750,7 @@ double Item_func_log2::val()
  double value=args[0]->val();
  if ((null_value=(args[0]->null_value || value <= 0.0)))
    return 0.0;
-  return log(value) / log(2.0);
+  return log(value) / M_LN2;
 }
 double Item_func_log10::val()
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@ -296,6 +296,9 @@ typedef struct st_qsel_param {
  char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
    max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
  bool quick;				// Don't calulate possible keys
  uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
  uint imerge_cost_buff_size; /* size of the buffer */
 } PARAM;
 static SEL_TREE * get_mm_parts(PARAM *param,Field *field,
@ -953,6 +956,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
    param.table=head;
    param.keys=0;
    param.mem_root= &alloc;
    param.imerge_cost_buff_size= 0;
    thd->no_errors=1;				// Don't warn about NULL
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
@ -1011,7 +1015,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 	ha_rows found_records;
 	double found_read_time= read_time;
-        if (!get_quick_select_params(tree, &param, needed_reg, true,
+        if (!get_quick_select_params(tree, &param, needed_reg, false,
                                     &found_read_time, &found_records,
                                     &best_key))
        {
@ -1254,31 +1258,30 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
  */
  /*
-    It may be possible to use different keys for index_merge scans,
+    It may be possible to use different keys for index_merge scans, e.g. for
-     e.g. for query like 
+    query like 
    ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
-    we have to make choice between key1 and key2 for one scan and 
+    we have to make choice between key1 and key2 for one scan and between
-    between key3,key4 for another.
+    key3, key4 for another.
-    We assume we'll get the best way if we choose the best key read
+    We assume we'll get the best if we choose the best key read inside each
-    inside each of the conjuncts. Comparison is done without 'using index'.
+    of the conjuncts.
  */
  for (SEL_TREE **ptree= imerge->trees;
       ptree != imerge->trees_next;
       ptree++)
  {
    SEL_ARG **tree_best_key;
    uint keynr;
    tree_read_time= *read_time;
-    if (get_quick_select_params(*ptree, param, needed_reg, false,
+
    if (get_quick_select_params(*ptree, param, needed_reg, true,
                                &tree_read_time, &tree_records,
                                &tree_best_key))
    {
      /*
-        Non-'index only' range scan on a one in index_merge key is more 
+        One of index scans in this index_merge is more expensive than entire
-        expensive than other available option. The entire index_merge will be
+        table read for another available option. The entire index_merge will 
-        more expensive then, too. We continue here only to update SQL_SELECT 
+        be more expensive then, too. We continue here only to update 
-        members.
+        SQL_SELECT members.
      */
      imerge_too_expensive= true;
    }
@ -1286,23 +1289,18 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
    if (imerge_too_expensive)
      continue;
    uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];
    imerge->best_keys[ptree - imerge->trees]= tree_best_key;
-    keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];    
+    imerge_cost += tree_read_time;
    if (pk_is_clustered && keynr == param->table->primary_key)
    {
      /* This is a Clustered PK scan, it will be done without 'index only' */
      imerge_cost += tree_read_time;
      have_cpk_scan= true;
      cpk_records= tree_records;
    }
    else
    {
      /* Non-CPK scan, calculate time to do it using 'index only' */
      imerge_cost += get_index_only_read_time(param, tree_records,keynr);
      records_for_unique += tree_records;
  }  
  }  
  DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost));
  if (imerge_too_expensive)
@ -1359,14 +1357,23 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost));
  /* PHASE 3: Add Unique operations cost */
-  double unique_cost= 
+  register uint unique_calc_buff_size= 
-    Unique::get_use_cost(param->mem_root, records_for_unique, 
+    Unique::get_cost_calc_buff_size(records_for_unique, 
                                    param->table->file->ref_length,
                                    param->thd->variables.sortbuff_size);
-  if (unique_cost < 0.0)
+  if (param->imerge_cost_buff_size < unique_calc_buff_size)
  {
    if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
                                                     unique_calc_buff_size)))
      DBUG_RETURN(1);
    param->imerge_cost_buff_size= unique_calc_buff_size;
  }
  imerge_cost += 
    Unique::get_use_cost(param->imerge_cost_buff, records_for_unique,
                         param->table->file->ref_length,
                         param->thd->variables.sortbuff_size);
  imerge_cost += unique_cost;
  DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost));
  if (imerge_cost < *read_time)
  {
@ -1415,8 +1422,8 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
      tree        in         make range select for this SEL_TREE 
      param       in         parameters from test_quick_select
      needed_reg  in/out     other table data needed by this quick_select
-      index_read_can_be_used if false, assume that 'index only' option is not
+      index_read_must_be_used if true, assume 'index only' option will be set 
-                             available.
+                             (except for clustered PK indexes)
      read_time   out        read time estimate
      records     out        # of records estimate
      key_to_read out        SEL_ARG to be used for creating quick select
@ -1424,16 +1431,17 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
 static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
                                   key_map& needed_reg,
-                                   bool index_read_can_be_used,
+                                   bool index_read_must_be_used,
                                   double *read_time, ha_rows *records,
                                   SEL_ARG ***key_to_read)
 {
  int idx;
  int result = 1;
  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
  /*
-    Note that there may be trees that have type SEL_TREE::KEY but contain 
+    Note that there may be trees that have type SEL_TREE::KEY but contain no 
-    no key reads at all. For example, tree for expression "key1 is not null"
+    key reads at all, e.g. tree for expression "key1 is not null" where key1 
-    where key1 is defined as "not null".
+    is defined as "not null".
  */
  SEL_ARG **key,**end;
@ -1450,22 +1458,29 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
          (*key)->maybe_flag)
        needed_reg.set_bit(keynr);
-      bool read_index_only= index_read_can_be_used? 
+      bool read_index_only= index_read_must_be_used? true :
-                            param->table->used_keys.is_set(keynr): false;
+                            (bool)param->table->used_keys.is_set(keynr);
      found_records=check_quick_select(param, idx, *key);
      if (found_records != HA_POS_ERROR && found_records > 2 &&
          read_index_only &&
-          (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY))
+          (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) &&
          !(pk_is_clustered && keynr == param->table->primary_key))
      {
        /* We can resolve this by only reading through this key. */
        found_read_time=get_index_only_read_time(param, found_records, keynr);
      }
      else
      {
        /* 
          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
        */
 	found_read_time= (param->table->file->read_time(keynr,
 						        param->range_count,
 						        found_records)+
 			  (double) found_records / TIME_FOR_COMPARE);
      }
      if (*read_time > found_read_time && found_records != HA_POS_ERROR)
      {
        *read_time=   found_read_time;
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@ -1233,8 +1233,16 @@ public:
  }
  bool get(TABLE *table);
-  static double get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size, 
+  static double get_use_cost(uint *buffer, uint nkeys, uint key_size, 
                             ulong max_in_memory_size);
  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size, 
                                            ulong max_in_memory_size)
  {
    register ulong max_elems_in_tree= 
      (1 + max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
    return sizeof(uint)*(1 + nkeys/max_elems_in_tree);
  }
  friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
  friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
 };
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@ -72,112 +72,161 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
 }
-#ifndef M_PI
+/*
-#define M_PI 3.14159265358979323846
+  Calculate log2(n!)
 #endif
-#ifndef M_E
+  NOTES
-#define M_E (exp((double)1.0))
+    Stirling's approximate formula is used:
-#endif
+          
      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n 
    Derivation of formula used for calculations is as follows:
    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
 */
 inline double log2_n_fact(double x)
 {
-  return (2 * (((x)+1)*log(((x)+1)/M_E) + log(2*M_PI*((x)+1))/2 ) / log(2));
+  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
 }
 /*
-  Calculate cost of merge_buffers call.
+  Calculate cost of merge_buffers function call for given sequence of 
  input stream lengths and store the number of rows in result stream in *last.
-  NOTE
+  SYNOPSIS
-    See comment near Unique::get_use_cost for cost formula derivation.
+    get_merge_buffers_cost()
      buff_elems  Array of #s of elements in buffers
      elem_size   Size of element stored in buffer
      output_buff Pointer to storage for result buffer size
      first       Pointer to first merged element size
      last        Pointer to last merged element size
  RETURN
    Cost of merge_buffers operation in disk seeks.
  NOTES
    It is assumed that no rows are eliminated during merge.
    The cost is calculated as 
      cost(read_and_write) + cost(merge_comparisons).
    All bytes in the sequences is read and written back during merge so cost 
    of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
    For comparisons cost calculations we assume that all merged sequences have
    the same length, so each of total_buf_size elements will be added to a sort 
    heap with (n_buffers-1) elements. This gives the comparison cost:
      total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
 */
-static double get_merge_buffers_cost(uint* buff_sizes, uint elem_size, 
+
-                                     int last, int f,int t)
+static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
                                     uint *output_buff, uint *first,
                                     uint *last)
 {  
-  uint sum= 0;
+  uint total_buf_elems= 0;
-  for (int i=f; i <= t; i++)
+  for (uint *pbuf= first; pbuf <= last; pbuf++)
-    sum+= buff_sizes[i];
+    total_buf_elems+= *pbuf;
-  buff_sizes[last]= sum;
+  *last= total_buf_elems;
-  int n_buffers= t - f + 1;
+  int n_buffers= last - first + 1;
  double buf_length= sum*elem_size;
-  return (((double)buf_length/(n_buffers+1)) / IO_SIZE) * 2 * n_buffers + 
+  /* Using log2(n)=log(n)/log(2) formula */
-     buf_length * log(n_buffers)  / (TIME_FOR_COMPARE_ROWID * log(2.0));
+  return 2*((double)total_buf_elems*elem_size) / IO_SIZE + 
     total_buf_elems*log(n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
 }
 /*
  Calculate cost of merging buffers into one in Unique::get, i.e. calculate
-  how long (in terms of disk seeks) the two call
+  how long (in terms of disk seeks) the two calls
    merge_many_buffs(...); 
    merge_buffers(...); 
  will take.
  SYNOPSIS
    get_merge_many_buffs_cost()
-      alloc         memory pool to use
+      buffer        buffer space for temporary data, at least 
-      maxbuffer     # of full buffers.
+                    Unique::get_cost_calc_buff_size bytes
-      max_n_elems   # of elements in first maxbuffer buffers.
+      maxbuffer     # of full buffers
-      last_n_elems  # of elements in last buffer.
+      max_n_elems   # of elements in first maxbuffer buffers
-      elem_size     size of buffer element.
+      last_n_elems  # of elements in last buffer
      elem_size     size of buffer element
  NOTES
-    It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
+    maxbuffer+1 buffers are merged, where first maxbuffer buffers contain 
-    contain max_n_elems each, last buffer contains last_n_elems elements.
+    max_n_elems elements each and last buffer contains last_n_elems elements.
    The current implementation does a dumb simulation of merge_many_buffs
-    actions.
+    function actions.
  RETURN
-    >=0  Cost of merge in disk seeks.
+    Cost of merge in disk seeks.
    <0   Out of memory.
 */
-static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
+
 static double get_merge_many_buffs_cost(uint *buffer,
                                        uint maxbuffer, uint max_n_elems,
                                        uint last_n_elems, int elem_size)
 {
  register int i;
  double total_cost= 0.0;
-  int    lastbuff;
+  uint *buff_elems= buffer; /* #s of elements in each of merged sequences */
-  uint*  buff_sizes;
+  uint *lastbuff;
-  if (!(buff_sizes= (uint*)alloc_root(alloc, sizeof(uint) * (maxbuffer + 1))))
+  /* 
-    return -1.0;
+    Set initial state: first maxbuffer sequences contain max_n_elems elements
    each, last sequence contains last_n_elems elements.
  */
  for(i = 0; i < (int)maxbuffer; i++)
-    buff_sizes[i]= max_n_elems;
+    buff_elems[i]= max_n_elems;  
-  
+  buff_elems[maxbuffer]= last_n_elems;
  buff_sizes[maxbuffer]= last_n_elems;
  /* 
    Do it exactly as merge_many_buff function does, calling 
    get_merge_buffers_cost to get cost of merge_buffers.
  */
  if (maxbuffer >= MERGEBUFF2)
  {
    /* Simulate merge_many_buff */
    while (maxbuffer >= MERGEBUFF2)
    {
      lastbuff=0;
      for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
-        total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 
+        total_cost+=get_merge_buffers_cost(buff_elems, elem_size, lastbuff++,
-                                             lastbuff++, i, i+MERGEBUFF-1);
+                                           buff_elems + i, 
                                           buff_elems + i + MERGEBUFF-1);
-      total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 
+      total_cost+=get_merge_buffers_cost(buff_elems, elem_size, lastbuff++,
-                                           lastbuff++, i, maxbuffer);
+                                         buff_elems + i, 
                                         buff_elems + maxbuffer);
      maxbuffer= (uint)lastbuff-1;
    }
  }
  /* Simulate final merge_buff call. */
-  total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 0, 0, 
+  total_cost += get_merge_buffers_cost(buff_elems, elem_size, buff_elems, 
-                                       maxbuffer);
+                                       buff_elems, buff_elems + maxbuffer);
  return total_cost;
 }
 /*
-  Calclulate cost of using Unique for processing nkeys elements of size 
+  Calculate cost of using Unique for processing nkeys elements of size 
  key_size using max_in_memory_size memory.
  SYNOPSIS
    Unique::get_use_cost()
      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
                to get # bytes needed.
      nkeys     #of elements in Unique
      key_size  size of each elements in bytes
      max_in_memory_size amount of memory Unique will be allowed to use
  RETURN
-    >=0  Cost in disk seeks.
+    Cost in disk seeks.
    <0   Out of memory.
  NOTES
    cost(using_unqiue) = 
@ -190,17 +239,15 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
      comparisons, where n runs from 1 tree_size (we assume that all added
      elements are different). Together this gives:
-      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
+      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
      = 2*ln((N+1)!) / ln(2) = {using Stirling formula} = 
      = 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
      Total cost of creating trees:
      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
      Approximate value of log2(N!) is calculated by log2_n_fact function.
    2. Cost of merging.
      If only one tree is created by Unique no merging will be necessary.
      Otherwise, we model execution of merge_many_buff function and count
@ -213,7 +260,7 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
      these will be random seeks.
 */
-double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size, 
+double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, 
                            ulong max_in_memory_size)
 {
  ulong max_elements_in_tree;
@ -221,15 +268,16 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
  int   n_full_trees; /* number of trees in unique - 1 */
  double result;
-  max_elements_in_tree= max_in_memory_size / 
+  max_elements_in_tree= 
-                        ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
+    max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
  n_full_trees=    nkeys / max_elements_in_tree;
  last_tree_elems= nkeys % max_elements_in_tree;
  /* Calculate cost of creating trees */
-  result= log2_n_fact(last_tree_elems);
+  result= 2*log2_n_fact(last_tree_elems + 1.0);
  if (n_full_trees)
-    result+= n_full_trees * log2_n_fact(max_elements_in_tree);
+    result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
  result /= TIME_FOR_COMPARE_ROWID;
  DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
@ -241,13 +289,15 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
  /* 
    There is more then one tree and merging is necessary.
-    First, add cost of writing all trees to disk. 
+    First, add cost of writing all trees to disk, assuming that all disk
    writes are sequential.
  */
-  result += n_full_trees * ceil(key_size*max_elements_in_tree / IO_SIZE);
+  result += DISK_SEEK_BASE_COST * n_full_trees * 
-  result += ceil(key_size*last_tree_elems / IO_SIZE);
+              ceil(key_size*max_elements_in_tree / IO_SIZE);
  result += DISK_SEEK_BASE_COST * ceil(key_size*last_tree_elems / IO_SIZE);
  /* Cost of merge */
-  double merge_cost= get_merge_many_buffs_cost(alloc, n_full_trees,
+  double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
                                               max_elements_in_tree,
                                               last_tree_elems, key_size);
  if (merge_cost < 0.0)