Original idea from Zardosht Kasheff to add HA_CLUSTERED_INDEX

- Added a lot of code comments - Updated get_best_ror_intersec() to prefer index scan on not clustered keys before clustered keys. - Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY - For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered() - Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect. sql/ha_partition.h: Added comment with warning for code unsafe to use with multiple storage engines at the same time sql/handler.h: Added HA_CLUSTERED_INDEX. Documented primary_key_is_clustered() sql/opt_range.cc: Added code comments Updated get_best_ror_intersec() to ignore clustered keys. Optimized away cpk_scan_used and one instance of current_thd (Simpler code) Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY sql/sql_select.cc: Changed comment to #ifdef For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered() (Change is smaller than what it looks beause of indentation change) sql/sql_table.cc: Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect. storage/innobase/handler/ha_innodb.h: Added support for HA_CLUSTERED_INDEX storage/innodb_plugin/handler/ha_innodb.cc: Added support for HA_CLUSTERED_INDEX storage/xtradb/handler/ha_innodb.cc: Added support for HA_CLUSTERED_INDEX
2025-08-09 22:24:09 +03:00 · 2011-05-18 19:26:30 +03:00
parent 5c70f813f3
commit 3631146442
8 changed files with 123 additions and 63 deletions
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -877,6 +877,10 @@ public:
  */
  virtual ulong index_flags(uint inx, uint part, bool all_parts) const
  {
    /*
      The following code is not safe if you are using different
      storage engines or different index types per partition.
    */
    return m_file[0]->index_flags(inx, part, all_parts);
  }
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -161,8 +161,11 @@
 */
 #define HA_KEY_SCAN_NOT_ROR     128 
 #define HA_DO_INDEX_COND_PUSHDOWN  256 /* Supports Index Condition Pushdown */
-
+/*
-
+  Data is clustered on this key. This means that when you read the key
  you also get the row data in the same block.
 */
 #define HA_CLUSTERED_INDEX      512
 /*
  bits in alter_table_flags:
@@ -2311,9 +2314,22 @@ public:
 /*
-   @retval TRUE   Primary key (if there is one) is clustered
+   Check if the primary key (if there is one) is a clustered key covering
-                  key covering all fields
+   all fields. This means:
-   @retval FALSE  otherwise
+
   - Data is stored together with the primary key (no secondary lookup
     needed to find the row data). The optimizer uses this to find out
     the cost of fetching data.
   - The primary key is part of each secondary key and is used
     to find the row data in the primary index when reading trough
     secondary indexes.
   - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
     into the row. This is critical property used by index_merge.
   For a clustered primary key, index_flags() returns also HA_CLUSTERED_INDEX
   @retval TRUE   yes
   @retval FALSE  No.
 */
 virtual bool primary_key_is_clustered() { return FALSE; }
 virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -1816,6 +1816,12 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
  DBUG_VOID_RETURN;
 }
 /*
  QUICK_INDEX_SORT_SELECT works as follows:
  - Do index scans, accumulate rowids in the Unique object 
    (Unique will also sort and de-duplicate rowids)
  - Use rowids from unique to run a disk-ordered sweep
 */
 QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
                                                 TABLE *table)
@@ -1848,7 +1854,18 @@ QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
  if (head->file->primary_key_is_clustered() &&
      quick_sel_range->index == head->s->primary_key)
  {
-   /* A quick_select over a clustered primary key is handled specifically */
+   /*
     A quick_select over a clustered primary key is handled specifically
     Here we assume:
     - PK columns are included in any other merged index
     - Scan on the PK is disk-ordered.
       (not meeting #2 will only cause performance degradation)
       We could treat clustered PK as any other index, but that would
       be inefficient. There is no point in doing scan on
       CPK, remembering the rowid, then making rnd_pos() call with
       that rowid.
    */
    pk_quick_select= quick_sel_range;
    DBUG_RETURN(0);
  }
@@ -4298,11 +4315,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
  DBUG_ENTER("get_sweep_read_cost");
  if (param->table->file->primary_key_is_clustered())
  {
    /*
      We are using the primary key to find the rows.
      Calculate the cost for this.
    */
    result= param->table->file->read_time(param->table->s->primary_key,
                                          (uint)records, records);
  }
  else
  {
    /*
      Rows will be retreived with rnd_pos(). Caluclate the expected
      cost for this.
    */
    double n_blocks=
      ceil(ulonglong2double(param->table->file->stats.data_file_length) /
           IO_SIZE);
@@ -6187,7 +6212,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
  ROR_SCAN_INFO **cur_ror_scan;
  ROR_SCAN_INFO *cpk_scan= NULL;
  uint cpk_no;
  bool cpk_scan_used= FALSE;
  if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
@@ -6199,11 +6223,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
  for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
  {
    ROR_SCAN_INFO *scan;
    uint key_no;
    if (!tree->ror_scans_map.is_set(idx))
      continue;
    key_no= param->real_keynr[idx];
    if (key_no != cpk_no &&
        param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
    {
      /* Ignore clustering keys */
      tree->n_ror_scans--;
      continue;
    }
    if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
      return NULL;
-    if (param->real_keynr[idx] == cpk_no)
+    if (key_no == cpk_no)
    {
      cpk_scan= scan;
      tree->n_ror_scans--;
@@ -6289,15 +6322,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
  {
    if (ror_intersect_add(intersect, cpk_scan, TRUE) && 
        (intersect->total_cost < min_cost))
    {
      cpk_scan_used= TRUE;
      intersect_best= intersect; //just set pointer here
  }
-  }
+  else
    cpk_scan= 0;                                // Don't use cpk_scan
  /* Ok, return ROR-intersect plan if we have found one */
  TRP_ROR_INTERSECT *trp= NULL;
-  if (min_cost < read_time && (cpk_scan_used || best_num > 1))
+  if (min_cost < read_time && (cpk_scan || best_num > 1))
  {
    if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
      DBUG_RETURN(trp);
@@ -6316,7 +6348,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
    set_if_smaller(param->table->quick_condition_rows, best_rows);
    trp->records= best_rows;
    trp->index_scan_costs= intersect_best->index_scan_costs;
-    trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
+    trp->cpk_scan= cpk_scan;
    DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
                        "cost %g, records %lu",
                        trp->read_cost, (ulong) trp->records));
@@ -9511,10 +9543,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
  bool pk_is_clustered= file->primary_key_is_clustered();
  if (index_only && 
      (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
-      !(pk_is_clustered && keynr == param->table->s->primary_key))
+      !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
     *mrr_flags |= HA_MRR_INDEX_ONLY;
-  if (current_thd->lex->sql_command != SQLCOM_SELECT)
+  if (param->thd->lex->sql_command != SQLCOM_SELECT)
    *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
  *bufsize= param->thd->variables.mrr_buff_size;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8550,17 +8550,19 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
 	  else if (!table->covering_keys.is_clear_all() &&
 		   !(tab->select && tab->select->quick))
 	  {					// Only read index tree
 #ifdef BAD_OPTIMIZATION
 	    /*
              It has turned out that the below change, while speeding things
              up for disk-bound loads, slows them down for cases when the data
              is in disk cache (see BUG#35850):
-	    //  See bug #26447: "Using the clustered index for a table scan
+              See bug #26447: "Using the clustered index for a table scan
-	    //  is always faster than using a secondary index".
+              is always faster than using a secondary index".
            */
            if (table->s->primary_key != MAX_KEY &&
                table->file->primary_key_is_clustered())
              tab->index= table->s->primary_key;
            else
-	    */
+#endif
              tab->index=find_shortest_key(table, & table->covering_keys);
 	    tab->read_first_record= join_read_first;
            /* Read with index_first / index_next */
@@ -16525,9 +16527,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
        */
        DBUG_ASSERT (ref_key != (int) nr);
-        bool is_covering= table->covering_keys.is_set(nr) ||
+        bool is_covering= (table->covering_keys.is_set(nr) ||
-                          (nr == table->s->primary_key &&
+                           (table->file->index_flags(nr, 0, 1) &
-                          table->file->primary_key_is_clustered());
+                            HA_CLUSTERED_INDEX));
        /* 
          Don't use an index scan with ORDER BY without limit.
@@ -16680,17 +16682,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
    /*
      filesort() and join cache are usually faster than reading in 
      index order and not using join cache, except in case that chosen
-      index is clustered primary key.
+      index is clustered key.
    */
-    if ((select_limit >= table_records) &&
+    if (best_key < 0 ||
        ((select_limit >= table_records) &&
         (tab->type == JT_ALL &&
          tab->join->tables > tab->join->const_tables + 1) &&
-         ((unsigned) best_key != table->s->primary_key ||
+         !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
          !table->file->primary_key_is_clustered()))
      goto use_filesort;
    if (best_key >= 0)
    {
    if (table->quick_keys.is_set(best_key) && best_key != ref_key)
    {
      key_map map;
@@ -16713,9 +16713,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
    used_key_parts= (order_direction == -1) ?
      saved_best_key_parts :  best_key_parts;
  }
    else
      goto use_filesort;
  } 
 check_reverse_order:                  
  DBUG_ASSERT(order_direction != 0);
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -7983,7 +7983,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
  if (order)
  {
-    if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
+    if (to->s->primary_key != MAX_KEY &&
        to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
    {
      char warn_buff[MYSQL_ERRMSG_SIZE];
      my_snprintf(warn_buff, sizeof(warn_buff), 
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -98,10 +98,14 @@ class ha_innobase: public handler
 	Table_flags table_flags() const;
 	ulong index_flags(uint idx, uint part, bool all_parts) const
 	{
          ulong extra_flag= 0;
          if (table && idx == table->s->primary_key)
             extra_flag= HA_CLUSTERED_INDEX;
 	  return (HA_READ_NEXT |
 		  HA_READ_PREV |
 		  HA_READ_ORDER |
 		  HA_READ_RANGE |
                  extra_flag |
 		  HA_KEYREAD_ONLY);
 	}
 	uint max_supported_keys()	   const { return MAX_KEY; }
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -2995,12 +2995,15 @@ UNIV_INTERN
 ulong
 ha_innobase::index_flags(
 /*=====================*/
-	uint,
+	uint index,
 	uint,
 	bool)
 const
 {
-	return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+       ulong extra_flag= 0;
       if (table && index == table->s->primary_key)
             extra_flag= HA_CLUSTERED_INDEX;
 	return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
 	       | HA_READ_RANGE | HA_KEYREAD_ONLY);
 }
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -3408,12 +3408,15 @@ UNIV_INTERN
 ulong
 ha_innobase::index_flags(
 /*=====================*/
-	uint,
+	uint index,
-	uint,
+	uint part,
-	bool)
+	bool all_parts)
 const
 {
-	return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+       ulong extra_flag= 0;
       if (table && index == table->s->primary_key)
             extra_flag= HA_CLUSTERED_INDEX;
 	return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag 
 	       | HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
 }