MDEV-16188 Use in-memory PK filters built from range index scans

This patch contains a full implementation of the optimization that allows to use in-memory rowid / primary filters built for range conditions over indexes. In many cases usage of such filters reduce the number of disk seeks spent for fetching table rows. In this implementation the choice of what possible filter to be applied (if any) is made purely on cost-based considerations. This implementation re-achitectured the partial implementation of the feature pushed by Galina Shalygina in the commit 8d5a11122c. Besides this patch contains a better implementation of the generic handler function handler::multi_range_read_info_const() that takes into account gaps between ranges when calculating the cost of range index scans. It also contains some corrections of the implementation of the handler function records_in_range() for MyISAM. This patch supports the feature for InnoDB and MyISAM.
2025-10-12 12:25:37 +03:00 · 2019-02-03 14:56:12 -08:00
parent 5f46670bd0
commit 658128af43
190 changed files with 5685 additions and 2017 deletions
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -43,6 +43,7 @@
 #include "debug_sync.h"         // DEBUG_SYNC
 #include "sql_audit.h"
 #include "ha_sequence.h"
+#include "rowid_filter.h"

 #ifdef WITH_PARTITION_STORAGE_ENGINE
 #include "ha_partition.h"
@@ -2602,36 +2603,26 @@ LEX_CSTRING *handler::engine_name()
 }


-/**
-  The method returns the cost of the random I/O accesses when
-  index is used.
+/*
+  It is assumed that the value of the parameter 'ranges' can be only 0 or 1.
+  If ranges == 1 then the function returns the cost of index only scan
+  by index 'keyno' of one range containing 'rows' key entries.
+  If ranges == 0 then the function returns only the cost of copying
+  those key entries into the engine buffers.
 */

-double handler::get_io_cost(uint index, ha_rows rows, uint *length)
-{
-  uint len= table->key_info[index].key_length + ref_length;
-  if (index == table->s->primary_key && table->file->primary_key_is_clustered())
-    len= table->s->stored_rec_length;
-  double keys_per_block= (stats.block_size/2.0/len+1);
-  *length= len;
-  return (rows + keys_per_block-1)/ keys_per_block;
-}
-
-
 double handler::keyread_time(uint index, uint ranges, ha_rows rows)
 {
-  /*
-    It is assumed that we will read trough the whole key range and that all
-    key blocks are half full (normally things are much better). It is also
-    assumed that each time we read the next key from the index, the handler
-    performs a random seek, thus the cost is proportional to the number of
-    blocks read. This model does not take into account clustered indexes -
-    engines that support that (e.g. InnoDB) may want to overwrite this method.
-    The model counts in the time to read index entries from cache.
-  */
-  uint len;
-  return get_io_cost(index, rows, &len) +
-         len*rows/(stats.block_size+1)/TIME_FOR_COMPARE ;
+  DBUG_ASSERT(ranges == 0 || ranges == 1);
+  size_t len= table->key_info[index].key_length + ref_length;
+  if (index == table->s->primary_key && table->file->primary_key_is_clustered())
+    len= table->s->stored_rec_length;
+  uint keys_per_block= (stats.block_size/2.0/len+1);
+  ulonglong blocks= !rows ? 0 : (rows-1) / keys_per_block + 1;
+  double cost= (double)rows*len/(stats.block_size+1)*IDX_BLOCK_COPY_COST;
+  if (ranges)
+    cost+= blocks;
+  return cost;
 }

 void **handler::ha_data(THD *thd) const
@@ -5766,6 +5757,35 @@ extern "C" enum icp_result handler_index_cond_check(void* h_arg)
  return res;
 }

+
+/**
+  Rowid filter callback - to be called by an engine to check rowid / primary
+  keys of the rows whose data is to be fetched against the used rowid filter
+*/
+
+extern "C" int handler_rowid_filter_check(void *h_arg)
+{
+  handler *h= (handler*) h_arg;
+  TABLE *tab= h->get_table();
+  h->position(tab->record[0]);
+  return h->pushed_rowid_filter->check((char *) h->ref);
+}
+
+
+/**
+  Callback function for an engine to check whether the used rowid filter
+  has been already built
+*/
+
+extern "C" int handler_rowid_filter_is_active(void *h_arg)
+{
+  if (!h_arg)
+    return false;
+  handler *h= (handler*) h_arg;
+  return h->rowid_filter_is_active;
+}
+
+
 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                key_part_map keypart_map,
                                enum ha_rkey_function find_flag)
@@ -6230,6 +6250,7 @@ int handler::ha_reset()
  /* Reset information about pushed engine conditions */
  cancel_pushed_idx_cond();
  /* Reset information about pushed index conditions */
+  cancel_pushed_rowid_filter();
  clear_top_table_fields();
  DBUG_RETURN(reset());
 }