MWL#121: DS-MRR support for clustered primary keys

- Add testcases - Code cleanup: garbage removal, better comments, make members private where possible
2025-07-30 16:24:05 +03:00 · 2010-06-22 21:24:22 +04:00
parent 82f8ed17e1
commit 16e197f5b1
4 changed files with 577 additions and 189 deletions
--- a/sql/multi_range_read.h
+++ b/sql/multi_range_read.h
@ -1,16 +1,76 @@
 /*
-  This file contains declarations for 
-   - Disk-Sweep MultiRangeRead (DS-MRR) implementation
+  This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR) 
+  implementation
 */

 /**
-  A Disk-Sweep MRR interface implementation
+  A Disk-Sweep implementation of MRR Interface (DS-MRR for short)

-  This implementation makes range (and, in the future, 'ref') scans to read
-  table rows in disk sweeps. 
-  
-  Currently it is used by MyISAM and InnoDB. Potentially it can be used with
-  any table handler that has non-clustered indexes and on-disk rows.
+  This is a "plugin"(*) for storage engines that allows make index scans 
+  read table rows in rowid order. For disk-based storage engines, this is
+  faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
+
+  (*) - only conceptually. No dynamic loading or binary compatibility of any
+        kind.
+
+  General scheme of things:
+   
+      SQL Layer code
+       |   |   |
+      -v---v---v---- handler->multi_range_read_XXX() function calls
+       |   |   |
+      ____________________________________
+     / DS-MRR module                      \
+     |  (scan indexes, order rowids, do    |
+     |   full record reads in rowid order) |
+     \____________________________________/
+       |   |   |
+      -|---|---|----- handler->read_range_first()/read_range_next(), 
+       |   |   |      handler->index_read(), handler->rnd_pos() calls.
+       |   |   |
+       v   v   v
+      Storage engine internals
+   
+  Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
+  Potentially it can be used with any table handler that has disk-based data
+  storage and has better performance when reading data in rowid order.
+*/
+
+
+/*
+  DS-MRR implementation for one table. Create/use one object of this class for
+  each ha_{myisam/innobase/etc} object. That object will be further referred to
+  as "the handler"
+
+  There are actually three strategies
+   S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
+      MRR-to-non-MRR calls converter)
+   S2. Regular DS-MRR 
+   S3. DS-MRR/CPK for doing scans on clustered primary keys.
+
+  S1 is used for cases which DS-MRR is unable to handle for some reason.
+
+  S2 is the actual DS-MRR. The basic algorithm is as follows:
+    1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and 
+        fill the buffer with {rowid, range_id} pairs
+    2. Sort the buffer by rowid
+    3. for each {rowid, range_id} pair in the buffer
+         get record by rowid and return the {record, range_id} pair
+    4. Repeat the above steps until we've exhausted the list of ranges we're
+       scanning.
+
+  S3 is the variant of DS-MRR for use with clustered primary keys (or any
+  clustered index). The idea is that in clustered index it is sufficient to 
+  access the index in index order, and we don't need an intermediate steps to
+  get rowid (like step #1 in S2).
+
+   DS-MRR/CPK's basic algorithm is as follows:
+    1. Collect a number of ranges (=lookup keys)
+    2. Sort them so that they follow in index order.
+    3. for each {lookup_key, range_id} pair in the buffer 
+       get record(s) matching the lookup key and return {record, range_id} pairs
+    4. Repeat the above steps until we've exhausted the list of ranges we're
+       scanning.
 */

 class DsMrr_impl
@ -21,40 +81,6 @@ public:
  DsMrr_impl()
    : h2(NULL) {};
  
-  /*
-    The "owner" handler object (the one that calls dsmrr_XXX functions.
-    It is used to retrieve full table rows by calling rnd_pos().
-  */
-  handler *h;
-  TABLE *table; /* Always equal to h->table */
-private:
-  /* Secondary handler object.  It is used for scanning the index */
-  handler *h2;
-
-  /* Buffer to store rowids, or (rowid, range_id) pairs */
-  uchar *rowids_buf;
-  uchar *rowids_buf_cur;   /* Current position when reading/writing */
-  uchar *rowids_buf_last;  /* When reading: end of used buffer space */
-  uchar *rowids_buf_end;   /* End of the buffer */
-
-  bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
-
-  /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
-  bool is_mrr_assoc;
-
-  bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
-
-  bool doing_cpk_scan;
-  uint cpk_tuple_length;
-  uint cpk_n_parts;
-  bool cpk_is_unique_scan;
-  char *cpk_saved_range_info;
-  bool cpk_have_range;
-
-
-  bool check_cpk_scan(uint keyno, uint mrr_flags);
-  static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
-public:
  void init(handler *h_arg, TABLE *table_arg)
  {
    h= h_arg; 
@ -64,10 +90,7 @@ public:
                 uint n_ranges, uint key_parts, uint mode, 
                 HANDLER_BUFFER *buf);
  void dsmrr_close();
-  int dsmrr_fill_buffer();
-  int dsmrr_fill_buffer_cpk();
  int dsmrr_next(char **range_info);
-  int dsmrr_next_cpk(char **range_info);

  ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, 
                     uint *bufsz, uint *flags, COST_VECT *cost);
@ -76,9 +99,53 @@ public:
                            void *seq_init_param, uint n_ranges, uint *bufsz,
                            uint *flags, COST_VECT *cost);
 private:
+  /*
+    The "owner" handler object (the one that calls dsmrr_XXX functions.
+    It is used to retrieve full table rows by calling rnd_pos().
+  */
+  handler *h;
+  TABLE *table; /* Always equal to h->table */
+
+  /* Secondary handler object.  It is used for scanning the index */
+  handler *h2;
+
+  /* Buffer to store rowids, or (rowid, range_id) pairs */
+  uchar *mrr_buf;
+  uchar *mrr_buf_cur;   /* Current position when reading/writing */
+  uchar *mrr_buf_last;  /* When reading: end of used buffer space */
+  uchar *mrr_buf_end;   /* End of the buffer */
+
+  bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
+
+  /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+
+  bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
+
+  bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
+
+  /** DS-MRR/CPK variables start */
+
+  /* Length of lookup tuple being used, in bytes */
+  uint cpk_tuple_length;
+  /*
+    TRUE <=> We're scanning on a full primary key (and not on prefix), and so 
+    can get max. one match for each key 
+  */
+  bool cpk_is_unique_scan;
+  /* TRUE<=> we're in a middle of enumerating records from a range */ 
+  bool cpk_have_range;
+  /* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
+  char *cpk_saved_range_info;
+
  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
                       COST_VECT *cost);
  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
                               uint *buffer_size, COST_VECT *cost);
+  bool check_cpk_scan(uint keyno, uint mrr_flags);
+  static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
+  int dsmrr_fill_buffer();
+  void dsmrr_fill_buffer_cpk();
+  int dsmrr_next_cpk(char **range_info);
 };