MWL#121: DS-MRR support for clustered primary keys

- Add testcases - Code cleanup: garbage removal, better comments, make members private where possible
2025-07-30 16:24:05 +03:00 · 2010-06-22 21:24:22 +04:00
parent 82f8ed17e1
commit 16e197f5b1
4 changed files with 577 additions and 189 deletions
--- a/mysql-test/r/innodb_mrr_cpk.result
+++ b/mysql-test/r/innodb_mrr_cpk.result
@ -0,0 +1,134 @@
 drop table if exists t0,t1,t2,t3;
 set @save_join_cache_level=@@join_cache_level;
 set join_cache_level=6;
 set @save_storage_engine=@@storage_engine;
 set storage_engine=innodb;
 create table t0(a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 create table t1(a char(8), b char(8), filler char(100), primary key(a));
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
  `a` char(8) NOT NULL DEFAULT '',
  `b` char(8) DEFAULT NULL,
  `filler` char(100) DEFAULT NULL,
  PRIMARY KEY (`a`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 insert into t1 select 
 concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
 concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
 'filler'
 from t0 A, t0 B, t0 C;
 create table t2 (a char(8));
 insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
 This should use join buffer:
 explain select * from t1, t2 where t1.a=t2.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	
 1	SIMPLE	t1	eq_ref	PRIMARY	PRIMARY	8	test.t2.a	1	Using join buffer
 This output must be sorted by value of t1.a:
 select * from t1, t2 where t1.a=t2.a;
 a	b	filler	a
 a-1010=A	b-1010=B	filler	a-1010=A
 a-1020=A	b-1020=B	filler	a-1020=A
 a-1030=A	b-1030=B	filler	a-1030=A
 drop table t1, t2;
 create table t1(
 a char(8) character set utf8, b int, filler char(100), 
 primary key(a,b)
 );
 insert into t1 select 
 concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
 1000 + A.a + B.a*10 + C.a*100,
 'filler'
 from t0 A, t0 B, t0 C;
 create table t2 (a char(8) character set utf8, b int);
 insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	
 1	SIMPLE	t1	eq_ref	PRIMARY	PRIMARY	28	test.t2.a,test.t2.b	1	Using join buffer
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 a	b	filler	a	b
 a-1010=A	1010	filler	a-1010=A	1010
 a-1020=A	1020	filler	a-1020=A	1020
 a-1030=A	1030	filler	a-1030=A	1030
 insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	5	
 1	SIMPLE	t1	eq_ref	PRIMARY	PRIMARY	28	test.t2.a,test.t2.b	1	Using join buffer
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 a	b	filler	a	b
 a-1010=A	1010	filler	a-1010=A	1010
 a-1020=A	1020	filler	a-1020=A	1020
 a-1020=A	1020	filler	a-1020=A	1020
 a-1030=A	1030	filler	a-1030=A	1030
 a-1030=A	1030	filler	a-1030=A	1030
 drop table t1, t2;
 create table t1(
 a varchar(8) character set utf8, b int, filler char(100), 
 primary key(a,b)
 );
 insert into t1 select 
 concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
 1000 + A.a + B.a*10 + C.a*100,
 'filler'
 from t0 A, t0 B, t0 C;
 create table t2 (a char(8) character set utf8, b int);
 insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	
 1	SIMPLE	t1	eq_ref	PRIMARY	PRIMARY	30	test.t2.a,test.t2.b	1	Using index condition(BKA); Using join buffer
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 a	b	filler	a	b
 a-1010=A	1010	filler	a-1010=A	1010
 a-1020=A	1020	filler	a-1020=A	1020
 a-1030=A	1030	filler	a-1030=A	1030
 explain select * from t1, t2 where t1.a=t2.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	
 1	SIMPLE	t1	ref	PRIMARY	PRIMARY	26	test.t2.a	1	Using index condition(BKA); Using join buffer
 select * from t1, t2 where t1.a=t2.a;
 a	b	filler	a	b
 a-1010=A	1010	filler	a-1010=A	1010
 a-1020=A	1020	filler	a-1020=A	1020
 a-1030=A	1030	filler	a-1030=A	1030
 drop table t1, t2;
 create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
 insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
 insert into t1 values (11, 11, 11,   'filler');
 insert into t1 values (11, 11, 12,   'filler');
 insert into t1 values (11, 11, 13,   'filler');
 insert into t1 values (11, 22, 1234, 'filler');
 insert into t1 values (11, 33, 124,  'filler');
 insert into t1 values (11, 33, 125,  'filler');
 create table t2 (a int, b int);
 insert into t2 values (11,33), (11,22), (11,11);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	
 1	SIMPLE	t1	ref	PRIMARY	PRIMARY	8	test.t2.a,test.t2.b	1	Using join buffer
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 a	b	c	filler	a	b
 11	11	11	filler	11	11
 11	11	12	filler	11	11
 11	11	13	filler	11	11
 11	22	1234	filler	11	22
 11	33	124	filler	11	33
 11	33	125	filler	11	33
 set join_cache_level=0;
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 a	b	c	filler	a	b
 11	33	124	filler	11	33
 11	33	125	filler	11	33
 11	22	1234	filler	11	22
 11	11	11	filler	11	11
 11	11	12	filler	11	11
 11	11	13	filler	11	11
 set join_cache_level=6;
 drop table t1,t2;
 set @@join_cache_level= @save_join_cache_level;
 set storage_engine=@save_storage_engine;
 drop table t0;
--- a/mysql-test/t/innodb_mrr_cpk.test
+++ b/mysql-test/t/innodb_mrr_cpk.test
@ -0,0 +1,134 @@
 # 
 # Tests for DS-MRR over clustered primary key. The only engine that supports
 # this is InnoDB/XtraDB.
 #
 # Basic idea about testing
 #  - DS-MRR/CPK works only with BKA
 #  - Should also test index condition pushdown
 #  - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
 #  - Also test access using prefix of primary key
 # 
 #  - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
 #    #rows, the call is there at all only for applicability check
 # 
 -- source include/have_innodb.inc
 --disable_warnings
 drop table if exists t0,t1,t2,t3;
 --enable_warnings
 set @save_join_cache_level=@@join_cache_level;
 set join_cache_level=6;
 set @save_storage_engine=@@storage_engine;
 set storage_engine=innodb;
 create table t0(a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 create table t1(a char(8), b char(8), filler char(100), primary key(a));
 show create table t1;
 insert into t1 select 
  concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
  concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
  'filler'
 from t0 A, t0 B, t0 C;
 create table t2 (a char(8));
 insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
 --echo This should use join buffer:
 explain select * from t1, t2 where t1.a=t2.a;
 --echo This output must be sorted by value of t1.a:
 select * from t1, t2 where t1.a=t2.a;
 drop table t1, t2;
 # Try multi-column indexes
 create table t1(
  a char(8) character set utf8, b int, filler char(100), 
  primary key(a,b)
 );
 insert into t1 select 
  concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
  1000 + A.a + B.a*10 + C.a*100,
  'filler'
 from t0 A, t0 B, t0 C;
 create table t2 (a char(8) character set utf8, b int);
 insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 # Try with dataset that causes identical lookup keys:
 insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 drop table t1, t2;
 create table t1(
  a varchar(8) character set utf8, b int, filler char(100), 
  primary key(a,b)
 );
 insert into t1 select 
  concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
  1000 + A.a + B.a*10 + C.a*100,
  'filler'
 from t0 A, t0 B, t0 C;
 create table t2 (a char(8) character set utf8, b int);
 insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 # 
 # Try scanning on a CPK prefix
 #
 explain select * from t1, t2 where t1.a=t2.a;
 select * from t1, t2 where t1.a=t2.a;
 drop table t1, t2;
 #
 # The above example is not very interesting, as CPK prefix has 
 # only one match.  Create a dataset where scan on CPK prefix 
 # would produce multiple matches:
 #
 create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
 insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
 insert into t1 values (11, 11, 11,   'filler');
 insert into t1 values (11, 11, 12,   'filler');
 insert into t1 values (11, 11, 13,   'filler');
 insert into t1 values (11, 22, 1234, 'filler');
 insert into t1 values (11, 33, 124,  'filler');
 insert into t1 values (11, 33, 125,  'filler');
 create table t2 (a int, b int);
 insert into t2 values (11,33), (11,22), (11,11);
 explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 set join_cache_level=0;
 select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
 set join_cache_level=6;
 drop table t1,t2;
 #
 # Check that Index Condition Pushdown (BKA) actually works:
 #
 # TODO
 #
 # Check that record-check-func is done:
 # 
 set @@join_cache_level= @save_join_cache_level;
 set storage_engine=@save_storage_engine;
 drop table t0;
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@ -139,8 +139,13 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
                                       uint key_parts, uint *bufsz, 
                                       uint *flags, COST_VECT *cost)
 {
  /* 
    Currently we expect this function to be called only in preparation of scan
    with HA_MRR_SINGLE_POINT property.
  */
  DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
  *bufsz= 0; /* Default implementation doesn't need a buffer */
  //psergey2-todo: assert for singlepoint ranges here?
  *flags |= HA_MRR_USE_DEFAULT_IMPL;
  cost->zero();
@ -323,22 +328,25 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
                                        n_ranges, key_parts, mode, buf);
    DBUG_RETURN(retval);
  }
-  rowids_buf= buf->buffer;
+  mrr_buf= buf->buffer;
  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
  if (is_mrr_assoc)
    status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
-  rowids_buf_end= buf->buffer_end;
+  mrr_buf_end= buf->buffer_end;
  doing_cpk_scan= check_cpk_scan(h->active_index, mode); 
  if (doing_cpk_scan)
  {
    /* 
      When doing a scan on CPK, the buffer stores {lookup_tuple, range_id}
      pairs 
    */
    uint keylen=0;
    DBUG_ASSERT(key_parts != 0);
    //psergey2-todo: new elem_size here
    for (uint kp= 0; kp < key_parts; kp++)
      keylen += table->key_info[h->active_index].key_part[kp].store_length;
@ -350,12 +358,29 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
    use_default_impl= FALSE;
  }
  else
  {
    /* In regular DS-MRR, buffer stores {rowid, range_id} pairs */
    elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
  }
-  rowids_buf_last= rowids_buf + 
+  mrr_buf_last= mrr_buf + 
-                      ((rowids_buf_end - rowids_buf)/ elem_size)*
+                      ((mrr_buf_end - mrr_buf)/ elem_size)*
                      elem_size;
-  rowids_buf_end= rowids_buf_last;
+  mrr_buf_end= mrr_buf_last;
  if (doing_cpk_scan)
  {
    /* 
      DS-MRR/CPK: fill buffer with lookup tuples and sort; also we don't need a
      secondary handler object.
    */
    h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
    h->mrr_funcs= *seq_funcs;
    dsmrr_fill_buffer_cpk();
    if (dsmrr_eof) 
      buf->end_of_used_area= mrr_buf_last;
    DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */
  }
  /*
    There can be two cases:
@ -365,9 +390,6 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
       The caller might have called h->index_init(), need to switch h to
       rnd_pos calls.
  */
  //psergey2-todo: don't create secondary for CPK scan.
  if (!doing_cpk_scan)
  {
  if (!h2)
  {
    /* Create a separate handler object to do rndpos() calls. */
@ -431,19 +453,6 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
    if (res)
      goto error;
  }
  }
  else
  {
    //doing DS-MRR/CPK
    // fill-buffer-analog
    // eof
    h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
    h->mrr_funcs= *seq_funcs;
    dsmrr_fill_buffer_cpk();
    if (dsmrr_eof) 
      buf->end_of_used_area= rowids_buf_last;
    DBUG_RETURN(0); // nothing can go wrong while filling the buffer
  }
  if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
                                         key_parts, mode, buf) || 
@ -456,7 +465,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
    adjust *buf to indicate that the remaining buffer space will not be used.
  */
  if (dsmrr_eof) 
-    buf->end_of_used_area= rowids_buf_last;
+    buf->end_of_used_area= mrr_buf_last;
  /*
     h->inited == INDEX may occur when 'range checked for each record' is
@ -513,6 +522,9 @@ static int rowid_cmp(void *h, uchar *a, uchar *b)
  The function assumes that rowids buffer is empty when it is invoked. 
  dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
  scanning.
  @param h  Table handler
  @retval 0      OK, the next portion of rowids is in the buffer,
@ -526,8 +538,8 @@ int DsMrr_impl::dsmrr_fill_buffer()
  int res;
  DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
-  rowids_buf_cur= rowids_buf;
+  mrr_buf_cur= mrr_buf;
-  while ((rowids_buf_cur < rowids_buf_end) && 
+  while ((mrr_buf_cur < mrr_buf_end) && 
         !(res= h2->handler::multi_range_read_next(&range_info)))
  {
    KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
@ -537,13 +549,13 @@ int DsMrr_impl::dsmrr_fill_buffer()
    /* Put rowid, or {rowid, range_id} pair into the buffer */
    h2->position(table->record[0]);
-    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
+    memcpy(mrr_buf_cur, h2->ref, h2->ref_length);
-    rowids_buf_cur += h2->ref_length;
+    mrr_buf_cur += h2->ref_length;
    if (is_mrr_assoc)
    {
-      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
+      memcpy(mrr_buf_cur, &range_info, sizeof(void*));
-      rowids_buf_cur += sizeof(void*);
+      mrr_buf_cur += sizeof(void*);
    }
  }
@ -553,27 +565,29 @@ int DsMrr_impl::dsmrr_fill_buffer()
  /* Sort the buffer contents by rowid */
  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
-  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+  uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
-  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
+  my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
            (void*)h);
-  rowids_buf_last= rowids_buf_cur;
+  mrr_buf_last= mrr_buf_cur;
-  rowids_buf_cur=  rowids_buf;
+  mrr_buf_cur=  mrr_buf;
  DBUG_RETURN(0);
 }
-/* qsort-compatible function to compare key tuples */
+/* 
  my_qsort2-compatible function to compare key tuples 
 */
 int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
 {
  DsMrr_impl *dsmrr= (DsMrr_impl*)arg;
  TABLE *table= dsmrr->h->table;
  KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part;
-  KEY_PART_INFO *part_end= part + dsmrr->cpk_n_parts;
+  uchar *key1_end= key1 + dsmrr->cpk_tuple_length;
-  //uint32 *lengths=item->field_lengths;
+  while (key1 < key1_end)
  for (; part < part_end; ++part)
  {
    Field* f = part->field;
    int len = part->store_length;
@ -582,33 +596,43 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
      return res;
    key1 += len;
    key2 += len;
    part++;
  }
  return 0;
 }
-//psergey2:
+/*
-int DsMrr_impl::dsmrr_fill_buffer_cpk()
+  DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
  DESCRIPTION
    DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
    dsmrr_eof is set to indicate whether we've exhausted the list of ranges 
    we're scanning.
 */
 void DsMrr_impl::dsmrr_fill_buffer_cpk()
 {
  int res;
  KEY_MULTI_RANGE cur_range;
  DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk");
-  rowids_buf_cur= rowids_buf;
+  mrr_buf_cur= mrr_buf;
-  while ((rowids_buf_cur < rowids_buf_end) && 
+  while ((mrr_buf_cur < mrr_buf_end) && 
         !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range)))
  {
    DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
    DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length);
    /* Put key, or {key, range_id} pair into the buffer */
-    memcpy(rowids_buf_cur, cur_range.start_key.key, cpk_tuple_length);
+    memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length);
-    rowids_buf_cur += cpk_tuple_length;
+    mrr_buf_cur += cpk_tuple_length;
    if (is_mrr_assoc)
    {
-      memcpy(rowids_buf_cur, &cur_range.ptr, sizeof(void*));
+      memcpy(mrr_buf_cur, &cur_range.ptr, sizeof(void*));
-      rowids_buf_cur += sizeof(void*);
+      mrr_buf_cur += sizeof(void*);
    }
  }
@ -616,77 +640,82 @@ int DsMrr_impl::dsmrr_fill_buffer_cpk()
  /* Sort the buffer contents by rowid */
  uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*);
-  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+  uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
-  my_qsort2(rowids_buf, n_rowids, elem_size, 
+  my_qsort2(mrr_buf, n_rowids, elem_size, 
            (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this);
-  rowids_buf_last= rowids_buf_cur;
+  mrr_buf_last= mrr_buf_cur;
-  rowids_buf_cur=  rowids_buf;
+  mrr_buf_cur=  mrr_buf;
-  DBUG_RETURN(0);
+  DBUG_VOID_RETURN;
 }
 /*
-  CPK: so, the source is 
+  DS-MRR/CPK: multi_range_read_next() function
-   - buffer exhaustion/re-fill
+
-   - advance to next range on "record-not-found" error.
+  DESCRIPTION
-   - if scanning on a prefix, enumerate all records for a key.
+    DsMrr_impl::dsmrr_next_cpk() 
  DESCRIPTION
    DS-MRR/CPK: multi_range_read_next() function. 
    This is similar to DsMrr_impl::dsmrr_next(), the differences are that
     - we get records with index_read(), not with rnd_pos()
     - we may get multiple records for one key (=element of the buffer)
     - unlike dsmrr_fill_buffer(), dsmrr_fill_buffer_cpk() never fails.
  RETURN
    0                   OK, next record was successfully read
    HA_ERR_END_OF_FILE  End of records
    Other               Some other error
 */
 int DsMrr_impl::dsmrr_next_cpk(char **range_info)
 {
  int res;
  if (cpk_have_range)
  {
-    res= h->index_next_same(table->record[0], rowids_buf_cur, cpk_tuple_length);
+    res= h->index_next_same(table->record[0], mrr_buf_cur, cpk_tuple_length);
    if (res != HA_ERR_END_OF_FILE)
    {
      // todo
      if (is_mrr_assoc)
        memcpy(range_info, &cpk_saved_range_info, sizeof(void*));
      return res;
    }
-    /* 
+    /* No more records in this range. Fall through to get to another range  */
      Ok, we got EOF for records in this range. Fall through to get to another
      range.
    */
  }
  do
  {
    /* First, make sure we have a range at start of the buffer */
-    if (rowids_buf_cur == rowids_buf_last)
+    if (mrr_buf_cur == mrr_buf_last)
    {
      if (dsmrr_eof)
      {
        res= HA_ERR_END_OF_FILE;
        goto end;
      }
-      // TODO: the return values are mix of HA_ERR_ codes and TRUE as "generic
+      dsmrr_fill_buffer_cpk();
      //       failure" error. Is this ok?
      if ((res= dsmrr_fill_buffer_cpk()))
        goto end;
    }
-   
+    if (mrr_buf_cur == mrr_buf_last)
    if (rowids_buf_cur == rowids_buf_last)
    {
      res= HA_ERR_END_OF_FILE;
      goto end;
    }
-    //TODO: make skip_index_tuple() calls, too?
+    //psergey2-todo: make skip_index_tuple() calls, too?
-    //TODO: skip-record calls here?
+    //psergey2-todo: skip-record calls here?
    //if (h2->mrr_funcs.skip_record &&
    //	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
    //  continue;
    /* Ok, got the range. Try making a lookup.  */
-    uchar *lookup_tuple= rowids_buf_cur;
+    uchar *lookup_tuple= mrr_buf_cur;
-    rowids_buf_cur += cpk_tuple_length;
+    mrr_buf_cur += cpk_tuple_length;
    if (is_mrr_assoc)
    {
-      memcpy(cpk_saved_range_info, rowids_buf_cur, sizeof(void*));
+      memcpy(cpk_saved_range_info, mrr_buf_cur, sizeof(void*));
-      rowids_buf_cur += sizeof(void*) * test(is_mrr_assoc);
+      mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc);
    }
    res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length, 
@ -698,6 +727,10 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info)
    if (!res)
    {
      memcpy(range_info, cpk_saved_range_info, sizeof(void*));
      /* 
        Attempt reading more rows from this range only if there actually can
        be multiple matches:
       */
      cpk_have_range= !cpk_is_unique_scan;
      break;
    }
@ -707,6 +740,7 @@ end:
  return res;
 }
 /**
  DS-MRR implementation: multi_range_read_next() function
 */
@ -725,7 +759,7 @@ int DsMrr_impl::dsmrr_next(char **range_info)
  do
  {
-    if (rowids_buf_cur == rowids_buf_last)
+    if (mrr_buf_cur == mrr_buf_last)
    {
      if (dsmrr_eof)
      {
@ -738,17 +772,17 @@ int DsMrr_impl::dsmrr_next(char **range_info)
    }
    /* return eof if there are no rowids in the buffer after re-fill attempt */
-    if (rowids_buf_cur == rowids_buf_last)
+    if (mrr_buf_cur == mrr_buf_last)
    {
      res= HA_ERR_END_OF_FILE;
      goto end;
    }
-    rowid= rowids_buf_cur;
+    rowid= mrr_buf_cur;
    if (is_mrr_assoc)
-      memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+      memcpy(&cur_range_info, mrr_buf_cur + h->ref_length, sizeof(uchar**));
-    rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
+    mrr_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
    if (h2->mrr_funcs.skip_record &&
 	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
      continue;
@ -870,7 +904,33 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
  return FALSE;
 }
-/**
+
 /*
  Check if key/flags allow DS-MRR/CPK strategy to be used
  SYNOPSIS
   DsMrr_impl::check_cpk_scan()
     keyno      Index that will be used
     mrr_flags  
  DESCRIPTION
    Check if key/flags allow DS-MRR/CPK strategy to be used. 
  RETURN
    TRUE   DS-MRR/CPK should be used
    FALSE  Otherwise
 */
 bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
 {
  return test((mrr_flags & HA_MRR_SINGLE_POINT) && 
              !(mrr_flags & HA_MRR_SORTED) && 
              keyno == table->s->primary_key && 
              h->primary_key_is_clustered());
 }
 /*
  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
  Make the choice between using Default MRR implementation and DS-MRR.
@ -892,13 +952,7 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
  @retval TRUE   Default MRR implementation should be used
  @retval FALSE  DS-MRR implementation should be used
 */
-bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
+
 {
  return test((mrr_flags & HA_MRR_SINGLE_POINT) && 
              !(mrr_flags & HA_MRR_SORTED) && 
              keyno == table->s->primary_key && 
              h->primary_key_is_clustered());
 }
 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
                                 uint *bufsz, COST_VECT *cost)
@ -906,9 +960,8 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
  COST_VECT dsmrr_cost;
  bool res;
  THD *thd= current_thd;
  //psergey2: check the criteria.
  doing_cpk_scan= check_cpk_scan(keyno, *flags); 
  doing_cpk_scan= check_cpk_scan(keyno, *flags); 
  if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
      (keyno == table->s->primary_key && h->primary_key_is_clustered() &&
       !doing_cpk_scan) ||
--- a/sql/multi_range_read.h
+++ b/sql/multi_range_read.h
@ -1,16 +1,76 @@
 /*
-  This file contains declarations for 
+  This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR) 
-   - Disk-Sweep MultiRangeRead (DS-MRR) implementation
+  implementation
 */
 /**
-  A Disk-Sweep MRR interface implementation
+  A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
-  This implementation makes range (and, in the future, 'ref') scans to read
+  This is a "plugin"(*) for storage engines that allows make index scans 
-  table rows in disk sweeps. 
+  read table rows in rowid order. For disk-based storage engines, this is
  faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
-  Currently it is used by MyISAM and InnoDB. Potentially it can be used with
+  (*) - only conceptually. No dynamic loading or binary compatibility of any
-  any table handler that has non-clustered indexes and on-disk rows.
+        kind.
  General scheme of things:
      SQL Layer code
       |   |   |
      -v---v---v---- handler->multi_range_read_XXX() function calls
       |   |   |
      ____________________________________
     / DS-MRR module                      \
     |  (scan indexes, order rowids, do    |
     |   full record reads in rowid order) |
     \____________________________________/
       |   |   |
      -|---|---|----- handler->read_range_first()/read_range_next(), 
       |   |   |      handler->index_read(), handler->rnd_pos() calls.
       |   |   |
       v   v   v
      Storage engine internals
  Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
  Potentially it can be used with any table handler that has disk-based data
  storage and has better performance when reading data in rowid order.
 */
 /*
  DS-MRR implementation for one table. Create/use one object of this class for
  each ha_{myisam/innobase/etc} object. That object will be further referred to
  as "the handler"
  There are actually three strategies
   S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
      MRR-to-non-MRR calls converter)
   S2. Regular DS-MRR 
   S3. DS-MRR/CPK for doing scans on clustered primary keys.
  S1 is used for cases which DS-MRR is unable to handle for some reason.
  S2 is the actual DS-MRR. The basic algorithm is as follows:
    1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and 
        fill the buffer with {rowid, range_id} pairs
    2. Sort the buffer by rowid
    3. for each {rowid, range_id} pair in the buffer
         get record by rowid and return the {record, range_id} pair
    4. Repeat the above steps until we've exhausted the list of ranges we're
       scanning.
  S3 is the variant of DS-MRR for use with clustered primary keys (or any
  clustered index). The idea is that in clustered index it is sufficient to 
  access the index in index order, and we don't need an intermediate steps to
  get rowid (like step #1 in S2).
   DS-MRR/CPK's basic algorithm is as follows:
    1. Collect a number of ranges (=lookup keys)
    2. Sort them so that they follow in index order.
    3. for each {lookup_key, range_id} pair in the buffer 
       get record(s) matching the lookup key and return {record, range_id} pairs
    4. Repeat the above steps until we've exhausted the list of ranges we're
       scanning.
 */
 class DsMrr_impl
@ -21,40 +81,6 @@ public:
  DsMrr_impl()
    : h2(NULL) {};
  /*
    The "owner" handler object (the one that calls dsmrr_XXX functions.
    It is used to retrieve full table rows by calling rnd_pos().
  */
  handler *h;
  TABLE *table; /* Always equal to h->table */
 private:
  /* Secondary handler object.  It is used for scanning the index */
  handler *h2;
  /* Buffer to store rowids, or (rowid, range_id) pairs */
  uchar *rowids_buf;
  uchar *rowids_buf_cur;   /* Current position when reading/writing */
  uchar *rowids_buf_last;  /* When reading: end of used buffer space */
  uchar *rowids_buf_end;   /* End of the buffer */
  bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
  /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
  bool is_mrr_assoc;
  bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
  bool doing_cpk_scan;
  uint cpk_tuple_length;
  uint cpk_n_parts;
  bool cpk_is_unique_scan;
  char *cpk_saved_range_info;
  bool cpk_have_range;
  bool check_cpk_scan(uint keyno, uint mrr_flags);
  static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
 public:
  void init(handler *h_arg, TABLE *table_arg)
  {
    h= h_arg; 
@ -64,10 +90,7 @@ public:
                 uint n_ranges, uint key_parts, uint mode, 
                 HANDLER_BUFFER *buf);
  void dsmrr_close();
  int dsmrr_fill_buffer();
  int dsmrr_fill_buffer_cpk();
  int dsmrr_next(char **range_info);
  int dsmrr_next_cpk(char **range_info);
  ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, 
                     uint *bufsz, uint *flags, COST_VECT *cost);
@ -76,9 +99,53 @@ public:
                            void *seq_init_param, uint n_ranges, uint *bufsz,
                            uint *flags, COST_VECT *cost);
 private:
  /*
    The "owner" handler object (the one that calls dsmrr_XXX functions.
    It is used to retrieve full table rows by calling rnd_pos().
  */
  handler *h;
  TABLE *table; /* Always equal to h->table */
  /* Secondary handler object.  It is used for scanning the index */
  handler *h2;
  /* Buffer to store rowids, or (rowid, range_id) pairs */
  uchar *mrr_buf;
  uchar *mrr_buf_cur;   /* Current position when reading/writing */
  uchar *mrr_buf_last;  /* When reading: end of used buffer space */
  uchar *mrr_buf_end;   /* End of the buffer */
  bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
  /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
  bool is_mrr_assoc;
  bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
  bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
  /** DS-MRR/CPK variables start */
  /* Length of lookup tuple being used, in bytes */
  uint cpk_tuple_length;
  /*
    TRUE <=> We're scanning on a full primary key (and not on prefix), and so 
    can get max. one match for each key 
  */
  bool cpk_is_unique_scan;
  /* TRUE<=> we're in a middle of enumerating records from a range */ 
  bool cpk_have_range;
  /* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
  char *cpk_saved_range_info;
  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
                       COST_VECT *cost);
  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
                               uint *buffer_size, COST_VECT *cost);
  bool check_cpk_scan(uint keyno, uint mrr_flags);
  static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
  int dsmrr_fill_buffer();
  void dsmrr_fill_buffer_cpk();
  int dsmrr_next_cpk(char **range_info);
 };