mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MWL#121: DS-MRR support for clustered primary keys
- Merge with current 5.3
This commit is contained in:
148
mysql-test/r/innodb_mrr_cpk.result
Normal file
148
mysql-test/r/innodb_mrr_cpk.result
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
drop table if exists t0,t1,t2,t3;
|
||||||
|
set @save_join_cache_level=@@join_cache_level;
|
||||||
|
set join_cache_level=6;
|
||||||
|
set @save_storage_engine=@@storage_engine;
|
||||||
|
set storage_engine=innodb;
|
||||||
|
create table t0(a int);
|
||||||
|
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t1(a char(8), b char(8), filler char(100), primary key(a));
|
||||||
|
show create table t1;
|
||||||
|
Table Create Table
|
||||||
|
t1 CREATE TABLE `t1` (
|
||||||
|
`a` char(8) NOT NULL DEFAULT '',
|
||||||
|
`b` char(8) DEFAULT NULL,
|
||||||
|
`filler` char(100) DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`a`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1
|
||||||
|
insert into t1 select
|
||||||
|
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
|
||||||
|
concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
|
||||||
|
'filler'
|
||||||
|
from t0 A, t0 B, t0 C;
|
||||||
|
create table t2 (a char(8));
|
||||||
|
insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
|
||||||
|
This should use join buffer:
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
|
||||||
|
This output must be sorted by value of t1.a:
|
||||||
|
select * from t1, t2 where t1.a=t2.a;
|
||||||
|
a b filler a
|
||||||
|
a-1010=A b-1010=B filler a-1010=A
|
||||||
|
a-1020=A b-1020=B filler a-1020=A
|
||||||
|
a-1030=A b-1030=B filler a-1030=A
|
||||||
|
drop table t1, t2;
|
||||||
|
create table t1(
|
||||||
|
a char(8) character set utf8, b int, filler char(100),
|
||||||
|
primary key(a,b)
|
||||||
|
);
|
||||||
|
insert into t1 select
|
||||||
|
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
|
||||||
|
1000 + A.a + B.a*10 + C.a*100,
|
||||||
|
'filler'
|
||||||
|
from t0 A, t0 B, t0 C;
|
||||||
|
create table t2 (a char(8) character set utf8, b int);
|
||||||
|
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
a b filler a b
|
||||||
|
a-1010=A 1010 filler a-1010=A 1010
|
||||||
|
a-1020=A 1020 filler a-1020=A 1020
|
||||||
|
a-1030=A 1030 filler a-1030=A 1030
|
||||||
|
insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 5
|
||||||
|
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
a b filler a b
|
||||||
|
a-1010=A 1010 filler a-1010=A 1010
|
||||||
|
a-1020=A 1020 filler a-1020=A 1020
|
||||||
|
a-1020=A 1020 filler a-1020=A 1020
|
||||||
|
a-1030=A 1030 filler a-1030=A 1030
|
||||||
|
a-1030=A 1030 filler a-1030=A 1030
|
||||||
|
drop table t1, t2;
|
||||||
|
create table t1(
|
||||||
|
a varchar(8) character set utf8, b int, filler char(100),
|
||||||
|
primary key(a,b)
|
||||||
|
);
|
||||||
|
insert into t1 select
|
||||||
|
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
|
||||||
|
1000 + A.a + B.a*10 + C.a*100,
|
||||||
|
'filler'
|
||||||
|
from t0 A, t0 B, t0 C;
|
||||||
|
create table t2 (a char(8) character set utf8, b int);
|
||||||
|
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
a b filler a b
|
||||||
|
a-1010=A 1010 filler a-1010=A 1010
|
||||||
|
a-1020=A 1020 filler a-1020=A 1020
|
||||||
|
a-1030=A 1030 filler a-1030=A 1030
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a;
|
||||||
|
a b filler a b
|
||||||
|
a-1010=A 1010 filler a-1010=A 1010
|
||||||
|
a-1020=A 1020 filler a-1020=A 1020
|
||||||
|
a-1030=A 1030 filler a-1030=A 1030
|
||||||
|
drop table t1, t2;
|
||||||
|
create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
|
||||||
|
insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
|
||||||
|
insert into t1 values (11, 11, 11, 'filler');
|
||||||
|
insert into t1 values (11, 11, 12, 'filler');
|
||||||
|
insert into t1 values (11, 11, 13, 'filler');
|
||||||
|
insert into t1 values (11, 22, 1234, 'filler');
|
||||||
|
insert into t1 values (11, 33, 124, 'filler');
|
||||||
|
insert into t1 values (11, 33, 125, 'filler');
|
||||||
|
create table t2 (a int, b int);
|
||||||
|
insert into t2 values (11,33), (11,22), (11,11);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
a b c filler a b
|
||||||
|
11 11 11 filler 11 11
|
||||||
|
11 11 12 filler 11 11
|
||||||
|
11 11 13 filler 11 11
|
||||||
|
11 22 1234 filler 11 22
|
||||||
|
11 33 124 filler 11 33
|
||||||
|
11 33 125 filler 11 33
|
||||||
|
set join_cache_level=0;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
a b c filler a b
|
||||||
|
11 33 124 filler 11 33
|
||||||
|
11 33 125 filler 11 33
|
||||||
|
11 22 1234 filler 11 22
|
||||||
|
11 11 11 filler 11 11
|
||||||
|
11 11 12 filler 11 11
|
||||||
|
11 11 13 filler 11 11
|
||||||
|
set join_cache_level=6;
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using index condition(BKA); Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
a b c filler a b
|
||||||
|
set optimizer_switch='index_condition_pushdown=off';
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
|
||||||
|
1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using where; Using join buffer
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
a b c filler a b
|
||||||
|
set optimizer_switch='index_condition_pushdown=on';
|
||||||
|
drop table t1,t2;
|
||||||
|
set @@join_cache_level= @save_join_cache_level;
|
||||||
|
set storage_engine=@save_storage_engine;
|
||||||
|
drop table t0;
|
137
mysql-test/t/innodb_mrr_cpk.test
Normal file
137
mysql-test/t/innodb_mrr_cpk.test
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
#
|
||||||
|
# Tests for DS-MRR over clustered primary key. The only engine that supports
|
||||||
|
# this is InnoDB/XtraDB.
|
||||||
|
#
|
||||||
|
# Basic idea about testing
|
||||||
|
# - DS-MRR/CPK works only with BKA
|
||||||
|
# - Should also test index condition pushdown
|
||||||
|
# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
|
||||||
|
# - Also test access using prefix of primary key
|
||||||
|
#
|
||||||
|
# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
|
||||||
|
# #rows, the call is there at all only for applicability check
|
||||||
|
#
|
||||||
|
-- source include/have_innodb.inc
|
||||||
|
|
||||||
|
--disable_warnings
|
||||||
|
drop table if exists t0,t1,t2,t3;
|
||||||
|
--enable_warnings
|
||||||
|
|
||||||
|
set @save_join_cache_level=@@join_cache_level;
|
||||||
|
set join_cache_level=6;
|
||||||
|
|
||||||
|
set @save_storage_engine=@@storage_engine;
|
||||||
|
set storage_engine=innodb;
|
||||||
|
|
||||||
|
create table t0(a int);
|
||||||
|
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t1(a char(8), b char(8), filler char(100), primary key(a));
|
||||||
|
show create table t1;
|
||||||
|
|
||||||
|
insert into t1 select
|
||||||
|
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
|
||||||
|
concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
|
||||||
|
'filler'
|
||||||
|
from t0 A, t0 B, t0 C;
|
||||||
|
|
||||||
|
create table t2 (a char(8));
|
||||||
|
insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
|
||||||
|
|
||||||
|
--echo This should use join buffer:
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a;
|
||||||
|
|
||||||
|
--echo This output must be sorted by value of t1.a:
|
||||||
|
select * from t1, t2 where t1.a=t2.a;
|
||||||
|
drop table t1, t2;
|
||||||
|
|
||||||
|
# Try multi-column indexes
|
||||||
|
create table t1(
|
||||||
|
a char(8) character set utf8, b int, filler char(100),
|
||||||
|
primary key(a,b)
|
||||||
|
);
|
||||||
|
|
||||||
|
insert into t1 select
|
||||||
|
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
|
||||||
|
1000 + A.a + B.a*10 + C.a*100,
|
||||||
|
'filler'
|
||||||
|
from t0 A, t0 B, t0 C;
|
||||||
|
|
||||||
|
create table t2 (a char(8) character set utf8, b int);
|
||||||
|
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
|
||||||
|
# Try with dataset that causes identical lookup keys:
|
||||||
|
insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
|
||||||
|
drop table t1, t2;
|
||||||
|
|
||||||
|
create table t1(
|
||||||
|
a varchar(8) character set utf8, b int, filler char(100),
|
||||||
|
primary key(a,b)
|
||||||
|
);
|
||||||
|
|
||||||
|
insert into t1 select
|
||||||
|
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
|
||||||
|
1000 + A.a + B.a*10 + C.a*100,
|
||||||
|
'filler'
|
||||||
|
from t0 A, t0 B, t0 C;
|
||||||
|
|
||||||
|
create table t2 (a char(8) character set utf8, b int);
|
||||||
|
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Try scanning on a CPK prefix
|
||||||
|
#
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a;
|
||||||
|
select * from t1, t2 where t1.a=t2.a;
|
||||||
|
drop table t1, t2;
|
||||||
|
|
||||||
|
#
|
||||||
|
# The above example is not very interesting, as CPK prefix has
|
||||||
|
# only one match. Create a dataset where scan on CPK prefix
|
||||||
|
# would produce multiple matches:
|
||||||
|
#
|
||||||
|
create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
|
||||||
|
insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
|
||||||
|
|
||||||
|
insert into t1 values (11, 11, 11, 'filler');
|
||||||
|
insert into t1 values (11, 11, 12, 'filler');
|
||||||
|
insert into t1 values (11, 11, 13, 'filler');
|
||||||
|
insert into t1 values (11, 22, 1234, 'filler');
|
||||||
|
insert into t1 values (11, 33, 124, 'filler');
|
||||||
|
insert into t1 values (11, 33, 125, 'filler');
|
||||||
|
|
||||||
|
create table t2 (a int, b int);
|
||||||
|
insert into t2 values (11,33), (11,22), (11,11);
|
||||||
|
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
|
||||||
|
# Check a real resultset for comaprison:
|
||||||
|
set join_cache_level=0;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
|
||||||
|
set join_cache_level=6;
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Check that Index Condition Pushdown (BKA) actually works:
|
||||||
|
#
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
|
||||||
|
set optimizer_switch='index_condition_pushdown=off';
|
||||||
|
explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
|
||||||
|
set optimizer_switch='index_condition_pushdown=on';
|
||||||
|
|
||||||
|
drop table t1,t2;
|
||||||
|
|
||||||
|
set @@join_cache_level= @save_join_cache_level;
|
||||||
|
set storage_engine=@save_storage_engine;
|
||||||
|
drop table t0;
|
||||||
|
|
@@ -1278,9 +1278,9 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
|
|||||||
COST_VECT *cost);
|
COST_VECT *cost);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The below two are not used (and not handled) in this milestone of this WL
|
Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
|
||||||
entry because there seems to be no use for them at this stage of
|
The ranges may not use the full key but all of them will use the same number
|
||||||
implementation.
|
of key parts.
|
||||||
*/
|
*/
|
||||||
#define HA_MRR_SINGLE_POINT 1
|
#define HA_MRR_SINGLE_POINT 1
|
||||||
#define HA_MRR_FIXED_KEY 2
|
#define HA_MRR_FIXED_KEY 2
|
||||||
@@ -1806,9 +1806,10 @@ public:
|
|||||||
uint n_ranges, uint *bufsz,
|
uint n_ranges, uint *bufsz,
|
||||||
uint *flags, COST_VECT *cost);
|
uint *flags, COST_VECT *cost);
|
||||||
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint *bufsz, uint *flags, COST_VECT *cost);
|
uint key_parts, uint *bufsz,
|
||||||
|
uint *flags, COST_VECT *cost);
|
||||||
virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
||||||
uint n_ranges, uint mode,
|
uint n_ranges, uint mode,
|
||||||
HANDLER_BUFFER *buf);
|
HANDLER_BUFFER *buf);
|
||||||
virtual int multi_range_read_next(char **range_info);
|
virtual int multi_range_read_next(char **range_info);
|
||||||
virtual int read_range_first(const key_range *start_key,
|
virtual int read_range_first(const key_range *start_key,
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
#include "mysql_priv.h"
|
#include "mysql_priv.h"
|
||||||
|
#include <my_bit.h>
|
||||||
#include "sql_select.h"
|
#include "sql_select.h"
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
@@ -136,10 +137,16 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
|
ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
|
||||||
uint *bufsz, uint *flags, COST_VECT *cost)
|
uint key_parts, uint *bufsz,
|
||||||
|
uint *flags, COST_VECT *cost)
|
||||||
{
|
{
|
||||||
*bufsz= 0; /* Default implementation doesn't need a buffer */
|
/*
|
||||||
|
Currently we expect this function to be called only in preparation of scan
|
||||||
|
with HA_MRR_SINGLE_POINT property.
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
|
||||||
|
|
||||||
|
*bufsz= 0; /* Default implementation doesn't need a buffer */
|
||||||
*flags |= HA_MRR_USE_DEFAULT_IMPL;
|
*flags |= HA_MRR_USE_DEFAULT_IMPL;
|
||||||
|
|
||||||
cost->zero();
|
cost->zero();
|
||||||
@@ -316,25 +323,39 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
|||||||
{
|
{
|
||||||
use_default_impl= TRUE;
|
use_default_impl= TRUE;
|
||||||
const int retval=
|
const int retval=
|
||||||
h->handler::multi_range_read_init(seq_funcs, seq_init_param,
|
h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
|
||||||
n_ranges, mode, buf);
|
mode, buf);
|
||||||
DBUG_RETURN(retval);
|
DBUG_RETURN(retval);
|
||||||
}
|
}
|
||||||
rowids_buf= buf->buffer;
|
mrr_buf= buf->buffer;
|
||||||
|
|
||||||
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
|
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
|
||||||
|
|
||||||
if (is_mrr_assoc)
|
if (is_mrr_assoc)
|
||||||
status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
|
status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
|
||||||
|
|
||||||
rowids_buf_end= buf->buffer_end;
|
mrr_buf_end= buf->buffer_end;
|
||||||
elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
|
|
||||||
rowids_buf_last= rowids_buf +
|
|
||||||
((rowids_buf_end - rowids_buf)/ elem_size)*
|
|
||||||
elem_size;
|
|
||||||
rowids_buf_end= rowids_buf_last;
|
|
||||||
|
|
||||||
/*
|
if ((doing_cpk_scan= check_cpk_scan(h->active_index, mode)))
|
||||||
|
{
|
||||||
|
/* It's a DS-MRR/CPK scan */
|
||||||
|
cpk_tuple_length= 0; /* dummy value telling it needs to be inited */
|
||||||
|
cpk_have_range= FALSE;
|
||||||
|
use_default_impl= FALSE;
|
||||||
|
h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
|
||||||
|
h->mrr_funcs= *seq_funcs;
|
||||||
|
dsmrr_fill_buffer_cpk();
|
||||||
|
if (dsmrr_eof)
|
||||||
|
buf->end_of_used_area= mrr_buf_last;
|
||||||
|
DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* In regular DS-MRR, buffer stores {rowid, range_id} pairs */
|
||||||
|
elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
|
||||||
|
mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/ elem_size)* elem_size;
|
||||||
|
mrr_buf_end= mrr_buf_last;
|
||||||
|
|
||||||
|
/*
|
||||||
There can be two cases:
|
There can be two cases:
|
||||||
- This is the first call since index_init(), h2==NULL
|
- This is the first call since index_init(), h2==NULL
|
||||||
Need to setup h2 then.
|
Need to setup h2 then.
|
||||||
@@ -406,8 +427,8 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
|
if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
|
||||||
mode, buf) ||
|
mode, buf) ||
|
||||||
dsmrr_fill_buffer())
|
dsmrr_fill_buffer())
|
||||||
{
|
{
|
||||||
goto error;
|
goto error;
|
||||||
@@ -417,7 +438,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
|||||||
adjust *buf to indicate that the remaining buffer space will not be used.
|
adjust *buf to indicate that the remaining buffer space will not be used.
|
||||||
*/
|
*/
|
||||||
if (dsmrr_eof)
|
if (dsmrr_eof)
|
||||||
buf->end_of_used_area= rowids_buf_last;
|
buf->end_of_used_area= mrr_buf_last;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
h->inited == INDEX may occur when 'range checked for each record' is
|
h->inited == INDEX may occur when 'range checked for each record' is
|
||||||
@@ -473,6 +494,9 @@ static int rowid_cmp(void *h, uchar *a, uchar *b)
|
|||||||
rowid and return.
|
rowid and return.
|
||||||
|
|
||||||
The function assumes that rowids buffer is empty when it is invoked.
|
The function assumes that rowids buffer is empty when it is invoked.
|
||||||
|
|
||||||
|
dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
|
||||||
|
scanning.
|
||||||
|
|
||||||
@param h Table handler
|
@param h Table handler
|
||||||
|
|
||||||
@@ -487,8 +511,8 @@ int DsMrr_impl::dsmrr_fill_buffer()
|
|||||||
int res;
|
int res;
|
||||||
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
|
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
|
||||||
|
|
||||||
rowids_buf_cur= rowids_buf;
|
mrr_buf_cur= mrr_buf;
|
||||||
while ((rowids_buf_cur < rowids_buf_end) &&
|
while ((mrr_buf_cur < mrr_buf_end) &&
|
||||||
!(res= h2->handler::multi_range_read_next(&range_info)))
|
!(res= h2->handler::multi_range_read_next(&range_info)))
|
||||||
{
|
{
|
||||||
KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
|
KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
|
||||||
@@ -498,13 +522,13 @@ int DsMrr_impl::dsmrr_fill_buffer()
|
|||||||
|
|
||||||
/* Put rowid, or {rowid, range_id} pair into the buffer */
|
/* Put rowid, or {rowid, range_id} pair into the buffer */
|
||||||
h2->position(table->record[0]);
|
h2->position(table->record[0]);
|
||||||
memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
|
memcpy(mrr_buf_cur, h2->ref, h2->ref_length);
|
||||||
rowids_buf_cur += h2->ref_length;
|
mrr_buf_cur += h2->ref_length;
|
||||||
|
|
||||||
if (is_mrr_assoc)
|
if (is_mrr_assoc)
|
||||||
{
|
{
|
||||||
memcpy(rowids_buf_cur, &range_info, sizeof(void*));
|
memcpy(mrr_buf_cur, &range_info, sizeof(void*));
|
||||||
rowids_buf_cur += sizeof(void*);
|
mrr_buf_cur += sizeof(void*);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -514,16 +538,224 @@ int DsMrr_impl::dsmrr_fill_buffer()
|
|||||||
|
|
||||||
/* Sort the buffer contents by rowid */
|
/* Sort the buffer contents by rowid */
|
||||||
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
|
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
|
||||||
uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
|
uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
|
||||||
|
|
||||||
my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
|
my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
|
||||||
(void*)h);
|
(void*)h);
|
||||||
rowids_buf_last= rowids_buf_cur;
|
mrr_buf_last= mrr_buf_cur;
|
||||||
rowids_buf_cur= rowids_buf;
|
mrr_buf_cur= mrr_buf;
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
my_qsort2-compatible function to compare key tuples
|
||||||
|
*/
|
||||||
|
|
||||||
|
int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
|
||||||
|
{
|
||||||
|
DsMrr_impl *dsmrr= (DsMrr_impl*)arg;
|
||||||
|
TABLE *table= dsmrr->h->table;
|
||||||
|
|
||||||
|
KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part;
|
||||||
|
uchar *key1_end= key1 + dsmrr->cpk_tuple_length;
|
||||||
|
|
||||||
|
while (key1 < key1_end)
|
||||||
|
{
|
||||||
|
Field* f = part->field;
|
||||||
|
int len = part->store_length;
|
||||||
|
int res = f->cmp(key1, key2);
|
||||||
|
if (res)
|
||||||
|
return res;
|
||||||
|
key1 += len;
|
||||||
|
key2 += len;
|
||||||
|
part++;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
DsMrr_impl::dsmrr_fill_buffer_cpk()
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
|
||||||
|
|
||||||
|
dsmrr_eof is set to indicate whether we've exhausted the list of ranges
|
||||||
|
we're scanning.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void DsMrr_impl::dsmrr_fill_buffer_cpk()
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
KEY_MULTI_RANGE cur_range;
|
||||||
|
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk");
|
||||||
|
|
||||||
|
mrr_buf_cur= mrr_buf;
|
||||||
|
while ((mrr_buf_cur < mrr_buf_end) &&
|
||||||
|
!(res= h->mrr_funcs.next(h->mrr_iter, &cur_range)))
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
|
||||||
|
DBUG_ASSERT(!cpk_tuple_length ||
|
||||||
|
cpk_tuple_length == cur_range.start_key.length);
|
||||||
|
if (!cpk_tuple_length)
|
||||||
|
{
|
||||||
|
cpk_tuple_length= cur_range.start_key.length;
|
||||||
|
cpk_is_unique_scan= test(table->key_info[h->active_index].key_parts ==
|
||||||
|
my_count_bits(cur_range.start_key.keypart_map));
|
||||||
|
uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*);
|
||||||
|
mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/elem_size) * elem_size;
|
||||||
|
mrr_buf_end= mrr_buf_last;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Put key, or {key, range_id} pair into the buffer */
|
||||||
|
memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length);
|
||||||
|
mrr_buf_cur += cpk_tuple_length;
|
||||||
|
|
||||||
|
if (is_mrr_assoc)
|
||||||
|
{
|
||||||
|
memcpy(mrr_buf_cur, &cur_range.ptr, sizeof(void*));
|
||||||
|
mrr_buf_cur += sizeof(void*);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dsmrr_eof= test(res);
|
||||||
|
|
||||||
|
/* Sort the buffer contents by rowid */
|
||||||
|
uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*);
|
||||||
|
uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
|
||||||
|
|
||||||
|
my_qsort2(mrr_buf, n_rowids, elem_size,
|
||||||
|
(qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this);
|
||||||
|
mrr_buf_last= mrr_buf_cur;
|
||||||
|
mrr_buf_cur= mrr_buf;
|
||||||
|
DBUG_VOID_RETURN;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
DS-MRR/CPK: multi_range_read_next() function
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
DsMrr_impl::dsmrr_next_cpk()
|
||||||
|
range_info OUT identifier of range that the returned record belongs to
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
DS-MRR/CPK: multi_range_read_next() function.
|
||||||
|
This is similar to DsMrr_impl::dsmrr_next(), the differences are that
|
||||||
|
- we get records with index_read(), not with rnd_pos()
|
||||||
|
- we may get multiple records for one key (=element of the buffer)
|
||||||
|
- unlike dsmrr_fill_buffer(), dsmrr_fill_buffer_cpk() never fails.
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
0 OK, next record was successfully read
|
||||||
|
HA_ERR_END_OF_FILE End of records
|
||||||
|
Other Some other error
|
||||||
|
*/
|
||||||
|
|
||||||
|
int DsMrr_impl::dsmrr_next_cpk(char **range_info)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
|
||||||
|
while (cpk_have_range)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (h->mrr_funcs.skip_record &&
|
||||||
|
h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL))
|
||||||
|
{
|
||||||
|
cpk_have_range= FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
res= h->index_next_same(table->record[0], mrr_buf_cur, cpk_tuple_length);
|
||||||
|
|
||||||
|
if (h->mrr_funcs.skip_index_tuple &&
|
||||||
|
h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (res != HA_ERR_END_OF_FILE)
|
||||||
|
{
|
||||||
|
if (is_mrr_assoc)
|
||||||
|
memcpy(range_info, &cpk_saved_range_info, sizeof(void*));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No more records in this range. Exit this loop and go get another range */
|
||||||
|
cpk_have_range= FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/* First, make sure we have a range at start of the buffer */
|
||||||
|
if (mrr_buf_cur == mrr_buf_last)
|
||||||
|
{
|
||||||
|
if (dsmrr_eof)
|
||||||
|
{
|
||||||
|
res= HA_ERR_END_OF_FILE;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
dsmrr_fill_buffer_cpk();
|
||||||
|
}
|
||||||
|
if (mrr_buf_cur == mrr_buf_last)
|
||||||
|
{
|
||||||
|
res= HA_ERR_END_OF_FILE;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ok, got the range. Try making a lookup. */
|
||||||
|
uchar *lookup_tuple= mrr_buf_cur;
|
||||||
|
mrr_buf_cur += cpk_tuple_length;
|
||||||
|
if (is_mrr_assoc)
|
||||||
|
{
|
||||||
|
memcpy(&cpk_saved_range_info, mrr_buf_cur, sizeof(void*));
|
||||||
|
mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (h->mrr_funcs.skip_record &&
|
||||||
|
h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length,
|
||||||
|
HA_READ_KEY_EXACT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Check pushed index condition. Performance-wise, it does not make any
|
||||||
|
sense to put this call here (the above call has already accessed the full
|
||||||
|
record). That's the best I could do, though, because:
|
||||||
|
- ha_innobase doesn't support IndexConditionPushdown on clustered PK
|
||||||
|
- MRR interface doesn't allow the storage engine to refuse a pushed index
|
||||||
|
condition.
|
||||||
|
Having this call here is not fully harmless: EXPLAIN shows "pushed index
|
||||||
|
condition", which is technically true but doesn't bring the benefits that
|
||||||
|
one might expect.
|
||||||
|
*/
|
||||||
|
if (h->mrr_funcs.skip_index_tuple &&
|
||||||
|
h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (res && res != HA_ERR_END_OF_FILE)
|
||||||
|
goto end;
|
||||||
|
|
||||||
|
if (!res)
|
||||||
|
{
|
||||||
|
memcpy(range_info, &cpk_saved_range_info, sizeof(void*));
|
||||||
|
/*
|
||||||
|
Attempt reading more rows from this range only if there actually can
|
||||||
|
be multiple matches:
|
||||||
|
*/
|
||||||
|
cpk_have_range= !cpk_is_unique_scan;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (true);
|
||||||
|
|
||||||
|
end:
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
DS-MRR implementation: multi_range_read_next() function
|
DS-MRR implementation: multi_range_read_next() function
|
||||||
*/
|
*/
|
||||||
@@ -536,10 +768,13 @@ int DsMrr_impl::dsmrr_next(char **range_info)
|
|||||||
|
|
||||||
if (use_default_impl)
|
if (use_default_impl)
|
||||||
return h->handler::multi_range_read_next(range_info);
|
return h->handler::multi_range_read_next(range_info);
|
||||||
|
|
||||||
|
if (doing_cpk_scan)
|
||||||
|
return dsmrr_next_cpk(range_info);
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (rowids_buf_cur == rowids_buf_last)
|
if (mrr_buf_cur == mrr_buf_last)
|
||||||
{
|
{
|
||||||
if (dsmrr_eof)
|
if (dsmrr_eof)
|
||||||
{
|
{
|
||||||
@@ -552,17 +787,17 @@ int DsMrr_impl::dsmrr_next(char **range_info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* return eof if there are no rowids in the buffer after re-fill attempt */
|
/* return eof if there are no rowids in the buffer after re-fill attempt */
|
||||||
if (rowids_buf_cur == rowids_buf_last)
|
if (mrr_buf_cur == mrr_buf_last)
|
||||||
{
|
{
|
||||||
res= HA_ERR_END_OF_FILE;
|
res= HA_ERR_END_OF_FILE;
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
rowid= rowids_buf_cur;
|
rowid= mrr_buf_cur;
|
||||||
|
|
||||||
if (is_mrr_assoc)
|
if (is_mrr_assoc)
|
||||||
memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
|
memcpy(&cur_range_info, mrr_buf_cur + h->ref_length, sizeof(uchar**));
|
||||||
|
|
||||||
rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
|
mrr_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
|
||||||
if (h2->mrr_funcs.skip_record &&
|
if (h2->mrr_funcs.skip_record &&
|
||||||
h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
|
h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
|
||||||
continue;
|
continue;
|
||||||
@@ -582,7 +817,8 @@ end:
|
|||||||
/**
|
/**
|
||||||
DS-MRR implementation: multi_range_read_info() function
|
DS-MRR implementation: multi_range_read_info() function
|
||||||
*/
|
*/
|
||||||
ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
|
ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
|
||||||
|
uint key_parts,
|
||||||
uint *bufsz, uint *flags, COST_VECT *cost)
|
uint *bufsz, uint *flags, COST_VECT *cost)
|
||||||
{
|
{
|
||||||
ha_rows res;
|
ha_rows res;
|
||||||
@@ -590,8 +826,8 @@ ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
|
|||||||
uint def_bufsz= *bufsz;
|
uint def_bufsz= *bufsz;
|
||||||
|
|
||||||
/* Get cost/flags/mem_usage of default MRR implementation */
|
/* Get cost/flags/mem_usage of default MRR implementation */
|
||||||
res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
|
res= h->handler::multi_range_read_info(keyno, n_ranges, rows, key_parts,
|
||||||
&def_flags, cost);
|
&def_bufsz, &def_flags, cost);
|
||||||
DBUG_ASSERT(!res);
|
DBUG_ASSERT(!res);
|
||||||
|
|
||||||
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
|
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
|
||||||
@@ -683,7 +919,33 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
|
/*
|
||||||
|
Check if key/flags allow DS-MRR/CPK strategy to be used
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
DsMrr_impl::check_cpk_scan()
|
||||||
|
keyno Index that will be used
|
||||||
|
mrr_flags
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
Check if key/flags allow DS-MRR/CPK strategy to be used.
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
TRUE DS-MRR/CPK should be used
|
||||||
|
FALSE Otherwise
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
|
||||||
|
{
|
||||||
|
return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
|
||||||
|
!(mrr_flags & HA_MRR_SORTED) &&
|
||||||
|
keyno == table->s->primary_key &&
|
||||||
|
h->primary_key_is_clustered());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
|
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
|
||||||
|
|
||||||
Make the choice between using Default MRR implementation and DS-MRR.
|
Make the choice between using Default MRR implementation and DS-MRR.
|
||||||
@@ -706,14 +968,18 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
|
|||||||
@retval FALSE DS-MRR implementation should be used
|
@retval FALSE DS-MRR implementation should be used
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
|
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
|
||||||
uint *bufsz, COST_VECT *cost)
|
uint *bufsz, COST_VECT *cost)
|
||||||
{
|
{
|
||||||
COST_VECT dsmrr_cost;
|
COST_VECT dsmrr_cost;
|
||||||
bool res;
|
bool res;
|
||||||
THD *thd= current_thd;
|
THD *thd= current_thd;
|
||||||
|
|
||||||
|
doing_cpk_scan= check_cpk_scan(keyno, *flags);
|
||||||
if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
|
if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
|
||||||
(keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
|
(keyno == table->s->primary_key && h->primary_key_is_clustered() &&
|
||||||
|
!doing_cpk_scan) ||
|
||||||
key_uses_partial_cols(table, keyno))
|
key_uses_partial_cols(table, keyno))
|
||||||
{
|
{
|
||||||
/* Use the default implementation */
|
/* Use the default implementation */
|
||||||
|
@@ -1,16 +1,76 @@
|
|||||||
/*
|
/*
|
||||||
This file contains declarations for
|
This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR)
|
||||||
- Disk-Sweep MultiRangeRead (DS-MRR) implementation
|
implementation
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
A Disk-Sweep MRR interface implementation
|
A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
|
||||||
|
|
||||||
This implementation makes range (and, in the future, 'ref') scans to read
|
This is a "plugin"(*) for storage engines that allows make index scans
|
||||||
table rows in disk sweeps.
|
read table rows in rowid order. For disk-based storage engines, this is
|
||||||
|
faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
|
||||||
Currently it is used by MyISAM and InnoDB. Potentially it can be used with
|
|
||||||
any table handler that has non-clustered indexes and on-disk rows.
|
(*) - only conceptually. No dynamic loading or binary compatibility of any
|
||||||
|
kind.
|
||||||
|
|
||||||
|
General scheme of things:
|
||||||
|
|
||||||
|
SQL Layer code
|
||||||
|
| | |
|
||||||
|
-v---v---v---- handler->multi_range_read_XXX() function calls
|
||||||
|
| | |
|
||||||
|
____________________________________
|
||||||
|
/ DS-MRR module \
|
||||||
|
| (scan indexes, order rowids, do |
|
||||||
|
| full record reads in rowid order) |
|
||||||
|
\____________________________________/
|
||||||
|
| | |
|
||||||
|
-|---|---|----- handler->read_range_first()/read_range_next(),
|
||||||
|
| | | handler->index_read(), handler->rnd_pos() calls.
|
||||||
|
| | |
|
||||||
|
v v v
|
||||||
|
Storage engine internals
|
||||||
|
|
||||||
|
Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
|
||||||
|
Potentially it can be used with any table handler that has disk-based data
|
||||||
|
storage and has better performance when reading data in rowid order.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
DS-MRR implementation for one table. Create/use one object of this class for
|
||||||
|
each ha_{myisam/innobase/etc} object. That object will be further referred to
|
||||||
|
as "the handler"
|
||||||
|
|
||||||
|
There are actually three strategies
|
||||||
|
S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
|
||||||
|
MRR-to-non-MRR calls converter)
|
||||||
|
S2. Regular DS-MRR
|
||||||
|
S3. DS-MRR/CPK for doing scans on clustered primary keys.
|
||||||
|
|
||||||
|
S1 is used for cases which DS-MRR is unable to handle for some reason.
|
||||||
|
|
||||||
|
S2 is the actual DS-MRR. The basic algorithm is as follows:
|
||||||
|
1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and
|
||||||
|
fill the buffer with {rowid, range_id} pairs
|
||||||
|
2. Sort the buffer by rowid
|
||||||
|
3. for each {rowid, range_id} pair in the buffer
|
||||||
|
get record by rowid and return the {record, range_id} pair
|
||||||
|
4. Repeat the above steps until we've exhausted the list of ranges we're
|
||||||
|
scanning.
|
||||||
|
|
||||||
|
S3 is the variant of DS-MRR for use with clustered primary keys (or any
|
||||||
|
clustered index). The idea is that in clustered index it is sufficient to
|
||||||
|
access the index in index order, and we don't need an intermediate steps to
|
||||||
|
get rowid (like step #1 in S2).
|
||||||
|
|
||||||
|
DS-MRR/CPK's basic algorithm is as follows:
|
||||||
|
1. Collect a number of ranges (=lookup keys)
|
||||||
|
2. Sort them so that they follow in index order.
|
||||||
|
3. for each {lookup_key, range_id} pair in the buffer
|
||||||
|
get record(s) matching the lookup key and return {record, range_id} pairs
|
||||||
|
4. Repeat the above steps until we've exhausted the list of ranges we're
|
||||||
|
scanning.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class DsMrr_impl
|
class DsMrr_impl
|
||||||
@@ -21,29 +81,6 @@ public:
|
|||||||
DsMrr_impl()
|
DsMrr_impl()
|
||||||
: h2(NULL) {};
|
: h2(NULL) {};
|
||||||
|
|
||||||
/*
|
|
||||||
The "owner" handler object (the one that calls dsmrr_XXX functions.
|
|
||||||
It is used to retrieve full table rows by calling rnd_pos().
|
|
||||||
*/
|
|
||||||
handler *h;
|
|
||||||
TABLE *table; /* Always equal to h->table */
|
|
||||||
private:
|
|
||||||
/* Secondary handler object. It is used for scanning the index */
|
|
||||||
handler *h2;
|
|
||||||
|
|
||||||
/* Buffer to store rowids, or (rowid, range_id) pairs */
|
|
||||||
uchar *rowids_buf;
|
|
||||||
uchar *rowids_buf_cur; /* Current position when reading/writing */
|
|
||||||
uchar *rowids_buf_last; /* When reading: end of used buffer space */
|
|
||||||
uchar *rowids_buf_end; /* End of the buffer */
|
|
||||||
|
|
||||||
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
|
|
||||||
|
|
||||||
/* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
|
|
||||||
bool is_mrr_assoc;
|
|
||||||
|
|
||||||
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
|
|
||||||
public:
|
|
||||||
void init(handler *h_arg, TABLE *table_arg)
|
void init(handler *h_arg, TABLE *table_arg)
|
||||||
{
|
{
|
||||||
h= h_arg;
|
h= h_arg;
|
||||||
@@ -52,19 +89,62 @@ public:
|
|||||||
int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
|
int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
|
||||||
uint n_ranges, uint mode, HANDLER_BUFFER *buf);
|
uint n_ranges, uint mode, HANDLER_BUFFER *buf);
|
||||||
void dsmrr_close();
|
void dsmrr_close();
|
||||||
int dsmrr_fill_buffer();
|
|
||||||
int dsmrr_next(char **range_info);
|
int dsmrr_next(char **range_info);
|
||||||
|
|
||||||
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
|
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
|
||||||
uint *flags, COST_VECT *cost);
|
uint *bufsz, uint *flags, COST_VECT *cost);
|
||||||
|
|
||||||
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
||||||
void *seq_init_param, uint n_ranges, uint *bufsz,
|
void *seq_init_param, uint n_ranges, uint *bufsz,
|
||||||
uint *flags, COST_VECT *cost);
|
uint *flags, COST_VECT *cost);
|
||||||
private:
|
private:
|
||||||
|
/*
|
||||||
|
The "owner" handler object (the one that calls dsmrr_XXX functions.
|
||||||
|
It is used to retrieve full table rows by calling rnd_pos().
|
||||||
|
*/
|
||||||
|
handler *h;
|
||||||
|
TABLE *table; /* Always equal to h->table */
|
||||||
|
|
||||||
|
/* Secondary handler object. It is used for scanning the index */
|
||||||
|
handler *h2;
|
||||||
|
|
||||||
|
/* Buffer to store rowids, or (rowid, range_id) pairs */
|
||||||
|
uchar *mrr_buf;
|
||||||
|
uchar *mrr_buf_cur; /* Current position when reading/writing */
|
||||||
|
uchar *mrr_buf_last; /* When reading: end of used buffer space */
|
||||||
|
uchar *mrr_buf_end; /* End of the buffer */
|
||||||
|
|
||||||
|
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
|
||||||
|
|
||||||
|
/* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
|
||||||
|
bool is_mrr_assoc;
|
||||||
|
|
||||||
|
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
|
||||||
|
|
||||||
|
bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
|
||||||
|
|
||||||
|
/** DS-MRR/CPK variables start */
|
||||||
|
|
||||||
|
/* Length of lookup tuple being used, in bytes */
|
||||||
|
uint cpk_tuple_length;
|
||||||
|
/*
|
||||||
|
TRUE <=> We're scanning on a full primary key (and not on prefix), and so
|
||||||
|
can get max. one match for each key
|
||||||
|
*/
|
||||||
|
bool cpk_is_unique_scan;
|
||||||
|
/* TRUE<=> we're in a middle of enumerating records from a range */
|
||||||
|
bool cpk_have_range;
|
||||||
|
/* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
|
||||||
|
char *cpk_saved_range_info;
|
||||||
|
|
||||||
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
|
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
|
||||||
COST_VECT *cost);
|
COST_VECT *cost);
|
||||||
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
|
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
|
||||||
uint *buffer_size, COST_VECT *cost);
|
uint *buffer_size, COST_VECT *cost);
|
||||||
|
bool check_cpk_scan(uint keyno, uint mrr_flags);
|
||||||
|
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
|
||||||
|
int dsmrr_fill_buffer();
|
||||||
|
void dsmrr_fill_buffer_cpk();
|
||||||
|
int dsmrr_next_cpk(char **range_info);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -8005,6 +8005,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
|
|||||||
|
|
||||||
quick->mrr_buf_size= thd->variables.mrr_buff_size;
|
quick->mrr_buf_size= thd->variables.mrr_buff_size;
|
||||||
if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
|
if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
|
||||||
|
uint(-1),
|
||||||
&quick->mrr_buf_size,
|
&quick->mrr_buf_size,
|
||||||
&quick->mrr_flags, &cost))
|
&quick->mrr_flags, &cost))
|
||||||
goto err;
|
goto err;
|
||||||
|
@@ -2390,8 +2390,8 @@ JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges)
|
|||||||
*/
|
*/
|
||||||
if (!file->inited)
|
if (!file->inited)
|
||||||
file->ha_index_init(join_tab->ref.key, 1);
|
file->ha_index_init(join_tab->ref.key, 1);
|
||||||
if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
|
if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
|
||||||
mrr_mode, &mrr_buff)))
|
mrr_mode, &mrr_buff)))
|
||||||
rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
|
rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
|
@@ -7508,10 +7508,11 @@ uint check_join_cache_usage(JOIN_TAB *tab,
|
|||||||
case JT_EQ_REF:
|
case JT_EQ_REF:
|
||||||
if (cache_level <= 4)
|
if (cache_level <= 4)
|
||||||
return 0;
|
return 0;
|
||||||
flags= HA_MRR_NO_NULL_ENDPOINTS;
|
flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
|
||||||
if (tab->table->covering_keys.is_set(tab->ref.key))
|
if (tab->table->covering_keys.is_set(tab->ref.key))
|
||||||
flags|= HA_MRR_INDEX_ONLY;
|
flags|= HA_MRR_INDEX_ONLY;
|
||||||
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
|
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
|
||||||
|
tab->ref.key_parts,
|
||||||
&bufsz, &flags, &cost);
|
&bufsz, &flags, &cost);
|
||||||
if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) &&
|
if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) &&
|
||||||
(!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) &&
|
(!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) &&
|
||||||
|
@@ -3503,8 +3503,8 @@ static SHOW_VAR status_variables[]= {
|
|||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
||||||
uint n_ranges, uint mode,
|
uint n_ranges, uint mode,
|
||||||
HANDLER_BUFFER *buf)
|
HANDLER_BUFFER *buf)
|
||||||
{
|
{
|
||||||
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
|
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
|
||||||
}
|
}
|
||||||
@@ -3530,11 +3530,11 @@ ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint *bufsz, uint *flags,
|
uint key_parts, uint *bufsz,
|
||||||
COST_VECT *cost)
|
uint *flags, COST_VECT *cost)
|
||||||
{
|
{
|
||||||
ds_mrr.init(this, table);
|
ds_mrr.init(this, table);
|
||||||
return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
|
return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* MyISAM MRR implementation ends */
|
/* MyISAM MRR implementation ends */
|
||||||
|
@@ -183,7 +183,8 @@ public:
|
|||||||
uint n_ranges, uint *bufsz,
|
uint n_ranges, uint *bufsz,
|
||||||
uint *flags, COST_VECT *cost);
|
uint *flags, COST_VECT *cost);
|
||||||
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint *bufsz, uint *flags, COST_VECT *cost);
|
uint key_parts, uint *bufsz,
|
||||||
|
uint *flags, COST_VECT *cost);
|
||||||
|
|
||||||
/* Index condition pushdown implementation */
|
/* Index condition pushdown implementation */
|
||||||
Item *idx_cond_push(uint keyno, Item* idx_cond);
|
Item *idx_cond_push(uint keyno, Item* idx_cond);
|
||||||
|
@@ -2206,11 +2206,11 @@ ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint *bufsz, uint *flags,
|
uint key_parts, uint *bufsz,
|
||||||
COST_VECT *cost)
|
uint *flags, COST_VECT *cost)
|
||||||
{
|
{
|
||||||
ds_mrr.init(this, table);
|
ds_mrr.init(this, table);
|
||||||
return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
|
return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* MyISAM MRR implementation ends */
|
/* MyISAM MRR implementation ends */
|
||||||
|
@@ -168,7 +168,8 @@ public:
|
|||||||
uint n_ranges, uint *bufsz,
|
uint n_ranges, uint *bufsz,
|
||||||
uint *flags, COST_VECT *cost);
|
uint *flags, COST_VECT *cost);
|
||||||
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint *bufsz, uint *flags, COST_VECT *cost);
|
uint key_parts, uint *bufsz,
|
||||||
|
uint *flags, COST_VECT *cost);
|
||||||
|
|
||||||
/* Index condition pushdown implementation */
|
/* Index condition pushdown implementation */
|
||||||
Item *idx_cond_push(uint keyno, Item* idx_cond);
|
Item *idx_cond_push(uint keyno, Item* idx_cond);
|
||||||
|
@@ -11206,7 +11206,8 @@ test_innobase_convert_name()
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
||||||
uint n_ranges, uint mode, HANDLER_BUFFER *buf)
|
uint n_ranges, uint mode,
|
||||||
|
HANDLER_BUFFER *buf)
|
||||||
{
|
{
|
||||||
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
|
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
|
||||||
}
|
}
|
||||||
@@ -11233,12 +11234,13 @@ ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
|
ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint keys, uint *bufsz,
|
uint key_parts, uint *bufsz,
|
||||||
uint *flags, COST_VECT *cost)
|
uint *flags, COST_VECT *cost)
|
||||||
{
|
{
|
||||||
ds_mrr.init(this, table);
|
ds_mrr.init(this, table);
|
||||||
ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
|
ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
|
||||||
|
flags, cost);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -219,7 +219,8 @@ public:
|
|||||||
uint n_ranges, uint *bufsz,
|
uint n_ranges, uint *bufsz,
|
||||||
uint *flags, COST_VECT *cost);
|
uint *flags, COST_VECT *cost);
|
||||||
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
||||||
uint *bufsz, uint *flags, COST_VECT *cost);
|
uint key_parts, uint *bufsz,
|
||||||
|
uint *flags, COST_VECT *cost);
|
||||||
DsMrr_impl ds_mrr;
|
DsMrr_impl ds_mrr;
|
||||||
|
|
||||||
Item *idx_cond_push(uint keyno, Item* idx_cond);
|
Item *idx_cond_push(uint keyno, Item* idx_cond);
|
||||||
|
Reference in New Issue
Block a user