From e1f49888bfcc5007c04a0d0837957f4767592dea Mon Sep 17 00:00:00 2001 From: "sergefp@mysql.com" <> Date: Thu, 22 Dec 2005 12:29:00 +0300 Subject: [PATCH 1/4] WL#2985 "Partition Pruning" --- mysql-test/r/partition_pruning.result | 215 ++++++ mysql-test/t/partition_pruning.test | 195 ++++++ sql/ha_ndbcluster.cc | 3 +- sql/ha_partition.cc | 2 + sql/handler.h | 46 +- sql/item.h | 35 + sql/item_timefunc.cc | 21 + sql/item_timefunc.h | 2 + sql/opt_range.cc | 962 +++++++++++++++++++++++++- sql/opt_range.h | 9 +- sql/sql_class.cc | 7 + sql/sql_lex.h | 5 + sql/sql_partition.cc | 230 +++++- sql/sql_select.cc | 57 +- sql/sql_yacc.yy | 2 + 15 files changed, 1743 insertions(+), 48 deletions(-) create mode 100644 mysql-test/r/partition_pruning.result create mode 100644 mysql-test/t/partition_pruning.test diff --git a/mysql-test/r/partition_pruning.result b/mysql-test/r/partition_pruning.result new file mode 100644 index 00000000000..63a727acb8c --- /dev/null +++ b/mysql-test/r/partition_pruning.result @@ -0,0 +1,215 @@ +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +create table t1 ( a int not null) partition by hash(a) partitions 2; +insert into t1 values (1),(2),(3); +explain select * from t1 where a=5 and a=6; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE +drop table t1; +create table t1 ( +a int(11) not null +) partition by hash (a) partitions 2; +insert into t1 values (1),(2),(3); +explain partitions select * from t1 where a=1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p1 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t1 where a=2; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t1 where a=1 or a=2; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0,p1 ALL NULL NULL NULL NULL 3 Using where +create table t2 ( +a int not null, +b int not null +) partition by key(a,b) partitions 2; +insert into t2 values (1,1),(2,2),(3,3); +explain partitions select * from t2 where a=1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t2 p0,p1 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t2 where b=1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t2 p0,p1 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t2 where a=1 and b=1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t2 p0 ALL NULL NULL NULL NULL 3 Using where +create table t3 ( +a int +) +partition by range (a*1) ( +partition p0 values less than (10), +partition p1 values less than (20) +); +insert into t3 values (5),(15); +explain partitions select * from t3 where a=11; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t3 p1 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t3 where a=10; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t3 p1 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t3 where a=20; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +explain partitions select * from t3 where a=30; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +create table t4 (a int not null, b int not null) partition by LIST (a+b) ( +partition p0 values in (12), +partition p1 values in (14) +); +insert into t4 values (10,2), (10,4); +explain partitions select * from t4 where (a=10 and b=1) or (a=10 and b=2); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t4 p0 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t4 +where (a=10 and b=1) or (a=10 and b=2) or (a=10 and b = 3); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t4 p0 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t4 where (a=10 and b=2) or (a=10 and b=3) +or (a=10 and b = 4); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t4 p0,p1 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t4 where (a=10 and b=1) or a=11; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t4 p0,p1 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t4 where (a=10 and b=2) or a=11; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t4 p0,p1 ALL NULL NULL NULL NULL 2 Using where +drop table t1, t2, t3, t4; +create table t5 (a int not null, b int not null, +c int not null, d int not null) +partition by LIST(a+b) subpartition by HASH (c+d) subpartitions 2 +( +partition p0 values in (12), +partition p1 values in (14) +); +insert into t5 values (10,2,0,0), (10,4,0,0), (10,2,0,1), (10,4,0,1); +explain partitions select * from t5; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp0,p0_sp1,p1_sp0,p1_sp1 ALL NULL NULL NULL NULL 4 +explain partitions select * from t5 +where (a=10 and b=1) or (a=10 and b=2) or (a=10 and b = 3); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp0,p0_sp1 ALL NULL NULL NULL NULL 4 Using where +explain partitions select * from t5 where (a=10 and b=2) or (a=10 and b=3) +or (a=10 and b = 4); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp0,p0_sp1,p1_sp0,p1_sp1 ALL NULL NULL NULL NULL 4 Using where +explain partitions select * from t5 where (c=1 and d=1); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp0,p1_sp0 ALL NULL NULL NULL NULL 4 Using where +explain partitions select * from t5 where (c=2 and d=1); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp1,p1_sp1 ALL NULL NULL NULL NULL 4 Using where +explain partitions select * from t5 where (a=10 and b=2 and c=1 and d=1) or +(c=2 and d=1); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp0,p0_sp1,p1_sp1 ALL NULL NULL NULL NULL 4 Using where +explain partitions select * from t5 where (a=10 and b=2 and c=1 and d=1) or +(b=2 and c=2 and d=1); +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t5 p0_sp0,p0_sp1,p1_sp1 ALL NULL NULL NULL NULL 4 Using where +create table t6 (a int not null) partition by LIST(a) ( +partition p1 values in (1), +partition p3 values in (3), +partition p5 values in (5), +partition p7 values in (7), +partition p9 values in (9) +); +insert into t6 values (1),(3),(5); +explain partitions select * from t6 where a < 1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +explain partitions select * from t6 where a <= 1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p1 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a > 9; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +explain partitions select * from t6 where a >= 9; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p9 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a > 0 and a < 5; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p1,p3 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a > 5 and a < 12; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p7,p9 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a > 3 and a < 8 ; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p5,p7 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a >= 0 and a <= 5; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p1,p3,p5 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a >= 5 and a <= 12; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p5,p7,p9 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a >= 3 and a <= 8; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t6 p3,p5,p7 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t6 where a > 3 and a < 5; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +create table t7 (a int not null) partition by RANGE(a) ( +partition p10 values less than (10), +partition p30 values less than (30), +partition p50 values less than (50), +partition p70 values less than (70), +partition p90 values less than (90) +); +insert into t7 values (10),(30),(50); +explain partitions select * from t7 where a < 5; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t7 p10 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t7 where a < 10; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t7 p10 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t7 where a <= 10; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t7 p10,p30 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t7 where a = 10; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t7 p30 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t7 where a < 90; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t7 p10,p30,p50,p70,p90 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t7 where a = 90; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +explain partitions select * from t7 where a > 90; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +explain partitions select * from t7 where a >= 90; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +explain partitions select * from t7 where a > 11 and a < 29; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t7 p30 ALL NULL NULL NULL NULL 3 Using where +create table t8 (a date not null) partition by RANGE(YEAR(a)) ( +partition p0 values less than (1980), +partition p1 values less than (1990), +partition p2 values less than (2000) +); +insert into t8 values ('1985-05-05'),('1995-05-05'); +explain partitions select * from t8 where a < '1980-02-02'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t8 p0,p1 ALL NULL NULL NULL NULL 2 Using where +create table t9 (a date not null) partition by RANGE(TO_DAYS(a)) ( +partition p0 values less than (732299), -- 2004-12-19 +partition p1 values less than (732468), -- 2005-06-06 +partition p2 values less than (732664) -- 2005-12-19 +); +insert into t9 values ('2005-05-05'), ('2005-04-04'); +explain partitions select * from t9 where a < '2004-12-19'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t9 p0 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t9 where a <= '2004-12-19'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t9 p0,p1 ALL NULL NULL NULL NULL 2 Using where +drop table t5,t6,t7,t8,t9; +create table t1 (a enum('a','b','c','d') default 'a') +partition by hash (ascii(a)) partitions 2; +insert into t1 values ('a'),('b'),('c'); +explain partitions select * from t1 where a='b'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0,p1 ALL NULL NULL NULL NULL 3 Using where +drop table t1; diff --git a/mysql-test/t/partition_pruning.test b/mysql-test/t/partition_pruning.test new file mode 100644 index 00000000000..e496a1b8e12 --- /dev/null +++ b/mysql-test/t/partition_pruning.test @@ -0,0 +1,195 @@ +# +# Partition pruning tests. Currently we only detect which partitions to +# prune, so the test is EXPLAINs. +# +-- source include/have_partition.inc + +--disable_warnings +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +--enable_warnings + + +# Check if we can infer from condition on partition fields that +# no records will match. +create table t1 ( a int not null) partition by hash(a) partitions 2; +insert into t1 values (1),(2),(3); +explain select * from t1 where a=5 and a=6; +drop table t1; + +# Simple HASH partitioning +create table t1 ( + a int(11) not null +) partition by hash (a) partitions 2; +insert into t1 values (1),(2),(3); + +explain partitions select * from t1 where a=1; +explain partitions select * from t1 where a=2; +explain partitions select * from t1 where a=1 or a=2; + +# Partitioning over several fields +create table t2 ( + a int not null, + b int not null +) partition by key(a,b) partitions 2; +insert into t2 values (1,1),(2,2),(3,3); + +explain partitions select * from t2 where a=1; +explain partitions select * from t2 where b=1; + +explain partitions select * from t2 where a=1 and b=1; + +# RANGE(expr) partitioning +create table t3 ( + a int +) +partition by range (a*1) ( + partition p0 values less than (10), + partition p1 values less than (20) +); +insert into t3 values (5),(15); + +explain partitions select * from t3 where a=11; +explain partitions select * from t3 where a=10; +explain partitions select * from t3 where a=20; + +explain partitions select * from t3 where a=30; + +# LIST(expr) partitioning +create table t4 (a int not null, b int not null) partition by LIST (a+b) ( + partition p0 values in (12), + partition p1 values in (14) +); +insert into t4 values (10,2), (10,4); + +# empty OR one +explain partitions select * from t4 where (a=10 and b=1) or (a=10 and b=2); + +# empty OR one OR empty +explain partitions select * from t4 + where (a=10 and b=1) or (a=10 and b=2) or (a=10 and b = 3); + +# one OR empty OR one +explain partitions select * from t4 where (a=10 and b=2) or (a=10 and b=3) + or (a=10 and b = 4); + +# empty OR full +explain partitions select * from t4 where (a=10 and b=1) or a=11; + +# one OR full +explain partitions select * from t4 where (a=10 and b=2) or a=11; + +drop table t1, t2, t3, t4; + +# LIST(expr)/HASH subpartitioning. +create table t5 (a int not null, b int not null, + c int not null, d int not null) +partition by LIST(a+b) subpartition by HASH (c+d) subpartitions 2 +( + partition p0 values in (12), + partition p1 values in (14) +); + +insert into t5 values (10,2,0,0), (10,4,0,0), (10,2,0,1), (10,4,0,1); +explain partitions select * from t5; + +# empty OR one OR empty +explain partitions select * from t5 + where (a=10 and b=1) or (a=10 and b=2) or (a=10 and b = 3); + +# one OR empty OR one +explain partitions select * from t5 where (a=10 and b=2) or (a=10 and b=3) + or (a=10 and b = 4); + +# conditions on subpartitions only +explain partitions select * from t5 where (c=1 and d=1); +explain partitions select * from t5 where (c=2 and d=1); + +# mixed partition/subpartitions. +explain partitions select * from t5 where (a=10 and b=2 and c=1 and d=1) or +(c=2 and d=1); + +# same as above +explain partitions select * from t5 where (a=10 and b=2 and c=1 and d=1) or +(b=2 and c=2 and d=1); + +# LIST(field) partitioning, interval analysis. +create table t6 (a int not null) partition by LIST(a) ( + partition p1 values in (1), + partition p3 values in (3), + partition p5 values in (5), + partition p7 values in (7), + partition p9 values in (9) +); +insert into t6 values (1),(3),(5); + +explain partitions select * from t6 where a < 1; +explain partitions select * from t6 where a <= 1; +explain partitions select * from t6 where a > 9; +explain partitions select * from t6 where a >= 9; + +explain partitions select * from t6 where a > 0 and a < 5; +explain partitions select * from t6 where a > 5 and a < 12; +explain partitions select * from t6 where a > 3 and a < 8 ; + +explain partitions select * from t6 where a >= 0 and a <= 5; +explain partitions select * from t6 where a >= 5 and a <= 12; +explain partitions select * from t6 where a >= 3 and a <= 8; + +explain partitions select * from t6 where a > 3 and a < 5; + +# RANGE(field) partitioning, interval analysis. +create table t7 (a int not null) partition by RANGE(a) ( + partition p10 values less than (10), + partition p30 values less than (30), + partition p50 values less than (50), + partition p70 values less than (70), + partition p90 values less than (90) +); +insert into t7 values (10),(30),(50); + +# leftmost intervals +explain partitions select * from t7 where a < 5; +explain partitions select * from t7 where a < 10; +explain partitions select * from t7 where a <= 10; +explain partitions select * from t7 where a = 10; + +#rightmost intervals +explain partitions select * from t7 where a < 90; +explain partitions select * from t7 where a = 90; +explain partitions select * from t7 where a > 90; +explain partitions select * from t7 where a >= 90; + +# misc intervals +explain partitions select * from t7 where a > 11 and a < 29; + +# LIST(monontonic_func) partitioning +create table t8 (a date not null) partition by RANGE(YEAR(a)) ( + partition p0 values less than (1980), + partition p1 values less than (1990), + partition p2 values less than (2000) +); +insert into t8 values ('1985-05-05'),('1995-05-05'); + +explain partitions select * from t8 where a < '1980-02-02'; + +# LIST(strict_monotonic_func) partitioning +create table t9 (a date not null) partition by RANGE(TO_DAYS(a)) ( + partition p0 values less than (732299), -- 2004-12-19 + partition p1 values less than (732468), -- 2005-06-06 + partition p2 values less than (732664) -- 2005-12-19 +); +insert into t9 values ('2005-05-05'), ('2005-04-04'); + +explain partitions select * from t9 where a < '2004-12-19'; +explain partitions select * from t9 where a <= '2004-12-19'; + +drop table t5,t6,t7,t8,t9; + +# Test the case where we can't create partitioning 'index' +create table t1 (a enum('a','b','c','d') default 'a') + partition by hash (ascii(a)) partitions 2; +insert into t1 values ('a'),('b'),('c'); +explain partitions select * from t1 where a='b'; +drop table t1; + + diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 050a6ea8af8..773e1dec1a0 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -3123,7 +3123,6 @@ void ha_ndbcluster::info(uint flag) DBUG_VOID_RETURN; } - int ha_ndbcluster::extra(enum ha_extra_function operation) { DBUG_ENTER("extra"); @@ -3132,6 +3131,8 @@ int ha_ndbcluster::extra(enum ha_extra_function operation) DBUG_PRINT("info", ("HA_EXTRA_RESET")); DBUG_PRINT("info", ("Clearing condition stack")); cond_clear(); + if (m_part_info) + bitmap_clear_all(&m_part_info->used_partitions); break; case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/ DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY")); diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index b0b4ac3fdcd..970c0e39dac 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -2795,6 +2795,8 @@ int ha_partition::reset(void) handler **file; DBUG_ENTER("ha_partition::reset"); file= m_file; + if (m_part_info) + bitmap_clear_all(&m_part_info->used_partitions); do { if ((tmp= (*file)->reset())) diff --git a/sql/handler.h b/sql/handler.h index 5674698487c..00312ed7cfa 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -534,19 +534,52 @@ public: List part_field_list; List subpart_field_list; - + + /* + If there is no subpartitioning, use only this func to get partition ids. + If there is subpartitioning, use the this func to get partition id when + you have both partition and subpartition fields. + */ get_part_id_func get_partition_id; - get_part_id_func get_part_partition_id; - get_subpart_id_func get_subpartition_id; + /* Get partition id when we don't have subpartition fields */ + get_part_id_func get_part_partition_id; + + /* + Get subpartition id when we have don't have partition fields by we do + have subpartition ids. + Mikael said that for given constant tuple + {subpart_field1, ..., subpart_fieldN} the subpartition id will be the + same in all subpartitions + */ + get_subpart_id_func get_subpartition_id; + + /* NULL-terminated list of fields used in partitioned expression */ Field **part_field_array; + /* NULL-terminated list of fields used in subpartitioned expression */ Field **subpart_field_array; + + /* + Array of all fields used in partition and subpartition expression, + without duplicates, NULL-terminated. + */ Field **full_part_field_array; Item *part_expr; Item *subpart_expr; Item *item_free_list; + + /* + A bitmap of partitions used by the current query. + Usage pattern: + * It is guaranteed that all partitions are set to be unused on query start. + * Before index/rnd_init(), partition pruning code sets the bits for used + partitions. + * The handler->extra(HA_EXTRA_RESET) call at query end sets all partitions + to be unused. + */ + MY_BITMAP used_partitions; union { longlong *range_int_array; @@ -747,6 +780,13 @@ void get_full_part_id_from_key(const TABLE *table, byte *buf, bool mysql_unpack_partition(THD *thd, const uchar *part_buf, uint part_info_len, TABLE *table, enum db_type default_db_type); +void make_used_partitions_str(partition_info *part_info, String *parts_str); +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); #endif diff --git a/sql/item.h b/sql/item.h index 89f673c47f5..d6baf249a90 100644 --- a/sql/item.h +++ b/sql/item.h @@ -368,6 +368,28 @@ public: } }; + +/* + This enum is used to report information about monotonicity of function + represented by Item* tree. + Monotonicity is defined only for Item* trees that represent table + partitioning expressions (i.e. have no subselects/user vars/PS parameters + etc etc). An Item* tree is assumed to have the same monotonicity properties + as its correspoinding function F: + + [signed] longlong F(field1, field2, ...) { + put values of field_i into table record buffer; + return item->val_int(); + } +*/ + +typedef enum monotonicity_info +{ + NON_MONOTONIC, /* none of the below holds */ + MONOTONIC_INCREASING, /* F() is unary and "x < y" => "F(x) < F(y)" */ + MONOTONIC_STRICT_INCREASING /* F() is unary and "x < y" => "F(x) <= F(y)" */ +} enum_monotonicity_info; + /*************************************************************************/ typedef bool (Item::*Item_processor)(byte *arg); @@ -465,6 +487,15 @@ public: virtual Item_result cast_to_int_type() const { return result_type(); } virtual enum_field_types field_type() const; virtual enum Type type() const =0; + + /* + Return information about function monotonicity. See comment for + enum_monotonicity_info for details. This function can only be called + after fix_fields() call. + */ + virtual enum_monotonicity_info get_monotonicity_info() const + { return NON_MONOTONIC; } + /* valXXX methods must return NULL or 0 or 0.0 if null_value is set. */ /* Return double precision floating point representation of item. @@ -1138,6 +1169,10 @@ public: { return field->type(); } + enum_monotonicity_info get_monotonicity_info() const + { + return MONOTONIC_STRICT_INCREASING; + } Field *get_tmp_table_field() { return result_field; } Field *tmp_table_field(TABLE *t_arg) { return result_field; } bool get_date(TIME *ltime,uint fuzzydate); diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index f62ad42bb95..3560a74ddb2 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -885,6 +885,19 @@ longlong Item_func_to_days::val_int() return (longlong) calc_daynr(ltime.year,ltime.month,ltime.day); } +enum_monotonicity_info Item_func_to_days::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM) + { + if (args[0]->field_type() == MYSQL_TYPE_DATE) + return MONOTONIC_STRICT_INCREASING; + if (args[0]->field_type() == MYSQL_TYPE_DATETIME) + return MONOTONIC_INCREASING; + } + return NON_MONOTONIC; +} + + longlong Item_func_dayofyear::val_int() { DBUG_ASSERT(fixed == 1); @@ -1067,6 +1080,14 @@ longlong Item_func_year::val_int() return (longlong) ltime.year; } +enum_monotonicity_info Item_func_year::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM && + (args[0]->field_type() == MYSQL_TYPE_DATE || + args[0]->field_type() == MYSQL_TYPE_DATETIME)) + return MONOTONIC_INCREASING; + return NON_MONOTONIC; +} longlong Item_func_unix_timestamp::val_int() { diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 46079ac0342..9a2cb7a4c9e 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -65,6 +65,7 @@ public: max_length=6*MY_CHARSET_BIN_MB_MAXLEN; maybe_null=1; } + enum_monotonicity_info get_monotonicity_info() const; }; @@ -234,6 +235,7 @@ public: Item_func_year(Item *a) :Item_int_func(a) {} longlong val_int(); const char *func_name() const { return "year"; } + enum_monotonicity_info get_monotonicity_info() const; void fix_length_and_dec() { decimals=0; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 4ed174adc4f..77822c55cc8 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -286,6 +286,13 @@ public: return parent->left == this ? &parent->left : &parent->right; } SEL_ARG *clone_tree(); + + /* Return TRUE if this represents "keypartK = const" or "keypartK IS NULL" */ + bool is_singlepoint() + { + return !min_flag && !max_flag && + !field->key_cmp((byte*) min_value, (byte*)max_value); + } }; class SEL_IMERGE; @@ -319,25 +326,51 @@ public: /* Note that #records for each key scan is stored in table->quick_rows */ }; +class RANGE_OPT_PARAM +{ +public: + THD *thd; /* Current thread handle */ + TABLE *table; /* Table being analyzed */ + COND *cond; /* Used inside get_mm_tree(). */ + table_map prev_tables; + table_map read_tables; + table_map current_table; /* Bit of the table being analyzed */ -typedef struct st_qsel_param { - THD *thd; - TABLE *table; - KEY_PART *key_parts,*key_parts_end; + /* Array of parts of all keys for which range analysis is performed */ + KEY_PART *key_parts; + KEY_PART *key_parts_end; + MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */ + MEM_ROOT *old_root; /* Memory that will last until the query end */ + /* + Number of indexes used in range analysis (In SEL_TREE::keys only first + #keys elements are not empty) + */ + uint keys; + + /* + If true, the index descriptions describe real indexes (and it is ok to + call field->optimize_range(real_keynr[...], ...). + Otherwise index description describes fake indexes. + */ + bool using_real_indexes; + + /* + used_key_no -> table_key_no translation table. Only makes sense if + using_real_indexes==TRUE + */ + uint real_keynr[MAX_KEY]; +}; + +class PARAM : public RANGE_OPT_PARAM +{ +public: KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ - MEM_ROOT *mem_root, *old_root; - table_map prev_tables,read_tables,current_table; uint baseflag, max_key_part, range_count; - uint keys; /* number of keys used in the query */ - - /* used_key_no -> table_key_no translation table */ - uint real_keynr[MAX_KEY]; char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; bool quick; // Don't calulate possible keys - COND *cond; uint fields_bitmap_size; MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ @@ -349,7 +382,7 @@ typedef struct st_qsel_param { /* TRUE if last checked tree->key can be used for ROR-scan */ bool is_ror_scan; -} PARAM; +}; class TABLE_READ_PLAN; class TRP_RANGE; @@ -360,13 +393,13 @@ class TABLE_READ_PLAN; struct st_ror_scan_info; -static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field, +static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, Item_func::Functype type,Item *value, Item_result cmp_type); -static SEL_ARG *get_mm_leaf(PARAM *param,COND *cond_func,Field *field, +static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value); -static SEL_TREE *get_mm_tree(PARAM *param,COND *cond); +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond); static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree); @@ -409,8 +442,8 @@ static void print_rowid(byte* val, int len); static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif -static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); -static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2); static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag); @@ -423,7 +456,7 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b); static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length); -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param); +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param); /* @@ -455,9 +488,9 @@ public: trees_next(trees), trees_end(trees + PREALLOCED_TREES) {} - int or_sel_tree(PARAM *param, SEL_TREE *tree); - int or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree); - int or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge); + int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); + int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree); + int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge); }; @@ -473,7 +506,7 @@ public: -1 - Out of memory. */ -int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree) +int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) { if (trees_next == trees_end) { @@ -524,7 +557,7 @@ int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree) -1 An error occurred. */ -int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree) +int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree) { for (SEL_TREE** tree = trees; tree != trees_next; @@ -558,7 +591,7 @@ int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree) -1 - An error occurred */ -int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge) +int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge) { for (SEL_TREE** tree= imerge->trees; tree != imerge->trees_next; @@ -604,7 +637,7 @@ inline void imerge_list_and_list(List *im1, List *im2) other Error, both passed lists are unusable */ -int imerge_list_or_list(PARAM *param, +int imerge_list_or_list(RANGE_OPT_PARAM *param, List *im1, List *im2) { @@ -624,7 +657,7 @@ int imerge_list_or_list(PARAM *param, other Error */ -int imerge_list_or_tree(PARAM *param, +int imerge_list_or_tree(RANGE_OPT_PARAM *param, List *im1, SEL_TREE *tree) { @@ -1776,6 +1809,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.old_root= thd->mem_root; param.needed_reg= &needed_reg; param.imerge_cost_buff_size= 0; + param.using_real_indexes= TRUE; thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); @@ -1966,6 +2000,859 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, DBUG_RETURN(records ? test(quick) : -1); } +/**************************************************************************** + * Partition pruning starts + ****************************************************************************/ +#ifdef WITH_PARTITION_STORAGE_ENGINE + +struct st_part_prune_param; +struct st_part_opt_info; + +typedef void (*mark_full_part_func)(partition_info*, uint32); +typedef uint32 (*part_num_to_partition_id_func)(struct st_part_prune_param*, + uint32); +typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint, + bool include_endpoint); + +/* + Partition pruning operation context +*/ +typedef struct st_part_prune_param +{ + RANGE_OPT_PARAM range_param; /* Range optimizer parameters */ + + /*************************************************************** + Following fields are filled in based solely on partitioning + definition and not modified after that: + **************************************************************/ + partition_info *part_info; /* Copy of table->part_info */ + /* Function to get partition id from partitioning fields only */ + get_part_id_func get_top_partition_id_func; + /* Function to mark a partition as used (w/all subpartitions if they exist)*/ + mark_full_part_func mark_full_partition_used; + + /* Partitioning 'index' description, array of key parts */ + KEY_PART *key; + + /* + Number of fields in partitioning 'index' definition created for + partitioning (0 if partitioning 'index' doesn't include partitioning + fields) + */ + uint part_fields; + uint subpart_fields; /* Same as above for subpartitioning */ + + /* + Number of the last partitioning field keypart in the index, or -1 if + partitioning index definition doesn't include partitioning fields. + */ + int last_part_partno; + int last_subpart_partno; /* Same as above for supartitioning */ + + /* + Function to be used to analyze non-singlepoint intervals (Can be pointer + to one of two functions - for RANGE and for LIST types). NULL means + partitioning type and/or expression doesn't allow non-singlepoint interval + analysis. + See get_list_array_idx_for_endpoint (or get_range_...) for description of + what the function does. + */ + get_endpoint_func get_endpoint; + + /* Maximum possible value that can be returned by get_endpoint function */ + uint32 max_endpoint_val; + + /* + For RANGE partitioning, part_num_to_part_id_range, for LIST partitioning, + part_num_to_part_id_list. Just to avoid the if-else clutter. + */ + part_num_to_partition_id_func endpoints_walk_func; + + /* + If true, process "key < const" as "part_func(key) < part_func(const)", + otherwise as "part_func(key) <= part_func(const)". Same for '>' and '>='. + This is defined iff get_endpoint != NULL. + */ + bool force_include_bounds; + + /* + is_part_keypart[i] == test(keypart #i in partitioning index is a member + used in partitioning) + Used to maintain current values of cur_part_fields and cur_subpart_fields + */ + my_bool *is_part_keypart; + /* Same as above for subpartitioning */ + my_bool *is_subpart_keypart; + + /*************************************************************** + Following fields form find_used_partitions() recursion context: + **************************************************************/ + SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */ + SEL_ARG **arg_stack_end; /* Top of the stack */ + /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */ + uint cur_part_fields; + /* Same as cur_part_fields, but for subpartitioning */ + uint cur_subpart_fields; + + /*************************************************************** + Following fields are used to store an 'iterator' that can be + used to obtain a set of used of used partitions. + **************************************************************/ + /* + Start number, end number and + */ + part_num_to_partition_id_func part_num_to_part_id; + uint32 start_part_num; + uint32 end_part_num; +} PART_PRUNE_PARAM; + +static bool create_partition_index_descrition(PART_PRUNE_PARAM *prune_par); +static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); +static void mark_all_partitions_as_used(partition_info *part_info); +static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, + uint32 num); + +#ifndef DBUG_OFF +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end); +static void dbug_print_field(Field *field); +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part); +static void dbug_print_onepoint_range(SEL_ARG **start, uint num); +#endif + + +/* + Perform partition pruning for a given table and condition. + + SYNOPSIS + prune_partitions() + thd Thread handle + table Table to perform partition pruning for + pprune_cond Condition to use for partition pruning + + DESCRIPTION + This function assumes that all partitions are marked as unused when it + is invoked. The function analyzes the condition, finds partitions that + need to be used to retrieve the records that match the condition, and + marks them as used by setting appropriate bit in part_info->used_partitions + In the worst case all partitions are marked as used. + + NOTE + This function returns promptly if called for non-partitioned table. + + RETURN + TRUE We've inferred that no partitions need to be used (i.e. no table + records will satisfy pprune_cond) + FALSE Otherwise +*/ + +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) +{ + bool retval= FALSE; + partition_info *part_info = table->part_info; + DBUG_ENTER("prune_partitions"); + + if (!part_info) + DBUG_RETURN(FALSE); /* not a partitioned table */ + + if (!pprune_cond) + { + mark_all_partitions_as_used(part_info); + DBUG_RETURN(FALSE); + } + + PART_PRUNE_PARAM prune_param; + MEM_ROOT alloc; + RANGE_OPT_PARAM *range_par= &prune_param.range_param; + + prune_param.part_info= part_info; + + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + range_par->mem_root= &alloc; + range_par->old_root= thd->mem_root; + + if (create_partition_index_descrition(&prune_param)) + { + mark_all_partitions_as_used(part_info); + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(FALSE); + } + + range_par->thd= thd; + range_par->table= table; + /* range_par->cond doesn't need initialization */ + range_par->prev_tables= range_par->read_tables= 0; + range_par->current_table= table->map; + + range_par->keys= 1; // one index + range_par->using_real_indexes= FALSE; + range_par->real_keynr[0]= 0; + + thd->no_errors=1; // Don't warn about NULL + thd->mem_root=&alloc; + + prune_param.key= prune_param.range_param.key_parts; + SEL_TREE *tree; + SEL_ARG *arg; + int res; + + tree= get_mm_tree(range_par, pprune_cond); + if (!tree || (tree->type != SEL_TREE::KEY && + tree->type != SEL_TREE::KEY_SMALLER)) + goto all_used; + + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + retval= TRUE; + goto end; + } + + if (tree->merges.is_empty()) + { + prune_param.arg_stack_end= prune_param.arg_stack; + prune_param.cur_part_fields= 0; + prune_param.cur_subpart_fields= 0; + prune_param.part_num_to_part_id= part_num_to_part_id_range; + prune_param.start_part_num= 0; + prune_param.end_part_num= prune_param.part_info->no_parts; + if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param, + tree->keys[0])))) + goto all_used; + } + else + { + res= 0; + DBUG_ASSERT(tree->merges.elements == 1); + SEL_IMERGE *imerge= tree->merges.head(); + for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) + { + prune_param.arg_stack_end= prune_param.arg_stack; + prune_param.cur_part_fields= 0; + prune_param.cur_subpart_fields= 0; + prune_param.part_num_to_part_id= part_num_to_part_id_range; + prune_param.start_part_num= 0; + prune_param.end_part_num= prune_param.part_info->no_parts; + if (-1 == (res |= find_used_partitions(&prune_param, (*ptree)->keys[0]))) + goto all_used; + } + } + + if (!res) + retval= TRUE; + goto end; + +all_used: + mark_all_partitions_as_used(prune_param.part_info); +end: + thd->no_errors=0; + thd->mem_root= range_par->old_root; + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(retval); +} + + +/* + Store key image to table record + + SYNOPSIS + field Field which key image should be stored. + ptr Field value in key format. + len Length of the value, in bytes. +*/ + +static void store_key_image_to_rec(Field *field, char *ptr, uint len) +{ + /* Do the same as print_key() does */ + if (field->real_maybe_null()) + { + if (*ptr) + { + field->set_null(); + return; + } + ptr++; + } + field->set_key_image(ptr, len); +} + + +/* + For SEL_ARG* array, store sel_arg->min values into table record buffer + + SYNOPSIS + store_selargs_to_rec() + ppar Partition pruning context + start Array SEL_ARG* for which the minimum values should be stored + num Number of elements in the array +*/ + +static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start, + int num) +{ + KEY_PART *parts= ppar->range_param.key_parts; + for (SEL_ARG **end= start + num; start != end; start++) + { + SEL_ARG *sel_arg= (*start); + store_key_image_to_rec(sel_arg->field, sel_arg->min_value, + parts[sel_arg->part].length); + } +} + + +/* Mark a partition as used in the case when there are no subpartitions */ +static void mark_full_partition_used_no_parts(partition_info* part_info, + uint32 part_id) +{ + bitmap_set_bit(&part_info->used_partitions, part_id); +} + + +/* Mark a partition as used in the case when there are subpartitions */ +static void mark_full_partition_used_with_parts(partition_info *part_info, + uint32 part_id) +{ + uint32 start= part_id * part_info->no_subparts; + uint32 end= start + part_info->no_subparts; + for (; start != end; start++) + bitmap_set_bit(&part_info->used_partitions, start); +} + +static +uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, uint32 num) +{ + return num; +} + +static +uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* prune_par, uint32 num) +{ + return prune_par->part_info->list_array[num].partition_id; +} + + +/* + Recursively walk the SEL_ARG tree, find/mark partitions that need to be used + + SYNOPSIS + find_used_partitions() + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + This function + * recursively walks the SEL_ARG* tree, collecting partitioning + "intervals"; + * finds the partitions one needs to use to get rows in these intervals; + * marks these partitions as used. + + WHAT IS CONSIDERED TO BE "INTERVALS" + A partition pruning "interval" is equivalent to condition in one of the + forms: + + "partition_field1=const1 AND ... partition_fieldN=constN" (1) + "subpartition_field1=const1 AND ... subpartition_fieldN=constN" (2) + "(1) AND (2)" (3) + + In (1) and (2) all [sub]partitioning fields must be used, and "x=const" + includes "x IS NULL". + + If partitioning is performed using + + PARTITION BY RANGE(unary_monotonic_func(single_partition_field)), + + then the following is also an interval: + + " const1 OP1 single_partition_field OR const2" (4) + + where OP1 and OP2 are '<' OR '<=', and const_i can be +/- inf. + Everything else is not a partition pruning "interval". + + RETURN + 1 OK, one or more [sub]partitions are marked as used. + 0 The passed condition doesn't match any partitions + -1 Couldn't infer any partition pruning "intervals" from the passed + SEL_ARG* tree. Marking partitions as used is the responsibility of + the caller. +*/ + +static +int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) +{ + int res, left_res=0, right_res=0; + int partno= (int)key_tree->part; + bool pushed= FALSE; + bool set_full_part_if_bad_ret= FALSE; + + if (key_tree->left != &null_element) + { + if (-1 == (left_res= find_used_partitions(ppar,key_tree->left))) + return -1; + } + + if (key_tree->type == SEL_ARG::KEY_RANGE) + { + if (partno == 0 && (NULL != ppar->get_endpoint)) + { + /* + Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and + we got "const1 < fieldX < const2" interval. + */ + DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, + ppar->range_param. + key_parts);); + /* Find minimum */ + if (key_tree->min_flag & NO_MIN_RANGE) + { + ppar->start_part_num= 0; + } + else + { + store_key_image_to_rec(key_tree->field, key_tree->min_value, + ppar->range_param.key_parts[0].length); + bool include_endp= ppar->force_include_bounds || + !test(key_tree->min_flag & NEAR_MIN); + ppar->start_part_num= ppar->get_endpoint(ppar->part_info, 1, + include_endp); + if (ppar->start_part_num == ppar->max_endpoint_val) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + } + + /* Find maximum */ + if (key_tree->min_flag & NO_MAX_RANGE) + { + ppar->end_part_num= ppar->max_endpoint_val; + } + else + { + store_key_image_to_rec(key_tree->field, key_tree->max_value, + ppar->range_param.key_parts[0].length); + bool include_endp= ppar->force_include_bounds || + !test(key_tree->max_flag & NEAR_MAX); + ppar->end_part_num= ppar->get_endpoint(ppar->part_info, 0, + include_endp); + if (ppar->start_part_num == ppar->end_part_num) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + } + ppar->part_num_to_part_id= ppar->endpoints_walk_func; + + /* + Save our intent to mark full partition as used if we will not be able + to obtain further limits on subpartitions + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (key_tree->is_singlepoint()) + { + pushed= TRUE; + ppar->cur_part_fields+= ppar->is_part_keypart[partno]; + ppar->cur_subpart_fields+= ppar->is_subpart_keypart[partno]; + *(ppar->arg_stack_end++) = key_tree; + + if (partno == ppar->last_part_partno && + ppar->cur_part_fields == ppar->part_fields) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields); + DBUG_EXECUTE("info", dbug_print_onepoint_range(ppar->arg_stack, + ppar->part_fields);); + uint32 part_id; + /* then find in which partition the {const1, ...,constN} tuple goes */ + if (ppar->get_top_partition_id_func(ppar->part_info, &part_id)) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + /* Rembember the limit we got - single partition #part_id */ + ppar->part_num_to_part_id= part_num_to_part_id_range; + ppar->start_part_num= part_id; + ppar->end_part_num= part_id + 1; + + /* + If there are no subpartitions/we fail to get any limit for them, + then we'll mark full partition as used. + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (partno == ppar->last_subpart_partno) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields, + ppar->subpart_fields); + DBUG_EXECUTE("info", dbug_print_onepoint_range(ppar->arg_stack_end - + ppar->subpart_fields, + ppar->subpart_fields);); + /* Find the subpartition (it's HASH/KEY so we always have one) */ + partition_info *part_info= ppar->part_info; + uint32 subpart_id= part_info->get_subpartition_id(part_info); + + /* Mark this partition as used in each subpartition. */ + for (uint32 num= ppar->start_part_num; num != ppar->end_part_num; + num++) + { + bitmap_set_bit(&part_info->used_partitions, + ppar->part_num_to_part_id(ppar, num) * + part_info->no_subparts + subpart_id); + ppar->start_part_num++; + } + res= 1; /* Some partitions were marked as used */ + goto pop_and_go_right; + } + } + else + { + /* + Can't handle condition on current key part. If we're that deep that + we're processing subpartititoning's key parts, this means we'll not be + able to infer any suitable condition, so bail out. + */ + if (partno >= ppar->last_part_partno) + return -1; + } + } + +process_next_key_part: + if (key_tree->next_key_part) + res= find_used_partitions(ppar, key_tree->next_key_part); + else + res= -1; + + if (res == -1) /* Got "full range" for key_tree->next_key_part call */ + { + if (set_full_part_if_bad_ret) + { + for (uint32 num= ppar->start_part_num; num != ppar->end_part_num; + num++) + { + ppar->mark_full_partition_used(ppar->part_info, + ppar->part_num_to_part_id(ppar, num)); + } + res= 1; + } + else + return -1; + } + + if (set_full_part_if_bad_ret) + { + /* Restore the "used partition iterator" to its default */ + ppar->part_num_to_part_id= part_num_to_part_id_range; + ppar->start_part_num= 0; + ppar->end_part_num= ppar->part_info->no_parts; + } + + if (pushed) + { +pop_and_go_right: + /* Pop this key part info off the "stack" */ + ppar->arg_stack_end--; + ppar->cur_part_fields-= ppar->is_part_keypart[partno]; + ppar->cur_subpart_fields-= ppar->is_subpart_keypart[partno]; + } + + if (key_tree->right != &null_element) + { + if (-1 == (right_res= find_used_partitions(ppar,key_tree->right))) + return -1; + } + return (left_res || right_res || res); +} + + +static void mark_all_partitions_as_used(partition_info *part_info) +{ + bitmap_set_all(&part_info->used_partitions); +} + + +/* + Check if field types allow to construct partitioning index description + + SYNOPSIS + fields_ok_for_partition_index() + pfield NULL-terminated array of pointers to fields. + + DESCRIPTION + For an array of fields, check if we can use all of the fields to create + partitioning index description. + + We can't process GEOMETRY fields - for these fields singlepoint intervals + cant be generated, and non-singlepoint are "special" kinds of intervals + to which our processing logic can't be applied. + + It is not known if we could process ENUM fields, so they are disabled to be + on the safe side. + + RETURN + TRUE Yes, fields can be used in partitioning index + FALSE Otherwise +*/ + +static bool fields_ok_for_partition_index(Field **pfield) +{ + if (!pfield) + return FALSE; + for (; (*pfield); pfield++) + { + enum_field_types ftype= (*pfield)->real_type(); + if (ftype == FIELD_TYPE_ENUM || ftype == FIELD_TYPE_GEOMETRY) + return FALSE; + } + return TRUE; +} + + +/* + Create partition index description and fill related info in the context + struct + + SYNOPSIS + create_partition_index_descrition() + prune_par INOUT Partition pruning context + + DESCRIPTION + Create partition index description. Partition index description is: + + part_index(used_fields_list(part_expr), used_fields_list(subpart_expr)) + + If partitioning/sub-partitioning uses BLOB or Geometry fields, then + corresponding fields_list(...) is not included into index description + and we don't perform partition pruning for partitions/subpartitions. + + RETURN + TRUE Out of memory or can't do partition pruning at all + FALSE OK +*/ + +static bool create_partition_index_descrition(PART_PRUNE_PARAM *ppar) +{ + RANGE_OPT_PARAM *range_par= &(ppar->range_param); + partition_info *part_info= ppar->part_info; + uint used_part_fields, used_subpart_fields; + + used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ? + part_info->no_part_fields : 0; + used_subpart_fields= + fields_ok_for_partition_index(part_info->subpart_field_array)? + part_info->no_subpart_fields : 0; + + uint total_parts= used_part_fields + used_subpart_fields; + + ppar->part_fields= used_part_fields; + ppar->last_part_partno= (int)used_part_fields - 1; + + ppar->subpart_fields= used_subpart_fields; + ppar->last_subpart_partno= + used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1; + + if (is_sub_partitioned(part_info)) + { + ppar->mark_full_partition_used= mark_full_partition_used_with_parts; + ppar->get_top_partition_id_func= part_info->get_part_partition_id; + } + else + { + ppar->mark_full_partition_used= mark_full_partition_used_no_parts; + ppar->get_top_partition_id_func= part_info->get_partition_id; + } + + enum_monotonicity_info minfo; + ppar->get_endpoint= NULL; + if (part_info->part_expr && + (minfo= part_info->part_expr->get_monotonicity_info()) != NON_MONOTONIC) + { + /* + ppar->force_include_bounds controls how we'll process "field < C" and + "field > C" intervals. + If the partitioning function F is strictly increasing, then for any x, y + "x < y" => "F(x) < F(y)" (*), i.e. when we get interval "field < C" + we can perform partition pruning on the equivalent "F(field) < F(C)". + + If the partitioning function not strictly increasing (it is simply + increasing), then instead of (*) we get "x < y" => "F(x) <= F(y)" + i.e. for interval "field < C" we can perform partition pruning for + "F(field) <= F(C)". + */ + ppar->force_include_bounds= test(minfo == MONOTONIC_INCREASING); + if (part_info->part_type == RANGE_PARTITION) + { + ppar->get_endpoint= get_partition_id_range_for_endpoint; + ppar->endpoints_walk_func= part_num_to_part_id_range; + ppar->max_endpoint_val= part_info->no_parts; + } + else if (part_info->part_type == LIST_PARTITION) + { + ppar->get_endpoint= get_list_array_idx_for_endpoint; + ppar->endpoints_walk_func= part_num_to_part_id_list; + ppar->max_endpoint_val= part_info->no_list_values; + } + } + + KEY_PART *key_part; + MEM_ROOT *alloc= range_par->mem_root; + if (!total_parts || + !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* + total_parts)) || + !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* + total_parts)) || + !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts)) || + !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts))) + return TRUE; + + range_par->key_parts= key_part; + Field **field= (ppar->part_fields)? part_info->part_field_array : + part_info->subpart_field_array; + bool subpart_fields= FALSE; + for (uint part= 0; part < total_parts; part++, key_part++) + { + key_part->key= 0; + key_part->part= part; + key_part->length= (*field)->pack_length_in_rec(); + /* + psergey-todo: check yet again if this is correct for tricky field types, + e.g. see "Fix a fatal error in decimal key handling" in + open_binary_frm(). + */ + key_part->store_length= (*field)->pack_length(); + if ((*field)->real_maybe_null()) + key_part->store_length+= HA_KEY_NULL_LENGTH; + if ((*field)->type() == FIELD_TYPE_BLOB || + (*field)->real_type() == MYSQL_TYPE_VARCHAR) + key_part->store_length+= HA_KEY_BLOB_LENGTH; + + key_part->field= (*field); + key_part->image_type = Field::itRAW; + /* We don't set key_parts->null_bit as it will not be used */ + + ppar->is_part_keypart[part]= !subpart_fields; + ppar->is_subpart_keypart[part]= subpart_fields; + + if (!*(++field)) + { + field= part_info->subpart_field_array; + subpart_fields= TRUE; + } + } + range_par->key_parts_end= key_part; + + DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts, + range_par->key_parts_end);); + return FALSE; +} + + +#ifndef DBUG_OFF + +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) +{ + DBUG_ENTER("print_partitioning_index"); + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, "partitioning INDEX("); + for (KEY_PART *p=parts; p != parts_end; p++) + { + fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name); + } + fprintf(DBUG_FILE, ");\n"); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + +/* Print field value into debug trace, in NULL-aware way. */ +static void dbug_print_field(Field *field) +{ + if (field->is_real_null()) + fprintf(DBUG_FILE, "NULL"); + else + { + String str; + String *pstr; + pstr= field->val_str(&str); + fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); + } +} + + +/* Print a "c1 < keypartX < c2" - type interval into debug trace. */ +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) +{ + DBUG_ENTER("dbug_print_segment_range"); + DBUG_LOCK_FILE; + if (!(arg->min_flag & NO_MIN_RANGE)) + { + arg->field->set_key_image((char*)(arg->min_value), part->length); + dbug_print_field(part->field); + if (arg->min_flag & NEAR_MIN) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + } + + fprintf(DBUG_FILE, "%s", part->field->field_name); + + if (!(arg->max_flag & NO_MAX_RANGE)) + { + if (arg->max_flag & NEAR_MAX) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + arg->field->set_key_image((char*)(arg->max_value), part->length); + dbug_print_field(part->field); + } + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + + +/* + Print a singlepoint multi-keypart range interval to debug trace + + SYNOPSIS + dbug_print_onepoint_range() + start Array of SEL_ARG* ptrs representing conditions on key parts + num Number of elements in the array. + + DESCRIPTION + This function prints a "keypartN=constN AND ... AND keypartK=constK"-type + interval to debug trace. +*/ + +static void dbug_print_onepoint_range(SEL_ARG **start, uint num) +{ + DBUG_ENTER("dbug_print_onepoint_range"); + DBUG_LOCK_FILE; + SEL_ARG **end= start + num; + for (SEL_ARG **arg= start; arg != end; arg++) + { + Field *field= (*arg)->field; + fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name); + dbug_print_field(field); + } + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} +#endif + +/**************************************************************************** + * Partition pruning code ends + ****************************************************************************/ +#endif + /* Get cost of 'sweep' full records retrieval. @@ -3424,7 +4311,7 @@ QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 0 on error */ -static SEL_TREE *get_ne_mm_tree(PARAM *param, Item_func *cond_func, +static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field, Item *lt_value, Item *gt_value, Item_result cmp_type) @@ -3459,7 +4346,7 @@ static SEL_TREE *get_ne_mm_tree(PARAM *param, Item_func *cond_func, Pointer to the tree built tree */ -static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, +static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field, Item *value, Item_result cmp_type, bool inv) { @@ -3552,7 +4439,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, /* make a select tree of all keys in condition */ -static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) { SEL_TREE *tree=0; SEL_TREE *ftree= 0; @@ -3725,7 +4612,7 @@ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) static SEL_TREE * -get_mm_parts(PARAM *param, COND *cond_func, Field *field, +get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field, Item_func::Functype type, Item *value, Item_result cmp_type) { @@ -3775,7 +4662,7 @@ get_mm_parts(PARAM *param, COND *cond_func, Field *field, static SEL_ARG * -get_mm_leaf(PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, +get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value) { uint maybe_null=(uint) field->real_maybe_null(); @@ -3834,8 +4721,11 @@ get_mm_leaf(PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, !(conf_func->compare_collation()->state & MY_CS_BINSORT)) goto end; - optimize_range= field->optimize_range(param->real_keynr[key_part->key], - key_part->part); + if (param->using_real_indexes) + optimize_range= field->optimize_range(param->real_keynr[key_part->key], + key_part->part); + else + optimize_range= TRUE; if (type == Item_func::LIKE_FUNC) { @@ -4102,7 +4992,7 @@ sel_add(SEL_ARG *key1,SEL_ARG *key2) static SEL_TREE * -tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { DBUG_ENTER("tree_and"); if (!tree1) @@ -4172,7 +5062,7 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) using index_merge. */ -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param) +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param) { key_map common_keys= tree1->keys_map; DBUG_ENTER("sel_trees_can_be_ored"); @@ -4199,7 +5089,7 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param) } static SEL_TREE * -tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { DBUG_ENTER("tree_or"); if (!tree1 || !tree2) diff --git a/sql/opt_range.h b/sql/opt_range.h index f84058f3b64..3cd348ba9af 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -249,6 +249,7 @@ public: struct st_qsel_param; +class PARAM; class SEL_ARG; /* @@ -283,12 +284,12 @@ protected: QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, struct st_table_ref *ref, ha_rows records); - friend bool get_quick_keys(struct st_qsel_param *param, + friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick,KEY_PART *key, SEL_ARG *key_tree, char *min_key, uint min_key_flag, char *max_key, uint max_key_flag); - friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx, + friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx, SEL_ARG *key_tree, MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; @@ -718,4 +719,8 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, ha_rows records); uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit); +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); +#endif + #endif diff --git a/sql/sql_class.cc b/sql/sql_class.cc index a28324c5e28..dd5f7aee3de 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -745,6 +745,13 @@ int THD::send_explain_fields(select_result *result) field_list.push_back(new Item_empty_string("select_type", 19, cs)); field_list.push_back(item= new Item_empty_string("table", NAME_LEN, cs)); item->maybe_null= 1; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (lex->describe & DESCRIBE_PARTITIONS) + { + field_list.push_back(item= new Item_empty_string("partitions", 10, cs)); + item->maybe_null= 1; + } +#endif field_list.push_back(item= new Item_empty_string("type", 10, cs)); item->maybe_null= 1; field_list.push_back(item=new Item_empty_string("possible_keys", diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 06a101a1c5c..9bd9ca3db45 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -102,6 +102,11 @@ enum enum_sql_command { // describe/explain types #define DESCRIBE_NORMAL 1 #define DESCRIBE_EXTENDED 2 +/* + This is not #ifdef'ed because we want "EXPLAIN PARTITIONS ..." to produce + additional "partitions" column even if partitioning is not compiled in. +*/ +#define DESCRIBE_PARTITIONS 4 enum enum_sp_suid_behaviour { diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 8571e39d9b8..944e715920c 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -2477,16 +2477,94 @@ bool get_partition_id_list(partition_info *part_info, if (list_value < part_func_value) min_list_index= list_index + 1; else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; max_list_index= list_index - 1; - else { + } + else + { *part_id= (uint32)list_array[list_index].partition_id; DBUG_RETURN(FALSE); } } +notfound: *part_id= 0; DBUG_RETURN(TRUE); } +/* + Find the part of part_info->list_array that corresponds to given interval + + SYNOPSIS + get_list_array_idx_for_endpoint() + part_info Partitioning info (partitioning type must be LIST) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a) + include_endpoint TRUE iff the interval includes the endpoint + + DESCRIPTION + This function finds the part of part_info->list_array where values of + list_array[idx].list_value are contained within the specifed interval. + list_array is ordered by list_value, so + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought array part starts at some index idx and continues till array + end. + The function returns first number idx, such that + list_array[idx].list_value is contained within the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought array part starts at array start and continues till some last + index idx. + The function returns first number idx, such that + list_array[idx].list_value is NOT contained within the passed interval. + If all array elements are contained, part_info->no_list_values is + returned. + + NOTE + The caller will call this function and then will run along the part of + list_array to collect partition ids. If the number of list values is + significantly higher then number of partitions, this could be slow and + we could invent some other approach. The "run over list array" part is + already wrapped in a get_next()-like function. + + RETURN + The edge of corresponding part_info->list_array part. +*/ + +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + DBUG_ENTER("get_list_array_idx_for_endpoint"); + LIST_PART_ENTRY *list_array= part_info->list_array; + uint list_index; + longlong list_value; + uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; + longlong part_func_value= part_info->part_expr->val_int(); + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + DBUG_RETURN(list_index + test(left_endpoint ^ include_endpoint)); + } + } +notfound: + if (list_value < part_func_value) + list_index++; + DBUG_RETURN(list_index); +} + bool get_partition_id_range(partition_info *part_info, uint32 *part_id) @@ -2516,6 +2594,89 @@ bool get_partition_id_range(partition_info *part_info, DBUG_RETURN(FALSE); } + +/* + Find the part of part_info->range_int_array that covers the given interval + + SYNOPSIS + get_partition_id_range_for_endpoint() + part_info Partitioning info (partitioning type must be RANGE) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a). + include_endpoint TRUE <=> the endpoint itself is included in the + interval + + DESCRIPTION + This function finds the part of part_info->range_int_array where the + elements have non-empty intersections with the given interval. + + A range_int_array element at index idx represents the interval + + [range_int_array[idx-1], range_int_array[idx]), + + intervals are disjoint and ordered by their right bound, so + + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought array part starts at some index idx and continues till array + end. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has non empty intersection with + the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought array part starts at array start and continues till some last + index idx. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has EMPTY intersection with the + passed interval. + If the interval represented by the last array element has non-empty + intersection with the passed interval, part_info->no_parts is + returned. + + RETURN + The edge of corresponding part_info->range_int_array part. +*/ + +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + DBUG_ENTER("get_partition_id_range_for_endpoint"); + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->no_parts - 1; + uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + longlong part_func_value= part_info->part_expr->val_int(); + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (range_array[loc_part_id] < part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (loc_part_id < max_partition && + part_func_value >= range_array[loc_part_id+1]) + { + loc_part_id++; + } + if (left_endpoint) + { + if (part_func_value >= range_array[loc_part_id]) + loc_part_id++; + } + else + { + if (part_func_value == range_array[loc_part_id]) + loc_part_id += test(include_endpoint); + else if (part_func_value > range_array[loc_part_id]) + loc_part_id++; + loc_part_id++; + } + DBUG_RETURN(loc_part_id); +} + + bool get_partition_id_hash_nosub(partition_info *part_info, uint32 *part_id) { @@ -3204,10 +3365,16 @@ bool mysql_unpack_partition(THD *thd, const uchar *part_buf, */ uint part_func_len= part_info->part_func_len; uint subpart_func_len= part_info->subpart_func_len; + uint bitmap_bits= part_info->no_subparts? + (part_info->no_subparts* part_info->no_parts): + part_info->no_parts; + uint bitmap_bytes= bitmap_buffer_size(bitmap_bits); + uint32 *bitmap_buf; char *part_func_string, *subpart_func_string= NULL; if (!((part_func_string= thd->alloc(part_func_len))) || (subpart_func_len && - !((subpart_func_string= thd->alloc(subpart_func_len))))) + !((subpart_func_string= thd->alloc(subpart_func_len)))) || + !((bitmap_buf= (uint32*)thd->alloc(bitmap_bytes)))) { my_error(ER_OUTOFMEMORY, MYF(0), part_func_len); free_items(thd->free_list); @@ -3220,6 +3387,8 @@ bool mysql_unpack_partition(THD *thd, const uchar *part_buf, subpart_func_len); part_info->part_func_string= part_func_string; part_info->subpart_func_string= subpart_func_string; + + bitmap_init(&part_info->used_partitions, bitmap_buf, bitmap_bytes*8, FALSE); } result= FALSE; @@ -3293,3 +3462,60 @@ void set_key_field_ptr(KEY *key_info, const byte *new_buf, } while (++i < key_parts); DBUG_VOID_RETURN; } + + +/* + Fill the string comma-separated line of used partitions names + SYNOPSIS + make_used_partitions_str() + part_info IN Partitioning info + parts_str OUT The string to fill +*/ + +void make_used_partitions_str(partition_info *part_info, String *parts_str) +{ + parts_str->length(0); + partition_element *pe; + uint partition_id= 0; + List_iterator it(part_info->partitions); + + if (part_info->subpart_type != NOT_A_PARTITION) + { + partition_element *head_pe; + while ((head_pe= it++)) + { + List_iterator it2(head_pe->subpartitions); + while ((pe= it2++)) + { + if (bitmap_is_set(&part_info->used_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + parts_str->append(head_pe->partition_name, + strlen(head_pe->partition_name), + system_charset_info); + parts_str->append('_'); + parts_str->append(pe->partition_name, + strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } + } + else + { + while ((pe= it++)) + { + if (bitmap_is_set(&part_info->used_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + parts_str->append(pe->partition_name, strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } +} + diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9c3fd90b6b9..d64f39c7c66 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -633,6 +633,21 @@ JOIN::optimize() DBUG_RETURN(0); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + TABLE_LIST *tbl; + for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf) + { + if (!tbl->embedding) + { + Item *prune_cond= tbl->on_expr? tbl->on_expr : conds; + tbl->table->no_partitions_used= prune_partitions(thd, tbl->table, + prune_cond); + } + } + } +#endif + /* Optimize count(*), min() and max() */ if (tables_list && tmp_table_param.sum_func_count && ! group_list) { @@ -2018,7 +2033,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables, COND *conds, if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ - if (!table->file->records && !embedding) +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((!table->file->records || table->no_partitions_used) && !embedding) +#else + if (!table->file->records || && !embedding) +#endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. set_position(join,const_count++,s,(KEYUSE*) 0); @@ -2045,8 +2064,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables, COND *conds, while (embedding); continue; } - - if ((table->s->system || table->file->records <= 1) && ! s->dependent && +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool no_partitions_used= table->no_partitions_used; +#else + const bool no_partitions_used= FALSE; +#endif + if ((table->s->system || table->file->records <= 1 || + no_partitions_used) && + !s->dependent && !(table->file->table_flags() & HA_NOT_EXACT_COUNT) && !table->fulltext_searched) { @@ -13767,6 +13792,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, strlen(join->select_lex->type), cs)); for (uint i=0 ; i < 7; i++) item_list.push_back(item_null); + if (join->thd->lex->describe & DESCRIBE_PARTITIONS) + item_list.push_back(item_null); + item_list.push_back(new Item_string(message,strlen(message),cs)); if (result->send_data(item_list)) join->error= 1; @@ -13887,7 +13915,28 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(table->alias, strlen(table->alias), cs)); - /* type */ + /* "partitions" column */ + if (join->thd->lex->describe & DESCRIBE_PARTITIONS) + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + if (!table->derived_select_number && + (part_info= table->part_info)) + { + char parts_buff[128]; + String parts_str(parts_buff,sizeof(parts_buff),cs); + make_used_partitions_str(part_info, &parts_str); + item_list.push_back(new Item_string(parts_str.ptr(), + parts_str.length(), cs)); + } + else + item_list.push_back(item_null); +#else + /* just produce empty column if partitioning is not compiled in */ + item_list.push_back(item_null); +#endif + } + /* "type" column */ item_list.push_back(new Item_string(join_type_str[tab->type], strlen(join_type_str[tab->type]), cs)); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 01dfd9f2f5a..ce205a104f1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -7380,8 +7380,10 @@ describe_command: opt_extended_describe: /* empty */ {} | EXTENDED_SYM { Lex->describe|= DESCRIBE_EXTENDED; } + | PARTITIONS_SYM { Lex->describe|= DESCRIBE_PARTITIONS; } ; + opt_describe_column: /* empty */ {} | text_string { Lex->wild= $1; } From 2a3943dbec73d8234b8ba4fd09a54c63d7131463 Mon Sep 17 00:00:00 2001 From: "sergefp@mysql.com" <> Date: Mon, 26 Dec 2005 08:40:09 +0300 Subject: [PATCH 2/4] WL#2985 "Partition Pruning": post-review fixes: - Added more comments. - Added a RANGE_OPT_PARAM::remove_jump_scans flag that disables construction of index_merge SEL_TREEs that represent unusable conditions like "key1part1 3; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p1,p2 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t1 where a1 >= 3; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p1,p2 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t1 where a1 < 3 and a1 > 3; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +drop table t1; +create table t3 (a int, b int) +partition by list(a) subpartition by hash(b) subpartitions 4 ( +partition p0 values in (1), +partition p1 values in (2), +partition p2 values in (3), +partition p3 values in (4) +); +insert into t3 values (1,1),(2,2),(3,3); +explain partitions select * from t3 where a=2 or b=1; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t3 p0_sp1,p1_sp0,p1_sp1,p1_sp2,p1_sp3,p2_sp1,p3_sp1 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t3 where a=4 or b=2; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t3 p0_sp2,p1_sp2,p2_sp2,p3_sp0,p3_sp1,p3_sp2,p3_sp3 ALL NULL NULL NULL NULL 3 Using where +explain partitions select * from t3 where (a=2 or b=1) and (a=4 or b=2) ; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t3 p1_sp2,p3_sp1 ALL NULL NULL NULL NULL 3 Using where +create table t1 (a int) partition by hash(a) partitions 2; +insert into t1 values (1),(2); +explain partitions select * from t1 where a is null; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 2 Using where +explain partitions select * from t1 where a is not null; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0,p1 ALL NULL NULL NULL NULL 2 Using where +drop table t1; diff --git a/mysql-test/t/partition_pruning.test b/mysql-test/t/partition_pruning.test index e496a1b8e12..876efbea00b 100644 --- a/mysql-test/t/partition_pruning.test +++ b/mysql-test/t/partition_pruning.test @@ -192,4 +192,46 @@ insert into t1 values ('a'),('b'),('c'); explain partitions select * from t1 where a='b'; drop table t1; +# +# Test cases for bugs found in code review: +# +create table t1 ( + a1 int not null +) +partition by range (a1) ( + partition p0 values less than (3), + partition p1 values less than (6), + partition p2 values less than (9) +); +insert into t1 values (1),(2),(3); +explain partitions select * from t1 where a1 > 3; +explain partitions select * from t1 where a1 >= 3; +explain partitions select * from t1 where a1 < 3 and a1 > 3; +drop table t1; + +# +create table t3 (a int, b int) + partition by list(a) subpartition by hash(b) subpartitions 4 ( + partition p0 values in (1), + partition p1 values in (2), + partition p2 values in (3), + partition p3 values in (4) + ); +insert into t3 values (1,1),(2,2),(3,3); + +explain partitions select * from t3 where a=2 or b=1; +explain partitions select * from t3 where a=4 or b=2; +explain partitions select * from t3 where (a=2 or b=1) and (a=4 or b=2) ; + +# Test for NULLs +create table t1 (a int) partition by hash(a) partitions 2; +insert into t1 values (1),(2); +explain partitions select * from t1 where a is null; + +# this selects both +explain partitions select * from t1 where a is not null; +drop table t1; + +# No tests for NULLs in RANGE(monotonic_expr()) - they depend on BUG#15447 +# being fixed. diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 77822c55cc8..370241f7743 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -24,16 +24,42 @@ */ /* - Classes in this file are used in the following way: - 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects - is created. #of rows in table and index statistics are ignored at this - step. - 2. Created SEL_TREE and index stats data are used to construct a - TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read - plans may be created. - 3. The least expensive table read plan is used to create a tree of - QUICK_SELECT_I-derived objects which are later used for row retrieval. - QUICK_RANGEs are also created in this step. + This file contains: + + RangeAnalysisModule + A module that accepts a condition, index (or partitioning) description, + and builds lists of intervals (in index/partitioning space), such that + all possible records that match the condition are contained within the + intervals. + The entry point for the range analysis module is get_mm_tree() function. + + The lists are returned in form of complicated structure of interlinked + SEL_TREE/SEL_IMERGE/SEL_ARG objects. + See check_quick_keys, find_used_partitions for examples of how to walk + this structure. + All direct "users" of this module are located within this file, too. + + + PartitionPruningModule + A module that accepts a partitioned table, condition, and finds which + partitions we will need to use in query execution. Search down for + "PartitionPruningModule" for description. + The module has single entry point - prune_partitions() function. + + + Range/index_merge/groupby-minmax optimizer module + A module that accepts a table, condition, and returns + - a QUICK_*_SELECT object that can be used to retrieve rows that match + the specified condition, or a "no records will match the condition" + statement. + + The module entry points are + test_quick_select() + get_quick_select_for_ref() + + + Record retrieval code for range/index_merge/groupby-min-max. + Implementations of QUICK_*_SELECT classes. */ #ifdef USE_PRAGMA_IMPLEMENTATION @@ -301,6 +327,11 @@ class SEL_IMERGE; class SEL_TREE :public Sql_alloc { public: + /* + Starting an effort to document this field: + (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => + (type == SEL_TREE::IMPOSSIBLE) + */ enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; SEL_TREE(enum Type type_arg) :type(type_arg) {} SEL_TREE() :type(KEY) @@ -354,6 +385,8 @@ public: */ bool using_real_indexes; + bool remove_jump_scans; + /* used_key_no -> table_key_no translation table. Only makes sense if using_real_indexes==TRUE @@ -380,7 +413,7 @@ public: uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ - /* TRUE if last checked tree->key can be used for ROR-scan */ + /* TRUE if last checked tree->key can be used for ROR-scan */ bool is_ror_scan; }; @@ -1810,6 +1843,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.needed_reg= &needed_reg; param.imerge_cost_buff_size= 0; param.using_real_indexes= TRUE; + param.remove_jump_scans= TRUE; thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); @@ -2005,6 +2039,87 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, ****************************************************************************/ #ifdef WITH_PARTITION_STORAGE_ENGINE +/* + PartitionPruningModule + + This part of the code does partition pruning. Partition pruning solves the + following problem: given a query over partitioned tables, find partitions + that we will not need to access (i.e. partitions that we can assume to be + empty) when executing the query. + The set of partitions to prune doesn't depend on which query execution + plan will be used to execute the query. + + HOW IT WORKS + + Partition pruning module makes use of RangeAnalysisModule. The following + examples show how the problem of partition pruning can be reduced to the + range analysis problem: + + EXAMPLE 1 + Consider a query: + + SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z' + + where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent + way to find the used (i.e. not pruned away) partitions is as follows: + + 1. analyze the WHERE clause and extract the list of intervals over t1.a + for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)} + + 2. for each interval I + { + find partitions that have non-empty intersection with I; + mark them as used; + } + + EXAMPLE 2 + Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then + we need to: + + 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b). + The list of intervals we'll obtain will look like this: + ((t1.a, t1.b) = (1,'foo')), + ((t1.a, t1.b) = (2,'bar')), + ((t1,a, t1.b) > (10,'zz')) (**) + + 2. for each interval I + { + if (the interval has form "(t1.a, t1.b) = (const1, const2)" ) + { + calculate HASH(part_func(t1.a, t1.b)); + find which partition has records with this hash value and mark + it as used; + } + else + { + mark all partitions as used; + break; + } + } + + For both examples the step #1 is exactly what RangeAnalysisModule could + be used to do, if it was provided with appropriate index description + (array of KEY_PART structures). + In example #1, we need to provide it with description of index(t1.a), + in example #2, we need to provide it with description of index(t1.a, t1.b). + + These index descriptions are further called "partitioning index + descriptions". Note that it doesn't matter if such indexes really exist, + as range analysis module only uses the description. + + Putting it all together, partitioning module works as follows: + + prune_partitions() { + call create_partition_index_descrition(); + + call get_mm_tree(); // invoke the RangeAnalysisModule + + // analyze the obtained interval list and get used partitions + call find_used_partitions(); + } + +*/ + struct st_part_prune_param; struct st_part_opt_info; @@ -2019,7 +2134,7 @@ typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint, */ typedef struct st_part_prune_param { - RANGE_OPT_PARAM range_param; /* Range optimizer parameters */ + RANGE_OPT_PARAM range_param; /* Range analyzer parameters */ /*************************************************************** Following fields are filled in based solely on partitioning @@ -2096,18 +2211,30 @@ typedef struct st_part_prune_param /*************************************************************** Following fields are used to store an 'iterator' that can be - used to obtain a set of used of used partitions. + used to obtain a set of used artitions. **************************************************************/ /* - Start number, end number and + Start and end+1 partition "numbers". They can have two meanings depending + depending of the value of part_num_to_part_id: + part_num_to_part_id_range - numbers are partition ids + part_num_to_part_id_list - numbers are indexes in part_info->list_array */ - part_num_to_partition_id_func part_num_to_part_id; uint32 start_part_num; uint32 end_part_num; + + /* + A function that should be used to convert two above "partition numbers" + to partition_ids. + */ + part_num_to_partition_id_func part_num_to_part_id; } PART_PRUNE_PARAM; static bool create_partition_index_descrition(PART_PRUNE_PARAM *prune_par); static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); +static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, + SEL_IMERGE *imerge); +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List &merges); static void mark_all_partitions_as_used(partition_info *part_info); static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, uint32 num); @@ -2185,6 +2312,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) range_par->keys= 1; // one index range_par->using_real_indexes= FALSE; + range_par->remove_jump_scans= FALSE; range_par->real_keynr[0]= 0; thd->no_errors=1; // Don't warn about NULL @@ -2196,8 +2324,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) int res; tree= get_mm_tree(range_par, pprune_cond); - if (!tree || (tree->type != SEL_TREE::KEY && - tree->type != SEL_TREE::KEY_SMALLER)) + if (!tree) goto all_used; if (tree->type == SEL_TREE::IMPOSSIBLE) @@ -2205,6 +2332,9 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) retval= TRUE; goto end; } + + if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) + goto all_used; if (tree->merges.is_empty()) { @@ -2220,27 +2350,30 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) } else { - res= 0; - DBUG_ASSERT(tree->merges.elements == 1); - SEL_IMERGE *imerge= tree->merges.head(); - for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) + if (tree->merges.elements == 1) { - prune_param.arg_stack_end= prune_param.arg_stack; - prune_param.cur_part_fields= 0; - prune_param.cur_subpart_fields= 0; - prune_param.part_num_to_part_id= part_num_to_part_id_range; - prune_param.start_part_num= 0; - prune_param.end_part_num= prune_param.part_info->no_parts; - if (-1 == (res |= find_used_partitions(&prune_param, (*ptree)->keys[0]))) + if (-1 == (res |= find_used_partitions_imerge(&prune_param, + tree->merges.head()))) + goto all_used; + } + else + { + if (-1 == (res |= find_used_partitions_imerge_list(&prune_param, + tree->merges))) goto all_used; } } - if (!res) - retval= TRUE; + /* + res == 0 => no used partitions => retval=TRUE + res == 1 => some used partitions => retval=FALSE + res == -1 - we jump over this line to all_used: + */ + retval= test(!res); goto end; all_used: + retval= FALSE; // some partitions are used mark_all_partitions_as_used(prune_param.part_info); end: thd->no_errors=0; @@ -2316,16 +2449,113 @@ static void mark_full_partition_used_with_parts(partition_info *part_info, bitmap_set_bit(&part_info->used_partitions, start); } -static -uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, uint32 num) +/* See comment in PART_PRUNE_PARAM::part_num_to_part_id about what this is */ +static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* ppar, uint32 num) { return num; } -static -uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* prune_par, uint32 num) +/* See comment in PART_PRUNE_PARAM::part_num_to_part_id about what this is */ +static uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* ppar, uint32 num) { - return prune_par->part_info->list_array[num].partition_id; + return ppar->part_info->list_array[num].partition_id; +} + + +/* + Find the set of used partitions for List + SYNOPSIS + find_used_partitions_imerge_list + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + List represents "imerge1 AND imerge2 AND ...". + The set of used partitions is an intersection of used partitions sets + for imerge_{i}. + We accumulate this intersection a separate bitmap. + + RETURN + See find_used_partitions() +*/ + +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List &merges) +{ + MY_BITMAP all_merges; + uint bitmap_bytes; + uint32 *bitmap_buf; + uint n_bits= ppar->part_info->used_partitions.n_bits; + bitmap_bytes= bitmap_buffer_size(n_bits); + if (!(bitmap_buf= (uint32*)alloc_root(ppar->range_param.mem_root, + bitmap_bytes))) + { + /* + Fallback, process just first SEL_IMERGE. This can leave us with more + partitions marked as used then actually needed. + */ + return find_used_partitions_imerge(ppar, merges.head()); + } + bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE); + bitmap_set_prefix(&all_merges, n_bits); + + List_iterator it(merges); + SEL_IMERGE *imerge; + while ((imerge=it++)) + { + int res= find_used_partitions_imerge(ppar, imerge); + if (!res) + { + /* no used partitions on one ANDed imerge => no used partitions at all */ + return 0; + } + + if (res != -1) + bitmap_intersect(&all_merges, &ppar->part_info->used_partitions); + + if (bitmap_is_clear_all(&all_merges)) + return 0; + + bitmap_clear_all(&ppar->part_info->used_partitions); + } + memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap, + bitmap_bytes); + return 1; +} + + +/* + Find the set of used partitions for SEL_IMERGE structure + SYNOPSIS + find_used_partitions_imerge() + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is + trivial - just use mark used partitions for each tree and bail out early + if for some tree_{i} all partitions are used. + + RETURN + See find_used_partitions(). +*/ + +static +int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge) +{ + int res= 0; + for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) + { + ppar->arg_stack_end= ppar->arg_stack; + ppar->cur_part_fields= 0; + ppar->cur_subpart_fields= 0; + ppar->part_num_to_part_id= part_num_to_part_id_range; + ppar->start_part_num= 0; + ppar->end_part_num= ppar->part_info->no_parts; + if (-1 == (res |= find_used_partitions(ppar, (*ptree)->keys[0]))) + return -1; + } + return res; } @@ -2370,8 +2600,8 @@ uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* prune_par, uint32 num) 1 OK, one or more [sub]partitions are marked as used. 0 The passed condition doesn't match any partitions -1 Couldn't infer any partition pruning "intervals" from the passed - SEL_ARG* tree. Marking partitions as used is the responsibility of - the caller. + SEL_ARG* tree (which means that all partitions should be marked as + used) Marking partitions as used is the responsibility of the caller. */ static @@ -2401,11 +2631,15 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) key_parts);); /* Find minimum */ if (key_tree->min_flag & NO_MIN_RANGE) - { ppar->start_part_num= 0; - } else { + /* + Store the interval edge in the record buffer, and call the + function that maps the edge in table-field space to an edge + in ordered-set-of-partitions (for RANGE partitioning) or + indexes-in-ordered-array-of-list-constants (for LIST) space. + */ store_key_image_to_rec(key_tree->field, key_tree->min_value, ppar->range_param.key_parts[0].length); bool include_endp= ppar->force_include_bounds || @@ -2419,11 +2653,9 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) } } - /* Find maximum */ - if (key_tree->min_flag & NO_MAX_RANGE) - { + /* Find maximum, do the same as above but for right interval bound */ + if (key_tree->max_flag & NO_MAX_RANGE) ppar->end_part_num= ppar->max_endpoint_val; - } else { store_key_image_to_rec(key_tree->field, key_tree->max_value, @@ -2441,7 +2673,7 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) ppar->part_num_to_part_id= ppar->endpoints_walk_func; /* - Save our intent to mark full partition as used if we will not be able + Save our intent to mark full partition as used if we will not be able to obtain further limits on subpartitions */ set_full_part_if_bad_ret= TRUE; @@ -2507,7 +2739,6 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) bitmap_set_bit(&part_info->used_partitions, ppar->part_num_to_part_id(ppar, num) * part_info->no_subparts + subpart_id); - ppar->start_part_num++; } res= 1; /* Some partitions were marked as used */ goto pop_and_go_right; @@ -2726,8 +2957,7 @@ static bool create_partition_index_descrition(PART_PRUNE_PARAM *ppar) key_part->length= (*field)->pack_length_in_rec(); /* psergey-todo: check yet again if this is correct for tricky field types, - e.g. see "Fix a fatal error in decimal key handling" in - open_binary_frm(). + e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm() */ key_part->store_length= (*field)->pack_length(); if ((*field)->real_maybe_null()) @@ -2768,7 +2998,7 @@ static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) { fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name); } - fprintf(DBUG_FILE, ");\n"); + fputs(");\n", DBUG_FILE); DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; } @@ -2780,7 +3010,9 @@ static void dbug_print_field(Field *field) fprintf(DBUG_FILE, "NULL"); else { - String str; + char buf[256]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); String *pstr; pstr= field->val_str(&str); fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); @@ -2795,7 +3027,7 @@ static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) DBUG_LOCK_FILE; if (!(arg->min_flag & NO_MIN_RANGE)) { - arg->field->set_key_image((char*)(arg->min_value), part->length); + store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length); dbug_print_field(part->field); if (arg->min_flag & NEAR_MIN) fputs(" < ", DBUG_FILE); @@ -2811,9 +3043,10 @@ static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) fputs(" < ", DBUG_FILE); else fputs(" <= ", DBUG_FILE); - arg->field->set_key_image((char*)(arg->max_value), part->length); + store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length); dbug_print_field(part->field); } + fputs("\n", DBUG_FILE); DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; } @@ -2837,12 +3070,14 @@ static void dbug_print_onepoint_range(SEL_ARG **start, uint num) DBUG_ENTER("dbug_print_onepoint_range"); DBUG_LOCK_FILE; SEL_ARG **end= start + num; + for (SEL_ARG **arg= start; arg != end; arg++) { Field *field= (*arg)->field; fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name); dbug_print_field(field); } + fputs("\n", DBUG_FILE); DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; } @@ -5062,7 +5297,8 @@ tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) using index_merge. */ -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param) +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, + RANGE_OPT_PARAM* param) { key_map common_keys= tree1->keys_map; DBUG_ENTER("sel_trees_can_be_ored"); @@ -5088,6 +5324,79 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* pa DBUG_RETURN(FALSE); } + +/* + Remove the trees that are not suitable for record retrieval. + SYNOPSIS + param Range analysis parameter + tree Tree to be processed, tree->type is KEY or KEY_SMALLER + + DESCRIPTION + This function walks through tree->keys[] and removes the SEL_ARG* trees + that are not "maybe" trees (*) and cannot be used to construct quick range + selects. + (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of + these types here as well. + + A SEL_ARG* tree cannot be used to construct quick select if it has + tree->part != 0. (e.g. it could represent "keypart2 < const"). + + WHY THIS FUNCTION IS NEEDED + + Normally we allow construction of SEL_TREE objects that have SEL_ARG + trees that do not allow quick range select construction. For example for + " keypart1=1 AND keypart2=2 " the execution will proceed as follows: + tree1= SEL_TREE { SEL_ARG{keypart1=1} } + tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select + from this + call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG + tree. + + There is an exception though: when we construct index_merge SEL_TREE, + any SEL_ARG* tree that cannot be used to construct quick range select can + be removed, because current range analysis code doesn't provide any way + that tree could be later combined with another tree. + Consider an example: we should not construct + st1 = SEL_TREE { + merges = SEL_IMERGE { + SEL_TREE(t.key1part1 = 1), + SEL_TREE(t.key2part2 = 2) -- (*) + } + }; + because + - (*) cannot be used to construct quick range select, + - There is no execution path that would cause (*) to be converted to + a tree that could be used. + + The latter is easy to verify: first, notice that the only way to convert + (*) into a usable tree is to call tree_and(something, (*)). + + Second look at what tree_and/tree_or function would do when passed a + SEL_TREE that has the structure like st1 tree has, and conlcude that + tree_and(something, (*)) will not be called. + + RETURN + 0 Ok, some suitable trees left + 1 No tree->keys[] left. +*/ + +static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + bool res= FALSE; + for (uint i=0; i < param->keys; i++) + { + if (tree->keys[i]) + { + if (tree->keys[i]->part) + tree->keys[i]= NULL; + else + res= TRUE; + } + } + return res; +} + + static SEL_TREE * tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { @@ -5131,6 +5440,13 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) /* ok, two trees have KEY type but cannot be used without index merge */ if (tree1->merges.is_empty() && tree2->merges.is_empty()) { + if (param->remove_jump_scans) + { + bool no_trees= remove_nonrange_trees(param, tree1); + no_trees= no_trees || remove_nonrange_trees(param, tree2); + if (no_trees) + DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); + } SEL_IMERGE *merge; /* both trees are "range" trees, produce new index merge structure */ if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) || @@ -5153,7 +5469,9 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) /* one tree is index merge tree and another is range tree */ if (tree1->merges.is_empty()) swap_variables(SEL_TREE*, tree1, tree2); - + + if (param->remove_jump_scans && remove_nonrange_trees(param, tree2)) + DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */ if (imerge_list_or_tree(param, &tree1->merges, tree2)) result= new SEL_TREE(SEL_TREE::ALWAYS); diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 944e715920c..385bbd6fa59 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -2493,9 +2493,10 @@ notfound: DBUG_RETURN(TRUE); } + /* - Find the part of part_info->list_array that corresponds to given interval - + Find the sub-array part_info->list_array that corresponds to given interval + SYNOPSIS get_list_array_idx_for_endpoint() part_info Partitioning info (partitioning type must be LIST) @@ -2504,17 +2505,16 @@ notfound: include_endpoint TRUE iff the interval includes the endpoint DESCRIPTION - This function finds the part of part_info->list_array where values of + This function finds the sub-array of part_info->list_array where values of list_array[idx].list_value are contained within the specifed interval. list_array is ordered by list_value, so 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the - sought array part starts at some index idx and continues till array - end. + sought sub-array starts at some index idx and continues till array end. The function returns first number idx, such that list_array[idx].list_value is contained within the passed interval. 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the - sought array part starts at array start and continues till some last + sought sub-array starts at array start and continues till some last index idx. The function returns first number idx, such that list_array[idx].list_value is NOT contained within the passed interval. @@ -2522,14 +2522,14 @@ notfound: returned. NOTE - The caller will call this function and then will run along the part of + The caller will call this function and then will run along the sub-array of list_array to collect partition ids. If the number of list values is significantly higher then number of partitions, this could be slow and we could invent some other approach. The "run over list array" part is already wrapped in a get_next()-like function. RETURN - The edge of corresponding part_info->list_array part. + The edge of corresponding sub-array of part_info->list_array */ uint32 get_list_array_idx_for_endpoint(partition_info *part_info, @@ -2541,6 +2541,7 @@ uint32 get_list_array_idx_for_endpoint(partition_info *part_info, uint list_index; longlong list_value; uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; + /* Get the partitioning function value for the endpoint */ longlong part_func_value= part_info->part_expr->val_int(); while (max_list_index >= min_list_index) { @@ -2596,8 +2597,8 @@ bool get_partition_id_range(partition_info *part_info, /* - Find the part of part_info->range_int_array that covers the given interval - + Find the sub-array of part_info->range_int_array that covers given interval + SYNOPSIS get_partition_id_range_for_endpoint() part_info Partitioning info (partitioning type must be RANGE) @@ -2607,9 +2608,9 @@ bool get_partition_id_range(partition_info *part_info, interval DESCRIPTION - This function finds the part of part_info->range_int_array where the + This function finds the sub-array of part_info->range_int_array where the elements have non-empty intersections with the given interval. - + A range_int_array element at index idx represents the interval [range_int_array[idx-1], range_int_array[idx]), @@ -2617,14 +2618,13 @@ bool get_partition_id_range(partition_info *part_info, intervals are disjoint and ordered by their right bound, so 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the - sought array part starts at some index idx and continues till array - end. + sought sub-array starts at some index idx and continues till array end. The function returns first number idx, such that the interval represented by range_int_array[idx] has non empty intersection with the passed interval. 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the - sought array part starts at array start and continues till some last + sought sub-array starts at array start and continues till some last index idx. The function returns first number idx, such that the interval represented by range_int_array[idx] has EMPTY intersection with the @@ -2634,7 +2634,7 @@ bool get_partition_id_range(partition_info *part_info, returned. RETURN - The edge of corresponding part_info->range_int_array part. + The edge of corresponding part_info->range_int_array sub-array. */ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, @@ -2645,6 +2645,7 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, longlong *range_array= part_info->range_int_array; uint max_partition= part_info->no_parts - 1; uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + /* Get the partitioning function value for the endpoint */ longlong part_func_value= part_info->part_expr->val_int(); while (max_part_id > min_part_id) { diff --git a/sql/table.h b/sql/table.h index c4e99ed2a0d..eebaea4dd55 100644 --- a/sql/table.h +++ b/sql/table.h @@ -297,6 +297,7 @@ struct st_table { FILESORT_INFO sort; #ifdef WITH_PARTITION_STORAGE_ENGINE partition_info *part_info; /* Partition related information */ + bool no_partitions_used; /* If true, all partitions have been pruned away */ #endif bool fill_item_list(List *item_list) const; From 15780cb3980a1f1ef91246e26ed0cd2462e7eac0 Mon Sep 17 00:00:00 2001 From: "sergefp@mysql.com" <> Date: Mon, 26 Dec 2005 08:51:29 +0300 Subject: [PATCH 3/4] Fix typo bug in previous cset --- sql/opt_range.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 370241f7743..877801f0ffe 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -5393,7 +5393,7 @@ static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) res= TRUE; } } - return res; + return !res; } From 81452dbcfce9561874d6e1c0e321a9ec04c2c5d0 Mon Sep 17 00:00:00 2001 From: "sergefp@mysql.com" <> Date: Mon, 26 Dec 2005 09:46:25 +0300 Subject: [PATCH 4/4] WL#2985 "Partition pruning": fixes to post-review fixes: when modifying SEL_TREE::keys, adjust SEL_TREE::keys_map appropriately. --- sql/opt_range.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 877801f0ffe..f090fac2ecf 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -5388,7 +5388,10 @@ static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) if (tree->keys[i]) { if (tree->keys[i]->part) + { tree->keys[i]= NULL; + tree->keys_map.clear_bit(i); + } else res= TRUE; }