From eded60737d6bd233c50c4f68e76432154f768363 Mon Sep 17 00:00:00 2001 From: Martin Hansson Date: Fri, 9 Oct 2009 11:30:40 +0200 Subject: [PATCH] Bug#42846: wrong result returned for range scan when using covering index When two range predicates were combined under an OR predicate, the algorithm tried to merge overlapping ranges into one. But the case when a range overlapped several other ranges was not handled. This lead to 1) ranges overlapping, which gave repeated results and 2) a range that overlapped several other ranges was cut off. Fixed by 1) Making sure that a range got an upper bound equal to the next range with a greater minimum. 2) Removing a continue statement --- mysql-test/r/group_min_max.result | 6 +- mysql-test/r/range.result | 179 ++++++++++++++++++++++++++++++ mysql-test/t/range.test | 125 +++++++++++++++++++++ sql/opt_range.cc | 75 ++++++++++++- 4 files changed, 379 insertions(+), 6 deletions(-) diff --git a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result index ac9a53ca238..620f5dc19ec 100644 --- a/mysql-test/r/group_min_max.result +++ b/mysql-test/r/group_min_max.result @@ -876,10 +876,10 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by +1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by +1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by @@ -924,7 +924,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by +1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by diff --git a/mysql-test/r/range.result b/mysql-test/r/range.result index cc5e8d2be96..c98a7696ea6 100644 --- a/mysql-test/r/range.result +++ b/mysql-test/r/range.result @@ -1219,3 +1219,182 @@ explain select * from t2 where a=1000 and b<11; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ref a a 5 const 502 Using where drop table t1, t2; +CREATE TABLE t1( a INT, b INT, KEY( a, b ) ); +CREATE TABLE t2( a INT, b INT, KEY( a, b ) ); +CREATE TABLE t3( a INT, b INT, KEY( a, b ) ); +INSERT INTO t1( a, b ) +VALUES (0, 1), (1, 2), (1, 4), (2, 3), (5, 0), (9, 7); +INSERT INTO t2( a, b ) +VALUES ( 1, 1), ( 2, 1), ( 3, 1), ( 4, 1), ( 5, 1), +( 6, 1), ( 7, 1), ( 8, 1), ( 9, 1), (10, 1), +(11, 1), (12, 1), (13, 1), (14, 1), (15, 1), +(16, 1), (17, 1), (18, 1), (19, 1), (20, 1); +INSERT INTO t2 SELECT a, 2 FROM t2 WHERE b = 1; +INSERT INTO t2 SELECT a, 3 FROM t2 WHERE b = 1; +INSERT INTO t2 SELECT -1, -1 FROM t2; +INSERT INTO t2 SELECT -1, -1 FROM t2; +INSERT INTO t2 SELECT -1, -1 FROM t2; +INSERT INTO t3 +VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), +(6, 0), (7, 0), (8, 0), (9, 0), (10, 0); +INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; +INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 < a AND b = 3 OR +3 <= a; +a b +5 0 +9 7 +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 < a AND b = 3 OR +3 <= a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 <= a AND b = 3 OR +3 <= a; +a b +5 0 +9 7 +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 <= a AND b = 3 OR +3 <= a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 4 Using where; Using index +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +5 <= a AND b = 3 OR +3 <= a; +a b +5 0 +9 7 +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +5 <= a AND b = 3 OR +3 <= a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +3 <= a; +a b +5 0 +9 7 +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +3 <= a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 1 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; +a b +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +10 1 +11 1 +12 1 +13 1 +14 1 +15 1 +15 3 +16 1 +16 3 +17 1 +17 3 +18 1 +18 3 +19 1 +19 3 +20 1 +EXPLAIN +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 1 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 10 NULL 50 Using where; Using index +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 2 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; +a b +1 1 +2 1 +3 1 +4 1 +5 1 +5 2 +6 1 +6 2 +7 1 +7 2 +8 1 +8 2 +9 1 +9 2 +10 1 +11 1 +12 1 +13 1 +14 1 +15 1 +15 3 +16 1 +16 3 +17 1 +17 3 +18 1 +18 3 +19 1 +19 3 +20 1 +EXPLAIN +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 2 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 10 NULL 50 Using where; Using index +SELECT * FROM t3 WHERE +5 <= a AND a < 10 AND b = 3 OR +a < 5 OR +a < 10; +a b +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +EXPLAIN +SELECT * FROM t3 WHERE +5 <= a AND a < 10 AND b = 3 OR +a < 5 OR +a < 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 range a a 5 NULL 8 Using where; Using index +DROP TABLE t1, t2, t3; diff --git a/mysql-test/t/range.test b/mysql-test/t/range.test index e1411e7fd46..dc119b6a77e 100644 --- a/mysql-test/t/range.test +++ b/mysql-test/t/range.test @@ -1046,3 +1046,128 @@ explain select * from t2 where a=1000 and b<11; drop table t1, t2; +# +# Bug#42846: wrong result returned for range scan when using covering index +# +CREATE TABLE t1( a INT, b INT, KEY( a, b ) ); + +CREATE TABLE t2( a INT, b INT, KEY( a, b ) ); + +CREATE TABLE t3( a INT, b INT, KEY( a, b ) ); + +INSERT INTO t1( a, b ) +VALUES (0, 1), (1, 2), (1, 4), (2, 3), (5, 0), (9, 7); + +INSERT INTO t2( a, b ) +VALUES ( 1, 1), ( 2, 1), ( 3, 1), ( 4, 1), ( 5, 1), + ( 6, 1), ( 7, 1), ( 8, 1), ( 9, 1), (10, 1), + (11, 1), (12, 1), (13, 1), (14, 1), (15, 1), + (16, 1), (17, 1), (18, 1), (19, 1), (20, 1); + +INSERT INTO t2 SELECT a, 2 FROM t2 WHERE b = 1; +INSERT INTO t2 SELECT a, 3 FROM t2 WHERE b = 1; + +# To make range scan compelling to the optimizer +INSERT INTO t2 SELECT -1, -1 FROM t2; +INSERT INTO t2 SELECT -1, -1 FROM t2; +INSERT INTO t2 SELECT -1, -1 FROM t2; + +INSERT INTO t3 +VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), + (6, 0), (7, 0), (8, 0), (9, 0), (10, 0); + +# To make range scan compelling to the optimizer +INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; +INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; + + +# +# Problem#1 Test queries. Will give missing results unless Problem#1 is fixed. +# With one exception, they are independent of Problem#2. +# +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 < a AND b = 3 OR +3 <= a; + +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 < a AND b = 3 OR +3 <= a; + +# Query below: Tests both Problem#1 and Problem#2 (EXPLAIN differs as well) +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 <= a AND b = 3 OR +3 <= a; + +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a < 5 OR +5 <= a AND b = 3 OR +3 <= a; + +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +5 <= a AND b = 3 OR +3 <= a; + +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +5 <= a AND b = 3 OR +3 <= a; + +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +3 <= a; + +EXPLAIN +SELECT * FROM t1 WHERE +3 <= a AND a <= 5 OR +3 <= a; + +# +# Problem#2 Test queries. +# These queries will give missing results if Problem#1 is fixed. +# But Problem#1 also hides this bug. +# +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 1 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; + +EXPLAIN +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 1 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; + +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 2 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; + +EXPLAIN +SELECT * FROM t2 WHERE +5 <= a AND a < 10 AND b = 2 OR +15 <= a AND a < 20 AND b = 3 +OR +1 <= a AND b = 1; + +SELECT * FROM t3 WHERE +5 <= a AND a < 10 AND b = 3 OR +a < 5 OR +a < 10; + +EXPLAIN +SELECT * FROM t3 WHERE +5 <= a AND a < 10 AND b = 3 OR +a < 5 OR +a < 10; + +DROP TABLE t1, t2, t3; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 1b1d948b3b9..119f90bc97a 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -6512,6 +6512,63 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1) } +/** + Combine two range expression under a common OR. On a logical level, the + transformation is key_or( expr1, expr2 ) => expr1 OR expr2. + + Both expressions are assumed to be in the SEL_ARG format. In a logic sense, + theformat is reminiscent of DNF, since an expression such as the following + + ( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 ) + + where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1 + and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid + SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of + the first range and ranges must not overlap. It follows that they are also + ordered by maximum endpoints. Thus + + ( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3 + + Is a a valid SER_ARG expression for a key of at least 2 keyparts. + + For simplicity, we will assume that expr2 is a single range predicate, + i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a + disjunction of several predicates by subsequently call key_or for each + disjunct. + + The algorithm iterates over each disjunct of expr1, and for each disjunct + where the first keypart's range overlaps with the first keypart's range in + expr2: + + If the predicates are equal for the rest of the keyparts, or if there are + no more, the range in expr2 has its endpoints copied in, and the SEL_ARG + node in expr2 is deallocated. If more ranges became connected in expr1, the + surplus is also dealocated. If they differ, two ranges are created. + + - The range leading up to the overlap. Empty if endpoints are equal. + + - The overlapping sub-range. May be the entire range if they are equal. + + Finally, there may be one more range if expr2's first keypart's range has a + greater maximum endpoint than the last range in expr1. + + For the overlapping sub-range, we recursively call key_or. Thus in order to + compute key_or of + + (1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 ) + + (2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 ) + + We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the + first one, we simply hook on the condition for the second keypart from (1) + : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4 + < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse + the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus + + ( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR + ( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR + ( 10 <= kp1 < 20 AND 4 < kp2 < 20 ) +*/ static SEL_ARG * key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) { @@ -6663,7 +6720,21 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) key1=key1->tree_delete(save); } last->copy_min(tmp); - if (last->copy_min(key2) || last->copy_max(key2)) + bool full_range= last->copy_min(key2); + if (!full_range) + { + if (last->next && key2->cmp_max_to_min(last->next) >= 0) + { + last->max_value= last->next->min_value; + if (last->next->min_flag & NEAR_MIN) + last->max_flag&= ~NEAR_MAX; + else + last->max_flag|= NEAR_MAX; + } + else + full_range= last->copy_max(key2); + } + if (full_range) { // Full range key1->free_tree(); for (; key2 ; key2=key2->next) @@ -6673,8 +6744,6 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) return 0; } } - key2=key2->next; - continue; } if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)