From 2ba683f2eeef8eb13462c5e46ca625d669e48463 Mon Sep 17 00:00:00 2001 From: "gkodinov/kgeorge@macbook.gmz" <> Date: Wed, 31 Jan 2007 10:18:26 +0200 Subject: [PATCH] Bug #25551: inconsistent behaviour in grouping NULL, depending on index type The optimizer takes away columns from GROUP BY/DISTINCT if they constitute all the parts of an unique index. However if some of the columns can contain NULLs this cannot be done (because an UNIQUE index can have multiple rows with NULL values). Fixed by not using UNIQUE indexes with nullable columns to remove grouping columns from GROUP BY/DISTINCT. --- mysql-test/r/distinct.result | 26 +++++++++++++++++++++++++- mysql-test/t/distinct.test | 17 ++++++++++++++++- sql/sql_select.cc | 19 +++++++++++-------- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/mysql-test/r/distinct.result b/mysql-test/r/distinct.result index 32151305698..3508a83a810 100644 --- a/mysql-test/r/distinct.result +++ b/mysql-test/r/distinct.result @@ -530,7 +530,8 @@ id select_type table type possible_keys key key_len ref rows Extra EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 3 -CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b)); +CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, +PRIMARY KEY (a,b)); INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4); EXPLAIN SELECT DISTINCT a FROM t2; id select_type table type possible_keys key key_len ref rows Extra @@ -644,3 +645,26 @@ SELECT COUNT(*) FROM COUNT(*) 2 DROP TABLE t1, t2; +CREATE TABLE t1 (a INT, UNIQUE (a)); +INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3); +EXPLAIN SELECT DISTINCT a FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 5 NULL 6 Using index +SELECT DISTINCT a FROM t1; +a +NULL +1 +2 +3 +4 +EXPLAIN SELECT a FROM t1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 5 NULL 6 Using index +SELECT a FROM t1 GROUP BY a; +a +NULL +1 +2 +3 +4 +DROP TABLE t1; diff --git a/mysql-test/t/distinct.test b/mysql-test/t/distinct.test index 8734b940241..476e4ce7735 100644 --- a/mysql-test/t/distinct.test +++ b/mysql-test/t/distinct.test @@ -364,7 +364,8 @@ EXPLAIN SELECT a FROM t1 GROUP BY a; EXPLAIN SELECT a,b FROM t1 GROUP BY a,b; EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b; -CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b)); +CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, + PRIMARY KEY (a,b)); INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4); EXPLAIN SELECT DISTINCT a FROM t2; EXPLAIN SELECT DISTINCT a,a FROM t2; @@ -525,3 +526,17 @@ SELECT COUNT(*) FROM (SELECT DISTINCT a FROM t2 WHERE a='oe' COLLATE latin1_german2_ci) dt; DROP TABLE t1, t2; + +# +# Bug #25551: inconsistent behaviour in grouping NULL, depending on index type +# +CREATE TABLE t1 (a INT, UNIQUE (a)); +INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3); +EXPLAIN SELECT DISTINCT a FROM t1; +#result must have one row with NULL +SELECT DISTINCT a FROM t1; +EXPLAIN SELECT a FROM t1 GROUP BY a; +#result must have one row with NULL +SELECT a FROM t1 GROUP BY a; + +DROP TABLE t1; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b03c4556279..1486cf521a5 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -848,10 +848,11 @@ JOIN::optimize() } /* Check if we can optimize away GROUP BY/DISTINCT. - We can do that if there are no aggregate functions and the + We can do that if there are no aggregate functions, the fields in DISTINCT clause (if present) and/or columns in GROUP BY (if present) contain direct references to all key parts of - an unique index (in whatever order). + an unique index (in whatever order) and if the key parts of the + unique index cannot contain NULLs. Note that the unique keys for DISTINCT and GROUP BY should not be the same (as long as they are unique). @@ -11856,7 +11857,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, /* - Check if GROUP BY/DISTINCT can be optimized away because the set is + Check if GROUP BY/DISTINCT can be optimized away because the set is already known to be distinct. SYNOPSIS @@ -11864,7 +11865,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, table The table to operate on. find_func function to iterate over the list and search for a field - + DESCRIPTION Used in removing the GROUP BY/DISTINCT of the following types of statements: @@ -11875,12 +11876,13 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, then ,{whatever} is also distinct This function checks if all the key parts of any of the unique keys - of the table are referenced by a list : either the select list + of the table are referenced by a list : either the select list through find_field_in_item_list or GROUP BY list through find_field_in_order_list. - If the above holds then we can safely remove the GROUP BY/DISTINCT, + If the above holds and the key parts cannot contain NULLs then we + can safely remove the GROUP BY/DISTINCT, as no result set can be more distinct than an unique key. - + RETURN VALUE 1 found 0 not found. @@ -11903,7 +11905,8 @@ list_contains_unique_index(TABLE *table, key_part < key_part_end; key_part++) { - if (!find_func(key_part->field, data)) + if (key_part->field->maybe_null() || + !find_func(key_part->field, data)) break; } if (key_part == key_part_end)