From 2ba683f2eeef8eb13462c5e46ca625d669e48463 Mon Sep 17 00:00:00 2001
From: "gkodinov/kgeorge@macbook.gmz" <>
Date: Wed, 31 Jan 2007 10:18:26 +0200
Subject: [PATCH] Bug #25551: inconsistent behaviour in grouping NULL,
 depending on index type  The optimizer takes away columns from GROUP
 BY/DISTINCT if they constitute  all the parts of an unique index.  However if
 some of the columns can contain NULLs this cannot be done (because an UNIQUE
 index can have multiple rows with NULL values).  Fixed by not using UNIQUE
 indexes with nullable columns to remove  grouping columns from GROUP
 BY/DISTINCT.

---
 mysql-test/r/distinct.result | 26 +++++++++++++++++++++++++-
 mysql-test/t/distinct.test   | 17 ++++++++++++++++-
 sql/sql_select.cc            | 19 +++++++++++--------
 3 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/mysql-test/r/distinct.result b/mysql-test/r/distinct.result
index 32151305698..3508a83a810 100644
--- a/mysql-test/r/distinct.result
+++ b/mysql-test/r/distinct.result
@@ -530,7 +530,8 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	
-CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b));
+CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, 
+PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
 EXPLAIN SELECT DISTINCT a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -644,3 +645,26 @@ SELECT COUNT(*) FROM
 COUNT(*)
 2
 DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, UNIQUE (a));
+INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3);
+EXPLAIN SELECT DISTINCT a FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	5	NULL	6	Using index
+SELECT DISTINCT a FROM t1;
+a
+NULL
+1
+2
+3
+4
+EXPLAIN SELECT a FROM t1 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	5	NULL	6	Using index
+SELECT a FROM t1 GROUP BY a;
+a
+NULL
+1
+2
+3
+4
+DROP TABLE t1;
diff --git a/mysql-test/t/distinct.test b/mysql-test/t/distinct.test
index 8734b940241..476e4ce7735 100644
--- a/mysql-test/t/distinct.test
+++ b/mysql-test/t/distinct.test
@@ -364,7 +364,8 @@ EXPLAIN SELECT a FROM t1 GROUP BY a;
 EXPLAIN SELECT a,b FROM t1 GROUP BY a,b;
 EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;
 
-CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b));
+CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, 
+                PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
 EXPLAIN SELECT DISTINCT a FROM t2;
 EXPLAIN SELECT DISTINCT a,a FROM t2;
@@ -525,3 +526,17 @@ SELECT COUNT(*) FROM
   (SELECT DISTINCT a FROM t2 WHERE a='oe' COLLATE latin1_german2_ci) dt;
 
 DROP TABLE t1, t2;
+
+#
+# Bug #25551: inconsistent behaviour in grouping NULL, depending on index type
+#
+CREATE TABLE t1 (a INT, UNIQUE (a));
+INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3);
+EXPLAIN SELECT DISTINCT a FROM t1;
+#result must have one row with NULL
+SELECT DISTINCT a FROM t1;
+EXPLAIN SELECT a FROM t1 GROUP BY a;
+#result must have one row with NULL
+SELECT a FROM t1 GROUP BY a;
+
+DROP TABLE t1;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b03c4556279..1486cf521a5 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -848,10 +848,11 @@ JOIN::optimize()
   }
   /*
      Check if we can optimize away GROUP BY/DISTINCT.
-     We can do that if there are no aggregate functions and the
+     We can do that if there are no aggregate functions, the
      fields in DISTINCT clause (if present) and/or columns in GROUP BY
      (if present) contain direct references to all key parts of
-     an unique index (in whatever order).
+     an unique index (in whatever order) and if the key parts of the
+     unique index cannot contain NULLs.
      Note that the unique keys for DISTINCT and GROUP BY should not
      be the same (as long as they are unique).
 
@@ -11856,7 +11857,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
 
 
 /*
-  Check if GROUP BY/DISTINCT can be optimized away because the set is 
+  Check if GROUP BY/DISTINCT can be optimized away because the set is
   already known to be distinct.
   
   SYNOPSIS
@@ -11864,7 +11865,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
     table                The table to operate on.
     find_func            function to iterate over the list and search
                          for a field
-  
+
   DESCRIPTION
     Used in removing the GROUP BY/DISTINCT of the following types of
     statements:
@@ -11875,12 +11876,13 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
       then <any combination of a,b,c>,{whatever} is also distinct
 
     This function checks if all the key parts of any of the unique keys
-    of the table are referenced by a list : either the select list 
+    of the table are referenced by a list : either the select list
     through find_field_in_item_list or GROUP BY list through
     find_field_in_order_list.
-    If the above holds then we can safely remove the GROUP BY/DISTINCT,
+    If the above holds and the key parts cannot contain NULLs then we 
+    can safely remove the GROUP BY/DISTINCT,
     as no result set can be more distinct than an unique key.
-  
+ 
   RETURN VALUE
     1                    found
     0                    not found.
@@ -11903,7 +11905,8 @@ list_contains_unique_index(TABLE *table,
            key_part < key_part_end;
            key_part++)
       {
-        if (!find_func(key_part->field, data))
+        if (key_part->field->maybe_null() || 
+            !find_func(key_part->field, data))
           break;
       }
       if (key_part == key_part_end)