From 7e7dfcccd64472c9d46b319e997181d0354b668c Mon Sep 17 00:00:00 2001
From: unknown <bar@mysql.com>
Date: Mon, 6 Sep 2004 20:04:22 +0500
Subject: [PATCH] Bug #5324 Bug in UCA collations with LIKE comparisons and
 INDEX

---
 include/m_ctype.h             |  7 +++
 mysql-test/r/ctype_uca.result | 39 ++++++++++++++++
 mysql-test/t/ctype_uca.test   | 37 +++++++++++++++
 strings/ctype-mb.c            | 86 +++++++++++++++++++++++++++++++++++
 strings/ctype-uca.c           |  7 +--
 5 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/include/m_ctype.h b/include/m_ctype.h
index 65b11f4c06a..16490af7fc3 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -312,6 +312,13 @@ my_bool  my_like_range_simple(CHARSET_INFO *cs,
 			      char *min_str, char *max_str,
 			      uint *min_length, uint *max_length);
 
+my_bool  my_like_range_mb(CHARSET_INFO *cs,
+			  const char *ptr, uint ptr_length,
+			  pbool escape, pbool w_one, pbool w_many,
+			  uint res_length,
+			  char *min_str, char *max_str,
+			  uint *min_length, uint *max_length);
+
 my_bool  my_like_range_ucs2(CHARSET_INFO *cs,
 			    const char *ptr, uint ptr_length,
 			    pbool escape, pbool w_one, pbool w_many,
diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result
index 94fe15fed26..da4b5bfb663 100644
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž
 ǁ
 ǂ
 ǃ
+drop table t1;
+SET NAMES utf8;
+CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
+COLLATE utf8_general_ci;
+c
+Μωδαί̈
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
+COLLATE utf8_general_ci ORDER BY c;
+c
+Μωδ
+Μωδαί̈
+DROP TABLE t1;
+CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
+SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
+c
+Μωδαί̈
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
+SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
+COLLATE ucs2_unicode_ci ORDER BY c;
+c
+Μωδ
+Μωδαί̈
+DROP TABLE t1;
+CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
+c
+Μωδαί̈
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
+COLLATE utf8_unicode_ci ORDER BY c;
+c
+Μωδ
+Μωδαί̈
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test
index 187d21f9ab7..d9181b19992 100644
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci;
 select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci;
 select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci;
 
+drop table t1;
+
+#
+# Bug#5324
+#
+SET NAMES utf8;
+#test1
+CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
+#Check one row
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
+COLLATE utf8_general_ci;
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
+COLLATE utf8_general_ci ORDER BY c;
+DROP TABLE t1;
+#test2
+CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
+#Check one row
+SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
+COLLATE ucs2_unicode_ci ORDER BY c;
+DROP TABLE t1;
+#test 3
+CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
+#Check one row row
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
+COLLATE utf8_unicode_ci ORDER BY c;
+DROP TABLE t1;
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 25ee85d62b4..3bfc66029ce 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
   }
 }
 
+/*
+** Calculate min_str and max_str that ranges a LIKE string.
+** Arguments:
+** ptr		Pointer to LIKE string.
+** ptr_length	Length of LIKE string.
+** escape	Escape character in LIKE.  (Normally '\').
+**		All escape characters should be removed from min_str and max_str
+** res_length	Length of min_str and max_str.
+** min_str	Smallest case sensitive string that ranges LIKE.
+**		Should be space padded to res_length.
+** max_str	Largest case sensitive string that ranges LIKE.
+**		Normally padded with the biggest character sort value.
+**
+** The function should return 0 if ok and 1 if the LIKE string can't be
+** optimized !
+*/
+
+my_bool my_like_range_mb(CHARSET_INFO *cs,
+			 const char *ptr,uint ptr_length,
+			 pbool escape, pbool w_one, pbool w_many,
+			 uint res_length,
+			 char *min_str,char *max_str,
+			 uint *min_length,uint *max_length)
+{
+  const char *end=ptr+ptr_length;
+  char *min_org=min_str;
+  char *min_end=min_str+res_length;
+  char *max_end=max_str+res_length;
+
+  for (; ptr != end && min_str != min_end ; ptr++)
+  {
+    if (*ptr == escape && ptr+1 != end)
+    {
+      ptr++;					/* Skip escape */
+      *min_str++= *max_str++ = *ptr;
+      continue;
+    }
+    if (*ptr == w_one || *ptr == w_many)	/* '_' and '%' in SQL */
+    {
+      char buf[10];
+      uint buflen;
+      
+      /* Write min key  */
+      *min_length= (uint) (min_str - min_org);
+      *max_length=res_length;
+      do
+      {
+	*min_str++= (char) cs->min_sort_char;
+      } while (min_str != min_end);
+      
+      /* 
+        Write max key: create a buffer with multibyte
+        representation of the max_sort_char character,
+        and copy it into max_str in a loop. 
+      */
+      buflen= cs->cset->wc_mb(cs, cs->max_sort_char, buf, buf + sizeof(buf));
+      DBUG_ASSERT(buflen > 0);
+      do
+      {
+        if ((max_str + buflen) <= max_end)
+        {
+          /* Enough space for max characer */
+          memcpy(max_str, buf, buflen);
+          max_str+= buflen;
+        }
+        else
+        {
+          /* 
+            There is no space for whole multibyte
+            character, then add trailing spaces.
+          */
+          
+	  *max_str++= ' ';
+	}
+      } while (max_str != max_end);
+      return 0;
+    }
+    *min_str++= *max_str++ = *ptr;
+  }
+  *min_length= *max_length = (uint) (min_str - min_org);
+
+  while (min_str != min_end)
+    *min_str++ = *max_str++ = ' ';	/* Because if key compression */
+  return 0;
+}
+
 static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
 		  const char *str,const char *str_end,
 		  const char *wildstr,const char *wildend,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index cecc3be5045..edb84dbf225 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
     int mblen;
     
     if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc, 
-                                          scanner->sbeg, scanner->send)) < 0))
+                                          scanner->sbeg,
+                                          scanner->send)) <= 0))
       return -1;
     
     scanner->page= wc >> 8;
@@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
     my_strnncoll_ucs2_uca,
     my_strnncollsp_ucs2_uca,
     my_strnxfrm_ucs2_uca,
-    my_like_range_simple,
+    my_like_range_ucs2,
     my_wildcmp_uca,
     NULL,
     my_instr_mb,
@@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
     my_strnncoll_any_uca,
     my_strnncollsp_any_uca,
     my_strnxfrm_any_uca,
-    my_like_range_simple,
+    my_like_range_mb,
     my_wildcmp_uca,
     NULL,
     my_instr_mb,