Bug #5324 Bug in UCA collations with LIKE comparisons and INDEX

2025-07-30 16:24:05 +03:00 · 2004-09-06 20:04:22 +05:00
parent 4011f819e2
commit 7e7dfcccd6
5 changed files with 173 additions and 3 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -312,6 +312,13 @@ my_bool  my_like_range_simple(CHARSET_INFO *cs,
 			      char *min_str, char *max_str,
 			      uint *min_length, uint *max_length);
 my_bool  my_like_range_mb(CHARSET_INFO *cs,
 			  const char *ptr, uint ptr_length,
 			  pbool escape, pbool w_one, pbool w_many,
 			  uint res_length,
 			  char *min_str, char *max_str,
 			  uint *min_length, uint *max_length);
 my_bool  my_like_range_ucs2(CHARSET_INFO *cs,
 			    const char *ptr, uint ptr_length,
 			    pbool escape, pbool w_one, pbool w_many,
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž
 ǁ
 ǂ
 ǃ
 drop table t1;
 SET NAMES utf8;
 CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
 COLLATE utf8_general_ci;
 c
 Μωδαί̈
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
 COLLATE utf8_general_ci ORDER BY c;
 c
 Μωδ
 Μωδαί̈
 DROP TABLE t1;
 CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
 INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
 SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
 c
 Μωδαί̈
 INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
 SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
 COLLATE ucs2_unicode_ci ORDER BY c;
 c
 Μωδ
 Μωδαί̈
 DROP TABLE t1;
 CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
 c
 Μωδαί̈
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
 COLLATE utf8_unicode_ci ORDER BY c;
 c
 Μωδ
 Μωδαί̈
 DROP TABLE t1;
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci;
 select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci;
 select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci;
 drop table t1;
 #
 # Bug#5324
 #
 SET NAMES utf8;
 #test1
 CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
 #Check one row
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
 COLLATE utf8_general_ci;
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
 #Check two rows
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
 COLLATE utf8_general_ci ORDER BY c;
 DROP TABLE t1;
 #test2
 CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
 INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
 #Check one row
 SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
 INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
 #Check two rows
 SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
 COLLATE ucs2_unicode_ci ORDER BY c;
 DROP TABLE t1;
 #test 3
 CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
 #Check one row row
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
 INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
 #Check two rows
 SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
 COLLATE utf8_unicode_ci ORDER BY c;
 DROP TABLE t1;
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
  }
 }
 /*
 ** Calculate min_str and max_str that ranges a LIKE string.
 ** Arguments:
 ** ptr		Pointer to LIKE string.
 ** ptr_length	Length of LIKE string.
 ** escape	Escape character in LIKE.  (Normally '\').
 **		All escape characters should be removed from min_str and max_str
 ** res_length	Length of min_str and max_str.
 ** min_str	Smallest case sensitive string that ranges LIKE.
 **		Should be space padded to res_length.
 ** max_str	Largest case sensitive string that ranges LIKE.
 **		Normally padded with the biggest character sort value.
 **
 ** The function should return 0 if ok and 1 if the LIKE string can't be
 ** optimized !
 */
 my_bool my_like_range_mb(CHARSET_INFO *cs,
 			 const char *ptr,uint ptr_length,
 			 pbool escape, pbool w_one, pbool w_many,
 			 uint res_length,
 			 char *min_str,char *max_str,
 			 uint *min_length,uint *max_length)
 {
  const char *end=ptr+ptr_length;
  char *min_org=min_str;
  char *min_end=min_str+res_length;
  char *max_end=max_str+res_length;
  for (; ptr != end && min_str != min_end ; ptr++)
  {
    if (*ptr == escape && ptr+1 != end)
    {
      ptr++;					/* Skip escape */
      *min_str++= *max_str++ = *ptr;
      continue;
    }
    if (*ptr == w_one || *ptr == w_many)	/* '_' and '%' in SQL */
    {
      char buf[10];
      uint buflen;
      /* Write min key  */
      *min_length= (uint) (min_str - min_org);
      *max_length=res_length;
      do
      {
 	*min_str++= (char) cs->min_sort_char;
      } while (min_str != min_end);
      /* 
        Write max key: create a buffer with multibyte
        representation of the max_sort_char character,
        and copy it into max_str in a loop. 
      */
      buflen= cs->cset->wc_mb(cs, cs->max_sort_char, buf, buf + sizeof(buf));
      DBUG_ASSERT(buflen > 0);
      do
      {
        if ((max_str + buflen) <= max_end)
        {
          /* Enough space for max characer */
          memcpy(max_str, buf, buflen);
          max_str+= buflen;
        }
        else
        {
          /* 
            There is no space for whole multibyte
            character, then add trailing spaces.
          */
 	  *max_str++= ' ';
 	}
      } while (max_str != max_end);
      return 0;
    }
    *min_str++= *max_str++ = *ptr;
  }
  *min_length= *max_length = (uint) (min_str - min_org);
  while (min_str != min_end)
    *min_str++ = *max_str++ = ' ';	/* Because if key compression */
  return 0;
 }
 static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
 		  const char *str,const char *str_end,
 		  const char *wildstr,const char *wildend,
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
    int mblen;
    if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc, 
-                                          scanner->sbeg, scanner->send)) < 0))
+                                          scanner->sbeg,
                                          scanner->send)) <= 0))
      return -1;
    scanner->page= wc >> 8;
@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
    my_strnncoll_ucs2_uca,
    my_strnncollsp_ucs2_uca,
    my_strnxfrm_ucs2_uca,
-    my_like_range_simple,
+    my_like_range_ucs2,
    my_wildcmp_uca,
    NULL,
    my_instr_mb,
@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
    my_strnncoll_any_uca,
    my_strnncollsp_any_uca,
    my_strnxfrm_any_uca,
-    my_like_range_simple,
+    my_like_range_mb,
    my_wildcmp_uca,
    NULL,
    my_instr_mb,