Merge mysql.com:/home/bar/mysql-work/mysql-5.0.b27345

into mysql.com:/home/bar/mysql-work/mysql-5.1-new-rpl strings/ctype-uca.c: Auto merged strings/ctype-mb.c: After merge fix. strings/ctype-ucs2.c: After merge fix.
2025-07-30 16:24:05 +03:00 · 2007-07-03 14:06:57 +05:00
parent 7119687e26 7edcebc97a
commit c45f8f949f
5 changed files with 261 additions and 2 deletions
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@ -2663,3 +2663,95 @@ COUNT(*)	c1
 1	
 1	a
 DROP TABLE IF EXISTS t1;
 set names utf8;
 create table t1 (
 a varchar(255),
 key a(a)
 ) character set utf8 collate utf8_danish_ci;
 insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa');
 select a as like_a from t1 where a like 'a%';
 like_a
 aaaaa
 select a as like_aa from t1 where a like 'aa%';
 like_aa
 aaaaa
 select a as like_aaa from t1 where a like 'aaa%';
 like_aaa
 aaaaa
 select a as like_aaaa from t1 where a like 'aaaa%';
 like_aaaa
 aaaaa
 select a as like_aaaaa from t1 where a like 'aaaaa%';
 like_aaaaa
 aaaaa
 alter table t1 convert to character set ucs2 collate ucs2_danish_ci;
 select a as like_a from t1 where a like 'a%';
 like_a
 aaaaa
 select a as like_aa from t1 where a like 'aa%';
 like_aa
 aaaaa
 select a as like_aaa from t1 where a like 'aaa%';
 like_aaa
 aaaaa
 select a as like_aaaa from t1 where a like 'aaaa%';
 like_aaaa
 aaaaa
 select a as like_aaaaa from t1 where a like 'aaaaa%';
 like_aaaaa
 aaaaa
 drop table t1;
 create table t1 (
 a varchar(255),
 key(a)
 ) character set utf8 collate utf8_spanish2_ci;
 insert into t1 values ('aaaaa'),('lllll'),('zzzzz');
 select a as like_l from t1 where a like 'l%';
 like_l
 lllll
 select a as like_ll from t1 where a like 'll%';
 like_ll
 lllll
 select a as like_lll from t1 where a like 'lll%';
 like_lll
 lllll
 select a as like_llll from t1 where a like 'llll%';
 like_llll
 lllll
 select a as like_lllll from t1 where a like 'lllll%';
 like_lllll
 lllll
 alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci;
 select a as like_l from t1 where a like 'l%';
 like_l
 lllll
 select a as like_ll from t1 where a like 'll%';
 like_ll
 lllll
 select a as like_lll from t1 where a like 'lll%';
 like_lll
 lllll
 select a as like_llll from t1 where a like 'llll%';
 like_llll
 lllll
 select a as like_lllll from t1 where a like 'lllll%';
 like_lllll
 lllll
 drop table t1;
 create table t1 (
 a varchar(255),
 key a(a)
 ) character set utf8 collate utf8_czech_ci;
 insert into t1 values
 ('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j');
 select * from t1 where a like 'c%';
 a
 c
 ch
 alter table t1 convert to character set ucs2 collate ucs2_czech_ci;
 select * from t1 where a like 'c%';
 a
 c
 ch
 drop table t1;
 End for 5.0 tests
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@ -485,3 +485,57 @@ CREATE TABLE t1 (
 insert into t1 values (''),('a');
 SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
 DROP TABLE IF EXISTS t1;
 #
 # Bug#27345 Incorrect data returned when range-read from utf8_danish_ci indexes
 #
 set names utf8;
 create table t1 (
  a varchar(255),
  key a(a)
 ) character set utf8 collate utf8_danish_ci;
 insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa');
 select a as like_a from t1 where a like 'a%';
 select a as like_aa from t1 where a like 'aa%';
 select a as like_aaa from t1 where a like 'aaa%';
 select a as like_aaaa from t1 where a like 'aaaa%';
 select a as like_aaaaa from t1 where a like 'aaaaa%';
 alter table t1 convert to character set ucs2 collate ucs2_danish_ci;
 select a as like_a from t1 where a like 'a%';
 select a as like_aa from t1 where a like 'aa%';
 select a as like_aaa from t1 where a like 'aaa%';
 select a as like_aaaa from t1 where a like 'aaaa%';
 select a as like_aaaaa from t1 where a like 'aaaaa%';
 drop table t1;
 create table t1 (
  a varchar(255),
  key(a)
 ) character set utf8 collate utf8_spanish2_ci;
 insert into t1 values ('aaaaa'),('lllll'),('zzzzz');
 select a as like_l from t1 where a like 'l%';
 select a as like_ll from t1 where a like 'll%';
 select a as like_lll from t1 where a like 'lll%';
 select a as like_llll from t1 where a like 'llll%';
 select a as like_lllll from t1 where a like 'lllll%';
 alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci;
 select a as like_l from t1 where a like 'l%';
 select a as like_ll from t1 where a like 'll%';
 select a as like_lll from t1 where a like 'lll%';
 select a as like_llll from t1 where a like 'llll%';
 select a as like_lllll from t1 where a like 'lllll%';
 drop table t1;
 create table t1 (
  a varchar(255),
  key a(a)
 ) character set utf8 collate utf8_czech_ci;
 -- In Czech 'ch' is a single letter between 'h' and 'i'
 insert into t1 values
 ('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j');
 select * from t1 where a like 'c%';
 alter table t1 convert to character set ucs2 collate ucs2_czech_ci;
 select * from t1 where a like 'c%';
 drop table t1;
 -- echo End for 5.0 tests
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@ -567,6 +567,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
  char *min_end= min_str + res_length;
  char *max_end= max_str + res_length;
  size_t maxcharlen= res_length / cs->mbmaxlen;
  const char *contraction_flags= cs->contractions ? 
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
  {
@ -575,6 +577,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
      ptr++;                                    /* Skip escape */
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
    {      
 fill_max_and_min:
      /*
        Calculate length of keys:
        'a\0\0... is the smallest possible string when we have space expand
@ -606,8 +609,74 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
       *min_str++= *max_str++= *ptr++;
    }
    else
-       *min_str++= *max_str++= *ptr++;    
+    {
      /*
        Special case for collations with contractions.
        For example, in Chezh, 'ch' is a separate letter
        which is sorted between 'h' and 'i'.
        If the pattern 'abc%', 'c' at the end can mean:
        - letter 'c' itself,
        - beginning of the contraction 'ch'.
        If we simply return this LIKE range:
         'abc\min\min\min' and 'abc\max\max\max'
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
        will only find values starting from 'abc[^h]',
        but won't find values starting from 'abch'.
        We must ignore contraction heads followed by w_one or w_many.
        ('Contraction head' means any letter which can be the first
        letter in a contraction)
        For example, for Czech 'abc%', we will return LIKE range,
        which is equal to LIKE range for 'ab%':
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
      */
      if (contraction_flags && ptr + 1 < end &&
          contraction_flags[(uchar) *ptr])
      {
        /* Ptr[0] is a contraction head. */
        if (ptr[1] == w_one || ptr[1] == w_many)
        {
          /* Contraction head followed by a wildcard, quit. */
          goto fill_max_and_min;
        }
        /*
          Some letters can be both contraction heads and contraction tails.
          For example, in Danish 'aa' is a separate single letter which
          is sorted after 'z'. So 'a' can be both head and tail.
          If ptr[0]+ptr[1] is a contraction,
          then put both letters together.
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
          is not a contraction, then we put only ptr[0],
          and continue with ptr[1] on the next loop.
        */
        if (contraction_flags[(uchar) ptr[1]] &&
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
        {
          /* Contraction found */
          if (maxcharlen == 1 || min_str + 1 >= min_end)
          {
            /* Both contraction parts don't fit, quit */
            goto fill_max_and_min;
          }
          /* Put contraction head */
          *min_str++= *max_str++= *ptr++;
          maxcharlen--;
        }
      }
      /* Put contraction tail, or a single character */
      *min_str++= *max_str++= *ptr++;    
    }
  }
  *min_length= *max_length = (size_t) (min_str - min_org);
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@ -7938,10 +7938,16 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
  /* Now process contractions */
  if (ncontractions)
  {
-    uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */
+    /*
      8K for weights for basic latin letter pairs,
      plus 256 bytes for "is contraction part" flags.
    */
    uint size= 0x40*0x40*sizeof(uint16) + 256;
    char *contraction_flags;
    if (!(cs->contractions= (uint16*) (*alloc)(size)))
        return 1;
    bzero((void*)cs->contractions, size);
    contraction_flags= ((char*) cs->contractions) + 0x40*0x40;
    for (i=0; i < rc; i++)
    {
      if (rule[i].curr[1])
@ -7967,6 +7973,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
        /* Copy base weight applying primary difference */
        cs->contractions[offsc]= offsb[0] + rule[i].diff[0];
        /* Mark both letters as "is contraction part */
        contraction_flags[rule[i].curr[0]]= 1;
        contraction_flags[rule[i].curr[1]]= 1;
      }
    }
  }
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@ -1527,6 +1527,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
  char *min_org=min_str;
  char *min_end=min_str+res_length;
  size_t charlen= res_length / cs->mbmaxlen;
  const char *contraction_flags= cs->contractions ?
             ((const char*) cs->contractions) + 0x40*0x40 : NULL;
  for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
        ; ptr+=2, charlen--)
@ -1548,6 +1550,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
    }
    if (ptr[0] == '\0' && ptr[1] == w_many)	/* '%' in SQL */
    {
 fill_max_and_min:
      /*
        Calculate length of keys:
        'a\0\0... is the smallest possible string when we have space expand
@ -1564,6 +1567,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
      } while (min_str + 1 < min_end);
      return 0;
    }
    if (contraction_flags && ptr + 3 < end &&
        ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
    {
      /* Contraction head found */
      if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
      {
        /* Contraction head followed by a wildcard, quit */
        goto fill_max_and_min;
      }
      /*
        Check if the second letter can be contraction part,
        and if two letters really produce a contraction.
      */
      if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
          cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
      {
        /* Contraction found */
        if (charlen == 1 || min_str + 2 >= min_end)
        {
          /* Full contraction doesn't fit, quit */
          goto fill_max_and_min;
        }
        /* Put contraction head */
        *min_str++= *max_str++= *ptr++;
        *min_str++= *max_str++= *ptr++;
        charlen--;
      }
    }
    /* Put contraction tail, or a single character */
    *min_str++= *max_str++ = ptr[0];
    *min_str++= *max_str++ = ptr[1];
  }