1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-30 11:22:14 +03:00
Files
mariadb/strings/ctype-mb.c
bar@mysql.com 2496e85b84 Bug#4521: unique key prefix interacts poorly with utf8.
Fix for binary collations for MyISAM and HEAP BTREE.
This patch also changes trailing spaces behaviour for
binary collations. Binary collations now have PAD 
characteristic too.
2004-08-19 15:15:10 +05:00

588 lines
13 KiB
C

/* Copyright (C) 2000 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
#include "m_ctype.h"
#include "m_string.h"
#ifdef USE_MB
void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
register char *end=str+strlen(str); /* BAR TODO: remove strlen() call */
register uchar *map=cs->to_upper;
while (*str)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
register char *end=str+strlen(str);
register uchar *map=cs->to_lower;
while (*str)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
void my_caseup_mb(CHARSET_INFO * cs, char *str, uint length)
{
register uint32 l;
register char *end=str+length;
register uchar *map=cs->to_upper;
while (str<end)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
void my_casedn_mb(CHARSET_INFO * cs, char *str, uint length)
{
register uint32 l;
register char *end=str+length;
register uchar *map=cs->to_lower;
while (str<end)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
{
register uint32 l;
register const char *end=s+strlen(s);
register uchar *map=cs->to_upper;
while (s<end)
{
if ((l=my_ismbchar(cs, s,end)))
{
while (l--)
if (*s++ != *t++)
return 1;
}
else if (my_mbcharlen(cs, *t) > 1)
return 1;
else if (map[(uchar) *s++] != map[(uchar) *t++])
return 1;
}
return *t;
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
#define INC_PTR(cs,A,B) A+=((use_mb_flag && \
my_ismbchar(cs,A,B)) ? my_ismbchar(cs,A,B) : 1)
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
int my_wildcmp_mb(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */
bool use_mb_flag=use_mb(cs);
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
int l;
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (use_mb_flag &&
(l = my_ismbchar(cs, wildstr, wildend)))
{
if (str+l > str_end || memcmp(str, wildstr, l) != 0)
return 1;
str += l;
wildstr += l;
}
else
if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
return(1); /* No match */
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
result=1; /* Found an anchor char */
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) /* Skip one char if possible */
return (result);
INC_PTR(cs,str,str_end);
} while (++wildstr < wildend && *wildstr == w_one);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ /* Found w_many */
uchar cmp;
const char* mb = wildstr;
int mblen=0;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return(0); /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
mb=wildstr;
LINT_INIT(mblen);
if (use_mb_flag)
mblen = my_ismbchar(cs, wildstr, wildend);
INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
cmp=likeconv(cs,cmp);
do
{
if (use_mb_flag)
{
for (;;)
{
if (str >= str_end)
return -1;
if (mblen)
{
if (str+mblen <= str_end && memcmp(str, mb, mblen) == 0)
{
str += mblen;
break;
}
}
else if (!my_ismbchar(cs, str, str_end) &&
likeconv(cs,*str) == cmp)
{
str++;
break;
}
INC_PTR(cs,str, str_end);
}
}
else
{
while (str != str_end && likeconv(cs,*str) != cmp)
str++;
if (str++ == str_end) return (-1);
}
{
int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
return (tmp);
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return (str != str_end ? 1 : 0);
}
uint my_numchars_mb(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
{
register uint32 n=0,mblen;
while (b < e)
{
b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1;
++n;
}
return n;
}
uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e, uint pos)
{
uint mblen;
const char *b0=b;
while (pos && b<e)
{
b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1;
pos--;
}
return pos ? e+2-b0 : b-b0;
}
uint my_well_formed_len_mb(CHARSET_INFO *cs,
const char *b, const char *e, uint pos)
{
my_wc_t wc;
int mblen;
const char *b_start= b;
while (pos)
{
if ((mblen= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <0)
break;
b+= mblen;
pos--;
}
return b - b_start;
}
uint my_instr_mb(CHARSET_INFO *cs,
const char *b, uint b_length,
const char *s, uint s_length,
my_match_t *match, uint nmatch)
{
register const char *end, *b0;
int res= 0;
if (s_length <= b_length)
{
if (!s_length)
{
if (nmatch)
{
match->beg= 0;
match->end= 0;
match->mblen= 0;
}
return 1; /* Empty string is always found */
}
b0= b;
end= b+b_length-s_length+1;
while (b < end)
{
int mblen;
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
(unsigned char*) s, s_length, 0))
{
if (nmatch)
{
match[0].beg= 0;
match[0].end= b-b0;
match[0].mblen= res;
if (nmatch > 1)
{
match[1].beg= match[0].end;
match[1].end= match[0].end+s_length;
match[1].mblen= 0; /* Not computed */
}
}
return 2;
}
mblen= (mblen= my_ismbchar(cs, b, end)) ? mblen : 1;
b+= mblen;
b_length-= mblen;
res++;
}
}
return 0;
}
/* BINARY collations handlers for MB charsets */
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
uint len=min(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
}
/*
Compare two strings.
SYNOPSIS
my_strnncollsp_mb_bin()
cs Chararacter set
s String to compare
slen Length of 's'
t String to compare
tlen Length of 't'
NOTE
This function is used for character strings with binary collations.
It ignores trailing spaces.
RETURN
A negative number if s < t
A positive number if s > t
0 if strings are equal
*/
static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, uint a_length,
const uchar *b, uint b_length)
{
const uchar *end;
uint length;
end= a + (length= min(a_length, b_length));
while (a < end)
{
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
}
if (a_length != b_length)
{
int swap= 0;
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a_length < b_length)
{
/* put shorter key in s */
a_length= b_length;
a= b;
swap= -1; /* swap sign of result */
}
for (end= a + a_length-length; a < end ; a++)
{
if (*a != ' ')
return ((int) *a - (int) ' ') ^ swap;
}
}
return 0;
}
static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint len,
const uchar *src,
uint srclen __attribute__((unused)))
{
if (dest != src)
memcpy(dest,src,len= min(len,srclen));
return len;
}
static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t)
{
return strcmp(s,t);
}
static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, uint len,ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
key+= len;
for (; pos < (uchar*) key ; pos++)
{
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
((uint)*pos)) + (nr1[0] << 8);
nr2[0]+=3;
}
}
static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */
bool use_mb_flag=use_mb(cs);
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
int l;
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (use_mb_flag &&
(l = my_ismbchar(cs, wildstr, wildend)))
{
if (str+l > str_end || memcmp(str, wildstr, l) != 0)
return 1;
str += l;
wildstr += l;
}
else
if (str == str_end || *wildstr++ != *str++)
return(1); /* No match */
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
result=1; /* Found an anchor char */
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) /* Skip one char if possible */
return (result);
INC_PTR(cs,str,str_end);
} while (++wildstr < wildend && *wildstr == w_one);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ /* Found w_many */
uchar cmp;
const char* mb = wildstr;
int mblen=0;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return(0); /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
mb=wildstr;
LINT_INIT(mblen);
if (use_mb_flag)
mblen = my_ismbchar(cs, wildstr, wildend);
INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
do
{
if (use_mb_flag)
{
for (;;)
{
if (str >= str_end)
return -1;
if (mblen)
{
if (str+mblen <= str_end && memcmp(str, mb, mblen) == 0)
{
str += mblen;
break;
}
}
else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
{
str++;
break;
}
INC_PTR(cs,str, str_end);
}
}
else
{
while (str != str_end && *str != cmp)
str++;
if (str++ == str_end) return (-1);
}
{
int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
return (tmp);
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return (str != str_end ? 1 : 0);
}
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
NULL, /* init */
my_strnncoll_mb_bin,
my_strnncollsp_mb_bin,
my_strnxfrm_mb_bin,
my_like_range_simple,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin
};
#endif