mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
CHARSET_INFO::instr was extended to return more substring match results:
- offset of substr begining - offset of substr end - number of characters (MB compatible)
This commit is contained in:
@ -75,6 +75,12 @@ typedef struct my_uni_idx_st
|
|||||||
uchar *tab;
|
uchar *tab;
|
||||||
} MY_UNI_IDX;
|
} MY_UNI_IDX;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint beg;
|
||||||
|
uint end;
|
||||||
|
uint mblen;
|
||||||
|
} my_match_t;
|
||||||
|
|
||||||
enum my_lex_states
|
enum my_lex_states
|
||||||
{
|
{
|
||||||
@ -116,9 +122,10 @@ typedef struct my_collation_handler_st
|
|||||||
|
|
||||||
int (*strcasecmp)(struct charset_info_st *, const char *, const char *);
|
int (*strcasecmp)(struct charset_info_st *, const char *, const char *);
|
||||||
|
|
||||||
int (*instr)(struct charset_info_st *,
|
uint (*instr)(struct charset_info_st *,
|
||||||
const char *big, uint b_length,
|
const char *big, uint b_length,
|
||||||
const char *small, uint s_length);
|
const char *small, uint s_length,
|
||||||
|
my_match_t *match, uint nmatch);
|
||||||
|
|
||||||
/* Hash calculation */
|
/* Hash calculation */
|
||||||
void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len,
|
void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len,
|
||||||
@ -249,9 +256,10 @@ extern void my_hash_sort_simple(CHARSET_INFO *cs,
|
|||||||
|
|
||||||
extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length);
|
extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length);
|
||||||
|
|
||||||
extern int my_instr_simple(struct charset_info_st *,
|
extern uint my_instr_simple(struct charset_info_st *,
|
||||||
const char *big, uint b_length,
|
const char *big, uint b_length,
|
||||||
const char *small, uint s_length);
|
const char *small, uint s_length,
|
||||||
|
my_match_t *match, uint nmatch);
|
||||||
|
|
||||||
|
|
||||||
/* Functions for 8bit */
|
/* Functions for 8bit */
|
||||||
@ -317,9 +325,10 @@ int my_wildcmp_mb(CHARSET_INFO *,
|
|||||||
int escape, int w_one, int w_many);
|
int escape, int w_one, int w_many);
|
||||||
uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
|
uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
|
||||||
uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
|
uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
|
||||||
int my_instr_mb(struct charset_info_st *,
|
uint my_instr_mb(struct charset_info_st *,
|
||||||
const char *big, uint b_length,
|
const char *big, uint b_length,
|
||||||
const char *small, uint s_length);
|
const char *small, uint s_length,
|
||||||
|
my_match_t *match, uint nmatch);
|
||||||
|
|
||||||
|
|
||||||
extern my_bool my_parse_charset_xml(const char *bug, uint len,
|
extern my_bool my_parse_charset_xml(const char *bug, uint len,
|
||||||
|
@ -1161,7 +1161,7 @@ longlong Item_func_locate::val_int()
|
|||||||
null_value=0;
|
null_value=0;
|
||||||
uint start=0;
|
uint start=0;
|
||||||
uint start0=0;
|
uint start0=0;
|
||||||
int ind;
|
my_match_t match;
|
||||||
|
|
||||||
if (arg_count == 3)
|
if (arg_count == 3)
|
||||||
{
|
{
|
||||||
@ -1175,11 +1175,12 @@ longlong Item_func_locate::val_int()
|
|||||||
if (!b->length()) // Found empty string at start
|
if (!b->length()) // Found empty string at start
|
||||||
return (longlong) (start+1);
|
return (longlong) (start+1);
|
||||||
|
|
||||||
ind= cmp_collation.collation->coll->instr(cmp_collation.collation,
|
if (!cmp_collation.collation->coll->instr(cmp_collation.collation,
|
||||||
a->ptr()+start, a->length()-start,
|
a->ptr()+start, a->length()-start,
|
||||||
b->ptr(), b->length());
|
b->ptr(), b->length(),
|
||||||
|
&match, 1))
|
||||||
return (longlong) (ind >= 0 ? ind + start0 + 1 : ind + 1);
|
return 0;
|
||||||
|
return (longlong) match.mblen + start0 + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -263,16 +263,25 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
int my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
|
uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
const char *big, uint b_length,
|
const char *big, uint b_length,
|
||||||
const char *small, uint s_length)
|
const char *small, uint s_length,
|
||||||
|
my_match_t *match, uint nmatch)
|
||||||
{
|
{
|
||||||
register const uchar *str, *search, *end, *search_end;
|
register const uchar *str, *search, *end, *search_end;
|
||||||
|
|
||||||
if (s_length <= b_length)
|
if (s_length <= b_length)
|
||||||
{
|
{
|
||||||
if (!s_length)
|
if (!s_length)
|
||||||
return 0; /* Empty string is always found */
|
{
|
||||||
|
if (nmatch)
|
||||||
|
{
|
||||||
|
match->beg= 0;
|
||||||
|
match->end= 0;
|
||||||
|
match->mblen= 0;
|
||||||
|
}
|
||||||
|
return 1; /* Empty string is always found */
|
||||||
|
}
|
||||||
|
|
||||||
str= (const uchar*) big;
|
str= (const uchar*) big;
|
||||||
search= (const uchar*) small;
|
search= (const uchar*) small;
|
||||||
@ -293,11 +302,24 @@ skipp:
|
|||||||
if ((*i++) != (*j++))
|
if ((*i++) != (*j++))
|
||||||
goto skipp;
|
goto skipp;
|
||||||
|
|
||||||
return (int) (str- (const uchar*)big) -1;
|
if (nmatch > 0)
|
||||||
|
{
|
||||||
|
match[0].beg= 0;
|
||||||
|
match[0].end= str- (const uchar*)big-1;
|
||||||
|
match[0].mblen= match[0].end;
|
||||||
|
|
||||||
|
if (nmatch > 1)
|
||||||
|
{
|
||||||
|
match[1].beg= match[0].end;
|
||||||
|
match[1].end= match[0].end+s_length;
|
||||||
|
match[1].mblen= match[1].end-match[1].beg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -274,18 +274,28 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
return b-b0;
|
return b-b0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int my_instr_mb(CHARSET_INFO *cs,
|
uint my_instr_mb(CHARSET_INFO *cs,
|
||||||
const char *big, uint b_length,
|
const char *big, uint b_length,
|
||||||
const char *small, uint s_length)
|
const char *small, uint s_length,
|
||||||
|
my_match_t *match, uint nmatch)
|
||||||
{
|
{
|
||||||
register const char *end;
|
register const char *end, *big0;
|
||||||
int res= 0;
|
int res= 0;
|
||||||
|
|
||||||
if (s_length <= b_length)
|
if (s_length <= b_length)
|
||||||
{
|
{
|
||||||
if (!s_length)
|
if (!s_length)
|
||||||
return 0; // Empty string is always found
|
{
|
||||||
|
if (nmatch)
|
||||||
|
{
|
||||||
|
match->beg= 0;
|
||||||
|
match->end= 0;
|
||||||
|
match->mblen= 0;
|
||||||
|
}
|
||||||
|
return 1; // Empty string is always found
|
||||||
|
}
|
||||||
|
|
||||||
|
big0= big;
|
||||||
end= big+b_length-s_length+1;
|
end= big+b_length-s_length+1;
|
||||||
|
|
||||||
while (big < end)
|
while (big < end)
|
||||||
@ -294,15 +304,28 @@ int my_instr_mb(CHARSET_INFO *cs,
|
|||||||
|
|
||||||
if (!cs->coll->strnncoll(cs, (unsigned char*) big, s_length,
|
if (!cs->coll->strnncoll(cs, (unsigned char*) big, s_length,
|
||||||
(unsigned char*) small, s_length))
|
(unsigned char*) small, s_length))
|
||||||
return res;
|
{
|
||||||
|
if (nmatch)
|
||||||
|
{
|
||||||
|
match[0].beg= big0;
|
||||||
|
match[0].end= big-big0;
|
||||||
|
match[0].mblen= res;
|
||||||
|
if (nmatch > 1)
|
||||||
|
{
|
||||||
|
match[1].beg= match[0].end;
|
||||||
|
match[1].end= match[0].end+s_length;
|
||||||
|
match[1].mblen= 0; /* Not computed */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
mblen= (mblen= my_ismbchar(cs, big, end)) ? mblen : 1;
|
mblen= (mblen= my_ismbchar(cs, big, end)) ? mblen : 1;
|
||||||
big+= mblen;
|
big+= mblen;
|
||||||
b_length-= mblen;
|
b_length-= mblen;
|
||||||
res++;
|
res++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* BINARY collations handlers for MB charsets */
|
/* BINARY collations handlers for MB charsets */
|
||||||
|
@ -1030,16 +1030,25 @@ uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int my_instr_simple(CHARSET_INFO *cs,
|
uint my_instr_simple(CHARSET_INFO *cs,
|
||||||
const char *big, uint b_length,
|
const char *big, uint b_length,
|
||||||
const char *small, uint s_length)
|
const char *small, uint s_length,
|
||||||
|
my_match_t *match, uint nmatch)
|
||||||
{
|
{
|
||||||
register const uchar *str, *search, *end, *search_end;
|
register const uchar *str, *search, *end, *search_end;
|
||||||
|
|
||||||
if (s_length <= b_length)
|
if (s_length <= b_length)
|
||||||
{
|
{
|
||||||
if (!s_length)
|
if (!s_length)
|
||||||
return 0; // Empty string is always found
|
{
|
||||||
|
if (nmatch)
|
||||||
|
{
|
||||||
|
match->beg= 0;
|
||||||
|
match->end= 0;
|
||||||
|
match->mblen= 0;
|
||||||
|
}
|
||||||
|
return 1; /* Empty string is always found */
|
||||||
|
}
|
||||||
|
|
||||||
str= (const uchar*) big;
|
str= (const uchar*) big;
|
||||||
search= (const uchar*) small;
|
search= (const uchar*) small;
|
||||||
@ -1060,11 +1069,24 @@ skipp:
|
|||||||
if (cs->sort_order[*i++] != cs->sort_order[*j++])
|
if (cs->sort_order[*i++] != cs->sort_order[*j++])
|
||||||
goto skipp;
|
goto skipp;
|
||||||
|
|
||||||
return (int) (str- (const uchar*)big) -1;
|
if (nmatch > 0)
|
||||||
|
{
|
||||||
|
match[0].beg= 0;
|
||||||
|
match[0].end= str- (const uchar*)big-1;
|
||||||
|
match[0].mblen= match[0].end;
|
||||||
|
|
||||||
|
if (nmatch > 1)
|
||||||
|
{
|
||||||
|
match[1].beg= match[0].end;
|
||||||
|
match[1].end= match[0].end+s_length;
|
||||||
|
match[1].mblen= match[1].end-match[1].beg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user