mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Optimize LIKE with turbo-boyer-more algoritm
This commit is contained in:
@ -26728,6 +26728,12 @@ In the first statement, the @code{LIKE} value begins with a wildcard
|
|||||||
character. In the second statement, the @code{LIKE} value is not a
|
character. In the second statement, the @code{LIKE} value is not a
|
||||||
constant.
|
constant.
|
||||||
|
|
||||||
|
MySQL 4.0 does another optimization on @code{LIKE}. If you are using
|
||||||
|
@code{... LIKE "%string%"} and @code{string} is longer than 3 characters
|
||||||
|
then MySQL will use the turbo-boyer-more algorithm to once initialize
|
||||||
|
the pattern for the string and then use this pattern to quickly search
|
||||||
|
after the given string.
|
||||||
|
|
||||||
@findex IS NULL, and indexes
|
@findex IS NULL, and indexes
|
||||||
@cindex indexes, and @code{IS NULL}
|
@cindex indexes, and @code{IS NULL}
|
||||||
Searching using @code{column_name IS NULL} will use indexes if column_name
|
Searching using @code{column_name IS NULL} will use indexes if column_name
|
||||||
@ -49310,6 +49316,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
|
|||||||
|
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
|
Use turbo-boyer-more to speed up @code{LIKE "%keyword%"} searches.
|
||||||
|
@item
|
||||||
Fixed bug in @code{DROP DATABASE} with symlink.
|
Fixed bug in @code{DROP DATABASE} with symlink.
|
||||||
@item
|
@item
|
||||||
Fixed crash in @code{REPAIR ... USE_FRM}.
|
Fixed crash in @code{REPAIR ... USE_FRM}.
|
||||||
|
@ -15,4 +15,15 @@ test
|
|||||||
select * from t1 where a like "te_t";
|
select * from t1 where a like "te_t";
|
||||||
a
|
a
|
||||||
test
|
test
|
||||||
|
select * from t1 where a like "%a%";
|
||||||
|
a
|
||||||
|
a
|
||||||
|
abc
|
||||||
|
abcd
|
||||||
|
select * from t1 where a like "%abcd%";
|
||||||
|
a
|
||||||
|
abcd
|
||||||
|
select * from t1 where a like "%abc\d%";
|
||||||
|
a
|
||||||
|
abcd
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
@ -9,4 +9,12 @@ select * from t1 where a like "abc%";
|
|||||||
select * from t1 where a like "ABC%";
|
select * from t1 where a like "ABC%";
|
||||||
select * from t1 where a like "test%";
|
select * from t1 where a like "test%";
|
||||||
select * from t1 where a like "te_t";
|
select * from t1 where a like "te_t";
|
||||||
|
|
||||||
|
#
|
||||||
|
# The following will test the boyer-more code
|
||||||
|
#
|
||||||
|
select * from t1 where a like "%a%";
|
||||||
|
select * from t1 where a like "%abcd%";
|
||||||
|
select * from t1 where a like "%abc\d%";
|
||||||
|
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
@ -1228,23 +1228,23 @@ void Item_func_like::fix_length_and_dec()
|
|||||||
// cmp_type=STRING_RESULT; // For quick select
|
// cmp_type=STRING_RESULT; // For quick select
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
longlong Item_func_like::val_int()
|
longlong Item_func_like::val_int()
|
||||||
{
|
{
|
||||||
String *res,*res2;
|
String* res = args[0]->val_str(&tmp_value1);
|
||||||
res=args[0]->val_str(&tmp_value1);
|
|
||||||
if (args[0]->null_value)
|
if (args[0]->null_value)
|
||||||
{
|
{
|
||||||
null_value=1;
|
null_value=1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
res2=args[1]->val_str(&tmp_value2);
|
String* res2 = args[1]->val_str(&tmp_value2);
|
||||||
if (args[1]->null_value)
|
if (args[1]->null_value)
|
||||||
{
|
{
|
||||||
null_value=1;
|
null_value=1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
null_value=0;
|
null_value=0;
|
||||||
|
if (canDoTurboBM)
|
||||||
|
return turboBM_matches(res->ptr(), res->length()) ? 1 : 0;
|
||||||
if (binary)
|
if (binary)
|
||||||
return wild_compare(*res,*res2,escape) ? 0 : 1;
|
return wild_compare(*res,*res2,escape) ? 0 : 1;
|
||||||
else
|
else
|
||||||
@ -1268,6 +1268,51 @@ Item_func::optimize_type Item_func_like::select_optimize() const
|
|||||||
return OPTIMIZE_NONE;
|
return OPTIMIZE_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Item_func_like::fix_fields(THD *thd,struct st_table_list *tlist)
|
||||||
|
{
|
||||||
|
if (Item_bool_func2::fix_fields(thd, tlist))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO--we could do it for non-const, but we'd have to
|
||||||
|
recompute the tables for each row--probably not worth it.
|
||||||
|
*/
|
||||||
|
if (args[1]->const_item() && !(specialflag & SPECIAL_NO_NEW_FUNC))
|
||||||
|
{
|
||||||
|
String* res2 = args[1]->val_str(&tmp_value2);
|
||||||
|
const size_t len = res2->length();
|
||||||
|
const char* first = res2->ptr();
|
||||||
|
const char* last = first + len - 1;
|
||||||
|
/*
|
||||||
|
len must be > 2 ('%pattern%')
|
||||||
|
heuristic: only do TurboBM for pattern_len > 2
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (len > MIN_TURBOBM_PATTERN_LEN + 2 &&
|
||||||
|
*first == wild_many &&
|
||||||
|
*last == wild_many)
|
||||||
|
{
|
||||||
|
const char* tmp = first + 1;
|
||||||
|
for ( ; *tmp != wild_many && *tmp != wild_one && *tmp != escape; tmp++) ;
|
||||||
|
canDoTurboBM = tmp == last;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (canDoTurboBM)
|
||||||
|
{
|
||||||
|
pattern = first + 1;
|
||||||
|
pattern_len = len - 2;
|
||||||
|
DBUG_PRINT("TurboBM", ("Initializing pattern: '%s'...", first));
|
||||||
|
int* suff = (int*)thd->alloc(sizeof(int[pattern_len + 1]));
|
||||||
|
bmGs = (int*)thd->alloc(sizeof(int[pattern_len + 1]));
|
||||||
|
bmBc = (int*)thd->alloc(sizeof(int[alphabet_size]));
|
||||||
|
turboBM_compute_good_suffix_shifts(suff);
|
||||||
|
turboBM_compute_bad_character_shifts();
|
||||||
|
DBUG_PRINT("turboBM",("done"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef USE_REGEX
|
#ifdef USE_REGEX
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -1307,7 +1352,6 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
longlong Item_func_regex::val_int()
|
longlong Item_func_regex::val_int()
|
||||||
{
|
{
|
||||||
char buff[MAX_FIELD_WIDTH];
|
char buff[MAX_FIELD_WIDTH];
|
||||||
@ -1364,3 +1408,215 @@ Item_func_regex::~Item_func_regex()
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif /* USE_REGEX */
|
#endif /* USE_REGEX */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef LIKE_CMP_TOUPPER
|
||||||
|
#define likeconv(A) (uchar) toupper(A)
|
||||||
|
#else
|
||||||
|
#define likeconv(A) (uchar) my_sort_order[(uchar) (A)]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
turboBM_compute_suffixes()
|
||||||
|
Precomputation dependent only on pattern_len.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
void Item_func_like::turboBM_compute_suffixes(int* suff)
|
||||||
|
{
|
||||||
|
const int plm1 = pattern_len - 1;
|
||||||
|
int f = 0;
|
||||||
|
int g = plm1;
|
||||||
|
int* const splm1 = suff + plm1;
|
||||||
|
|
||||||
|
*splm1 = pattern_len;
|
||||||
|
|
||||||
|
if (binary)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = pattern_len - 2; i >= 0; i--)
|
||||||
|
{
|
||||||
|
int tmp = *(splm1 + i - f);
|
||||||
|
if (g < i && tmp < i - g)
|
||||||
|
suff[i] = tmp;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (i < g)
|
||||||
|
g = i; // g = min(i, g)
|
||||||
|
f = i;
|
||||||
|
while (g >= 0 && pattern[g] == pattern[g + plm1 - f])
|
||||||
|
g--;
|
||||||
|
suff[i] = f - g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = pattern_len - 2; 0 <= i; --i)
|
||||||
|
{
|
||||||
|
int tmp = *(splm1 + i - f);
|
||||||
|
if (g < i && tmp < i - g)
|
||||||
|
suff[i] = tmp;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (i < g)
|
||||||
|
g = i; // g = min(i, g)
|
||||||
|
f = i;
|
||||||
|
while (g >= 0 && likeconv(pattern[g]) == likeconv(pattern[g + plm1 - f]))
|
||||||
|
g--;
|
||||||
|
suff[i] = f - g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
turboBM_compute_good_suffix_shifts()
|
||||||
|
Precomputation dependent only on pattern_len.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
void Item_func_like::turboBM_compute_good_suffix_shifts(int* suff)
|
||||||
|
{
|
||||||
|
turboBM_compute_suffixes(suff);
|
||||||
|
|
||||||
|
int* end = bmGs + pattern_len;
|
||||||
|
int* k;
|
||||||
|
for (k = bmGs; k < end; k++)
|
||||||
|
*k = pattern_len;
|
||||||
|
|
||||||
|
int tmp;
|
||||||
|
int i;
|
||||||
|
int j = 0;
|
||||||
|
const int plm1 = pattern_len - 1;
|
||||||
|
for (i = plm1; i > -1; i--)
|
||||||
|
{
|
||||||
|
if (suff[i] == i + 1)
|
||||||
|
{
|
||||||
|
for (tmp = plm1 - i; j < tmp; j++)
|
||||||
|
{
|
||||||
|
int* tmp2 = bmGs + j;
|
||||||
|
if (*tmp2 == pattern_len)
|
||||||
|
*tmp2 = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int* tmp2;
|
||||||
|
for (tmp = plm1 - i; j < tmp; j++)
|
||||||
|
{
|
||||||
|
tmp2 = bmGs + j;
|
||||||
|
if (*tmp2 == pattern_len)
|
||||||
|
*tmp2 = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp2 = bmGs + plm1;
|
||||||
|
for (i = 0; i <= pattern_len - 2; i++)
|
||||||
|
*(tmp2 - suff[i]) = plm1 - i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
turboBM_compute_bad_character_shifts()
|
||||||
|
Precomputation dependent on pattern_len.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
void Item_func_like::turboBM_compute_bad_character_shifts()
|
||||||
|
{
|
||||||
|
int* i;
|
||||||
|
int* end = bmBc + alphabet_size;
|
||||||
|
for (i = bmBc; i < end; i++)
|
||||||
|
*i = pattern_len;
|
||||||
|
|
||||||
|
int j;
|
||||||
|
const int plm1 = pattern_len - 1;
|
||||||
|
if (binary)
|
||||||
|
for (j = 0; j < plm1; j++)
|
||||||
|
bmBc[pattern[j]] = plm1 - j;
|
||||||
|
else
|
||||||
|
for (j = 0; j < plm1; j++)
|
||||||
|
bmBc[likeconv(pattern[j])] = plm1 - j;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
turboBM_matches()
|
||||||
|
Search for pattern in text, returns true/false for match/no match
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
bool Item_func_like::turboBM_matches(const char* text, int text_len) const
|
||||||
|
{
|
||||||
|
register int bcShift;
|
||||||
|
register int turboShift;
|
||||||
|
int shift = pattern_len;
|
||||||
|
int j = 0;
|
||||||
|
int u = 0;
|
||||||
|
|
||||||
|
const int plm1 = pattern_len - 1;
|
||||||
|
const int tlmpl = text_len - pattern_len;
|
||||||
|
|
||||||
|
/* Searching */
|
||||||
|
if (binary)
|
||||||
|
{
|
||||||
|
while (j <= tlmpl)
|
||||||
|
{
|
||||||
|
register int i = plm1;
|
||||||
|
while (i >= 0 && pattern[i] == text[i + j])
|
||||||
|
{
|
||||||
|
i--;
|
||||||
|
if (i == plm1 - shift)
|
||||||
|
i -= u;
|
||||||
|
}
|
||||||
|
if (i < 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
register const int v = plm1 - i;
|
||||||
|
turboShift = u - v;
|
||||||
|
bcShift = bmBc[text[i + j]] - plm1 + i;
|
||||||
|
shift = max(turboShift, bcShift);
|
||||||
|
shift = max(shift, bmGs[i]);
|
||||||
|
if (shift == bmGs[i])
|
||||||
|
u = min(pattern_len - shift, v);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (turboShift < bcShift)
|
||||||
|
shift = max(shift, u + 1);
|
||||||
|
u = 0;
|
||||||
|
}
|
||||||
|
j += shift;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (j <= tlmpl)
|
||||||
|
{
|
||||||
|
register int i = plm1;
|
||||||
|
while (i >= 0 && likeconv(pattern[i]) == likeconv(text[i + j]))
|
||||||
|
{
|
||||||
|
i--;
|
||||||
|
if (i == plm1 - shift)
|
||||||
|
i -= u;
|
||||||
|
}
|
||||||
|
if (i < 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
register const int v = plm1 - i;
|
||||||
|
turboShift = u - v;
|
||||||
|
bcShift = bmBc[likeconv(text[i + j])] - plm1 + i;
|
||||||
|
shift = max(turboShift, bcShift);
|
||||||
|
shift = max(shift, bmGs[i]);
|
||||||
|
if (shift == bmGs[i])
|
||||||
|
u = min(pattern_len - shift, v);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (turboShift < bcShift)
|
||||||
|
shift = max(shift, u + 1);
|
||||||
|
u = 0;
|
||||||
|
}
|
||||||
|
j += shift;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -478,15 +478,40 @@ public:
|
|||||||
class Item_func_like :public Item_bool_func2
|
class Item_func_like :public Item_bool_func2
|
||||||
{
|
{
|
||||||
char escape;
|
char escape;
|
||||||
|
|
||||||
|
// Turbo Boyer-Moore data
|
||||||
|
bool canDoTurboBM; // pattern is '%abcd%' case
|
||||||
|
const char* pattern;
|
||||||
|
int pattern_len;
|
||||||
|
|
||||||
|
// TurboBM buffers, *this is owner
|
||||||
|
int* bmGs; // good suffix shift table, size is pattern_len + 1
|
||||||
|
int* bmBc; // bad character shift table, size is alphabet_size
|
||||||
|
|
||||||
|
void turboBM_compute_suffixes(int* suff);
|
||||||
|
void turboBM_compute_good_suffix_shifts(int* suff);
|
||||||
|
void turboBM_compute_bad_character_shifts();
|
||||||
|
bool turboBM_matches(const char* text, int text_len) const;
|
||||||
|
enum { alphabet_size = 256 };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Item_func_like(Item *a,Item *b, char* escape_arg) :Item_bool_func2(a,b),escape(*escape_arg)
|
Item_func_like::Item_func_like(Item *a,Item *b, char* escape_arg) :
|
||||||
|
Item_bool_func2(a,b),
|
||||||
|
escape(*escape_arg),
|
||||||
|
canDoTurboBM(false),
|
||||||
|
pattern(0),
|
||||||
|
pattern_len(0),
|
||||||
|
bmGs(0),
|
||||||
|
bmBc(0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
longlong val_int();
|
longlong val_int();
|
||||||
enum Functype functype() const { return LIKE_FUNC; }
|
enum Functype functype() const { return LIKE_FUNC; }
|
||||||
optimize_type select_optimize() const;
|
optimize_type select_optimize() const;
|
||||||
cond_result eq_cmp_result() const { return COND_TRUE; }
|
cond_result eq_cmp_result() const { return COND_TRUE; }
|
||||||
const char *func_name() const { return "like"; }
|
const char *func_name() const { return "like"; }
|
||||||
void fix_length_and_dec();
|
void fix_length_and_dec();
|
||||||
|
bool fix_fields(THD *thd,struct st_table_list *tlist);
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef USE_REGEX
|
#ifdef USE_REGEX
|
||||||
|
@ -122,6 +122,13 @@ bfill((A)->null_flags,(A)->null_bytes,255);\
|
|||||||
#define TE_INFO_LENGTH 3
|
#define TE_INFO_LENGTH 3
|
||||||
#define MTYP_NOEMPTY_BIT 128
|
#define MTYP_NOEMPTY_BIT 128
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minimum length pattern before Turbo Boyer-Moore is used
|
||||||
|
* for SELECT "text" LIKE "%pattern%", excluding the two
|
||||||
|
* wildcards in class Item_func_like.
|
||||||
|
*/
|
||||||
|
#define MIN_TURBOBM_PATTERN_LEN 3
|
||||||
|
|
||||||
/* Include prototypes for unireg */
|
/* Include prototypes for unireg */
|
||||||
|
|
||||||
#include "mysqld_error.h"
|
#include "mysqld_error.h"
|
||||||
|
Reference in New Issue
Block a user