mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Optimize LIKE with turbo-boyer-more algoritm
This commit is contained in:
@ -26728,6 +26728,12 @@ In the first statement, the @code{LIKE} value begins with a wildcard
|
||||
character. In the second statement, the @code{LIKE} value is not a
|
||||
constant.
|
||||
|
||||
MySQL 4.0 does another optimization on @code{LIKE}. If you are using
|
||||
@code{... LIKE "%string%"} and @code{string} is longer than 3 characters
|
||||
then MySQL will use the turbo-boyer-more algorithm to once initialize
|
||||
the pattern for the string and then use this pattern to quickly search
|
||||
after the given string.
|
||||
|
||||
@findex IS NULL, and indexes
|
||||
@cindex indexes, and @code{IS NULL}
|
||||
Searching using @code{column_name IS NULL} will use indexes if column_name
|
||||
@ -49310,6 +49316,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
Use turbo-boyer-more to speed up @code{LIKE "%keyword%"} searches.
|
||||
@item
|
||||
Fixed bug in @code{DROP DATABASE} with symlink.
|
||||
@item
|
||||
Fixed crash in @code{REPAIR ... USE_FRM}.
|
||||
|
@ -15,4 +15,15 @@ test
|
||||
select * from t1 where a like "te_t";
|
||||
a
|
||||
test
|
||||
select * from t1 where a like "%a%";
|
||||
a
|
||||
a
|
||||
abc
|
||||
abcd
|
||||
select * from t1 where a like "%abcd%";
|
||||
a
|
||||
abcd
|
||||
select * from t1 where a like "%abc\d%";
|
||||
a
|
||||
abcd
|
||||
drop table t1;
|
||||
|
@ -9,4 +9,12 @@ select * from t1 where a like "abc%";
|
||||
select * from t1 where a like "ABC%";
|
||||
select * from t1 where a like "test%";
|
||||
select * from t1 where a like "te_t";
|
||||
|
||||
#
|
||||
# The following will test the boyer-more code
|
||||
#
|
||||
select * from t1 where a like "%a%";
|
||||
select * from t1 where a like "%abcd%";
|
||||
select * from t1 where a like "%abc\d%";
|
||||
|
||||
drop table t1;
|
||||
|
@ -1228,23 +1228,23 @@ void Item_func_like::fix_length_and_dec()
|
||||
// cmp_type=STRING_RESULT; // For quick select
|
||||
}
|
||||
|
||||
|
||||
longlong Item_func_like::val_int()
|
||||
{
|
||||
String *res,*res2;
|
||||
res=args[0]->val_str(&tmp_value1);
|
||||
String* res = args[0]->val_str(&tmp_value1);
|
||||
if (args[0]->null_value)
|
||||
{
|
||||
null_value=1;
|
||||
return 0;
|
||||
}
|
||||
res2=args[1]->val_str(&tmp_value2);
|
||||
String* res2 = args[1]->val_str(&tmp_value2);
|
||||
if (args[1]->null_value)
|
||||
{
|
||||
null_value=1;
|
||||
return 0;
|
||||
}
|
||||
null_value=0;
|
||||
if (canDoTurboBM)
|
||||
return turboBM_matches(res->ptr(), res->length()) ? 1 : 0;
|
||||
if (binary)
|
||||
return wild_compare(*res,*res2,escape) ? 0 : 1;
|
||||
else
|
||||
@ -1268,6 +1268,51 @@ Item_func::optimize_type Item_func_like::select_optimize() const
|
||||
return OPTIMIZE_NONE;
|
||||
}
|
||||
|
||||
bool Item_func_like::fix_fields(THD *thd,struct st_table_list *tlist)
|
||||
{
|
||||
if (Item_bool_func2::fix_fields(thd, tlist))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
TODO--we could do it for non-const, but we'd have to
|
||||
recompute the tables for each row--probably not worth it.
|
||||
*/
|
||||
if (args[1]->const_item() && !(specialflag & SPECIAL_NO_NEW_FUNC))
|
||||
{
|
||||
String* res2 = args[1]->val_str(&tmp_value2);
|
||||
const size_t len = res2->length();
|
||||
const char* first = res2->ptr();
|
||||
const char* last = first + len - 1;
|
||||
/*
|
||||
len must be > 2 ('%pattern%')
|
||||
heuristic: only do TurboBM for pattern_len > 2
|
||||
*/
|
||||
|
||||
if (len > MIN_TURBOBM_PATTERN_LEN + 2 &&
|
||||
*first == wild_many &&
|
||||
*last == wild_many)
|
||||
{
|
||||
const char* tmp = first + 1;
|
||||
for ( ; *tmp != wild_many && *tmp != wild_one && *tmp != escape; tmp++) ;
|
||||
canDoTurboBM = tmp == last;
|
||||
}
|
||||
|
||||
if (canDoTurboBM)
|
||||
{
|
||||
pattern = first + 1;
|
||||
pattern_len = len - 2;
|
||||
DBUG_PRINT("TurboBM", ("Initializing pattern: '%s'...", first));
|
||||
int* suff = (int*)thd->alloc(sizeof(int[pattern_len + 1]));
|
||||
bmGs = (int*)thd->alloc(sizeof(int[pattern_len + 1]));
|
||||
bmBc = (int*)thd->alloc(sizeof(int[alphabet_size]));
|
||||
turboBM_compute_good_suffix_shifts(suff);
|
||||
turboBM_compute_bad_character_shifts();
|
||||
DBUG_PRINT("turboBM",("done"));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_REGEX
|
||||
|
||||
bool
|
||||
@ -1307,7 +1352,6 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
longlong Item_func_regex::val_int()
|
||||
{
|
||||
char buff[MAX_FIELD_WIDTH];
|
||||
@ -1364,3 +1408,215 @@ Item_func_regex::~Item_func_regex()
|
||||
}
|
||||
|
||||
#endif /* USE_REGEX */
|
||||
|
||||
|
||||
#ifdef LIKE_CMP_TOUPPER
|
||||
#define likeconv(A) (uchar) toupper(A)
|
||||
#else
|
||||
#define likeconv(A) (uchar) my_sort_order[(uchar) (A)]
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
turboBM_compute_suffixes()
|
||||
Precomputation dependent only on pattern_len.
|
||||
**********************************************************************/
|
||||
|
||||
void Item_func_like::turboBM_compute_suffixes(int* suff)
|
||||
{
|
||||
const int plm1 = pattern_len - 1;
|
||||
int f = 0;
|
||||
int g = plm1;
|
||||
int* const splm1 = suff + plm1;
|
||||
|
||||
*splm1 = pattern_len;
|
||||
|
||||
if (binary)
|
||||
{
|
||||
int i;
|
||||
for (i = pattern_len - 2; i >= 0; i--)
|
||||
{
|
||||
int tmp = *(splm1 + i - f);
|
||||
if (g < i && tmp < i - g)
|
||||
suff[i] = tmp;
|
||||
else
|
||||
{
|
||||
if (i < g)
|
||||
g = i; // g = min(i, g)
|
||||
f = i;
|
||||
while (g >= 0 && pattern[g] == pattern[g + plm1 - f])
|
||||
g--;
|
||||
suff[i] = f - g;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
for (i = pattern_len - 2; 0 <= i; --i)
|
||||
{
|
||||
int tmp = *(splm1 + i - f);
|
||||
if (g < i && tmp < i - g)
|
||||
suff[i] = tmp;
|
||||
else
|
||||
{
|
||||
if (i < g)
|
||||
g = i; // g = min(i, g)
|
||||
f = i;
|
||||
while (g >= 0 && likeconv(pattern[g]) == likeconv(pattern[g + plm1 - f]))
|
||||
g--;
|
||||
suff[i] = f - g;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
turboBM_compute_good_suffix_shifts()
|
||||
Precomputation dependent only on pattern_len.
|
||||
**********************************************************************/
|
||||
|
||||
void Item_func_like::turboBM_compute_good_suffix_shifts(int* suff)
|
||||
{
|
||||
turboBM_compute_suffixes(suff);
|
||||
|
||||
int* end = bmGs + pattern_len;
|
||||
int* k;
|
||||
for (k = bmGs; k < end; k++)
|
||||
*k = pattern_len;
|
||||
|
||||
int tmp;
|
||||
int i;
|
||||
int j = 0;
|
||||
const int plm1 = pattern_len - 1;
|
||||
for (i = plm1; i > -1; i--)
|
||||
{
|
||||
if (suff[i] == i + 1)
|
||||
{
|
||||
for (tmp = plm1 - i; j < tmp; j++)
|
||||
{
|
||||
int* tmp2 = bmGs + j;
|
||||
if (*tmp2 == pattern_len)
|
||||
*tmp2 = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int* tmp2;
|
||||
for (tmp = plm1 - i; j < tmp; j++)
|
||||
{
|
||||
tmp2 = bmGs + j;
|
||||
if (*tmp2 == pattern_len)
|
||||
*tmp2 = tmp;
|
||||
}
|
||||
|
||||
tmp2 = bmGs + plm1;
|
||||
for (i = 0; i <= pattern_len - 2; i++)
|
||||
*(tmp2 - suff[i]) = plm1 - i;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
turboBM_compute_bad_character_shifts()
|
||||
Precomputation dependent on pattern_len.
|
||||
**********************************************************************/
|
||||
|
||||
void Item_func_like::turboBM_compute_bad_character_shifts()
|
||||
{
|
||||
int* i;
|
||||
int* end = bmBc + alphabet_size;
|
||||
for (i = bmBc; i < end; i++)
|
||||
*i = pattern_len;
|
||||
|
||||
int j;
|
||||
const int plm1 = pattern_len - 1;
|
||||
if (binary)
|
||||
for (j = 0; j < plm1; j++)
|
||||
bmBc[pattern[j]] = plm1 - j;
|
||||
else
|
||||
for (j = 0; j < plm1; j++)
|
||||
bmBc[likeconv(pattern[j])] = plm1 - j;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
turboBM_matches()
|
||||
Search for pattern in text, returns true/false for match/no match
|
||||
**********************************************************************/
|
||||
|
||||
bool Item_func_like::turboBM_matches(const char* text, int text_len) const
|
||||
{
|
||||
register int bcShift;
|
||||
register int turboShift;
|
||||
int shift = pattern_len;
|
||||
int j = 0;
|
||||
int u = 0;
|
||||
|
||||
const int plm1 = pattern_len - 1;
|
||||
const int tlmpl = text_len - pattern_len;
|
||||
|
||||
/* Searching */
|
||||
if (binary)
|
||||
{
|
||||
while (j <= tlmpl)
|
||||
{
|
||||
register int i = plm1;
|
||||
while (i >= 0 && pattern[i] == text[i + j])
|
||||
{
|
||||
i--;
|
||||
if (i == plm1 - shift)
|
||||
i -= u;
|
||||
}
|
||||
if (i < 0)
|
||||
return true;
|
||||
|
||||
register const int v = plm1 - i;
|
||||
turboShift = u - v;
|
||||
bcShift = bmBc[text[i + j]] - plm1 + i;
|
||||
shift = max(turboShift, bcShift);
|
||||
shift = max(shift, bmGs[i]);
|
||||
if (shift == bmGs[i])
|
||||
u = min(pattern_len - shift, v);
|
||||
else
|
||||
{
|
||||
if (turboShift < bcShift)
|
||||
shift = max(shift, u + 1);
|
||||
u = 0;
|
||||
}
|
||||
j += shift;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (j <= tlmpl)
|
||||
{
|
||||
register int i = plm1;
|
||||
while (i >= 0 && likeconv(pattern[i]) == likeconv(text[i + j]))
|
||||
{
|
||||
i--;
|
||||
if (i == plm1 - shift)
|
||||
i -= u;
|
||||
}
|
||||
if (i < 0)
|
||||
return true;
|
||||
|
||||
register const int v = plm1 - i;
|
||||
turboShift = u - v;
|
||||
bcShift = bmBc[likeconv(text[i + j])] - plm1 + i;
|
||||
shift = max(turboShift, bcShift);
|
||||
shift = max(shift, bmGs[i]);
|
||||
if (shift == bmGs[i])
|
||||
u = min(pattern_len - shift, v);
|
||||
else
|
||||
{
|
||||
if (turboShift < bcShift)
|
||||
shift = max(shift, u + 1);
|
||||
u = 0;
|
||||
}
|
||||
j += shift;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -478,15 +478,40 @@ public:
|
||||
class Item_func_like :public Item_bool_func2
|
||||
{
|
||||
char escape;
|
||||
|
||||
// Turbo Boyer-Moore data
|
||||
bool canDoTurboBM; // pattern is '%abcd%' case
|
||||
const char* pattern;
|
||||
int pattern_len;
|
||||
|
||||
// TurboBM buffers, *this is owner
|
||||
int* bmGs; // good suffix shift table, size is pattern_len + 1
|
||||
int* bmBc; // bad character shift table, size is alphabet_size
|
||||
|
||||
void turboBM_compute_suffixes(int* suff);
|
||||
void turboBM_compute_good_suffix_shifts(int* suff);
|
||||
void turboBM_compute_bad_character_shifts();
|
||||
bool turboBM_matches(const char* text, int text_len) const;
|
||||
enum { alphabet_size = 256 };
|
||||
|
||||
public:
|
||||
Item_func_like(Item *a,Item *b, char* escape_arg) :Item_bool_func2(a,b),escape(*escape_arg)
|
||||
Item_func_like::Item_func_like(Item *a,Item *b, char* escape_arg) :
|
||||
Item_bool_func2(a,b),
|
||||
escape(*escape_arg),
|
||||
canDoTurboBM(false),
|
||||
pattern(0),
|
||||
pattern_len(0),
|
||||
bmGs(0),
|
||||
bmBc(0)
|
||||
{}
|
||||
|
||||
longlong val_int();
|
||||
enum Functype functype() const { return LIKE_FUNC; }
|
||||
optimize_type select_optimize() const;
|
||||
cond_result eq_cmp_result() const { return COND_TRUE; }
|
||||
const char *func_name() const { return "like"; }
|
||||
void fix_length_and_dec();
|
||||
bool fix_fields(THD *thd,struct st_table_list *tlist);
|
||||
};
|
||||
|
||||
#ifdef USE_REGEX
|
||||
|
@ -122,6 +122,13 @@ bfill((A)->null_flags,(A)->null_bytes,255);\
|
||||
#define TE_INFO_LENGTH 3
|
||||
#define MTYP_NOEMPTY_BIT 128
|
||||
|
||||
/*
|
||||
* Minimum length pattern before Turbo Boyer-Moore is used
|
||||
* for SELECT "text" LIKE "%pattern%", excluding the two
|
||||
* wildcards in class Item_func_like.
|
||||
*/
|
||||
#define MIN_TURBOBM_PATTERN_LEN 3
|
||||
|
||||
/* Include prototypes for unireg */
|
||||
|
||||
#include "mysqld_error.h"
|
||||
|
Reference in New Issue
Block a user