mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MDEV-34911 Sargable substr(col, 1, n) = str
Make Item_func_eq of the following forms sargable by updating the relevant range analysis methods: 1. substr(col, 1, n) = str 2. str = substr(col, 1, n) 3. left(col, n) = str 4. str = left(col, n) where col is a indexed column and str is a const and inexpensive item of length n. We do this by factoring out Item_func_like::get_mm_leaf() and apply it to a string obtained from escaping str and then appending a wildcard "%" to it. The addition of the two Functype enums, LEFT_FUNC and SUBSTR_FUNC, requires changes in the spider group by handler to continue handling LEFT and SUBSTR correctly. Co-authored-by: Yuchen Pei <ycp@mariadb.com> Co-authored-by: Sergei Petrunia <sergey@mariadb.com>
This commit is contained in:
319
sql/opt_range.cc
319
sql/opt_range.cc
@@ -8983,6 +8983,205 @@ static bool is_field_an_unique_index(Field *field)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Given a string, escape the LIKE pattern characters (%, _, \) with the '\'.
|
||||
|
||||
@detail
|
||||
Currently we fail if the escaped string didn't fit into MAX_FIELD_WIDTH
|
||||
bytes but this is not necessary.
|
||||
*/
|
||||
|
||||
static bool escape_like_characters(String *res)
|
||||
{
|
||||
CHARSET_INFO *cs= res->charset();
|
||||
StringBuffer<MAX_FIELD_WIDTH> tmp2(cs);
|
||||
tmp2.copy(*res);
|
||||
int ret;
|
||||
uchar *src= (uchar *) tmp2.ptr(), *src_end= (uchar *) tmp2.end(),
|
||||
*dst= (uchar *) res->ptr(), *dst_end= dst + MAX_FIELD_WIDTH;
|
||||
my_wc_t wc;
|
||||
while (src < src_end)
|
||||
{
|
||||
/* Advance to the next character */
|
||||
if ((ret= my_ci_mb_wc(cs, &wc, src, src_end)) <= 0)
|
||||
{
|
||||
if (ret == MY_CS_ILSEQ) /* Bad sequence */
|
||||
return true; /* Cannot LIKE optimize */
|
||||
break; /* End of the string */
|
||||
}
|
||||
src+= ret;
|
||||
|
||||
/* If the next char is escape-able in actual LIKE, escape it */
|
||||
if (wc == (my_wc_t) '%' || wc == (my_wc_t) '_' || wc == (my_wc_t) '\\')
|
||||
{
|
||||
if ((ret= my_ci_wc_mb(cs, (my_wc_t) '\\', dst, dst_end)) <= 0)
|
||||
return true; /* No space - no LIKE optimize */
|
||||
dst+= ret;
|
||||
}
|
||||
if ((ret= my_ci_wc_mb(cs, wc, dst, dst_end)) <= 0)
|
||||
return true; /* No space - no LIKE optimize */
|
||||
dst+= ret;
|
||||
}
|
||||
res->length((char *) dst - res->ptr());
|
||||
return false; /* Ok */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Produce SEL_ARG interval for LIKE and prefix match functions.
|
||||
|
||||
@detail
|
||||
This is used for conditions in forms:
|
||||
|
||||
- key_col LIKE 'sargable_pattern'
|
||||
- SUBSTR(key_col, 1, ...) = 'value', or equivalent conditions involving
|
||||
LEFT() instead of SUBSTR() - see with_sargable_substr() for details.
|
||||
|
||||
@param
|
||||
item The comparison item (Item_func_like or Item_func_eq)
|
||||
*/
|
||||
|
||||
static SEL_ARG *
|
||||
get_mm_leaf_for_LIKE(Item_bool_func *item, RANGE_OPT_PARAM *param,
|
||||
Field *field, KEY_PART *key_part,
|
||||
Item_func::Functype type, Item *value)
|
||||
{
|
||||
DBUG_ENTER("get_mm_leaf_for_sargable");
|
||||
DBUG_ASSERT(value);
|
||||
|
||||
if (key_part->image_type != Field::itRAW)
|
||||
DBUG_RETURN(0);
|
||||
|
||||
uint keynr= param->real_keynr[key_part->key];
|
||||
if (param->using_real_indexes &&
|
||||
!field->optimize_range(keynr, key_part->part))
|
||||
DBUG_RETURN(0);
|
||||
|
||||
if (field->result_type() == STRING_RESULT &&
|
||||
field->charset() != item->compare_collation())
|
||||
{
|
||||
/*
|
||||
For equalities where one side is LEFT or SUBSTR
|
||||
param->note_unusable_keys is BITMAP_EXCEPT_ANY_EQUALITY and the
|
||||
following if condition is satisfied. But it will not result in
|
||||
duplicate warnings because the ref optimizer does not cover this
|
||||
case.
|
||||
*/
|
||||
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
item->func_name_cstring(),
|
||||
item->compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_COLLATION);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
|
||||
String *res;
|
||||
|
||||
if (!(res= value->val_str(&tmp)))
|
||||
DBUG_RETURN(&null_element);
|
||||
|
||||
if (field->cmp_type() != STRING_RESULT ||
|
||||
field->type_handler() == &type_handler_enum ||
|
||||
field->type_handler() == &type_handler_set)
|
||||
{
|
||||
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
item->func_name_cstring(),
|
||||
item->compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_DATA_TYPE);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*
|
||||
TODO:
|
||||
Check if this was a function. This should have be optimized away
|
||||
in the sql_select.cc
|
||||
*/
|
||||
if (res != &tmp)
|
||||
{
|
||||
tmp.copy(*res); // Get own copy
|
||||
res= &tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
If we're handling a predicate in one of these forms:
|
||||
- LEFT(key_col, N) ='string_const'
|
||||
- SUBSTRING(key_col, 1, N)='string_const'
|
||||
|
||||
then we need to:
|
||||
- escape the LIKE pattern characters in the string_const,
|
||||
- make the search pattern to be 'string_const%':
|
||||
*/
|
||||
if (type != Item_func::LIKE_FUNC)
|
||||
{
|
||||
DBUG_ASSERT(type == Item_func::EQ_FUNC);
|
||||
if (escape_like_characters(res))
|
||||
DBUG_RETURN(0); /* Error, no optimization */
|
||||
res->append("%", 1);
|
||||
}
|
||||
|
||||
uint maybe_null= (uint) field->real_maybe_null();
|
||||
size_t field_length= field->pack_length() + maybe_null;
|
||||
size_t offset= maybe_null;
|
||||
size_t length= key_part->store_length;
|
||||
|
||||
if (length != key_part->length + maybe_null)
|
||||
{
|
||||
/* key packed with length prefix */
|
||||
offset+= HA_KEY_BLOB_LENGTH;
|
||||
field_length= length - HA_KEY_BLOB_LENGTH;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unlikely(length < field_length))
|
||||
{
|
||||
/*
|
||||
This can only happen in a table created with UNIREG where one key
|
||||
overlaps many fields
|
||||
*/
|
||||
length= field_length;
|
||||
}
|
||||
else
|
||||
field_length= length;
|
||||
}
|
||||
length+= offset;
|
||||
uchar *min_str,*max_str;
|
||||
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
|
||||
DBUG_RETURN(0);
|
||||
max_str= min_str + length;
|
||||
if (maybe_null)
|
||||
max_str[0]= min_str[0]=0;
|
||||
|
||||
size_t min_length, max_length;
|
||||
field_length-= maybe_null;
|
||||
/* If the item is a LIKE, use its escape, otherwise use backslash */
|
||||
int escape= type == Item_func::LIKE_FUNC ?
|
||||
((Item_func_like *) item)->escape : '\\';
|
||||
if (field->charset()->like_range(res->ptr(), res->length(),
|
||||
escape, wild_one, wild_many,
|
||||
field_length,
|
||||
(char*) min_str + offset,
|
||||
(char*) max_str + offset,
|
||||
&min_length, &max_length))
|
||||
DBUG_RETURN(0); // Can't optimize with LIKE
|
||||
|
||||
if (offset != maybe_null) // BLOB or VARCHAR
|
||||
{
|
||||
int2store(min_str + maybe_null, min_length);
|
||||
int2store(max_str + maybe_null, max_length);
|
||||
}
|
||||
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
|
||||
DBUG_RETURN(tree);
|
||||
}
|
||||
|
||||
|
||||
SEL_TREE *
|
||||
Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
|
||||
Item_func::Functype type, Item *value)
|
||||
@@ -8995,6 +9194,9 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
|
||||
KEY_PART *end = param->key_parts_end;
|
||||
SEL_TREE *tree=0;
|
||||
table_map value_used_tables= 0;
|
||||
bool know_sargable_substr= false;
|
||||
bool sargable_substr; // protected by know_sargable_substr
|
||||
|
||||
if (value &&
|
||||
(value_used_tables= value->used_tables()) &
|
||||
~(param->prev_tables | param->read_tables))
|
||||
@@ -9019,7 +9221,18 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
|
||||
*/
|
||||
MEM_ROOT *tmp_root= param->mem_root;
|
||||
param->thd->mem_root= param->old_root;
|
||||
sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
|
||||
if (!know_sargable_substr)
|
||||
{
|
||||
sargable_substr= with_sargable_substr();
|
||||
know_sargable_substr= true;
|
||||
}
|
||||
if (sargable_substr)
|
||||
{
|
||||
sel_arg= get_mm_leaf_for_LIKE(this, param, key_part->field, key_part,
|
||||
type, value);
|
||||
}
|
||||
else
|
||||
sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
|
||||
param->thd->mem_root= tmp_root;
|
||||
|
||||
if (!sel_arg)
|
||||
@@ -9087,109 +9300,7 @@ Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param,
|
||||
Item_func::Functype type, Item *value)
|
||||
{
|
||||
DBUG_ENTER("Item_func_like::get_mm_leaf");
|
||||
DBUG_ASSERT(value);
|
||||
|
||||
if (key_part->image_type != Field::itRAW)
|
||||
DBUG_RETURN(0);
|
||||
|
||||
uint keynr= param->real_keynr[key_part->key];
|
||||
if (param->using_real_indexes &&
|
||||
!field->optimize_range(keynr, key_part->part))
|
||||
DBUG_RETURN(0);
|
||||
|
||||
if (field->result_type() == STRING_RESULT &&
|
||||
field->charset() != compare_collation())
|
||||
{
|
||||
if (param->note_unusable_keys & BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
func_name_cstring(),
|
||||
compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_COLLATION);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
|
||||
String *res;
|
||||
|
||||
if (!(res= value->val_str(&tmp)))
|
||||
DBUG_RETURN(&null_element);
|
||||
|
||||
if (field->cmp_type() != STRING_RESULT ||
|
||||
field->type_handler() == &type_handler_enum ||
|
||||
field->type_handler() == &type_handler_set)
|
||||
{
|
||||
if (param->note_unusable_keys & BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
func_name_cstring(),
|
||||
compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_DATA_TYPE);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*
|
||||
TODO:
|
||||
Check if this was a function. This should have be optimized away
|
||||
in the sql_select.cc
|
||||
*/
|
||||
if (res != &tmp)
|
||||
{
|
||||
tmp.copy(*res); // Get own copy
|
||||
res= &tmp;
|
||||
}
|
||||
|
||||
uint maybe_null= (uint) field->real_maybe_null();
|
||||
size_t field_length= field->pack_length() + maybe_null;
|
||||
size_t offset= maybe_null;
|
||||
size_t length= key_part->store_length;
|
||||
|
||||
if (length != key_part->length + maybe_null)
|
||||
{
|
||||
/* key packed with length prefix */
|
||||
offset+= HA_KEY_BLOB_LENGTH;
|
||||
field_length= length - HA_KEY_BLOB_LENGTH;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unlikely(length < field_length))
|
||||
{
|
||||
/*
|
||||
This can only happen in a table created with UNIREG where one key
|
||||
overlaps many fields
|
||||
*/
|
||||
length= field_length;
|
||||
}
|
||||
else
|
||||
field_length= length;
|
||||
}
|
||||
length+= offset;
|
||||
uchar *min_str,*max_str;
|
||||
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
|
||||
DBUG_RETURN(0);
|
||||
max_str= min_str + length;
|
||||
if (maybe_null)
|
||||
max_str[0]= min_str[0]=0;
|
||||
|
||||
size_t min_length, max_length;
|
||||
field_length-= maybe_null;
|
||||
if (field->charset()->like_range(res->ptr(), res->length(),
|
||||
escape, wild_one, wild_many,
|
||||
field_length,
|
||||
(char*) min_str + offset,
|
||||
(char*) max_str + offset,
|
||||
&min_length, &max_length))
|
||||
DBUG_RETURN(0); // Can't optimize with LIKE
|
||||
|
||||
if (offset != maybe_null) // BLOB or VARCHAR
|
||||
{
|
||||
int2store(min_str + maybe_null, min_length);
|
||||
int2store(max_str + maybe_null, max_length);
|
||||
}
|
||||
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
|
||||
DBUG_RETURN(tree);
|
||||
DBUG_RETURN(get_mm_leaf_for_LIKE(this, param, field, key_part, type, value));
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user