1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-34911 Sargable substr(col, 1, n) = str

Make Item_func_eq of the following forms sargable by updating the relevant range
analysis methods:

1. substr(col, 1, n) = str
2. str = substr(col, 1, n)
3. left(col, n) = str
4. str = left(col, n)

where col is a indexed column and str is a const and inexpensive item
of length n.

We do this by factoring out Item_func_like::get_mm_leaf() and apply it
to a string obtained from escaping str and then appending a wildcard
"%" to it.

The addition of the two Functype enums, LEFT_FUNC and SUBSTR_FUNC,
requires changes in the spider group by handler to continue handling
LEFT and SUBSTR correctly.

Co-authored-by: Yuchen Pei <ycp@mariadb.com>
Co-authored-by: Sergei Petrunia <sergey@mariadb.com>
This commit is contained in:
Yuchen Pei
2024-11-26 11:28:57 +11:00
parent ae998c22b2
commit e021770667
13 changed files with 673 additions and 107 deletions

View File

@@ -8983,6 +8983,205 @@ static bool is_field_an_unique_index(Field *field)
}
/*
@brief
Given a string, escape the LIKE pattern characters (%, _, \) with the '\'.
@detail
Currently we fail if the escaped string didn't fit into MAX_FIELD_WIDTH
bytes but this is not necessary.
*/
static bool escape_like_characters(String *res)
{
CHARSET_INFO *cs= res->charset();
StringBuffer<MAX_FIELD_WIDTH> tmp2(cs);
tmp2.copy(*res);
int ret;
uchar *src= (uchar *) tmp2.ptr(), *src_end= (uchar *) tmp2.end(),
*dst= (uchar *) res->ptr(), *dst_end= dst + MAX_FIELD_WIDTH;
my_wc_t wc;
while (src < src_end)
{
/* Advance to the next character */
if ((ret= my_ci_mb_wc(cs, &wc, src, src_end)) <= 0)
{
if (ret == MY_CS_ILSEQ) /* Bad sequence */
return true; /* Cannot LIKE optimize */
break; /* End of the string */
}
src+= ret;
/* If the next char is escape-able in actual LIKE, escape it */
if (wc == (my_wc_t) '%' || wc == (my_wc_t) '_' || wc == (my_wc_t) '\\')
{
if ((ret= my_ci_wc_mb(cs, (my_wc_t) '\\', dst, dst_end)) <= 0)
return true; /* No space - no LIKE optimize */
dst+= ret;
}
if ((ret= my_ci_wc_mb(cs, wc, dst, dst_end)) <= 0)
return true; /* No space - no LIKE optimize */
dst+= ret;
}
res->length((char *) dst - res->ptr());
return false; /* Ok */
}
/*
@brief
Produce SEL_ARG interval for LIKE and prefix match functions.
@detail
This is used for conditions in forms:
- key_col LIKE 'sargable_pattern'
- SUBSTR(key_col, 1, ...) = 'value', or equivalent conditions involving
LEFT() instead of SUBSTR() - see with_sargable_substr() for details.
@param
item The comparison item (Item_func_like or Item_func_eq)
*/
static SEL_ARG *
get_mm_leaf_for_LIKE(Item_bool_func *item, RANGE_OPT_PARAM *param,
Field *field, KEY_PART *key_part,
Item_func::Functype type, Item *value)
{
DBUG_ENTER("get_mm_leaf_for_sargable");
DBUG_ASSERT(value);
if (key_part->image_type != Field::itRAW)
DBUG_RETURN(0);
uint keynr= param->real_keynr[key_part->key];
if (param->using_real_indexes &&
!field->optimize_range(keynr, key_part->part))
DBUG_RETURN(0);
if (field->result_type() == STRING_RESULT &&
field->charset() != item->compare_collation())
{
/*
For equalities where one side is LEFT or SUBSTR
param->note_unusable_keys is BITMAP_EXCEPT_ANY_EQUALITY and the
following if condition is satisfied. But it will not result in
duplicate warnings because the ref optimizer does not cover this
case.
*/
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
item->func_name_cstring(),
item->compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_COLLATION);
DBUG_RETURN(0);
}
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
String *res;
if (!(res= value->val_str(&tmp)))
DBUG_RETURN(&null_element);
if (field->cmp_type() != STRING_RESULT ||
field->type_handler() == &type_handler_enum ||
field->type_handler() == &type_handler_set)
{
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
item->func_name_cstring(),
item->compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_DATA_TYPE);
DBUG_RETURN(0);
}
/*
TODO:
Check if this was a function. This should have be optimized away
in the sql_select.cc
*/
if (res != &tmp)
{
tmp.copy(*res); // Get own copy
res= &tmp;
}
/*
If we're handling a predicate in one of these forms:
- LEFT(key_col, N) ='string_const'
- SUBSTRING(key_col, 1, N)='string_const'
then we need to:
- escape the LIKE pattern characters in the string_const,
- make the search pattern to be 'string_const%':
*/
if (type != Item_func::LIKE_FUNC)
{
DBUG_ASSERT(type == Item_func::EQ_FUNC);
if (escape_like_characters(res))
DBUG_RETURN(0); /* Error, no optimization */
res->append("%", 1);
}
uint maybe_null= (uint) field->real_maybe_null();
size_t field_length= field->pack_length() + maybe_null;
size_t offset= maybe_null;
size_t length= key_part->store_length;
if (length != key_part->length + maybe_null)
{
/* key packed with length prefix */
offset+= HA_KEY_BLOB_LENGTH;
field_length= length - HA_KEY_BLOB_LENGTH;
}
else
{
if (unlikely(length < field_length))
{
/*
This can only happen in a table created with UNIREG where one key
overlaps many fields
*/
length= field_length;
}
else
field_length= length;
}
length+= offset;
uchar *min_str,*max_str;
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
DBUG_RETURN(0);
max_str= min_str + length;
if (maybe_null)
max_str[0]= min_str[0]=0;
size_t min_length, max_length;
field_length-= maybe_null;
/* If the item is a LIKE, use its escape, otherwise use backslash */
int escape= type == Item_func::LIKE_FUNC ?
((Item_func_like *) item)->escape : '\\';
if (field->charset()->like_range(res->ptr(), res->length(),
escape, wild_one, wild_many,
field_length,
(char*) min_str + offset,
(char*) max_str + offset,
&min_length, &max_length))
DBUG_RETURN(0); // Can't optimize with LIKE
if (offset != maybe_null) // BLOB or VARCHAR
{
int2store(min_str + maybe_null, min_length);
int2store(max_str + maybe_null, max_length);
}
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
DBUG_RETURN(tree);
}
SEL_TREE *
Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
Item_func::Functype type, Item *value)
@@ -8995,6 +9194,9 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
KEY_PART *end = param->key_parts_end;
SEL_TREE *tree=0;
table_map value_used_tables= 0;
bool know_sargable_substr= false;
bool sargable_substr; // protected by know_sargable_substr
if (value &&
(value_used_tables= value->used_tables()) &
~(param->prev_tables | param->read_tables))
@@ -9019,7 +9221,18 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
*/
MEM_ROOT *tmp_root= param->mem_root;
param->thd->mem_root= param->old_root;
sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
if (!know_sargable_substr)
{
sargable_substr= with_sargable_substr();
know_sargable_substr= true;
}
if (sargable_substr)
{
sel_arg= get_mm_leaf_for_LIKE(this, param, key_part->field, key_part,
type, value);
}
else
sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
param->thd->mem_root= tmp_root;
if (!sel_arg)
@@ -9087,109 +9300,7 @@ Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param,
Item_func::Functype type, Item *value)
{
DBUG_ENTER("Item_func_like::get_mm_leaf");
DBUG_ASSERT(value);
if (key_part->image_type != Field::itRAW)
DBUG_RETURN(0);
uint keynr= param->real_keynr[key_part->key];
if (param->using_real_indexes &&
!field->optimize_range(keynr, key_part->part))
DBUG_RETURN(0);
if (field->result_type() == STRING_RESULT &&
field->charset() != compare_collation())
{
if (param->note_unusable_keys & BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
func_name_cstring(),
compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_COLLATION);
DBUG_RETURN(0);
}
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
String *res;
if (!(res= value->val_str(&tmp)))
DBUG_RETURN(&null_element);
if (field->cmp_type() != STRING_RESULT ||
field->type_handler() == &type_handler_enum ||
field->type_handler() == &type_handler_set)
{
if (param->note_unusable_keys & BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
func_name_cstring(),
compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_DATA_TYPE);
DBUG_RETURN(0);
}
/*
TODO:
Check if this was a function. This should have be optimized away
in the sql_select.cc
*/
if (res != &tmp)
{
tmp.copy(*res); // Get own copy
res= &tmp;
}
uint maybe_null= (uint) field->real_maybe_null();
size_t field_length= field->pack_length() + maybe_null;
size_t offset= maybe_null;
size_t length= key_part->store_length;
if (length != key_part->length + maybe_null)
{
/* key packed with length prefix */
offset+= HA_KEY_BLOB_LENGTH;
field_length= length - HA_KEY_BLOB_LENGTH;
}
else
{
if (unlikely(length < field_length))
{
/*
This can only happen in a table created with UNIREG where one key
overlaps many fields
*/
length= field_length;
}
else
field_length= length;
}
length+= offset;
uchar *min_str,*max_str;
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
DBUG_RETURN(0);
max_str= min_str + length;
if (maybe_null)
max_str[0]= min_str[0]=0;
size_t min_length, max_length;
field_length-= maybe_null;
if (field->charset()->like_range(res->ptr(), res->length(),
escape, wild_one, wild_many,
field_length,
(char*) min_str + offset,
(char*) max_str + offset,
&min_length, &max_length))
DBUG_RETURN(0); // Can't optimize with LIKE
if (offset != maybe_null) // BLOB or VARCHAR
{
int2store(min_str + maybe_null, min_length);
int2store(max_str + maybe_null, max_length);
}
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
DBUG_RETURN(tree);
DBUG_RETURN(get_mm_leaf_for_LIKE(this, param, field, key_part, type, value));
}