mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special CHARSET_INFO to create lookup keys. Review input addressed.) Equalities that compare utf8mb{3,4}_general_ci strings, like: WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP) can now be used to construct ref[const] access and also participate in multiple-equalities. This means that utf8mb3_key_col can be used for key-lookups when compared with an utf8mb4 constant, field or expression using '=' or '<=>' comparison operators. This is controlled by optimizer_switch='cset_narrowing=on', which is OFF by default. IMPLEMENTATION Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci. This is valid as any utf8mb3 value is also an utf8mb4 value. When making index lookup value for utf8mb3_key_col, we do "Charset Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are copied as-is, as they can be represented in utf8mb3. Characters that are outside the BMP cannot be represented in utf8mb3 and are replaced with U+FFFD, the "Replacement Character". In utf8mb4_general_ci, the Replacement Character compares as equal to any character that's not in BMP. Because of this, the constructed lookup value will find all index records that would be considered equal by the original condition (MB3-4-CMP). Approved-by: Monty <monty@mariadb.org>
This commit is contained in:
@ -34,6 +34,8 @@
|
||||
#include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */
|
||||
#include "filesort.h"
|
||||
|
||||
#include "cset_narrowing.h"
|
||||
|
||||
typedef struct st_join_table JOIN_TAB;
|
||||
/* Values in optimize */
|
||||
#define KEY_OPTIMIZE_EXISTS 1U
|
||||
@ -1921,7 +1923,14 @@ public:
|
||||
{
|
||||
enum_check_fields org_count_cuted_fields= thd->count_cuted_fields;
|
||||
Use_relaxed_field_copy urfc(to_field->table->in_use);
|
||||
|
||||
/* If needed, perform CharsetNarrowing for making ref access lookup keys. */
|
||||
Utf8_narrow do_narrow(to_field, do_cset_narrowing);
|
||||
|
||||
store_key_result result= copy_inner();
|
||||
|
||||
do_narrow.stop();
|
||||
|
||||
thd->count_cuted_fields= org_count_cuted_fields;
|
||||
return result;
|
||||
}
|
||||
@ -1931,6 +1940,12 @@ public:
|
||||
uchar *null_ptr;
|
||||
uchar err;
|
||||
|
||||
/*
|
||||
This is set to true if we need to do Charset Narrowing when making a lookup
|
||||
key.
|
||||
*/
|
||||
bool do_cset_narrowing= false;
|
||||
|
||||
virtual enum store_key_result copy_inner()=0;
|
||||
};
|
||||
|
||||
@ -1950,6 +1965,7 @@ class store_key_field: public store_key
|
||||
if (to_field)
|
||||
{
|
||||
copy_field.set(to_field,from_field,0);
|
||||
setup_charset_narrowing();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1960,6 +1976,15 @@ class store_key_field: public store_key
|
||||
{
|
||||
copy_field.set(to_field, fld_item->field, 0);
|
||||
field_name= fld_item->full_name();
|
||||
setup_charset_narrowing();
|
||||
}
|
||||
|
||||
/* Setup CharsetNarrowing if necessary */
|
||||
void setup_charset_narrowing()
|
||||
{
|
||||
do_cset_narrowing=
|
||||
Utf8_narrow::should_do_narrowing(copy_field.to_field,
|
||||
copy_field.from_field->charset());
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -2000,7 +2025,12 @@ public:
|
||||
:store_key(thd, to_field_arg, ptr,
|
||||
null_ptr_arg ? null_ptr_arg : item_arg->maybe_null() ?
|
||||
&err : (uchar*) 0, length), item(item_arg), use_value(val)
|
||||
{}
|
||||
{
|
||||
/* Setup CharsetNarrowing to be done if necessary */
|
||||
do_cset_narrowing=
|
||||
Utf8_narrow::should_do_narrowing(to_field,
|
||||
item->collation.collation);
|
||||
}
|
||||
store_key_item(store_key &arg, Item *new_item, bool val)
|
||||
:store_key(arg), item(new_item), use_value(val)
|
||||
{}
|
||||
@ -2388,7 +2418,7 @@ Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
|
||||
extern bool test_if_ref(Item *,
|
||||
Item_field *left_item,Item *right_item);
|
||||
|
||||
inline bool optimizer_flag(THD *thd, ulonglong flag)
|
||||
inline bool optimizer_flag(const THD *thd, ulonglong flag)
|
||||
{
|
||||
return (thd->variables.optimizer_switch & flag);
|
||||
}
|
||||
|
Reference in New Issue
Block a user