mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
- MDEV-6695 Bad column name for UCS2 string literals
The Item_string constructors called set_name() on the source string, which was wrong because in case of UCS2/UTF16/UTF32 the source value might be a not well formed string (e.g. have incomplete leftmost character). Now set_name() is called on str_value after its copied (with optionally left zero padding) from the source string. - MDEV-6694 Illegal mix of collation with a PS parameter Item_param::convert_str_value() did not set repertoire. Introducing a new structure MY_STRING_METADATA to collect character length and repertoire of a string in a single loop, to avoid two separate loops. Adding a new class Item_basic_value::Metadata as a convenience wrapper around MY_STRING_METADATA, to reuse the code between Item_string and Item_param.
This commit is contained in:
@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
|
|||||||
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
|
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
size_t char_length;
|
||||||
|
uint repertoire;
|
||||||
|
} MY_STRING_METADATA;
|
||||||
|
|
||||||
|
void my_string_metadata_get(MY_STRING_METADATA *metadata,
|
||||||
|
CHARSET_INFO *cs, const char *str, size_t len);
|
||||||
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
|
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
|
||||||
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
|
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
|
||||||
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
|
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
|
||||||
|
@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
|
|||||||
PI
|
PI
|
||||||
pi=3.141593
|
pi=3.141593
|
||||||
#
|
#
|
||||||
|
# MDEV-6695 Bad column name for UCS2 string literals
|
||||||
|
#
|
||||||
|
SET NAMES utf8, character_set_connection=ucs2;
|
||||||
|
SELECT 'a','aa';
|
||||||
|
a aa
|
||||||
|
a aa
|
||||||
|
#
|
||||||
# End of 10.0 tests
|
# End of 10.0 tests
|
||||||
#
|
#
|
||||||
|
@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001'))
|
|||||||
aa
|
aa
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
|
# MDEV-6694 Illegal mix of collation with a PS parameter
|
||||||
|
#
|
||||||
|
SET NAMES utf8;
|
||||||
|
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
|
||||||
|
INSERT INTO t1 VALUES (1,'a');
|
||||||
|
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
|
||||||
|
CONCAT(b,IF(a,'b','b'))
|
||||||
|
ab
|
||||||
|
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
|
||||||
|
SET @b='b';
|
||||||
|
EXECUTE stmt USING @b,@b;
|
||||||
|
CONCAT(b,IF(a,?,?))
|
||||||
|
ab
|
||||||
|
SET @b='';
|
||||||
|
EXECUTE stmt USING @b,@b;
|
||||||
|
CONCAT(b,IF(a,?,?))
|
||||||
|
a
|
||||||
|
SET @b='я';
|
||||||
|
EXECUTE stmt USING @b,@b;
|
||||||
|
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
# End of 10.0 tests
|
# End of 10.0 tests
|
||||||
#
|
#
|
||||||
|
@ -902,6 +902,13 @@ DROP TABLE t1;
|
|||||||
--echo #
|
--echo #
|
||||||
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
|
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-6695 Bad column name for UCS2 string literals
|
||||||
|
--echo #
|
||||||
|
SET NAMES utf8, character_set_connection=ucs2;
|
||||||
|
SELECT 'a','aa';
|
||||||
|
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.0 tests
|
--echo # End of 10.0 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1;
|
|||||||
SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1;
|
SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-6694 Illegal mix of collation with a PS parameter
|
||||||
|
--echo #
|
||||||
|
SET NAMES utf8;
|
||||||
|
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
|
||||||
|
INSERT INTO t1 VALUES (1,'a');
|
||||||
|
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
|
||||||
|
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
|
||||||
|
SET @b='b';
|
||||||
|
EXECUTE stmt USING @b,@b;
|
||||||
|
SET @b='';
|
||||||
|
EXECUTE stmt USING @b,@b;
|
||||||
|
SET @b='я';
|
||||||
|
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||||
|
EXECUTE stmt USING @b,@b;
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.0 tests
|
--echo # End of 10.0 tests
|
||||||
|
47
sql/item.cc
47
sql/item.cc
@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
|
|||||||
name_length= 0;
|
name_length= 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (cs->ctype)
|
|
||||||
{
|
|
||||||
const char *str_start= str;
|
|
||||||
|
|
||||||
|
const char *str_start= str;
|
||||||
|
if (!cs->ctype || cs->mbminlen > 1)
|
||||||
|
{
|
||||||
|
str+= cs->cset->scan(cs, str, str + length, MY_SEQ_SPACES);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
This will probably need a better implementation in the future:
|
This will probably need a better implementation in the future:
|
||||||
a function in CHARSET_INFO structure.
|
a function in CHARSET_INFO structure.
|
||||||
@ -1086,6 +1090,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
|
|||||||
length--;
|
length--;
|
||||||
str++;
|
str++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (str != str_start && !is_autogenerated_name)
|
if (str != str_start && !is_autogenerated_name)
|
||||||
{
|
{
|
||||||
char buff[SAFE_NAME_LEN];
|
char buff[SAFE_NAME_LEN];
|
||||||
@ -1101,7 +1106,6 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
|
|||||||
ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
|
ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
|
||||||
buff);
|
buff);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (!my_charset_same(cs, system_charset_info))
|
if (!my_charset_same(cs, system_charset_info))
|
||||||
{
|
{
|
||||||
size_t res_length;
|
size_t res_length;
|
||||||
@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
|
|||||||
SET @@arg= 1;
|
SET @@arg= 1;
|
||||||
EXECUTE stms USING @arg;
|
EXECUTE stms USING @arg;
|
||||||
|
|
||||||
result_type is STRING_RESULT at prepare time,
|
In the above example result_type is STRING_RESULT at prepare time,
|
||||||
and INT_RESULT at execution time.
|
and INT_RESULT at execution time.
|
||||||
*/
|
*/
|
||||||
if (const_item())
|
return !const_item() || state == NULL_VALUE ?
|
||||||
{
|
this : const_charset_converter(tocs, true);
|
||||||
if (state == NULL_VALUE)
|
|
||||||
return this;
|
|
||||||
uint cnv_errors;
|
|
||||||
String *ostr= val_str(&cnvstr);
|
|
||||||
if (!needs_charset_converter(tocs))
|
|
||||||
return this;
|
|
||||||
cnvitem->copy_value(ostr->ptr(), ostr->length(),
|
|
||||||
ostr->charset(), tocs, &cnv_errors);
|
|
||||||
if (cnv_errors)
|
|
||||||
return NULL;
|
|
||||||
if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
|
|
||||||
!cnvitem->check_well_formed_result(true))
|
|
||||||
return NULL;
|
|
||||||
return cnvitem;
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) :
|
|||||||
value is set.
|
value is set.
|
||||||
*/
|
*/
|
||||||
maybe_null= 1;
|
maybe_null= 1;
|
||||||
cnvitem= new Item_string("", 0, &my_charset_bin, DERIVATION_COERCIBLE);
|
|
||||||
cnvstr.set(cnvbuf, sizeof(cnvbuf), &my_charset_bin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd)
|
|||||||
str_value.set_charset(value.cs_info.final_character_set_of_str_value);
|
str_value.set_charset(value.cs_info.final_character_set_of_str_value);
|
||||||
/* Here str_value is guaranteed to be in final_character_set_of_str_value */
|
/* Here str_value is guaranteed to be in final_character_set_of_str_value */
|
||||||
|
|
||||||
max_length= str_value.numchars() * str_value.charset()->mbmaxlen;
|
|
||||||
|
|
||||||
/* For the strings converted to numeric form within some functions */
|
|
||||||
decimals= NOT_FIXED_DEC;
|
|
||||||
/*
|
/*
|
||||||
str_value_ptr is returned from val_str(). It must be not alloced
|
str_value_ptr is returned from val_str(). It must be not alloced
|
||||||
to prevent it's modification by val_str() invoker.
|
to prevent it's modification by val_str() invoker.
|
||||||
*/
|
*/
|
||||||
str_value_ptr.set(str_value.ptr(), str_value.length(),
|
str_value_ptr.set(str_value.ptr(), str_value.length(),
|
||||||
str_value.charset());
|
str_value.charset());
|
||||||
/* Synchronize item charset with value charset */
|
/* Synchronize item charset and length with value charset */
|
||||||
collation.set(str_value.charset(), DERIVATION_COERCIBLE);
|
fix_charset_and_length_from_str_value(DERIVATION_COERCIBLE);
|
||||||
}
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -3777,7 +3759,8 @@ Item_param::clone_item()
|
|||||||
case STRING_VALUE:
|
case STRING_VALUE:
|
||||||
case LONG_DATA_VALUE:
|
case LONG_DATA_VALUE:
|
||||||
return new Item_string(name, str_value.c_ptr_quick(), str_value.length(),
|
return new Item_string(name, str_value.c_ptr_quick(), str_value.length(),
|
||||||
str_value.charset());
|
str_value.charset(),
|
||||||
|
collation.derivation, collation.repertoire);
|
||||||
case TIME_VALUE:
|
case TIME_VALUE:
|
||||||
break;
|
break;
|
||||||
case NO_VALUE:
|
case NO_VALUE:
|
||||||
|
142
sql/item.h
142
sql/item.h
@ -1694,7 +1694,41 @@ class Item_basic_value :public Item
|
|||||||
value->bin_eq(other) :
|
value->bin_eq(other) :
|
||||||
collation.collation == cs && value->eq(other, collation.collation);
|
collation.collation == cs && value->eq(other, collation.collation);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// Value metadata, e.g. to make string processing easier
|
||||||
|
class Metadata: private MY_STRING_METADATA
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Metadata(const String *str)
|
||||||
|
{
|
||||||
|
my_string_metadata_get(this, str->charset(), str->ptr(), str->length());
|
||||||
|
}
|
||||||
|
Metadata(const String *str, uint repertoire)
|
||||||
|
{
|
||||||
|
MY_STRING_METADATA::repertoire= repertoire;
|
||||||
|
MY_STRING_METADATA::char_length= str->numchars();
|
||||||
|
}
|
||||||
|
uint repertoire() const { return MY_STRING_METADATA::repertoire; }
|
||||||
|
size_t char_length() const { return MY_STRING_METADATA::char_length; }
|
||||||
|
};
|
||||||
|
void fix_charset_and_length_from_str_value(Derivation dv, Metadata metadata)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
We have to have a different max_length than 'length' here to
|
||||||
|
ensure that we get the right length if we do use the item
|
||||||
|
to create a new table. In this case max_length must be the maximum
|
||||||
|
number of chars for a string of this type because we in Create_field::
|
||||||
|
divide the max_length with mbmaxlen).
|
||||||
|
*/
|
||||||
|
collation.set(str_value.charset(), dv, metadata.repertoire());
|
||||||
|
fix_char_length(metadata.char_length());
|
||||||
|
decimals= NOT_FIXED_DEC;
|
||||||
|
}
|
||||||
|
void fix_charset_and_length_from_str_value(Derivation dv)
|
||||||
|
{
|
||||||
|
fix_charset_and_length_from_str_value(dv, Metadata(&str_value));
|
||||||
|
}
|
||||||
Item_basic_value(): Item() {}
|
Item_basic_value(): Item() {}
|
||||||
/*
|
/*
|
||||||
In the xxx_eq() methods below we need to cast off "const" to
|
In the xxx_eq() methods below we need to cast off "const" to
|
||||||
@ -2374,10 +2408,6 @@ public:
|
|||||||
class Item_param :public Item_basic_value,
|
class Item_param :public Item_basic_value,
|
||||||
private Settable_routine_parameter
|
private Settable_routine_parameter
|
||||||
{
|
{
|
||||||
char cnvbuf[MAX_FIELD_WIDTH];
|
|
||||||
String cnvstr;
|
|
||||||
Item_string *cnvitem;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
enum enum_item_param_state
|
enum enum_item_param_state
|
||||||
{
|
{
|
||||||
@ -2727,40 +2757,16 @@ protected:
|
|||||||
{
|
{
|
||||||
m_cs_specified= cs_specified;
|
m_cs_specified= cs_specified;
|
||||||
}
|
}
|
||||||
|
void fix_from_value(Derivation dv, const Metadata metadata)
|
||||||
public:
|
|
||||||
Item_string(const char *str,uint length,
|
|
||||||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
|
|
||||||
uint repertoire= MY_REPERTOIRE_UNICODE30)
|
|
||||||
: m_cs_specified(FALSE)
|
|
||||||
{
|
{
|
||||||
str_value.set_or_copy_aligned(str, length, cs);
|
fix_charset_and_length_from_str_value(dv, metadata);
|
||||||
collation.set(cs, dv, repertoire);
|
|
||||||
/*
|
|
||||||
We have to have a different max_length than 'length' here to
|
|
||||||
ensure that we get the right length if we do use the item
|
|
||||||
to create a new table. In this case max_length must be the maximum
|
|
||||||
number of chars for a string of this type because we in Create_field::
|
|
||||||
divide the max_length with mbmaxlen).
|
|
||||||
*/
|
|
||||||
max_length= str_value.numchars()*cs->mbmaxlen;
|
|
||||||
set_name(str, length, cs);
|
|
||||||
decimals=NOT_FIXED_DEC;
|
|
||||||
// it is constant => can be used without fix_fields (and frequently used)
|
// it is constant => can be used without fix_fields (and frequently used)
|
||||||
fixed= 1;
|
fixed= 1;
|
||||||
}
|
}
|
||||||
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
|
void fix_and_set_name_from_value(Derivation dv, const Metadata metadata)
|
||||||
Derivation dv, uint repertoire)
|
|
||||||
:m_cs_specified(false)
|
|
||||||
{
|
{
|
||||||
if (str_value.copy(str, tocs, conv_errors))
|
fix_from_value(dv, metadata);
|
||||||
str_value.set("", 0, tocs); // EOM ?
|
set_name(str_value.ptr(), str_value.length(), str_value.charset());
|
||||||
str_value.mark_as_const();
|
|
||||||
collation.set(tocs, dv, repertoire);
|
|
||||||
fix_char_length(str_value.numchars());
|
|
||||||
set_name(str_value.ptr(), str_value.length(), tocs);
|
|
||||||
decimals= NOT_FIXED_DEC;
|
|
||||||
fixed= 1;
|
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
/* Just create an item and do not fill string representation */
|
/* Just create an item and do not fill string representation */
|
||||||
@ -2769,52 +2775,56 @@ protected:
|
|||||||
{
|
{
|
||||||
collation.set(cs, dv);
|
collation.set(cs, dv);
|
||||||
max_length= 0;
|
max_length= 0;
|
||||||
set_name(NULL, 0, cs);
|
set_name(NULL, 0, system_charset_info);
|
||||||
decimals= NOT_FIXED_DEC;
|
decimals= NOT_FIXED_DEC;
|
||||||
fixed= 1;
|
fixed= 1;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
Item_string(const char *name_par, const char *str, uint length,
|
// Constructors with the item name set from its value
|
||||||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
|
Item_string(const char *str, uint length, CHARSET_INFO *cs,
|
||||||
uint repertoire= MY_REPERTOIRE_UNICODE30)
|
Derivation dv, uint repertoire)
|
||||||
: m_cs_specified(FALSE)
|
: m_cs_specified(FALSE)
|
||||||
{
|
{
|
||||||
str_value.set_or_copy_aligned(str, length, cs);
|
str_value.set_or_copy_aligned(str, length, cs);
|
||||||
collation.set(cs, dv, repertoire);
|
fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
|
||||||
max_length= str_value.numchars()*cs->mbmaxlen;
|
|
||||||
set_name(name_par, 0, cs);
|
|
||||||
decimals=NOT_FIXED_DEC;
|
|
||||||
// it is constant => can be used without fix_fields (and frequently used)
|
|
||||||
fixed= 1;
|
|
||||||
}
|
}
|
||||||
void copy_value(const char *str, uint32 length, CHARSET_INFO *fromcs,
|
Item_string(const char *str, uint length,
|
||||||
CHARSET_INFO *tocs, uint *cnv_errors)
|
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||||
|
: m_cs_specified(FALSE)
|
||||||
{
|
{
|
||||||
str_value.copy(str, length, fromcs, tocs, cnv_errors);
|
str_value.set_or_copy_aligned(str, length, cs);
|
||||||
str_value.mark_as_const();
|
fix_and_set_name_from_value(dv, Metadata(&str_value));
|
||||||
collation.set(tocs);
|
}
|
||||||
fix_char_length(str_value.numchars());
|
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
|
||||||
|
Derivation dv, uint repertoire)
|
||||||
|
:m_cs_specified(false)
|
||||||
|
{
|
||||||
|
if (str_value.copy(str, tocs, conv_errors))
|
||||||
|
str_value.set("", 0, tocs); // EOM ?
|
||||||
|
str_value.mark_as_const();
|
||||||
|
fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
|
||||||
|
}
|
||||||
|
// Constructors with an externally provided item name
|
||||||
|
Item_string(const char *name_par, const char *str, uint length,
|
||||||
|
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||||
|
:m_cs_specified(false)
|
||||||
|
{
|
||||||
|
str_value.set_or_copy_aligned(str, length, cs);
|
||||||
|
fix_from_value(dv, Metadata(&str_value));
|
||||||
|
set_name(name_par, 0, system_charset_info);
|
||||||
|
}
|
||||||
|
Item_string(const char *name_par, const char *str, uint length,
|
||||||
|
CHARSET_INFO *cs, Derivation dv, uint repertoire)
|
||||||
|
:m_cs_specified(false)
|
||||||
|
{
|
||||||
|
str_value.set_or_copy_aligned(str, length, cs);
|
||||||
|
fix_from_value(dv, Metadata(&str_value, repertoire));
|
||||||
|
set_name(name_par, 0, system_charset_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_value(String *to) const
|
void print_value(String *to) const
|
||||||
{
|
{
|
||||||
str_value.print(to);
|
str_value.print(to);
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
This is used in stored procedures to avoid memory leaks and
|
|
||||||
does a deep copy of its argument.
|
|
||||||
*/
|
|
||||||
void set_str_with_copy(const char *str_arg, uint length_arg)
|
|
||||||
{
|
|
||||||
str_value.copy(str_arg, length_arg, collation.collation);
|
|
||||||
max_length= str_value.numchars() * collation.collation->mbmaxlen;
|
|
||||||
}
|
|
||||||
void set_repertoire_from_value()
|
|
||||||
{
|
|
||||||
collation.repertoire= my_string_repertoire(str_value.charset(),
|
|
||||||
str_value.ptr(),
|
|
||||||
str_value.length());
|
|
||||||
}
|
|
||||||
enum Type type() const { return STRING_ITEM; }
|
enum Type type() const { return STRING_ITEM; }
|
||||||
double val_real();
|
double val_real();
|
||||||
longlong val_int();
|
longlong val_int();
|
||||||
@ -2914,13 +2924,11 @@ public:
|
|||||||
Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs)
|
Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs)
|
||||||
:Item_string(str, length, cs)
|
:Item_string(str, length, cs)
|
||||||
{
|
{
|
||||||
set_repertoire_from_value();
|
|
||||||
set_cs_specified(true);
|
set_cs_specified(true);
|
||||||
}
|
}
|
||||||
Item_string_with_introducer(const String *str, CHARSET_INFO *tocs)
|
Item_string_with_introducer(const String *str, CHARSET_INFO *tocs)
|
||||||
:Item_string(str->ptr(), str->length(), tocs)
|
:Item_string(str->ptr(), str->length(), tocs)
|
||||||
{
|
{
|
||||||
set_repertoire_from_value();
|
|
||||||
set_cs_specified(true);
|
set_cs_specified(true);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -580,7 +580,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length,
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 String::numchars()
|
uint32 String::numchars() const
|
||||||
{
|
{
|
||||||
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
|
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
|
||||||
}
|
}
|
||||||
|
@ -411,7 +411,7 @@ public:
|
|||||||
friend int stringcmp(const String *a,const String *b);
|
friend int stringcmp(const String *a,const String *b);
|
||||||
friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
|
friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
|
||||||
friend class Field;
|
friend class Field;
|
||||||
uint32 numchars();
|
uint32 numchars() const;
|
||||||
int charpos(longlong i,uint32 offset=0);
|
int charpos(longlong i,uint32 offset=0);
|
||||||
|
|
||||||
int reserve(uint32 space_needed)
|
int reserve(uint32 space_needed)
|
||||||
|
@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint
|
||||||
|
my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length)
|
||||||
|
{
|
||||||
|
const char *strend;
|
||||||
|
if ((cs->state & MY_CS_NONASCII) && length > 0)
|
||||||
|
return MY_REPERTOIRE_UNICODE30;
|
||||||
|
for (strend= str + length; str < strend; str++)
|
||||||
|
{
|
||||||
|
if (((uchar) *str) > 0x7F)
|
||||||
|
return MY_REPERTOIRE_UNICODE30;
|
||||||
|
}
|
||||||
|
return MY_REPERTOIRE_ASCII;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
my_string_metadata_init(MY_STRING_METADATA *metadata)
|
||||||
|
{
|
||||||
|
metadata->repertoire= MY_REPERTOIRE_ASCII;
|
||||||
|
metadata->char_length= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
This should probably eventually go as a virtual function into
|
||||||
|
MY_CHARSET_HANDLER or MY_COLLATION_HANDLER.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
my_string_metadata_get_mb(MY_STRING_METADATA *metadata,
|
||||||
|
CHARSET_INFO *cs, const char *str, ulong length)
|
||||||
|
{
|
||||||
|
const char *strend= str + length;
|
||||||
|
for (my_string_metadata_init(metadata) ;
|
||||||
|
str < strend;
|
||||||
|
metadata->char_length++)
|
||||||
|
{
|
||||||
|
my_wc_t wc;
|
||||||
|
int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str,
|
||||||
|
(const uchar *) strend);
|
||||||
|
if (mblen > 0) /* Assigned character */
|
||||||
|
{
|
||||||
|
if (wc > 0x7F)
|
||||||
|
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
|
||||||
|
str+= mblen;
|
||||||
|
}
|
||||||
|
else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */
|
||||||
|
{
|
||||||
|
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */
|
||||||
|
{
|
||||||
|
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
|
||||||
|
str+= (-mblen);
|
||||||
|
}
|
||||||
|
else /* Incomplete character, premature end-of-line */
|
||||||
|
{
|
||||||
|
metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Collect string metadata: length in characters and repertoire.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
my_string_metadata_get(MY_STRING_METADATA *metadata,
|
||||||
|
CHARSET_INFO *cs, const char *str, ulong length)
|
||||||
|
{
|
||||||
|
if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII))
|
||||||
|
{
|
||||||
|
metadata->char_length= length;
|
||||||
|
metadata->repertoire= my_string_repertoire_8bit(cs, str, length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
my_string_metadata_get_mb(metadata, cs, str, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Check repertoire: detect pure ascii strings
|
Check repertoire: detect pure ascii strings
|
||||||
*/
|
*/
|
||||||
uint
|
uint
|
||||||
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
|
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
|
||||||
{
|
{
|
||||||
const char *strend= str + length;
|
if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII))
|
||||||
if (cs->mbminlen == 1)
|
|
||||||
{
|
{
|
||||||
for ( ; str < strend; str++)
|
return my_string_repertoire_8bit(cs, str, length);
|
||||||
{
|
|
||||||
if (((uchar) *str) > 0x7F)
|
|
||||||
return MY_REPERTOIRE_UNICODE30;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
const char *strend= str + length;
|
||||||
my_wc_t wc;
|
my_wc_t wc;
|
||||||
int chlen;
|
int chlen;
|
||||||
for (;
|
for (;
|
||||||
|
Reference in New Issue
Block a user