mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
Removing some duplicate code in THD::convert_string() & friends
1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper for convert_string(LEX_STRING *to,...), as LEX_CSTRING is now frequently used for conversion purpose. This reduced duplicate code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy 2. Adding yet another THD::convert_string() with an extra parameter "bool simple_copy_is_possible". This even more reduced repeatable code in the mentioned grammar rules in *.yy 3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st, as they have very similar functionality. Moving m_quote from Lex_ident_cli_st to Lex_string_with_metadata_st, as m_quote will be used later to optimize string literals anyway (e.g. avoid redundant copying on the tokenizer stage). Adjusting Lex_input_stream::get_text() accordingly. 4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules as new methods in THD: - make_text_string_sys() - make_text_string_connection() - make_text_string_filesystem() and changing *.yy to use these new methods. This reduced the amount of similar code in sql_yacc.yy and sql_yacc_ora.yy. 5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident(): by reusing THD::make_text_string_sys(). Thanks to #3 and #4. 6. Making THD members charset_is_system_charset, charset_is_collation_connection, charset_is_character_set_filesystem private, as they are not needed externally any more.
This commit is contained in:
@ -3130,8 +3130,10 @@ public:
|
|||||||
is set if a statement accesses a temporary table created through
|
is set if a statement accesses a temporary table created through
|
||||||
CREATE TEMPORARY TABLE.
|
CREATE TEMPORARY TABLE.
|
||||||
*/
|
*/
|
||||||
bool charset_is_system_charset, charset_is_collation_connection;
|
private:
|
||||||
|
bool charset_is_system_charset, charset_is_collation_connection;
|
||||||
bool charset_is_character_set_filesystem;
|
bool charset_is_character_set_filesystem;
|
||||||
|
public:
|
||||||
bool enable_slow_log; /* Enable slow log for current statement */
|
bool enable_slow_log; /* Enable slow log for current statement */
|
||||||
bool abort_on_warning;
|
bool abort_on_warning;
|
||||||
bool got_warning; /* Set on call to push_warning() */
|
bool got_warning; /* Set on call to push_warning() */
|
||||||
@ -3706,6 +3708,25 @@ public:
|
|||||||
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||||
const char *from, size_t from_length,
|
const char *from, size_t from_length,
|
||||||
CHARSET_INFO *from_cs);
|
CHARSET_INFO *from_cs);
|
||||||
|
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
|
||||||
|
const char *from, size_t from_length,
|
||||||
|
CHARSET_INFO *from_cs)
|
||||||
|
{
|
||||||
|
LEX_STRING tmp;
|
||||||
|
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
|
||||||
|
to->str= tmp.str;
|
||||||
|
to->length= tmp.length;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
|
||||||
|
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
|
||||||
|
bool simple_copy_is_possible)
|
||||||
|
{
|
||||||
|
if (!simple_copy_is_possible)
|
||||||
|
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
|
||||||
|
*to= *from;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
Convert a strings between character sets.
|
Convert a strings between character sets.
|
||||||
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
|
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
|
||||||
@ -3767,6 +3788,24 @@ public:
|
|||||||
Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str);
|
Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str);
|
||||||
Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str,
|
Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str,
|
||||||
CHARSET_INFO *cs);
|
CHARSET_INFO *cs);
|
||||||
|
bool make_text_string_sys(LEX_CSTRING *to,
|
||||||
|
const Lex_string_with_metadata_st *from)
|
||||||
|
{
|
||||||
|
return convert_string(to, system_charset_info,
|
||||||
|
from, charset(), charset_is_system_charset);
|
||||||
|
}
|
||||||
|
bool make_text_string_connection(LEX_CSTRING *to,
|
||||||
|
const Lex_string_with_metadata_st *from)
|
||||||
|
{
|
||||||
|
return convert_string(to, variables.collation_connection,
|
||||||
|
from, charset(), charset_is_collation_connection);
|
||||||
|
}
|
||||||
|
bool make_text_string_filesystem(LEX_CSTRING *to,
|
||||||
|
const Lex_string_with_metadata_st *from)
|
||||||
|
{
|
||||||
|
return convert_string(to, variables.character_set_filesystem,
|
||||||
|
from, charset(), charset_is_character_set_filesystem);
|
||||||
|
}
|
||||||
void add_changed_table(TABLE *table);
|
void add_changed_table(TABLE *table);
|
||||||
void add_changed_table(const char *key, size_t key_length);
|
void add_changed_table(const char *key, size_t key_length);
|
||||||
CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length);
|
CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length);
|
||||||
|
@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
|
|||||||
operation.
|
operation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void Lex_input_stream::body_utf8_append_ident(THD *thd,
|
void
|
||||||
const LEX_CSTRING *txt,
|
Lex_input_stream::body_utf8_append_ident(THD *thd,
|
||||||
const char *end_ptr)
|
const Lex_string_with_metadata_st *txt,
|
||||||
|
const char *end_ptr)
|
||||||
{
|
{
|
||||||
if (!m_cpp_utf8_processed_ptr)
|
if (!m_cpp_utf8_processed_ptr)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
LEX_CSTRING utf_txt;
|
LEX_CSTRING utf_txt;
|
||||||
CHARSET_INFO *txt_cs= thd->charset();
|
thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value?
|
||||||
|
|
||||||
if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
thd->convert_string(&to,
|
|
||||||
&my_charset_utf8_general_ci,
|
|
||||||
txt->str, (uint) txt->length,
|
|
||||||
txt_cs);
|
|
||||||
utf_txt.str= to.str;
|
|
||||||
utf_txt.length= to.length;
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
utf_txt= *txt;
|
|
||||||
|
|
||||||
/* NOTE: utf_txt.length is in bytes, not in symbols. */
|
/* NOTE: utf_txt.length is in bytes, not in symbols. */
|
||||||
|
|
||||||
memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length);
|
memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length);
|
||||||
m_body_utf8_ptr += utf_txt.length;
|
m_body_utf8_ptr += utf_txt.length;
|
||||||
*m_body_utf8_ptr= 0;
|
*m_body_utf8_ptr= 0;
|
||||||
@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
|
|||||||
uchar c;
|
uchar c;
|
||||||
uint found_escape=0;
|
uint found_escape=0;
|
||||||
CHARSET_INFO *cs= m_thd->charset();
|
CHARSET_INFO *cs= m_thd->charset();
|
||||||
|
bool is_8bit= false;
|
||||||
|
|
||||||
dst->set_8bit(false);
|
|
||||||
while (! eof())
|
while (! eof())
|
||||||
{
|
{
|
||||||
c= yyGet();
|
c= yyGet();
|
||||||
if (c & 0x80)
|
if (c & 0x80)
|
||||||
dst->set_8bit(true);
|
is_8bit= true;
|
||||||
#ifdef USE_MB
|
#ifdef USE_MB
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
|
|||||||
|
|
||||||
if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1)))
|
if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1)))
|
||||||
{
|
{
|
||||||
dst->str= ""; // Sql_alloc has set error flag
|
dst->set(&empty_clex_str, 0, '\0');
|
||||||
dst->length= 0;
|
return true; // Sql_alloc has set error flag
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
dst->str= to;
|
|
||||||
|
|
||||||
m_cpp_text_start= m_cpp_tok_start + pre_skip;
|
m_cpp_text_start= m_cpp_tok_start + pre_skip;
|
||||||
m_cpp_text_end= get_cpp_ptr() - post_skip;
|
m_cpp_text_end= get_cpp_ptr() - post_skip;
|
||||||
|
|
||||||
if (!found_escape)
|
if (!found_escape)
|
||||||
{
|
{
|
||||||
memcpy(to, str, dst->length= (end - str));
|
size_t len= (end - str);
|
||||||
to[dst->length]= 0;
|
memcpy(to, str, len);
|
||||||
|
to[len]= '\0';
|
||||||
|
dst->set(to, len, is_8bit, '\0');
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dst->length= unescape(cs, to, str, end, sep);
|
size_t len= unescape(cs, to, str, end, sep);
|
||||||
|
dst->set(to, len, is_8bit, '\0');
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -37,12 +37,16 @@
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
A string with metadata.
|
A string with metadata. Usually points to a string in the client
|
||||||
|
character set, but unlike Lex_ident_cli_st (see below) it does not
|
||||||
|
necessarily point to a query fragment. It can also point to memory
|
||||||
|
of other kinds (e.g. an additional THD allocated memory buffer
|
||||||
|
not overlapping with the current query text).
|
||||||
|
|
||||||
We'll add more flags here eventually, to know if the string has, e.g.:
|
We'll add more flags here eventually, to know if the string has, e.g.:
|
||||||
- multi-byte characters
|
- multi-byte characters
|
||||||
- bad byte sequences
|
- bad byte sequences
|
||||||
- backslash escapes: 'a\nb'
|
- backslash escapes: 'a\nb'
|
||||||
- separator escapes: 'a''b'
|
|
||||||
and reuse the original query fragments instead of making the string
|
and reuse the original query fragments instead of making the string
|
||||||
copy too early, in Lex_input_stream::get_text().
|
copy too early, in Lex_input_stream::get_text().
|
||||||
This will allow to avoid unnecessary copying, as well as
|
This will allow to avoid unnecessary copying, as well as
|
||||||
@ -50,9 +54,30 @@
|
|||||||
*/
|
*/
|
||||||
struct Lex_string_with_metadata_st: public LEX_CSTRING
|
struct Lex_string_with_metadata_st: public LEX_CSTRING
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
bool m_is_8bit; // True if the string has 8bit characters
|
bool m_is_8bit; // True if the string has 8bit characters
|
||||||
|
char m_quote; // Quote character, or 0 if not quoted
|
||||||
public:
|
public:
|
||||||
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
|
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
|
||||||
|
void set_metadata(bool is_8bit, char quote)
|
||||||
|
{
|
||||||
|
m_is_8bit= is_8bit;
|
||||||
|
m_quote= quote;
|
||||||
|
}
|
||||||
|
void set(const char *s, size_t len, bool is_8bit, char quote)
|
||||||
|
{
|
||||||
|
str= s;
|
||||||
|
length= len;
|
||||||
|
set_metadata(is_8bit, quote);
|
||||||
|
}
|
||||||
|
void set(const LEX_CSTRING *s, bool is_8bit, char quote)
|
||||||
|
{
|
||||||
|
((LEX_CSTRING &)*this)= *s;
|
||||||
|
set_metadata(is_8bit, quote);
|
||||||
|
}
|
||||||
|
bool is_8bit() const { return m_is_8bit; }
|
||||||
|
bool is_quoted() const { return m_quote != '\0'; }
|
||||||
|
char quote() const { return m_quote; }
|
||||||
// Get string repertoire by the 8-bit flag and the character set
|
// Get string repertoire by the 8-bit flag and the character set
|
||||||
uint repertoire(CHARSET_INFO *cs) const
|
uint repertoire(CHARSET_INFO *cs) const
|
||||||
{
|
{
|
||||||
@ -71,44 +96,27 @@ public:
|
|||||||
Used to store identifiers in the client character set.
|
Used to store identifiers in the client character set.
|
||||||
Points to a query fragment.
|
Points to a query fragment.
|
||||||
*/
|
*/
|
||||||
struct Lex_ident_cli_st: public LEX_CSTRING
|
struct Lex_ident_cli_st: public Lex_string_with_metadata_st
|
||||||
{
|
{
|
||||||
private:
|
|
||||||
bool m_is_8bit;
|
|
||||||
char m_quote;
|
|
||||||
public:
|
public:
|
||||||
void set_keyword(const char *s, size_t len)
|
void set_keyword(const char *s, size_t len)
|
||||||
{
|
{
|
||||||
str= s;
|
set(s, len, false, '\0');
|
||||||
length= len;
|
|
||||||
m_is_8bit= false;
|
|
||||||
m_quote= '\0';
|
|
||||||
}
|
}
|
||||||
void set_ident(const char *s, size_t len, bool is_8bit)
|
void set_ident(const char *s, size_t len, bool is_8bit)
|
||||||
{
|
{
|
||||||
str= s;
|
set(s, len, is_8bit, '\0');
|
||||||
length= len;
|
|
||||||
m_is_8bit= is_8bit;
|
|
||||||
m_quote= '\0';
|
|
||||||
}
|
}
|
||||||
void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
|
void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
|
||||||
{
|
{
|
||||||
str= s;
|
set(s, len, is_8bit, quote);
|
||||||
length= len;
|
|
||||||
m_is_8bit= is_8bit;
|
|
||||||
m_quote= quote;
|
|
||||||
}
|
}
|
||||||
void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
|
void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
|
||||||
{
|
{
|
||||||
((LEX_CSTRING &)*this)= *s;
|
set(s, is_8bit, '\0');
|
||||||
m_is_8bit= is_8bit;
|
|
||||||
m_quote= '\0';
|
|
||||||
}
|
}
|
||||||
const char *pos() const { return str - is_quoted(); }
|
const char *pos() const { return str - is_quoted(); }
|
||||||
const char *end() const { return str + length + is_quoted(); }
|
const char *end() const { return str + length + is_quoted(); }
|
||||||
bool is_quoted() const { return m_quote != '\0'; }
|
|
||||||
bool is_8bit() const { return m_is_8bit; }
|
|
||||||
char quote() const { return m_quote; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -2453,7 +2461,7 @@ public:
|
|||||||
void body_utf8_append(const char *ptr);
|
void body_utf8_append(const char *ptr);
|
||||||
void body_utf8_append(const char *ptr, const char *end_ptr);
|
void body_utf8_append(const char *ptr, const char *end_ptr);
|
||||||
void body_utf8_append_ident(THD *thd,
|
void body_utf8_append_ident(THD *thd,
|
||||||
const LEX_CSTRING *txt,
|
const Lex_string_with_metadata_st *txt,
|
||||||
const char *end_ptr);
|
const char *end_ptr);
|
||||||
void body_utf8_append_escape(THD *thd,
|
void body_utf8_append_escape(THD *thd,
|
||||||
const LEX_CSTRING *txt,
|
const LEX_CSTRING *txt,
|
||||||
|
@ -15170,57 +15170,26 @@ IDENT_sys:
|
|||||||
TEXT_STRING_sys:
|
TEXT_STRING_sys:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_system_charset)
|
if (thd->make_text_string_sys(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
if (unlikely(thd->convert_string(&to, system_charset_info,
|
|
||||||
$1.str, $1.length,
|
|
||||||
thd->charset())))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
$$.str= to.str;
|
|
||||||
$$.length= to.length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
TEXT_STRING_literal:
|
TEXT_STRING_literal:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_collation_connection)
|
if (thd->make_text_string_connection(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
if (unlikely(thd->convert_string(&to,
|
|
||||||
thd->variables.collation_connection,
|
|
||||||
$1.str, $1.length,
|
|
||||||
thd->charset())))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
$$.str= to.str;
|
|
||||||
$$.length= to.length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
TEXT_STRING_filesystem:
|
TEXT_STRING_filesystem:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_character_set_filesystem)
|
if (thd->make_text_string_filesystem(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
if (unlikely(thd->convert_string(&to,
|
|
||||||
thd->variables.character_set_filesystem,
|
|
||||||
$1.str, $1.length,
|
|
||||||
thd->charset())))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
$$.str= to.str;
|
|
||||||
$$.length= to.length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
;
|
||||||
|
|
||||||
ident_table_alias:
|
ident_table_alias:
|
||||||
IDENT_sys
|
IDENT_sys
|
||||||
|
@ -14919,56 +14919,24 @@ IDENT_sys:
|
|||||||
TEXT_STRING_sys:
|
TEXT_STRING_sys:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_system_charset)
|
if (thd->make_text_string_sys(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
if (unlikely(thd->convert_string(&to, system_charset_info,
|
|
||||||
$1.str, $1.length,
|
|
||||||
thd->charset())))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
$$.str= to.str;
|
|
||||||
$$.length= to.length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
TEXT_STRING_literal:
|
TEXT_STRING_literal:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_collation_connection)
|
if (thd->make_text_string_connection(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
if (unlikely(thd->convert_string(&to,
|
|
||||||
thd->variables.collation_connection,
|
|
||||||
$1.str, $1.length,
|
|
||||||
thd->charset())))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
$$.str= to.str;
|
|
||||||
$$.length= to.length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
TEXT_STRING_filesystem:
|
TEXT_STRING_filesystem:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_character_set_filesystem)
|
if (thd->make_text_string_filesystem(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
LEX_STRING to;
|
|
||||||
if (unlikely(thd->convert_string(&to,
|
|
||||||
thd->variables.character_set_filesystem,
|
|
||||||
$1.str, $1.length,
|
|
||||||
thd->charset())))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
$$.str= to.str;
|
|
||||||
$$.length= to.length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user