1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-27 18:02:13 +03:00

Moving a few static functions in sql_lex.cc to new methods in Lex_input_stream

Reasoning:
- Shorter and clearer code
- Better encapsulation
  (a fair number of Lex_input_stream methods and members were
   moved to the private section)

New methods:

  int lex_token(union YYSTYPE *yylval, THD *thd);
  bool consume_comment(int remaining_recursions_permitted);
  int lex_one_token(union YYSTYPE *yylval, THD *thd);
  int find_keyword(Lex_ident_cli_st *str, uint len, bool function);
  LEX_CSTRING get_token(uint skip, uint length);

Additional changes:

- Removing Lex_input_stream::yylval.
  In the original code it was just an alias
  for the "yylval" passed to lex_one_token().
  This coding style is bug prone and is hard to follow.
  In the new reduction "yylval" (or its components) is passed to
  the affected methods as a parameter.
- Moving the code in sql_lex.h up and down between "private" and "public"
  sections (sorry if this made the diff somewhat harder to read)
This commit is contained in:
Alexander Barkov
2018-05-09 00:16:32 +04:00
parent 971268dc14
commit 1d30a23fcc
4 changed files with 302 additions and 284 deletions

View File

@ -2145,6 +2145,16 @@ public:
void reset(char *buff, size_t length);
/**
The main method to scan the next token, with token contraction processing
for LALR(2) resolution, e.g. translate "WITH" followed by "ROLLUP"
to a single token WITH_ROLLUP_SYM.
*/
int lex_token(union YYSTYPE *yylval, THD *thd);
void reduce_digest_token(uint token_left, uint token_right);
private:
/**
Set the echo mode.
@ -2272,15 +2282,6 @@ public:
return m_ptr;
}
/**
End of file indicator for the query text to parse.
@return true if there are no more characters to parse
*/
bool eof()
{
return (m_ptr >= m_end_of_query);
}
/**
End of file indicator for the query text to parse.
@param n number of characters expected
@ -2291,24 +2292,6 @@ public:
return ((m_ptr + n) >= m_end_of_query);
}
/** Get the raw query buffer. */
const char *get_buf()
{
return m_buf;
}
/** Get the pre-processed query buffer. */
const char *get_cpp_buf()
{
return m_cpp_buf;
}
/** Get the end of the raw query buffer. */
const char *get_end_of_query()
{
return m_end_of_query;
}
/** Mark the stream position as the start of a new token. */
void start_token()
{
@ -2331,6 +2314,61 @@ public:
m_cpp_tok_start= m_cpp_ptr;
}
/**
Get the maximum length of the utf8-body buffer.
The utf8 body can grow because of the character set conversion and escaping.
*/
size_t get_body_utf8_maximum_length(THD *thd);
/** Get the length of the current token, in the raw buffer. */
uint yyLength()
{
/*
The assumption is that the lexical analyser is always 1 character ahead,
which the -1 account for.
*/
DBUG_ASSERT(m_ptr > m_tok_start);
return (uint) ((m_ptr - m_tok_start) - 1);
}
public:
/**
Test if a lookahead token was already scanned by lex_token(),
for LALR(2) resolution.
*/
bool has_lookahead() const
{
return lookahead_token >= 0;
}
/**
End of file indicator for the query text to parse.
@return true if there are no more characters to parse
*/
bool eof()
{
return (m_ptr >= m_end_of_query);
}
/** Get the raw query buffer. */
const char *get_buf()
{
return m_buf;
}
/** Get the pre-processed query buffer. */
const char *get_cpp_buf()
{
return m_cpp_buf;
}
/** Get the end of the raw query buffer. */
const char *get_end_of_query()
{
return m_end_of_query;
}
/** Get the token start position, in the raw buffer. */
const char *get_tok_start()
{
@ -2360,17 +2398,6 @@ public:
return m_ptr;
}
/** Get the length of the current token, in the raw buffer. */
uint yyLength()
{
/*
The assumption is that the lexical analyser is always 1 character ahead,
which the -1 account for.
*/
DBUG_ASSERT(m_ptr > m_tok_start);
return (uint) ((m_ptr - m_tok_start) - 1);
}
/** Get the previus token start position, in the pre-processed buffer. */
const char *get_cpp_start_prev()
{
@ -2434,12 +2461,6 @@ public:
return (size_t) (m_body_utf8_ptr - m_body_utf8);
}
/**
Get the maximum length of the utf8-body buffer.
The utf8 body can grow because of the character set conversion and escaping.
*/
size_t get_body_utf8_maximum_length(THD *thd);
void body_utf8_start(THD *thd, const char *begin_ptr);
void body_utf8_append(const char *ptr);
void body_utf8_append(const char *ptr, const char *end_ptr);
@ -2451,15 +2472,8 @@ public:
CHARSET_INFO *txt_cs,
const char *end_ptr,
my_wc_t sep);
/** Current thread. */
THD *m_thd;
/** Current line number. */
uint yylineno;
/** Interface with bison, value of the last token parsed. */
LEX_YYSTYPE yylval;
private:
/**
LALR(2) resolution, look ahead token.
Value of the next token to return, if any,
@ -2476,15 +2490,20 @@ public:
void add_digest_token(uint token, LEX_YYSTYPE yylval);
void reduce_digest_token(uint token_left, uint token_right);
bool consume_comment(int remaining_recursions_permitted);
int lex_one_token(union YYSTYPE *yylval, THD *thd);
int find_keyword(Lex_ident_cli_st *str, uint len, bool function);
LEX_CSTRING get_token(uint skip, uint length);
int scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str);
int scan_ident_start(THD *thd, Lex_ident_cli_st *str);
int scan_ident_middle(THD *thd, Lex_ident_cli_st *str,
CHARSET_INFO **cs, my_lex_states *);
int scan_ident_delimited(THD *thd, Lex_ident_cli_st *str);
bool get_7bit_or_8bit_ident(THD *thd, uchar *last_char);
private:
/** Current thread. */
THD *m_thd;
/** Pointer to the current position in the raw input stream. */
char *m_ptr;
@ -2570,6 +2589,15 @@ public:
*/
bool multi_statements;
/** Current line number. */
uint yylineno;
/**
Current statement digest instrumentation.
*/
sql_digest_state* m_digest;
private:
/** State of the lexical analyser for comments. */
enum_comment_state in_comment;
enum_comment_state in_comment_saved;
@ -2596,13 +2624,9 @@ public:
NOTE: this member must be used within MYSQLlex() function only.
*/
CHARSET_INFO *m_underscore_cs;
/**
Current statement digest instrumentation.
*/
sql_digest_state* m_digest;
};
/**
Abstract representation of a statement.
This class is an interface between the parser and the runtime.