Moving a few static functions in sql_lex.cc to new methods in Lex_input_stream

Reasoning: - Shorter and clearer code - Better encapsulation (a fair number of Lex_input_stream methods and members were moved to the private section) New methods: int lex_token(union YYSTYPE *yylval, THD *thd); bool consume_comment(int remaining_recursions_permitted); int lex_one_token(union YYSTYPE *yylval, THD *thd); int find_keyword(Lex_ident_cli_st *str, uint len, bool function); LEX_CSTRING get_token(uint skip, uint length); Additional changes: - Removing Lex_input_stream::yylval. In the original code it was just an alias for the "yylval" passed to lex_one_token(). This coding style is bug prone and is hard to follow. In the new reduction "yylval" (or its components) is passed to the affected methods as a parameter. - Moving the code in sql_lex.h up and down between "private" and "public" sections (sorry if this made the diff somewhat harder to read)
2025-07-27 18:02:13 +03:00 · 2018-05-09 00:16:32 +04:00
parent 971268dc14
commit 1d30a23fcc
4 changed files with 302 additions and 284 deletions
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@ -2145,6 +2145,16 @@ public:

  void reset(char *buff, size_t length);

+  /**
+    The main method to scan the next token, with token contraction processing
+    for LALR(2) resolution, e.g. translate "WITH" followed by "ROLLUP"
+    to a single token WITH_ROLLUP_SYM.
+  */
+  int lex_token(union YYSTYPE *yylval, THD *thd);
+
+  void reduce_digest_token(uint token_left, uint token_right);
+
+private:
  /**
    Set the echo mode.

@ -2272,15 +2282,6 @@ public:
    return m_ptr;
  }

-  /**
-    End of file indicator for the query text to parse.
-    @return true if there are no more characters to parse
-  */
-  bool eof()
-  {
-    return (m_ptr >= m_end_of_query);
-  }
-
  /**
    End of file indicator for the query text to parse.
    @param n number of characters expected
@ -2291,24 +2292,6 @@ public:
    return ((m_ptr + n) >= m_end_of_query);
  }

-  /** Get the raw query buffer. */
-  const char *get_buf()
-  {
-    return m_buf;
-  }
-
-  /** Get the pre-processed query buffer. */
-  const char *get_cpp_buf()
-  {
-    return m_cpp_buf;
-  }
-
-  /** Get the end of the raw query buffer. */
-  const char *get_end_of_query()
-  {
-    return m_end_of_query;
-  }
-
  /** Mark the stream position as the start of a new token. */
  void start_token()
  {
@ -2331,6 +2314,61 @@ public:
    m_cpp_tok_start= m_cpp_ptr;
  }

+  /**
+    Get the maximum length of the utf8-body buffer.
+    The utf8 body can grow because of the character set conversion and escaping.
+  */
+  size_t get_body_utf8_maximum_length(THD *thd);
+
+  /** Get the length of the current token, in the raw buffer. */
+  uint yyLength()
+  {
+    /*
+      The assumption is that the lexical analyser is always 1 character ahead,
+      which the -1 account for.
+    */
+    DBUG_ASSERT(m_ptr > m_tok_start);
+    return (uint) ((m_ptr - m_tok_start) - 1);
+  }
+
+public:
+
+  /**
+    Test if a lookahead token was already scanned by lex_token(),
+    for LALR(2) resolution.
+  */
+  bool has_lookahead() const
+  {
+    return lookahead_token >= 0;
+  }
+
+  /**
+    End of file indicator for the query text to parse.
+    @return true if there are no more characters to parse
+  */
+  bool eof()
+  {
+    return (m_ptr >= m_end_of_query);
+  }
+
+  /** Get the raw query buffer. */
+  const char *get_buf()
+  {
+    return m_buf;
+  }
+
+  /** Get the pre-processed query buffer. */
+  const char *get_cpp_buf()
+  {
+    return m_cpp_buf;
+  }
+
+  /** Get the end of the raw query buffer. */
+  const char *get_end_of_query()
+  {
+    return m_end_of_query;
+  }
+
  /** Get the token start position, in the raw buffer. */
  const char *get_tok_start()
  {
@ -2360,17 +2398,6 @@ public:
    return m_ptr;
  }

-  /** Get the length of the current token, in the raw buffer. */
-  uint yyLength()
-  {
-    /*
-      The assumption is that the lexical analyser is always 1 character ahead,
-      which the -1 account for.
-    */
-    DBUG_ASSERT(m_ptr > m_tok_start);
-    return (uint) ((m_ptr - m_tok_start) - 1);
-  }
-
  /** Get the previus token start position, in the pre-processed buffer. */
  const char *get_cpp_start_prev()
  {
@ -2434,12 +2461,6 @@ public:
    return (size_t) (m_body_utf8_ptr - m_body_utf8);
  }

-  /**
-    Get the maximum length of the utf8-body buffer.
-    The utf8 body can grow because of the character set conversion and escaping.
-  */
-  size_t get_body_utf8_maximum_length(THD *thd);
-
  void body_utf8_start(THD *thd, const char *begin_ptr);
  void body_utf8_append(const char *ptr);
  void body_utf8_append(const char *ptr, const char *end_ptr);
@ -2451,15 +2472,8 @@ public:
                               CHARSET_INFO *txt_cs,
                               const char *end_ptr,
                               my_wc_t sep);
-  /** Current thread. */
-  THD *m_thd;
-
-  /** Current line number. */
-  uint yylineno;
-
-  /** Interface with bison, value of the last token parsed. */
-  LEX_YYSTYPE yylval;

+private:
  /**
    LALR(2) resolution, look ahead token.
    Value of the next token to return, if any,
@ -2476,15 +2490,20 @@ public:

  void add_digest_token(uint token, LEX_YYSTYPE yylval);

-  void reduce_digest_token(uint token_left, uint token_right);
-
+  bool consume_comment(int remaining_recursions_permitted);
+  int lex_one_token(union YYSTYPE *yylval, THD *thd);
+  int find_keyword(Lex_ident_cli_st *str, uint len, bool function);
+  LEX_CSTRING get_token(uint skip, uint length);
  int scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str);
  int scan_ident_start(THD *thd, Lex_ident_cli_st *str);
  int scan_ident_middle(THD *thd, Lex_ident_cli_st *str,
                        CHARSET_INFO **cs, my_lex_states *);
  int scan_ident_delimited(THD *thd, Lex_ident_cli_st *str);
  bool get_7bit_or_8bit_ident(THD *thd, uchar *last_char);
-private:
+
+  /** Current thread. */
+  THD *m_thd;
+
  /** Pointer to the current position in the raw input stream. */
  char *m_ptr;

@ -2570,6 +2589,15 @@ public:
  */
  bool multi_statements;

+  /** Current line number. */
+  uint yylineno;
+
+  /**
+    Current statement digest instrumentation.
+  */
+  sql_digest_state* m_digest;
+
+private:
  /** State of the lexical analyser for comments. */
  enum_comment_state in_comment;
  enum_comment_state in_comment_saved;
@ -2596,13 +2624,9 @@ public:
    NOTE: this member must be used within MYSQLlex() function only.
  */
  CHARSET_INFO *m_underscore_cs;
-
-  /**
-    Current statement digest instrumentation. 
-  */
-  sql_digest_state* m_digest;
 };

+
 /**
  Abstract representation of a statement.
  This class is an interface between the parser and the runtime.