MDEV-33281 Implement optimizer hints

- Using Lex_ident_sys to scan identifiers, like the SQL parser does. This fixes handling of double-quote-delimited and backtick-delimited identifiers, as well as handling of non-ASCII identifiers. Unescaping and converting from the client character set to the system character set is now done using Lex_ident_cli_st and Lex_ident_sys, like it's done in the SQL tokenizer/parser. Adding helper methods to_ident_cli() and to_ident_sys() in Optimizer_hint_parser::Token. - Fixing the hint parser to report a syntax error when an empty identifiers: SELECT /*+ BKA(``) */ * FROM t1; - Moving a part of the code from opt_hints_parser.h to opt_hints_parser.cc Moving these method definitions: - Optimizer_hint_tokenizer::find_keyword() - Optimizer_hint_tokenizer::get_token() to avoid huge pieces of the code in the header file. - A Lex_ident_cli_st cleanup Fixing a few Lex_ident_cli_st methods to return Lex_ident_cli_st & instead of void, to use them easier in the caller code. - Fixing the hint parser to display the correct line number Adding a new data type Lex_comment_st (a combination of LEX_CSTRING and a line number) Using it in sql_yacc.yy - Getting rid of redundant dependencies on sql_hints_parser.h Moving void LEX::resolve_optimizer_hints() from sql_lex.h to sql_lex.cc Adding a class Optimizer_hint_parser_output, deriving from Optimizer_hint_parser::Hint_list. Fixing the hint parser to return a pointer to an allocated instance of Optimizer_hint_parser_output rather than an instance of Optimizer_hint_parser::Hint_list. This allows to use a forward declaration of Optimizer_hint_parser_output in sql_lex.h and thus avoid dependencies on sql_hints_parser.h.
2025-08-08 11:22:35 +03:00 · 2024-07-15 14:29:47 +04:00
parent 877e4a386c
commit bd30c796fa
10 changed files with 257 additions and 147 deletions
--- a/sql/opt_hints_parser.cc
+++ b/sql/opt_hints_parser.cc
@@ -33,6 +33,85 @@ Parse_context::Parse_context(THD *thd, st_select_lex *select)
 {}


+Optimizer_hint_tokenizer::TokenID
+Optimizer_hint_tokenizer::find_keyword(const LEX_CSTRING &str)
+{
+  switch (str.length)
+  {
+  case 3:
+    if ("BKA"_Lex_ident_column.streq(str)) return TokenID::keyword_BKA;
+    if ("BNL"_Lex_ident_column.streq(str)) return TokenID::keyword_BNL;
+    if ("MRR"_Lex_ident_column.streq(str)) return TokenID::keyword_MRR;
+    break;
+
+  case 6:
+    if ("NO_BKA"_Lex_ident_column.streq(str)) return TokenID::keyword_NO_BKA;
+    if ("NO_BNL"_Lex_ident_column.streq(str)) return TokenID::keyword_NO_BNL;
+    if ("NO_ICP"_Lex_ident_column.streq(str)) return TokenID::keyword_NO_ICP;
+    if ("NO_MRR"_Lex_ident_column.streq(str)) return TokenID::keyword_NO_MRR;
+    break;
+
+  case 7:
+    if ("QB_NAME"_Lex_ident_column.streq(str))
+      return TokenID::keyword_QB_NAME;
+    break;
+
+  case 21:
+    if ("NO_RANGE_OPTIMIZATION"_Lex_ident_column.streq(str))
+      return TokenID::keyword_NO_RANGE_OPTIMIZATION;
+    break;
+  }
+  return TokenID::tIDENT;
+}
+
+
+Optimizer_hint_tokenizer::Token
+Optimizer_hint_tokenizer::get_token(CHARSET_INFO *cs)
+{
+  get_spaces();
+  if (eof())
+    return Token(Lex_cstring(m_ptr, m_ptr), TokenID::tEOF);
+  const char head= m_ptr[0];
+  if (head == '`' || head=='"')
+  {
+    const Token_with_metadata delimited_ident= get_quoted_string();
+    /*
+      Consider only non-empty quoted strings as identifiers.
+      Table and index names cannot be empty in MariaDB.
+      Let's also disallow empty query block names.
+      Note, table aliases can actually be empty:
+        SELECT ``.a FROM t1 ``;
+      But let's disallow them in hints for simplicity, to handle
+      all identifiers in the same way in the hint parser.
+    */
+    if (delimited_ident.length > 2)
+      return Token(delimited_ident, TokenID::tIDENT);
+    /*
+      If the string is empty, "unget" it to have a good
+      syntax error position in the message text.
+      The point is to include the empty string in the error message:
+        EXPLAIN EXTENDED SELECT ... QB_NAME(``) ...;  -->
+        Optimizer hint syntax error near '``) ...' at line 1
+    */
+    m_ptr-= delimited_ident.length;
+    return Token(Lex_cstring(m_ptr, m_ptr), TokenID::tNULL);
+  }
+  const Token_with_metadata ident= get_ident();
+  if (ident.length)
+    return Token(ident, ident.m_extended_chars ?
+                 TokenID::tIDENT : find_keyword(ident));
+  if (!get_char(','))
+    return Token(Lex_cstring(m_ptr - 1, 1), TokenID::tCOMMA);
+  if (!get_char('@'))
+    return Token(Lex_cstring(m_ptr - 1, 1), TokenID::tAT);
+  if (!get_char('('))
+    return Token(Lex_cstring(m_ptr - 1, 1), TokenID::tLPAREN);
+  if (!get_char(')'))
+    return Token(Lex_cstring(m_ptr - 1, 1), TokenID::tRPAREN);
+  return Token(Lex_cstring(m_ptr, m_ptr), TokenID::tNULL);
+}
+
+
 // This method is for debug purposes
 bool Optimizer_hint_parser::parse_token_list(THD *thd)
 {
@@ -52,15 +131,22 @@ bool Optimizer_hint_parser::parse_token_list(THD *thd)
  return true; // Success
 }

-
-void Optimizer_hint_parser::push_warning_syntax_error(THD *thd)
+void Optimizer_hint_parser::push_warning_syntax_error(THD *thd,
+                                                      uint start_lineno)
 {
+  DBUG_ASSERT(m_start <= m_ptr);
+  DBUG_ASSERT(m_ptr <= m_end);
  const char *msg= ER_THD(thd, ER_WARN_OPTIMIZER_HINT_SYNTAX_ERROR);
  ErrConvString txt(m_look_ahead_token.str, strlen(m_look_ahead_token.str),
                    thd->variables.character_set_client);
+  /*
+    start_lineno is the line number on which the whole hint started.
+    Add the line number of the current tokenizer position inside the hint
+    (in case hints are written in multiple lines).
+  */
  push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
                      ER_PARSE_ERROR, ER_THD(thd, ER_PARSE_ERROR),
-                      msg, txt.ptr(), 1);
+                      msg, txt.ptr(), start_lineno + lineno());
 }