MDEV-26278 Add functionality to eliminate derived tables from the query

Elimination of unnecessary tables from SQL queries is already present in MariaDB. But it only works for regular tables and not for derived ones. Imagine we have a view: CREATE VIEW v1 AS SELECT a, b, max(c) AS maxc FROM t1 GROUP BY a, b Due to "GROUP BY a, b" the values of combinations {a, b} are unique, and this fact can be treated as like derived table "v1" has a unique key on fields {a, b}. Suppose we have a SQL query: SELECT t2.* FROM t2 LEFT JOIN v1 ON t2.a=v1.a and t2.b=v1.b 1. Since {v1.a, v1.b} is unique and both these fields are bound to t2, "v1" is functionally dependent on t2. This means every record of "t2" will be either joined with a single record of "v1" or NULL-complemented. 2. No fields of "v1" are present on the SELECT list These two facts allow the server to completely exclude (eliminate) the derived table "v1" from the query.
2025-08-08 11:22:35 +03:00 · 2022-02-17 22:53:37 +07:00
parent 1f0187ff8d
commit 7f0201a2b5
5 changed files with 650 additions and 20 deletions
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -33,6 +33,7 @@
 #include "sql_select.h"
 #include "opt_trace.h"
 #include "my_json_writer.h"
+#include <set>

 /*
  OVERVIEW
@@ -134,6 +135,11 @@
   - Nodes representing unique keys. Unique key has
      = incoming edges from key component value modules
      = outgoing edge to key's table module
+   - Nodes representing unique pseudo-keys for derived tables.
+     Unique pseudo-keys are composed as a result of GROUP BY expressions.
+     Like normal unique keys, they have:
+      = incoming edges from key component value modules
+      = outgoing edge to key's table module
   - Inner side of outer join module. Outer join module has
      = incoming edges from table value modules
      = No outgoing edges. Once we reach it, we know we can eliminate the 
@@ -205,6 +211,7 @@ class Dep_module;
  class Dep_module_expr;
  class Dep_module_goal;
  class Dep_module_key;
+  class Dep_module_pseudo_key;

 class Dep_analysis_context;

@@ -278,6 +285,8 @@ private:
    Dep_module_key *key_dep;
    /* Otherwise, this and advance */
    uint equality_no;
+    /* Or this one and advance */
+    Dep_module_pseudo_key *pseudo_key_dep;
  };
  friend class Dep_analysis_context;
  friend class Field_dependency_recorder; 
@@ -302,12 +311,20 @@ class Dep_value_table : public Dep_value
 {
 public:
  Dep_value_table(TABLE *table_arg) : 
-    table(table_arg), fields(NULL), keys(NULL)
+    table(table_arg), fields(NULL), keys(NULL), pseudo_key(NULL)
  {}
  TABLE *table;  /* Table this object is representing */
  /* Ordered list of fields that belong to this table */
  Dep_value_field *fields;
-  Dep_module_key *keys; /* Ordered list of Unique keys in this table */
+
+  /* Ordered list of Unique keys in this table */
+  Dep_module_key *keys;
+
+  /*
+    Possible unique pseudo-key applicable for this table
+    (only none or a single one is possible)
+  */
+  Dep_module_pseudo_key *pseudo_key;

  /* Iteration over unbound modules that are our dependencies */
  Iterator init_unbound_modules_iter(char *buf);
@@ -443,9 +460,62 @@ private:
 const size_t Dep_module_key::iterator_size= 
  ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));

-const size_t Dep_module::iterator_size=
-  MY_MAX(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);

+/*
+  A unique pseudo-key module for a derived table.
+  For example, a derived table
+  "SELECT a, count(*) from t1 GROUP BY a"
+  has unique values in its first field "a" due to GROUP BY expression
+  so this can be considered as a unique key for this derived table
+*/
+
+class Dep_module_pseudo_key : public Dep_module
+{
+public:
+  Dep_module_pseudo_key(Dep_value_table *table_arg,
+                        std::set<field_index_t>&& field_indexes)
+      : table(table_arg), derived_table_field_indexes(field_indexes)
+  {
+    unbound_args= static_cast<uint>(field_indexes.size());
+  }
+
+  Dep_value_table *table;
+
+  Iterator init_unbound_values_iter(char *buf) override;
+
+  Dep_value *get_next_unbound_value(Dep_analysis_context *dac,
+                                    Iterator iter) override;
+
+  bool covers_field(int field_index);
+
+  static const size_t iterator_size;
+
+private:
+  /*
+    Set of field numbers (indexes) in the derived table's SELECT list
+    which are included in the GROUP BY expression.
+    For example, unique pseudo-key for SQL
+    "SELECT count(*), b, a FROM t1 GROUP BY a, b"
+    will include two elements: {2} and {1}, since "a" and "b" are on the
+    GROUP BY list and also are present on the SELECT list with indexes 2 and 1
+    (numeration starts from 0).
+  */
+  std::set<field_index_t> derived_table_field_indexes;
+
+  class Value_iter
+  {
+  public:
+    Dep_value_table *table;
+  };
+};
+
+const size_t Dep_module_pseudo_key::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_module_pseudo_key::Value_iter));
+
+const size_t Dep_module::iterator_size=
+  MY_MAX(Dep_module_expr::iterator_size,
+         MY_MAX(Dep_module_key::iterator_size,
+                Dep_module_pseudo_key::iterator_size));

 /*
  A module that represents outer join that we're trying to eliminate. If we 
@@ -508,12 +578,17 @@ public:
  */
  MY_BITMAP expr_deps;
  
-  Dep_value_table *create_table_value(TABLE *table);
+  Dep_value_table *create_table_value(TABLE_LIST *table_list);
  Dep_value_field *get_field_value(Field *field);

 #ifndef DBUG_OFF
  void dbug_print_deps();
 #endif 
+
+private:
+  void create_unique_pseudo_key_if_needed(TABLE_LIST *table_list,
+                                          Dep_value_table *tbl_dep);
+  int find_field_in_list(List<Item> &fields_list, Item *field);
 };


@@ -851,7 +926,7 @@ bool check_func_dependency(JOIN *join,
  /* Create Dep_value_table objects for all tables we're trying to eliminate */
  if (oj_tbl)
  {
-    if (!dac.create_table_value(oj_tbl->table))
+    if (!dac.create_table_value(oj_tbl))
      return FALSE; /* purecov: inspected */
  }
  else
@@ -861,7 +936,7 @@ bool check_func_dependency(JOIN *join,
    {
      if (tbl->table && (tbl->table->map & dep_tables))
      {
-        if (!dac.create_table_value(tbl->table))
+        if (!dac.create_table_value(tbl))
          return FALSE; /* purecov: inspected */
      }
    }
@@ -1577,33 +1652,139 @@ void add_module_expr(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
  DESCRIPTION
    Create a Dep_value_table object for the given table. Also create
    Dep_module_key objects for all unique keys in the table.
+    Create a unique pseudo-key if this table is derived and has
+    a GROUP BY expression.

  RETURN
    Created table value object
    NULL if out of memory
 */

-Dep_value_table *Dep_analysis_context::create_table_value(TABLE *table)
+Dep_value_table *
+Dep_analysis_context::create_table_value(TABLE_LIST *table_list)
 {
  Dep_value_table *tbl_dep;
-  if (!(tbl_dep= new Dep_value_table(table)))
+  if (!(tbl_dep= new Dep_value_table(table_list->table)))
    return NULL; /* purecov: inspected */

  Dep_module_key **key_list= &(tbl_dep->keys);
  /* Add dependencies for unique keys */
-  for (uint i=0; i < table->s->keys; i++)
+  for (uint i= 0; i < table_list->table->s->keys; i++)
  {
-    KEY *key= table->key_info + i; 
+    KEY *key= table_list->table->key_info + i;
    if (key->flags & HA_NOSAME)
    {
      Dep_module_key *key_dep;
-      if (!(key_dep= new Dep_module_key(tbl_dep, i, key->user_defined_key_parts)))
+      if (!(key_dep= new Dep_module_key(tbl_dep, i,
+                                        key->user_defined_key_parts)))
        return NULL;
      *key_list= key_dep;
      key_list= &(key_dep->next_table_key);
    }
  }
-  return table_deps[table->tablenr]= tbl_dep;
+
+  create_unique_pseudo_key_if_needed(table_list, tbl_dep);
+  return table_deps[table_list->table->tablenr]= tbl_dep;
+}
+
+
+/*
+  @brief
+    Check if we can create a unique pseudo-key for the passed table.
+    If we can, create a dependency for it
+
+  @detail
+    Currently, pseudo-key is created for the list of GROUP BY columns.
+
+    TODO: also it can be created if the query uses
+     - SELECT DISTINCT
+     - UNION DISTINCT (not UNION ALL)
+*/
+
+void Dep_analysis_context::create_unique_pseudo_key_if_needed(
+    TABLE_LIST *table_list, Dep_value_table *tbl_dep)
+{
+  auto select_unit= table_list->get_unit();
+  SELECT_LEX *first_select= nullptr;
+  if (select_unit)
+  {
+    first_select= select_unit->first_select();
+
+    /*
+      Exclude UNION (ALL) queries from consideration by checking
+      next_select() == nullptr
+    */
+    if (unlikely(select_unit->first_select()->next_select()))
+      first_select= nullptr;
+  }
+
+  /*
+    GROUP BY expression is considered as a unique pseudo-key
+    for the derived table. Add this pseudo key as a dependency
+  */
+  if (first_select && first_select->group_list.elements > 0)
+  {
+    bool valid= true;
+    std::set<field_index_t> exposed_fields_indexes;
+    for (auto cur_group= first_select->group_list.first;
+         cur_group;
+         cur_group= cur_group->next)
+    {
+      auto elem= *(cur_group->item);
+      /*
+        Make sure GROUP BY elements contain only fields
+        and no functions or other expressions
+      */
+      if (elem->type() != Item::FIELD_ITEM)
+      {
+        valid= false;
+        break;
+      }
+      auto field_idx= find_field_in_list(first_select->join->fields_list, elem);
+      if (field_idx == -1)
+      {
+        /*
+          This GROUP BY element is not present in the select list. This is a
+          case like this:
+             (SELECT a FROM t1 GROUP by a,b) as TBL
+          Here, the combination of (a,b) is unique, but the select doesn't
+          include "b". "a" alone is not unique, so TBL doesn't have a unique
+          pseudo-key.
+        */
+        valid= false;
+        break;
+      }
+      exposed_fields_indexes.insert(field_idx);
+    }
+    if (valid)
+    {
+      Dep_module_pseudo_key *pseudo_key;
+      pseudo_key= new Dep_module_pseudo_key(tbl_dep,
+                                            std::move(exposed_fields_indexes));
+      tbl_dep->pseudo_key= pseudo_key;
+    }
+  }
+}
+
+
+/*
+  Iterate the list of fields and look for the given field.
+  Returns the index of the field if it is found on the list
+  and -1 otherwise
+*/
+
+int Dep_analysis_context::find_field_in_list(List<Item> &fields_list,
+                                             Item *field)
+{
+  List_iterator<Item> it(fields_list);
+  int field_idx= 0;
+  while (auto next_field= it++)
+  {
+    if (next_field->eq(field, false))
+      return field_idx;
+    field_idx++;
+  }
+  return -1; /*not found*/
 }


@@ -1746,11 +1927,39 @@ Dep_value* Dep_module_key::get_next_unbound_value(Dep_analysis_context *dac,
 }


+char *Dep_module_pseudo_key::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->table= table;
+  return (char *) iter;
+}
+
+Dep_value *
+Dep_module_pseudo_key::get_next_unbound_value(Dep_analysis_context *dac,
+                                                  Dep_module::Iterator iter)
+{
+  Dep_value *res= ((Value_iter *) iter)->table;
+  ((Value_iter *) iter)->table= NULL;
+  return res;
+}
+
+
+/*
+  Check if column number field_index is covered by the pseudo-key.
+*/
+
+bool Dep_module_pseudo_key::covers_field(int field_index)
+{
+  return derived_table_field_indexes.count(field_index) > 0;
+}
+
+
 Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
 {
  Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
  iter->key_dep= table->keys;
  iter->equality_no= 0;
+  iter->pseudo_key_dep= table->pseudo_key;
  return (char*)iter;
 }

@@ -1758,7 +1967,8 @@ Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
 void 
 Dep_value_field::make_unbound_modules_iter_skip_keys(Dep_value::Iterator iter)
 {
-  ((Module_iter*)iter)->key_dep= NULL;
+  ((Module_iter*) iter)->key_dep= NULL;
+  ((Module_iter*) iter)->pseudo_key_dep= NULL;
 }


@@ -1786,6 +1996,16 @@ Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac,
  }
  else 
    di->key_dep= NULL;
+
+  Dep_module_pseudo_key *pseudo_key_dep= di->pseudo_key_dep;
+  if (pseudo_key_dep && !pseudo_key_dep->is_applicable() &&
+      pseudo_key_dep->covers_field(field->field_index))
+  {
+    di->pseudo_key_dep= NULL;
+    return pseudo_key_dep;
+  }
+  else
+    di->pseudo_key_dep= NULL;
  
  /*
    Then walk through [multi]equalities and find those that
@@ -1819,7 +2039,7 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
  TABLE *table;
  /*
    NOTE: there are TABLE_LIST object that have
-    tbl->table!= NULL && tbl->nested_join!=NULL and 
+    tbl->table!= NULL && tbl->nested_join!=NULL and
    tbl->table == tbl->nested_join->join_list->element(..)->table
  */
  if (tbl->nested_join)
@@ -1848,7 +2068,6 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
    tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
 }

-
 #ifndef DBUG_OFF
 /* purecov: begin inspected */
 void Dep_analysis_context::dbug_print_deps()