Semi-join optimizations code cleanup:

- Break down POSITION/advance_sj_state() into four classes representing potential semi-join strategies. - Treat all strategies uniformly (before, DuplicateWeedout was special as it was the catch-all strategy. Now, we're still relying on it to be the catch-all, but are able to function,e.g. with firstmatch=on,duplicate_weedout=off. - Update test results (checked)
2025-07-29 05:21:33 +03:00 · 2011-11-23 04:25:52 +04:00
parent 7f746fbe74
commit 694ce95557
6 changed files with 780 additions and 489 deletions
--- a/mysql-test/r/subselect_sj2.result
+++ b/mysql-test/r/subselect_sj2.result
@ -323,8 +323,8 @@ WHERE Language='English' AND Percentage > 10 AND
 t2.Population > 100000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	range	Population,Country	Population	4	NULL	1	Using index condition; Rowid-ordered scan; Start temporary
-1	PRIMARY	t2	eq_ref	PRIMARY,Population	PRIMARY	3	test.t1.Country	1	Using where
-1	PRIMARY	t3	eq_ref	PRIMARY,Percentage	PRIMARY	33	test.t1.Country,const	1	Using index condition; Using where; End temporary
+1	PRIMARY	t2	eq_ref	PRIMARY,Population	PRIMARY	3	test.t1.Country	1	Using where; End temporary
+1	PRIMARY	t3	eq_ref	PRIMARY,Percentage	PRIMARY	33	test.t1.Country,const	1	Using index condition; Using where
 set optimizer_switch=@bug35674_save_optimizer_switch;
 DROP TABLE t1,t2,t3;
 CREATE TABLE t1 (
--- a/mysql-test/r/subselect_sj2_jcl6.result
+++ b/mysql-test/r/subselect_sj2_jcl6.result
@ -332,8 +332,8 @@ WHERE Language='English' AND Percentage > 10 AND
 t2.Population > 100000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	range	Population,Country	Population	4	NULL	1	Using index condition; Rowid-ordered scan; Start temporary
-1	PRIMARY	t2	eq_ref	PRIMARY,Population	PRIMARY	3	test.t1.Country	1	Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
-1	PRIMARY	t3	eq_ref	PRIMARY,Percentage	PRIMARY	33	test.t1.Country,const	1	Using index condition; Using where; End temporary; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
+1	PRIMARY	t2	eq_ref	PRIMARY,Population	PRIMARY	3	test.t1.Country	1	Using where; End temporary; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
+1	PRIMARY	t3	eq_ref	PRIMARY,Percentage	PRIMARY	33	test.t1.Country,const	1	Using index condition; Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
 set optimizer_switch=@bug35674_save_optimizer_switch;
 DROP TABLE t1,t2,t3;
 CREATE TABLE t1 (
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@ -2168,70 +2168,414 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
    See setup_semijoin_dups_elimination() for a description of what kinds of
    join prefixes each strategy can handle.
 */
+bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables)
+{
+  for (int i= (int)idx; i >= 0; i--)
+  {
+    TABLE_LIST *emb_sj_nest;
+    if ((emb_sj_nest= prefix[i].table->emb_sj_nest))
+    {
+      if (inner_tables & emb_sj_nest->sj_inner_tables)
+        return !test(inner_tables == emb_sj_nest->sj_inner_tables);
+    }
+  }
+  return FALSE;
+}

-void advance_sj_state(JOIN *join, table_map remaining_tables, 
-                      const JOIN_TAB *new_join_tab, uint idx, 
+
+void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, 
                      double *current_record_count, double *current_read_time,
                      POSITION *loose_scan_pos)
 {
-  TABLE_LIST *emb_sj_nest;
  POSITION *pos= join->positions + idx;
-  remaining_tables &= ~new_join_tab->table->map;
-  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
+  const JOIN_TAB *new_join_tab= pos->table; 
+  Semi_join_strategy_picker *pickers[]=
+  {
+    &pos->firstmatch_picker,
+    &pos->loosescan_picker,
+    &pos->sjmat_picker,
+    &pos->dups_weedout_picker,
+    NULL,
+  };

-  pos->prefix_cost.convert_from_cost(*current_read_time);
-  pos->prefix_record_count= *current_record_count;
-  pos->sj_strategy= SJ_OPT_NONE;
-  
-  pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
-
-  /* We're performing optimization inside SJ-Materialization nest */
  if (join->emb_sjm_nest)
  {
-    pos->invalidate_firstmatch_prefix();
-    pos->first_loosescan_table= MAX_TABLES; 
-    pos->dupsweedout_tables= 0;
-    pos->sjm_scan_need_tables= 0;
+    /* 
+      We're performing optimization inside SJ-Materialization nest:
+       - there are no other semi-joins inside semi-join nests
+       - attempts to build semi-join strategies here will confuse
+         the optimizer, so bail out.
+    */
    return;
  }

-  /* Initialize the state or copy it from prev. tables */
+  /* 
+    Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
+    LooseScan detector in best_access_path)
+  */
+  remaining_tables &= ~new_join_tab->table->map;
+  pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+  {
+   /// join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+    join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
+
+    /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
+  ///  if (!(remaining_tables &
+  ///        emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+  ///    join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+  }
+
+  Semi_join_strategy_picker **strategy;
  if (idx == join->const_tables)
  {
-    pos->invalidate_firstmatch_prefix();
-    pos->first_loosescan_table= MAX_TABLES; 
-    pos->dupsweedout_tables= 0;
-    pos->sjm_scan_need_tables= 0;
-    LINT_INIT(pos->sjm_scan_last_inner);
+    /* First table, initialize pickers */
+    for (strategy= pickers; *strategy != NULL; strategy++)
+      (*strategy)->set_empty();
+    pos->inner_tables_handled_with_other_sjs= 0;
  }
  else
  {
-    // FirstMatch
-    pos->first_firstmatch_table=
-      (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
-      MAX_TABLES : pos[-1].first_firstmatch_table;
-    pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
-    pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
-
-    // LooseScan
-    pos->first_loosescan_table=
-      (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
-      MAX_TABLES : pos[-1].first_loosescan_table;
-    pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
-
-    // SJ-Materialization Scan
-    pos->sjm_scan_need_tables=
-      (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
-      0 : pos[-1].sjm_scan_need_tables;
-    pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
-
-    // Duplicate Weedout
-    pos->dupsweedout_tables=      pos[-1].dupsweedout_tables;
-    pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table;
+    for (strategy= pickers; *strategy != NULL; strategy++)
+    {
+      (*strategy)->set_from_prev(pos - 1);
+    }
+    pos->inner_tables_handled_with_other_sjs=
+       pos[-1].inner_tables_handled_with_other_sjs;
  }

-  table_map handled_by_fm_or_ls= 0;
-  /* FirstMatch Strategy */
+  pos->prefix_cost.convert_from_cost(*current_read_time);
+  pos->prefix_record_count= *current_record_count;
+
+  {
+    pos->sj_strategy= SJ_OPT_NONE;
+
+    for (strategy= pickers; *strategy != NULL; strategy++)
+    {
+      table_map handled_fanout;
+      sj_strategy_enum sj_strategy;
+      double rec_count= *current_record_count;
+      double read_time= *current_read_time;
+      if ((*strategy)->check_qep(join, idx, remaining_tables, 
+                                 new_join_tab,
+                                 &rec_count,
+                                 &read_time,
+                                 &handled_fanout,
+                                 &sj_strategy,
+                                 loose_scan_pos))
+      {
+        /*
+          It's possible to use the strategy. Use it, if
+           - it removes semi-join fanout that was not removed before
+           - using it is cheaper than using something else,
+               and {if some other strategy has removed fanout
+               that this strategy is trying to remove, then it
+               did remove the fanout only for one semi-join}
+               This is to avoid a situation when
+                1. strategy X removes fanout for semijoin X,Y
+                2. using strategy Z is cheaper, but it only removes
+                   fanout from semijoin X.
+                3. We have no clue what to do about fanount of semi-join Y.
+        */
+        if ((join->cur_dups_producing_tables & handled_fanout) ||
+            (read_time < *current_read_time && 
+             !(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
+        {
+          /* Mark strategy as used */ 
+          (*strategy)->mark_used();
+          pos->sj_strategy= sj_strategy;
+          *current_read_time= read_time;
+          *current_record_count= rec_count;
+          join->cur_dups_producing_tables &= ~handled_fanout;
+          //TODO: update bitmap of semi-joins that were handled together with
+          // others.
+          if (is_multiple_semi_joins(join->positions, idx, handled_fanout))
+            pos->inner_tables_handled_with_other_sjs |= handled_fanout;
+        }
+        else
+        {
+          /* We decided not to apply the strategy. */
+          (*strategy)->set_empty();
+        }
+      }
+    }
+  }
+
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+  {
+    join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+
+    /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
+    if (!(remaining_tables &
+          emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+      join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+  }
+
+  pos->prefix_cost.convert_from_cost(*current_read_time);
+  pos->prefix_record_count= *current_record_count;
+}
+
+
+void Sj_materialization_picker::set_from_prev(struct st_position *prev)
+{
+  if (prev->sjmat_picker.is_used)
+    set_empty();
+  else
+  {
+    sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables; 
+    sjm_scan_last_inner=  prev->sjmat_picker.sjm_scan_last_inner;
+  }
+  is_used= FALSE;
+}
+
+
+bool Sj_materialization_picker::check_qep(JOIN *join,
+                                          uint idx,
+                                          table_map remaining_tables, 
+                                          const JOIN_TAB *new_join_tab,
+                                          double *record_count,
+                                          double *read_time,
+                                          table_map *handled_fanout,
+                                          sj_strategy_enum *strategy,
+                                          POSITION *loose_scan_pos)
+{
+  bool sjm_scan;
+  SJ_MATERIALIZATION_INFO *mat_info;
+  if ((mat_info= at_sjmat_pos(join, remaining_tables,
+                              new_join_tab, idx, &sjm_scan)))
+  {
+    if (sjm_scan)
+    {
+      /*
+        We can't yet evaluate this option yet. This is because we can't
+        accout for fanout of sj-inner tables yet:
+
+          ntX  SJM-SCAN(it1 ... itN) | ot1 ... otN  |
+                                     ^(1)           ^(2)
+
+        we're now at position (1). SJM temptable in general has multiple
+        records, so at point (1) we'll get the fanout from sj-inner tables (ie
+        there will be multiple record combinations).
+
+        The final join result will not contain any semi-join produced
+        fanout, i.e. tables within SJM-SCAN(...) will not contribute to
+        the cardinality of the join output.  Extra fanout produced by 
+        SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ...  otN.
+
+        The simple way to model this is to remove SJM-SCAN(...) fanout once
+        we reach the point #2.
+      */
+      sjm_scan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+      sjm_scan_last_inner= idx;
+    }
+    else
+    {
+      /* This is SJ-Materialization with lookups */
+      COST_VECT prefix_cost; 
+      signed int first_tab= (int)idx - mat_info->tables;
+      double prefix_rec_count;
+      if (first_tab < (int)join->const_tables)
+      {
+        prefix_cost.zero();
+        prefix_rec_count= 1.0;
+      }
+      else
+      {
+        prefix_cost= join->positions[first_tab].prefix_cost;
+        prefix_rec_count= join->positions[first_tab].prefix_record_count;
+      }
+
+      double mat_read_time= prefix_cost.total_cost();
+      mat_read_time += mat_info->materialization_cost.total_cost() +
+                       prefix_rec_count * mat_info->lookup_cost.total_cost();
+
+      /*
+        NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
+        elements to join->positions as that makes it hard to return things
+        back when making one step back in join optimization. That's done 
+        after the QEP has been chosen.
+      */
+      *read_time=    mat_read_time;
+      *record_count= prefix_rec_count;
+      *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables;
+      *strategy= SJ_OPT_MATERIALIZE;
+      return TRUE;
+    }
+  }
+  
+  /* 4.A SJM-Scan second phase check */
+  if (sjm_scan_need_tables && /* Have SJM-Scan prefix */
+      !(sjm_scan_need_tables & remaining_tables))
+  {
+    TABLE_LIST *mat_nest= 
+      join->positions[sjm_scan_last_inner].table->emb_sj_nest;
+    SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
+
+    double prefix_cost;
+    double prefix_rec_count;
+    int first_tab= sjm_scan_last_inner + 1 - mat_info->tables;
+    /* Get the prefix cost */
+    if (first_tab == (int)join->const_tables)
+    {
+      prefix_rec_count= 1.0;
+      prefix_cost= 0.0;
+    }
+    else
+    {
+      prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+    }
+
+    /* Add materialization cost */
+    prefix_cost += mat_info->materialization_cost.total_cost() +
+                   prefix_rec_count * mat_info->scan_cost.total_cost();
+    prefix_rec_count *= mat_info->rows;
+    
+    uint i;
+    table_map rem_tables= remaining_tables;
+    for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
+      rem_tables |= join->positions[i].table->table->map;
+
+    POSITION curpos, dummy;
+    /* Need to re-run best-access-path as we prefix_rec_count has changed */
+    bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
+    for (i= first_tab + mat_info->tables; i <= idx; i++)
+    {
+      best_access_path(join, join->positions[i].table, rem_tables, i,
+                       disable_jbuf, prefix_rec_count, &curpos, &dummy);
+      prefix_rec_count *= curpos.records_read;
+      prefix_cost += curpos.read_time;
+    }
+
+    *strategy= SJ_OPT_MATERIALIZE_SCAN;
+    *read_time=    prefix_cost;
+    *record_count= prefix_rec_count;
+    *handled_fanout= mat_nest->sj_inner_tables;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+void LooseScan_picker::set_from_prev(struct st_position *prev)
+{
+  if (prev->loosescan_picker.is_used)
+    set_empty();
+  else
+  {
+    first_loosescan_table= prev->loosescan_picker.first_loosescan_table;
+    loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables;
+  }
+  is_used= FALSE;
+}
+
+
+bool LooseScan_picker::check_qep(JOIN *join,
+                                 uint idx,
+                                 table_map remaining_tables, 
+                                 const JOIN_TAB *new_join_tab,
+                                 double *record_count, 
+                                 double *read_time,
+                                 table_map *handled_fanout,
+                                 sj_strategy_enum *strategy,
+                                 struct st_position *loose_scan_pos)
+{
+  POSITION *first= join->positions + first_loosescan_table; 
+  /* 
+    LooseScan strategy can't handle interleaving between tables from the 
+    semi-join that LooseScan is handling and any other tables.
+
+    If we were considering LooseScan for the join prefix (1)
+       and the table we're adding creates an interleaving (2)
+    then 
+       stop considering loose scan
+  */
+  if ((first_loosescan_table != MAX_TABLES) &&   // (1)
+      (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
+      new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
+  {
+    first_loosescan_table= MAX_TABLES;
+  }
+
+  /*
+    If we got an option to use LooseScan for the current table, start
+    considering using LooseScan strategy
+  */
+  if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
+  {
+    first_loosescan_table= idx;
+    loosescan_need_tables=
+      new_join_tab->emb_sj_nest->sj_inner_tables | 
+      new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+      new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+  }
+  
+  if ((first_loosescan_table != MAX_TABLES) && 
+      !(remaining_tables & loosescan_need_tables) &&
+      (new_join_tab->table->map & loosescan_need_tables))
+  {
+    /* 
+      Ok we have LooseScan plan and also have all LooseScan sj-nest's
+      inner tables and outer correlated tables into the prefix.
+    */
+
+    first= join->positions + first_loosescan_table; 
+    uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
+    /* Got a complete LooseScan range. Calculate its cost */
+    /*
+      The same problem as with FirstMatch - we need to save POSITIONs
+      somewhere but reserving space for all cases would require too
+      much space. We will re-calculate POSITION structures later on. 
+    */
+    bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
+    optimize_wo_join_buffering(join, first_loosescan_table, idx,
+                               remaining_tables, 
+                               TRUE,  //first_alt
+                               disable_jbuf ? join->table_count :
+                                 first_loosescan_table + n_tables,
+                               record_count,
+                               read_time);
+    /*
+      We don't yet have any other strategies that could handle this
+      semi-join nest (the other options are Duplicate Elimination or
+      Materialization, which need at least the same set of tables in 
+      the join prefix to be considered) so unconditionally pick the 
+      LooseScan.
+    */
+    *strategy= SJ_OPT_LOOSE_SCAN;
+    *handled_fanout= first->table->emb_sj_nest->sj_inner_tables;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+void Firstmatch_picker::set_from_prev(struct st_position *prev)
+{
+  if (prev->firstmatch_picker.is_used)
+    invalidate_firstmatch_prefix();
+  else
+  {
+    first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table;
+    first_firstmatch_rtbl=  prev->firstmatch_picker.first_firstmatch_rtbl;
+    firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables;
+  }
+  is_used= FALSE;
+}
+
+bool Firstmatch_picker::check_qep(JOIN *join,
+                                  uint idx,
+                                  table_map remaining_tables, 
+                                  const JOIN_TAB *new_join_tab,
+                                  double *record_count,
+                                  double *read_time,
+                                  table_map *handled_fanout,
+                                  sj_strategy_enum *strategy,
+                                  POSITION *loose_scan_pos)
+{
  if (new_join_tab->emb_sj_nest &&
      optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
      !join->outer_join)
@ -2259,298 +2603,102 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
         ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
    {
      /* Start tracking potential FirstMatch range */
-      pos->first_firstmatch_table= idx;
-      pos->firstmatch_need_tables= sj_inner_tables;
-      pos->first_firstmatch_rtbl= remaining_tables;
+      first_firstmatch_table= idx;
+      firstmatch_need_tables= sj_inner_tables;
+      first_firstmatch_rtbl= remaining_tables;
    }

-    if (pos->in_firstmatch_prefix())
+    if (in_firstmatch_prefix())
    {
-      if (outer_corr_tables & pos->first_firstmatch_rtbl)
+      if (outer_corr_tables & first_firstmatch_rtbl)
      {
        /*
          Trying to add an sj-inner table whose sj-nest has an outer correlated 
          table that was not in the prefix. This means FirstMatch can't be used.
        */
-        pos->invalidate_firstmatch_prefix();
+        invalidate_firstmatch_prefix();
      }
      else
      {
        /* Record that we need all of this semi-join's inner tables, too */
-        pos->firstmatch_need_tables|= sj_inner_tables;
+        firstmatch_need_tables|= sj_inner_tables;
      }
    
-      if (pos->in_firstmatch_prefix() && 
-          !(pos->firstmatch_need_tables & remaining_tables))
+      if (in_firstmatch_prefix() && 
+          !(firstmatch_need_tables & remaining_tables))
      {
        /*
          Got a complete FirstMatch range.
            Calculate correct costs and fanout
        */
-        optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx,
+        optimize_wo_join_buffering(join, first_firstmatch_table, idx,
                                   remaining_tables, FALSE, idx,
-                                   current_record_count, 
-                                   current_read_time);
+                                   record_count, 
+                                   read_time);
        /*
-          We don't yet know what are the other strategies, so pick the
-          FirstMatch.
-
          We ought to save the alternate POSITIONs produced by
          optimize_wo_join_buffering but the problem is that providing save
          space uses too much space. Instead, we will re-calculate the
          alternate POSITIONs after we've picked the best QEP.
        */
-        pos->sj_strategy= SJ_OPT_FIRST_MATCH;
-        handled_by_fm_or_ls=  pos->firstmatch_need_tables;
+        *handled_fanout= firstmatch_need_tables;
+        /* *record_count and *read_time were set by the above call */
+        *strategy= SJ_OPT_FIRST_MATCH;
+        return TRUE;
      }
    }
  }
+  return FALSE;
+}

-  /* LooseScan Strategy */
-  {
-    POSITION *first=join->positions+pos->first_loosescan_table; 
-    /* 
-      LooseScan strategy can't handle interleaving between tables from the 
-      semi-join that LooseScan is handling and any other tables.

-      If we were considering LooseScan for the join prefix (1)
-         and the table we're adding creates an interleaving (2)
-      then 
-         stop considering loose scan
-    */
-    if ((pos->first_loosescan_table != MAX_TABLES) &&   // (1)
-        (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
-        new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
-    {
-      pos->first_loosescan_table= MAX_TABLES;
-    }
-
-    /*
-      If we got an option to use LooseScan for the current table, start
-      considering using LooseScan strategy
-    */
-    if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
-    {
-      pos->first_loosescan_table= idx;
-      pos->loosescan_need_tables=
-        new_join_tab->emb_sj_nest->sj_inner_tables | 
-        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
-        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
-    }
-    
-    if ((pos->first_loosescan_table != MAX_TABLES) && 
-        !(remaining_tables & pos->loosescan_need_tables) &&
-        (pos->table->table->map & pos->loosescan_need_tables))
-    {
-      /* 
-        Ok we have LooseScan plan and also have all LooseScan sj-nest's
-        inner tables and outer correlated tables into the prefix.
-      */
-
-      first=join->positions + pos->first_loosescan_table; 
-      uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
-      /* Got a complete LooseScan range. Calculate its cost */
-      /*
-        The same problem as with FirstMatch - we need to save POSITIONs
-        somewhere but reserving space for all cases would require too
-        much space. We will re-calculate POSITION structures later on. 
-      */
-      optimize_wo_join_buffering(join, pos->first_loosescan_table, idx,
-                                 remaining_tables, 
-                                 TRUE,  //first_alt
-                                 disable_jbuf ? join->table_count :
-                                   pos->first_loosescan_table + n_tables,
-                                 current_record_count,
-                                 current_read_time);
-      /*
-        We don't yet have any other strategies that could handle this
-        semi-join nest (the other options are Duplicate Elimination or
-        Materialization, which need at least the same set of tables in 
-        the join prefix to be considered) so unconditionally pick the 
-        LooseScan.
-      */
-      pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
-      handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables;
-    }
-  }
-
-  /* 
-    Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
-    LooseScan detector in best_access_path)
-  */
-  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
-  {
-    join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
-    join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
-
-    /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
-    if (!(remaining_tables &
-          emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
-      join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
-  }
-  join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
-
-  /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
-  bool sjm_scan;
-  SJ_MATERIALIZATION_INFO *mat_info;
-  if ((mat_info= at_sjmat_pos(join, remaining_tables,
-                              new_join_tab, idx, &sjm_scan)))
-  {
-    if (sjm_scan)
-    {
-      /*
-        We can't yet evaluate this option yet. This is because we can't
-        accout for fanout of sj-inner tables yet:
-
-          ntX  SJM-SCAN(it1 ... itN) | ot1 ... otN  |
-                                     ^(1)           ^(2)
-
-        we're now at position (1). SJM temptable in general has multiple
-        records, so at point (1) we'll get the fanout from sj-inner tables (ie
-        there will be multiple record combinations).
-
-        The final join result will not contain any semi-join produced
-        fanout, i.e. tables within SJM-SCAN(...) will not contribute to
-        the cardinality of the join output.  Extra fanout produced by 
-        SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ...  otN.
-
-        The simple way to model this is to remove SJM-SCAN(...) fanout once
-        we reach the point #2.
-      */
-      pos->sjm_scan_need_tables=
-        new_join_tab->emb_sj_nest->sj_inner_tables | 
-        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
-        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
-      pos->sjm_scan_last_inner= idx;
-    }
+void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
+{
+  if (prev->dups_weedout_picker.is_used)
+    set_empty();
  else
  {
-      /* This is SJ-Materialization with lookups */
-      COST_VECT prefix_cost; 
-      signed int first_tab= (int)idx - mat_info->tables;
-      double prefix_rec_count;
-      if (first_tab < (int)join->const_tables)
-      {
-        prefix_cost.zero();
-        prefix_rec_count= 1.0;
-      }
-      else
-      {
-        prefix_cost= join->positions[first_tab].prefix_cost;
-        prefix_rec_count= join->positions[first_tab].prefix_record_count;
+    dupsweedout_tables=      prev->dups_weedout_picker.dupsweedout_tables;
+    first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table;
  }
+  is_used= FALSE;
+}

-      double mat_read_time= prefix_cost.total_cost();
-      mat_read_time += mat_info->materialization_cost.total_cost() +
-                       prefix_rec_count * mat_info->lookup_cost.total_cost();

-      if (mat_read_time < *current_read_time || join->cur_dups_producing_tables)
-      {
-        /*
-          NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
-          elements to join->positions as that makes it hard to return things
-          back when making one step back in join optimization. That's done 
-          after the QEP has been chosen.
-        */
-        pos->sj_strategy= SJ_OPT_MATERIALIZE;
-        *current_read_time=    mat_read_time;
-        *current_record_count= prefix_rec_count;
-        join->cur_dups_producing_tables&=
-          ~new_join_tab->emb_sj_nest->sj_inner_tables;
-      }
-    }
-  }
-  
-  /* 4.A SJM-Scan second phase check */
-  if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */
-      !(pos->sjm_scan_need_tables & remaining_tables))
-  {
-    TABLE_LIST *mat_nest= 
-      join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
-    SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
-
-    double prefix_cost;
-    double prefix_rec_count;
-    int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables;
-    /* Get the prefix cost */
-    if (first_tab == (int)join->const_tables)
-    {
-      prefix_rec_count= 1.0;
-      prefix_cost= 0.0;
-    }
-    else
-    {
-      prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
-      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
-    }
-
-    /* Add materialization cost */
-    prefix_cost += mat_info->materialization_cost.total_cost() +
-                   prefix_rec_count * mat_info->scan_cost.total_cost();
-    prefix_rec_count *= mat_info->rows;
-    
-    uint i;
-    table_map rem_tables= remaining_tables;
-    for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
-      rem_tables |= join->positions[i].table->table->map;
-
-    POSITION curpos, dummy;
-    /* Need to re-run best-access-path as we prefix_rec_count has changed */
-    for (i= first_tab + mat_info->tables; i <= idx; i++)
-    {
-      best_access_path(join, join->positions[i].table, rem_tables, i,
-                       disable_jbuf, prefix_rec_count, &curpos, &dummy);
-      prefix_rec_count *= curpos.records_read;
-      prefix_cost += curpos.read_time;
-    }
-
-    /*
-      Use the strategy if 
-       * it is cheaper then what we've had, or
-       * we haven't picked any other semi-join strategy yet
-      In the second case, we pick this strategy unconditionally because
-      comparing cost without semi-join duplicate removal with cost with
-      duplicate removal is not an apples-to-apples comparison.
-    */
-    if (prefix_cost < *current_read_time || join->cur_dups_producing_tables)
-    {
-      pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
-      *current_read_time=    prefix_cost;
-      *current_record_count= prefix_rec_count;
-      join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables;
-
-    }
-  }
-
-  /* 5. Duplicate Weedout strategy handler */
-  {
-    /* 
-       Duplicate weedout can be applied after all ON-correlated and 
-       correlated 
-    */
+bool Duplicate_weedout_picker::check_qep(JOIN *join,
+                                         uint idx,
+                                         table_map remaining_tables, 
+                                         const JOIN_TAB *new_join_tab,
+                                         double *record_count,
+                                         double *read_time,
+                                         table_map *handled_fanout,
+                                         sj_strategy_enum *strategy,
+                                         POSITION *loose_scan_pos
+                                         )
+{
  TABLE_LIST *nest;
  if ((nest= new_join_tab->emb_sj_nest))
  {
-      if (!pos->dupsweedout_tables)
-        pos->first_dupsweedout_table= idx;
+    if (!dupsweedout_tables)
+      first_dupsweedout_table= idx;

-      pos->dupsweedout_tables |= nest->sj_inner_tables |
+    dupsweedout_tables |= nest->sj_inner_tables |
                          nest->nested_join->sj_depends_on |
                          nest->nested_join->sj_corr_tables;
  }
  
-    if (pos->dupsweedout_tables)
+  if (dupsweedout_tables)
  {
    /* we're in the process of constructing a DuplicateWeedout range */
    TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
    /* and we've entered an inner side of an outer join*/
    if (emb && emb->on_expr)
-        pos->dupsweedout_tables |= emb->nested_join->used_tables;
+      dupsweedout_tables |= emb->nested_join->used_tables;
  }
  
-    if (pos->dupsweedout_tables && 
-        !(remaining_tables &
-          ~new_join_tab->table->map & pos->dupsweedout_tables))
+  /* If this is the last table that we need for DuplicateWeedout range */
+  if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map &
+                              dupsweedout_tables))
  {
    /*
      Ok, reached a state where we could put a dups weedout point.
@ -2566,7 +2714,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
      We need to calculate the cost in case #2 also because we need to make
      choice between this join order and others.
    */
-      uint first_tab= pos->first_dupsweedout_table;
+    uint first_tab= first_dupsweedout_table;
    double dups_cost;
    double prefix_rec_count;
    double sj_inner_fanout= 1.0;
@ -2586,7 +2734,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
    }
    
    table_map dups_removed_fanout= 0;
-      for (uint j= pos->first_dupsweedout_table; j <= idx; j++)
+    for (uint j= first_dupsweedout_table; j <= idx; j++)
    {
      POSITION *p= join->positions + j;
      dups_cost += p->read_time;
@ -2623,23 +2771,13 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
                             one_lookup_cost;
    dups_cost += write_cost + full_lookup_cost;
    
-      /*
-        Use the strategy if 
-         * it is cheaper then what we've had, or
-         * we haven't picked any other semi-join strategy yet
-        The second part is necessary because this strategy is the last one
-        to consider (it needs "the most" tables in the prefix) and we can't
-        leave duplicate-producing tables not handled by any strategy.
-      */
-      if (dups_cost < *current_read_time || join->cur_dups_producing_tables)
-      {
-        pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT;
-        *current_read_time= dups_cost;
-        *current_record_count= prefix_rec_count * sj_outer_fanout;
-        join->cur_dups_producing_tables &= ~dups_removed_fanout;
-      }
-    }
+    *read_time= dups_cost;
+    *record_count= prefix_rec_count * sj_outer_fanout;
+    *handled_fanout= dups_removed_fanout;
+    *strategy= SJ_OPT_DUPS_WEEDOUT;
+    return TRUE;
  }
+  return FALSE;
 }


@ -2836,11 +2974,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
    }
    else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
    {
-      POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner;
+      POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner;
      SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
      sjm->is_used= TRUE;
      sjm->is_sj_scan= TRUE;
-      first= pos->sjm_scan_last_inner - sjm->tables + 1;
+      first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1;
      memcpy(join->best_positions + first, 
             sjm->positions, sizeof(POSITION) * sjm->tables);
      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
@ -2878,7 +3016,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
 
    if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
    {
-      first= pos->first_firstmatch_table;
+      first= pos->firstmatch_picker.first_firstmatch_table;
      join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
      join->best_positions[first].n_sj_tables= tablenr - first + 1;
      POSITION dummy; // For loose scan paths
@ -2911,7 +3049,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)

    if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) 
    {
-      first= pos->first_loosescan_table;
+      first= pos->loosescan_picker.first_loosescan_table;
      POSITION *first_pos= join->best_positions + first;
      POSITION loose_scan_pos; // For loose scan paths
      double record_count= (first== join->const_tables)? 1.0: 
@ -2950,7 +3088,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
        Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
        this table.
      */
-      first= pos->first_dupsweedout_table;
+      first= pos->dups_weedout_picker.first_dupsweedout_table;
      join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
      join->best_positions[first].n_sj_tables= tablenr - first + 1;
    }
@ -3893,8 +4031,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,

        /* Calculate key length */
        keylen= 0;
-        keyno= pos->loosescan_key;
-        for (uint kp=0; kp < pos->loosescan_parts; kp++)
+        keyno= pos->loosescan_picker.loosescan_key;
+        for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
          keylen += tab->table->key_info[keyno].key_part[kp].store_length;

        tab->loosescan_key_len= keylen;
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@ -263,8 +263,8 @@ public:
    {
      pos->records_read=    best_loose_scan_records;
      pos->key=             best_loose_scan_start_key;
-      pos->loosescan_key=   best_loose_scan_key;
-      pos->loosescan_parts= best_max_loose_keypart + 1;
+      pos->loosescan_picker.loosescan_key=   best_loose_scan_key;
+      pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1;
      pos->use_join_buffer= FALSE;
      pos->table=           tab;
      // todo need ref_depend_map ?
@ -277,8 +277,7 @@ public:
 };


-void advance_sj_state(JOIN *join, const table_map remaining_tables, 
-                      const JOIN_TAB *new_join_tab, uint idx, 
+void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx, 
                      double *current_record_count, double *current_read_time,
                      POSITION *loose_scan_pos);
 void restore_prev_sj_state(const table_map remaining_tables, 
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const
 static bool find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
-static bool get_best_combination(JOIN *join);
+bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
 				KEY_PART_INFO *key_part, uchar *key_buff,
@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
  join->positions[idx].records_read=1.0;	/* This is a const table */
  join->positions[idx].ref_depend_map= 0;

-  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
+//  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
  join->positions[idx].sj_strategy= SJ_OPT_NONE;
  join->positions[idx].use_join_buffer= FALSE;

@ -5533,7 +5533,7 @@ best_access_path(JOIN      *join,
  pos->key=          best_key;
  pos->table=        s;
  pos->ref_depend_map= best_ref_depends_map;
-  pos->loosescan_key= MAX_KEY;
+  pos->loosescan_picker.loosescan_key= MAX_KEY;
  pos->use_join_buffer= best_uses_jbuf;
   
  loose_scan_opt.save_to_position(s, loose_scan_pos);
@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
    /* compute the cost of the new plan extended with 's' */
    record_count*= join->positions[idx].records_read;
    read_time+=    join->positions[idx].read_time;
-    advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
+    advance_sj_state(join, join_tables, idx, &record_count, &read_time,
                     &loose_scan_pos);

    join_tables&= ~(s->table->map);
@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN      *join,
      current_record_count= record_count * position->records_read;
      current_read_time=    read_time + position->read_time;

-      advance_sj_state(join, remaining_tables, s, idx, &current_record_count,
+      advance_sj_state(join, remaining_tables, idx, &current_record_count,
                       &current_read_time, &loose_scan_pos);

      /* Expand only partial plans with lower cost than the best QEP so far */
@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
      */
      double current_record_count=record_count*records;
      double current_read_time=read_time+best;
-      advance_sj_state(join, rest_tables, s, idx, &current_record_count, 
+      advance_sj_state(join, rest_tables, idx, &current_record_count, 
                       &current_read_time, &loose_scan_pos);

      if (best_record_count > current_record_count ||
@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL;
    TRUE   Out of memory
 */

-static bool
+bool
 get_best_combination(JOIN *join)
 {
  uint tablenr;
@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join)
    
    *j= *join->best_positions[tablenr].table;

-#if 0
-/* SJ-Materialization is represented with join tab ranges */
-    if (j->sj_strategy == SJ_OPT_MATERIALIZE || 
-        j->sj_strategy == SJ_OPT_MATERIALIZE)
-      j->sj_strategy= SJ_OPT_NONE;  
-#endif
-
    j->bush_root_tab= sjm_nest_root;

    form=join->table[tablenr]=j->table;
@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join)
        (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
    {
      j->type=JT_ALL;
-      j->index= join->best_positions[tablenr].loosescan_key;
+      j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
      if (tablenr != join->const_tables)
 	join->full_join=1;
    }
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@ -158,6 +158,17 @@ enum enum_nested_loop_state
 };


+/* Possible sj_strategy values */
+enum sj_strategy_enum
+{
+  SJ_OPT_NONE=0,
+  SJ_OPT_DUPS_WEEDOUT=1,
+  SJ_OPT_LOOSE_SCAN  =2,
+  SJ_OPT_FIRST_MATCH =3,
+  SJ_OPT_MATERIALIZE =4,
+  SJ_OPT_MATERIALIZE_SCAN=5
+};
+
 /* Values for JOIN_TAB::packed_info */
 #define TAB_INFO_HAVE_VALUE 1
 #define TAB_INFO_USING_INDEX 2
@ -374,7 +385,7 @@ typedef struct st_join_table {
    POSITION::sj_strategy field. This field is set up by the
    fix_semijoin_strategies_for_picked_join_order.
  */
-  uint sj_strategy;
+  enum sj_strategy_enum sj_strategy;

  uint n_sj_tables;

@ -496,66 +507,126 @@ enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		bool end_of_records);

+/* psergey */ 

-/**
-  Information about a position of table within a join order. Used in join
-  optimization.
+
+struct st_position;
+
+class Semi_join_strategy_picker
+{
+public:
+  /* Called when starting to build a new join prefix */
+  virtual void set_empty() = 0;
+
+  /* 
+    Update internal state after another table has been added to the join
+    prefix
+  */
+  virtual void set_from_prev(struct st_position *prev) = 0;
+  
+  virtual bool check_qep(JOIN *join,
+                         uint idx,
+                         table_map remaining_tables, 
+                         const JOIN_TAB *new_join_tab,
+                         double *record_count,
+                         double *read_time,
+                         table_map *handled_fanout,
+                         sj_strategy_enum *strategy,
+                         struct st_position *loose_scan_pos) = 0;
+
+  virtual void mark_used() = 0;
+
+  virtual ~Semi_join_strategy_picker() {} 
+};
+
+
+/*
+  Duplicate Weedout strategy optimization state
 */
-typedef struct st_position
+
+class Duplicate_weedout_picker : public Semi_join_strategy_picker
+{
+  /* The first table that the strategy will need to handle */
+  uint  first_dupsweedout_table;
+
+  /*
+    Tables that we will need to have in the prefix to do the weedout step
+    (all inner and all outer that the involved semi-joins are correlated with)
+  */
+  table_map dupsweedout_tables;
+  
+  bool is_used;
+public:
+  void set_empty()
+  {
+    dupsweedout_tables= 0;
+    first_dupsweedout_table= MAX_TABLES;
+    is_used= FALSE;
+  }
+  void set_from_prev(struct st_position *prev);
+  
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *stratey,
+                 struct st_position *loose_scan_pos);
+
+  void mark_used() { is_used= TRUE; }
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class Firstmatch_picker : public Semi_join_strategy_picker
 {
  /*
-    The "fanout": number of output rows that will be produced (after
-    pushed down selection condition is applied) per each row combination of
-    previous tables.
+    Index of the first inner table that we intend to handle with this
+    strategy
  */
-  double records_read;
-
+  uint first_firstmatch_table;
  /*
-    Cost accessing the table in course of the entire complete join execution,
-    i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times 
-    number the access method will be invoked.
+    Tables that were not in the join prefix when we've started considering 
+    FirstMatch strategy.
  */
-  double read_time;
-  JOIN_TAB *table;
-
+  table_map first_firstmatch_rtbl;
  /* 
-    NULL  -  'index' or 'range' or 'index_merge' or 'ALL' access is used.
-    Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
+    Tables that need to be in the prefix before we can calculate the cost
+    of using FirstMatch strategy.
   */
-  KEYUSE *key;
+  table_map firstmatch_need_tables;

-  /* If ref-based access is used: bitmap of tables this table depends on  */
-  table_map ref_depend_map;
+  bool is_used;

-  bool use_join_buffer; 
+  bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
+  void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
+public:
+  void set_empty()
+  {
+    invalidate_firstmatch_prefix();
+    is_used= FALSE;
+  }
+
+  void set_from_prev(struct st_position *prev);
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *strategy,
+                 struct st_position *loose_scan_pos);
+
+  void mark_used() { is_used= TRUE; }
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};


-  /* These form a stack of partial join order costs and output sizes */
-  COST_VECT prefix_cost;
-  double    prefix_record_count;
-
-  /*
-    Current optimization state: Semi-join strategy to be used for this
-    and preceding join tables.
-    
-    Join optimizer sets this for the *last* join_tab in the
-    duplicate-generating range. That is, in order to interpret this field, 
-    one needs to traverse join->[best_]positions array from right to left.
-    When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
-    field (depending on the strategy) tells how many preceding positions 
-    this applies to. The values of covered_preceding_positions->sj_strategy
-    must be ignored.
-  */
-  uint sj_strategy;
-  /*
-    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
-    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
-    are covered by the specified semi-join strategy
-  */
-  uint n_sj_tables;
-
-/* LooseScan strategy members */
-
+class LooseScan_picker : public Semi_join_strategy_picker
+{
  /* The first (i.e. driving) table we're doing loose scan for */
  uint        first_loosescan_table;
  /* 
@ -573,36 +644,46 @@ typedef struct st_position
  uint loosescan_key;  // final (one for strategy instance )
  uint loosescan_parts; /* Number of keyparts to be kept distinct */
  
-/* FirstMatch strategy */
-  /*
-    Index of the first inner table that we intend to handle with this
-    strategy
-  */
-  uint first_firstmatch_table;
-  /*
-    Tables that were not in the join prefix when we've started considering 
-    FirstMatch strategy.
-  */
-  table_map first_firstmatch_rtbl;
-  /* 
-    Tables that need to be in the prefix before we can calculate the cost
-    of using FirstMatch strategy.
-   */
-  table_map firstmatch_need_tables;
+  bool is_used;
+public:
+  void set_empty()
+  {
+    first_loosescan_table= MAX_TABLES; 
+    is_used= FALSE;
+  }

-  bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
-  void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
+  void set_from_prev(struct st_position *prev);
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *strategy,
+                 struct st_position *loose_scan_pos);
+  void mark_used() { is_used= TRUE; }

-/* Duplicate Weedout strategy */
-  /* The first table that the strategy will need to handle */
-  uint  first_dupsweedout_table;
-  /*
-    Tables that we will need to have in the prefix to do the weedout step
-    (all inner and all outer that the involved semi-joins are correlated with)
-  */
-  table_map dupsweedout_tables;
+  friend class Loose_scan_opt;
+  friend void best_access_path(JOIN      *join,
+                               JOIN_TAB  *s,
+                               table_map remaining_tables,
+                               uint      idx,
+                               bool      disable_jbuf,
+                               double    record_count,
+                               struct st_position *pos,
+                               struct st_position *loose_scan_pos);
+  friend bool get_best_combination(JOIN *join);
+  friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
+                                             uint no_jbuf_after);
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class Sj_materialization_picker : public Semi_join_strategy_picker
+{
+  bool is_used;

-/* SJ-Materialization-Scan strategy */
  /* The last inner table (valid once we're after it) */
  uint      sjm_scan_last_inner;
  /*
@ -612,9 +693,101 @@ typedef struct st_position
  */
  table_map sjm_scan_need_tables;

-  table_map prefix_dups_producing_tables;
-} POSITION;
+public:
+  void set_empty()
+  {
+    sjm_scan_need_tables= 0;
+    LINT_INIT(sjm_scan_last_inner);
+    is_used= FALSE;
+  }
+  void set_from_prev(struct st_position *prev);
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *strategy,
+                 struct st_position *loose_scan_pos);
+  void mark_used() { is_used= TRUE; }

+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+/**
+  Information about a position of table within a join order. Used in join
+  optimization.
+*/
+typedef struct st_position
+{
+  /* The table that's put into join order */
+  JOIN_TAB *table;
+
+  /*
+    The "fanout": number of output rows that will be produced (after
+    pushed down selection condition is applied) per each row combination of
+    previous tables.
+  */
+  double records_read;
+
+  /* 
+    Cost accessing the table in course of the entire complete join execution,
+    i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times 
+    number the access method will be invoked.
+  */
+  double read_time;
+
+  /* Cumulative cost and record count for the join prefix */
+  COST_VECT prefix_cost;
+  double    prefix_record_count;
+
+  /*
+    NULL  -  'index' or 'range' or 'index_merge' or 'ALL' access is used.
+    Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
+  */
+  KEYUSE *key;
+
+  /* If ref-based access is used: bitmap of tables this table depends on  */
+  table_map ref_depend_map;
+ 
+  /*
+    TRUE <=> join buffering will be used. At the moment this is based on 
+    *very* imprecise guesses made in best_access_path(). 
+  */
+  bool use_join_buffer;
+ 
+  /*
+    Current optimization state: Semi-join strategy to be used for this
+    and preceding join tables.
+    
+    Join optimizer sets this for the *last* join_tab in the
+    duplicate-generating range. That is, in order to interpret this field, 
+    one needs to traverse join->[best_]positions array from right to left.
+    When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
+    field (depending on the strategy) tells how many preceding positions 
+    this applies to. The values of covered_preceding_positions->sj_strategy
+    must be ignored.
+  */
+  enum sj_strategy_enum sj_strategy;
+  
+  /*
+    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
+    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
+    are covered by the specified semi-join strategy
+  */
+  uint n_sj_tables;
+
+  table_map prefix_dups_producing_tables;
+
+  table_map inner_tables_handled_with_other_sjs;
+   
+  Duplicate_weedout_picker  dups_weedout_picker;
+  Firstmatch_picker         firstmatch_picker;
+  LooseScan_picker          loosescan_picker;
+  Sj_materialization_picker sjmat_picker;
+} POSITION;

 typedef struct st_rollup
 {
@ -626,18 +799,6 @@ typedef struct st_rollup
 } ROLLUP;


-#define SJ_OPT_NONE 0
-#define SJ_OPT_DUPS_WEEDOUT 1
-#define SJ_OPT_LOOSE_SCAN   2
-#define SJ_OPT_FIRST_MATCH  3
-#define SJ_OPT_MATERIALIZE  4
-#define SJ_OPT_MATERIALIZE_SCAN  5
-
-inline bool sj_is_materialize_strategy(uint strategy)
-{
-  return strategy >= SJ_OPT_MATERIALIZE;
-}
-
 class JOIN_TAB_RANGE: public Sql_alloc
 {
 public: