MCOL-4617 Move in-to-exists predicate creation and injection into the engine.

We earlier leveraged the server functionality provided by Item_in_subselect::create_in_to_exists_cond and Item_in_subselect::inject_in_to_exists_cond to create and inject the in-to-exists predicate into an IN subquery's JOIN struct. With this patch, we leave the IN subquery's JOIN unaltered and instead directly perform this predicate creation and injection into ColumnStore's select execution plan.
2025-07-29 08:21:15 +03:00 · 2021-03-26 15:41:01 +00:00
parent 374103220c
commit f167a6e505
6 changed files with 1373 additions and 186 deletions
--- a/dbcon/mysql/ha_in_sub.cpp
+++ b/dbcon/mysql/ha_in_sub.cpp
@ -41,6 +41,7 @@ using namespace std;
 #include "simplefilter.h"
 #include "predicateoperator.h"
 #include "rowcolumn.h"
+#include "constantcolumn.h"
 using namespace execplan;

 #include "errorids.h"
@ -101,6 +102,17 @@ InSub::InSub(const InSub& rhs) :
 InSub::~InSub()
 {}

+inline void setCorrelatedFlag(execplan::ReturnedColumn* col, gp_walk_info* gwip)
+{
+    ConstantColumn* cc = dynamic_cast<ConstantColumn*>(col);
+
+    if (!cc)
+    {
+        col->joinInfo(col->joinInfo() | JOIN_CORRELATED | JOIN_NULLMATCH_CANDIDATE);
+        gwip->subQuery->correlated(true);
+    }
+}
+
 /** MySQL transform (NOT) IN subquery to (NOT) EXIST
 *
 */
@ -125,10 +137,9 @@ execplan::ParseTree* InSub::transform()
    delete rhs;
    ReturnedColumn* lhs = fGwip.rcWorkStack.top();
    fGwip.rcWorkStack.pop();
-    delete lhs;

    fSub = (Item_subselect*)(fFunc->arguments()[1]);
-    idbassert(fSub && fFunc);
+    idbassert(fSub);

    SCSEP csep (new CalpontSelectExecutionPlan());
    csep->sessionID(fGwip.sessionid);
@ -140,6 +151,24 @@ execplan::ParseTree* InSub::transform()
    gwi.thd = fGwip.thd;
    gwi.subQuery = this;

+    // The below 2 fields are used later on in buildInToExistsFilter()
+    gwi.inSubQueryLHS = lhs;
+    gwi.inSubQueryLHSItem = fFunc->arguments()[0];
+
+    RowColumn* rlhs = dynamic_cast<RowColumn*>(lhs);
+
+    if (rlhs)
+    {
+        for (auto& col : rlhs->columnVec())
+        {
+            setCorrelatedFlag(col.get(), &gwi);
+        }
+    }
+    else
+    {
+        setCorrelatedFlag(lhs, &gwi);
+    }
+
    // @4827 merge table list to gwi in case there is FROM sub to be referenced
    // in the FROM sub
    gwi.derivedTbCnt = fGwip.derivedTbList.size();
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@ -1677,6 +1677,180 @@ bool buildRowColumnFilter(gp_walk_info* gwip, RowColumn* rhs, RowColumn* lhs, It
    return true;
 }

+void checkOuterTableColumn(gp_walk_info* gwip,
+    const CalpontSystemCatalog::TableAliasName& tan,
+    execplan::ReturnedColumn* col)
+{
+    set<CalpontSystemCatalog::TableAliasName>::const_iterator it;
+
+    bool notInner = true;
+
+    for (it = gwip->innerTables.begin(); it != gwip->innerTables.end(); ++it)
+    {
+        if (tan.alias == it->alias && tan.view == it->view)
+            notInner = false;
+    }
+
+    if (notInner)
+    {
+        col->returnAll(true);
+        IDEBUG( cerr << "setting returnAll on " << tan << endl);
+    }
+}
+
+bool buildEqualityPredicate(execplan::ReturnedColumn* lhs,
+    execplan::ReturnedColumn* rhs,
+    gp_walk_info* gwip,
+    boost::shared_ptr<Operator>& sop,
+    const Item_func::Functype& funcType,
+    const vector<Item*>& itemList,
+    bool isInSubs)
+{
+    cal_connection_info* ci = reinterpret_cast<cal_connection_info*>(get_fe_conn_info_ptr());
+
+    // push the column that is associated with the correlated column to the returned
+    // column list, so the materialized view have the complete projection list.
+    // e.g. tout.c1 in (select tin.c1 from tin where tin.c2=tout.c2);
+    // the projetion list of subquery will have tin.c1, tin.c2.
+    ReturnedColumn* correlatedCol = nullptr;
+    ReturnedColumn* localCol = nullptr;
+
+    if (rhs->joinInfo() & JOIN_CORRELATED)
+    {
+        correlatedCol = rhs;
+        localCol = lhs;
+    }
+    else if (lhs->joinInfo() & JOIN_CORRELATED)
+    {
+        correlatedCol = lhs;
+        localCol = rhs;
+    }
+
+    if (correlatedCol && localCol)
+    {
+        ConstantColumn* cc = dynamic_cast<ConstantColumn*>(localCol);
+
+        if ((!cc || (cc && funcType == Item_func::EQ_FUNC)) &&
+                !(localCol->joinInfo() & JOIN_CORRELATED))
+        {
+            if (isInSubs)
+                localCol->sequence(0);
+            else
+                localCol->sequence(gwip->returnedCols.size());
+
+            localCol->expressionId(ci->expressionId++);
+            ReturnedColumn* rc = localCol->clone();
+            rc->colSource(rc->colSource() | CORRELATED_JOIN);
+            gwip->additionalRetCols.push_back(SRCP(rc));
+            gwip->localCols.push_back(localCol);
+
+            if (rc->hasWindowFunc() && !isInSubs)
+                gwip->windowFuncList.push_back(rc);
+        }
+
+        // push the correlated join partner to the group by list only when there's aggregate
+        // and we don't push aggregate column to the group by
+        // @bug4756. mysql does not always give correct information about whether there is
+        // aggregate on the SELECT list. Need to figure that by ourselves and then decide
+        // to add the group by or not.
+        if (gwip->subQuery)
+        {
+            if (!localCol->hasAggregate() && !localCol->hasWindowFunc())
+                gwip->subGroupByCols.push_back(SRCP(localCol->clone()));
+        }
+
+        if (sop->op() == OP_EQ)
+        {
+            if (gwip->subSelectType == CalpontSelectExecutionPlan::IN_SUBS ||
+                    gwip->subSelectType == CalpontSelectExecutionPlan::EXISTS_SUBS)
+                correlatedCol->joinInfo(correlatedCol->joinInfo() | JOIN_SEMI);
+            else if (gwip->subSelectType == CalpontSelectExecutionPlan::NOT_IN_SUBS ||
+                     gwip->subSelectType == CalpontSelectExecutionPlan::NOT_EXISTS_SUBS)
+                correlatedCol->joinInfo(correlatedCol->joinInfo() | JOIN_ANTI);
+        }
+    }
+
+    SimpleFilter* sf = new SimpleFilter();
+    sf->timeZone(gwip->thd->variables.time_zone->get_name()->ptr());
+
+    //@bug 2101 for when there are only constants in a delete or update where clause (eg "where 5 < 6").
+    //There will be no field column and it will get here only if the comparison is true.
+    if (gwip->columnMap.empty() &&
+            ((current_thd->lex->sql_command == SQLCOM_UPDATE) ||
+             (current_thd->lex->sql_command == SQLCOM_UPDATE_MULTI) ||
+             (current_thd->lex->sql_command == SQLCOM_DELETE) ||
+             (current_thd->lex->sql_command == SQLCOM_DELETE_MULTI)))
+    {
+        IDEBUG( cerr << "deleted func with 2 const columns" << endl );
+        delete rhs;
+        delete lhs;
+        return false;
+    }
+
+    // handle noop (only for table mode)
+    if (rhs->data() == "noop" || lhs->data() == "noop")
+    {
+        sop.reset(new Operator("noop"));
+    }
+    else
+    {
+        for (uint32_t i = 0; i < itemList.size(); i++)
+        {
+            if (isPredicateFunction(itemList[i], gwip))
+            {
+                gwip->fatalParseError = true;
+                gwip->parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_SUB_EXPRESSION);
+            }
+        }
+    }
+
+    sf->op(sop);
+    sf->lhs(lhs);
+    sf->rhs(rhs);
+    sop->setOpType(lhs->resultType(), rhs->resultType());
+    sop->resultType(sop->operationType());
+
+    if (sop->op() == OP_EQ)
+    {
+        CalpontSystemCatalog::TableAliasName tan_lhs;
+        CalpontSystemCatalog::TableAliasName tan_rhs;
+        bool outerjoin = (rhs->singleTable(tan_rhs) && lhs->singleTable(tan_lhs));
+
+        // @bug 1632. Alias should be taken account to the identity of tables for selfjoin to work
+        if (outerjoin && tan_lhs != tan_rhs) // join
+        {
+            if (!gwip->condPush) // vtable
+            {
+                if (!gwip->innerTables.empty())
+                {
+                    checkOuterTableColumn(gwip, tan_lhs, lhs);
+                    checkOuterTableColumn(gwip, tan_rhs, rhs);
+                }
+
+                if (funcType == Item_func::EQ_FUNC)
+                {
+                    gwip->equiCondSFList.push_back(sf);
+                }
+
+                ParseTree* ptp = new ParseTree(sf);
+                gwip->ptWorkStack.push(ptp);
+            }
+        }
+        else
+        {
+            ParseTree* ptp = new ParseTree(sf);
+            gwip->ptWorkStack.push(ptp);
+        }
+    }
+    else
+    {
+        ParseTree* ptp = new ParseTree(sf);
+        gwip->ptWorkStack.push(ptp);
+    }
+
+    return true;
+}
+
 bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip)
 {
    boost::shared_ptr<Operator> sop(new PredicateOperator(ifp->func_name()));
@ -1694,9 +1868,6 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip)
    if (get_fe_conn_info_ptr() == NULL)
        set_fe_conn_info_ptr((void*)new cal_connection_info());

-    cal_connection_info* ci = reinterpret_cast<cal_connection_info*>(get_fe_conn_info_ptr());
-
-
    if (ifp->functype() == Item_func::BETWEEN)
    {
        idbassert (gwip->rcWorkStack.size() >= 3);
@ -2126,174 +2297,14 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip)
        RowColumn* rlhs = dynamic_cast<RowColumn*>(lhs);

        if (rrhs && rlhs)
-        {
            return buildRowColumnFilter(gwip, rrhs, rlhs, ifp);
-        }

-        // push the column that is associated with the correlated column to the returned
-        // column list, so the materialized view have the complete projection list.
-        // e.g. tout.c1 in (select tin.c1 from tin where tin.c2=tout.c2);
-        // the projetion list of subquery will have tin.c1, tin.c2.
-        ReturnedColumn* correlatedCol = NULL;
-        ReturnedColumn* localCol = NULL;
+        vector<Item*> itemList;

-        if (rhs->joinInfo() & JOIN_CORRELATED)
-        {
-            correlatedCol = rhs;
-            localCol = lhs;
-        }
-        else if (lhs->joinInfo() & JOIN_CORRELATED)
-        {
-            correlatedCol = lhs;
-            localCol = rhs;
-        }
-
-        if (correlatedCol && localCol)
-        {
-            ConstantColumn* cc = dynamic_cast<ConstantColumn*>(localCol);
-
-            if ((!cc || (cc && ifp->functype() == Item_func::EQ_FUNC)) &&
-                    !(localCol->joinInfo() & JOIN_CORRELATED))
-            {
-                localCol->sequence(gwip->returnedCols.size());
-                localCol->expressionId(ci->expressionId++);
-                ReturnedColumn* rc = localCol->clone();
-                rc->colSource(rc->colSource() | CORRELATED_JOIN);
-                gwip->additionalRetCols.push_back(SRCP(rc));
-                gwip->localCols.push_back(localCol);
-
-                if (rc->hasWindowFunc())
-                    gwip->windowFuncList.push_back(rc);
-            }
-
-            // push the correlated join partner to the group by list only when there's aggregate
-            // and we don't push aggregate column to the group by
-            // @bug4756. mysql does not always give correct information about whether there is
-            // aggregate on the SELECT list. Need to figure that by ourselves and then decide
-            // to add the group by or not.
-            if (gwip->subQuery)
-            {
-                if (!localCol->hasAggregate() && !localCol->hasWindowFunc())
-                    gwip->subGroupByCols.push_back(SRCP(localCol->clone()));
-            }
-
-            if (sop->op() == OP_EQ)
-            {
-                if (gwip->subSelectType == CalpontSelectExecutionPlan::IN_SUBS ||
-                        gwip->subSelectType == CalpontSelectExecutionPlan::EXISTS_SUBS)
-                    correlatedCol->joinInfo(correlatedCol->joinInfo() | JOIN_SEMI);
-                else if (gwip->subSelectType == CalpontSelectExecutionPlan::NOT_IN_SUBS ||
-                         gwip->subSelectType == CalpontSelectExecutionPlan::NOT_EXISTS_SUBS)
-                    correlatedCol->joinInfo(correlatedCol->joinInfo() | JOIN_ANTI);
-            }
-        }
-
-        SimpleFilter* sf = new SimpleFilter();
-        sf->timeZone(gwip->thd->variables.time_zone->get_name()->ptr());
-
-        //@bug 2101 for when there are only constants in a delete or update where clause (eg "where 5 < 6").
-        //There will be no field column and it will get here only if the comparison is true.
-        if (gwip->columnMap.empty() &&
-                ((current_thd->lex->sql_command == SQLCOM_UPDATE) ||
-                 (current_thd->lex->sql_command == SQLCOM_UPDATE_MULTI) ||
-                 (current_thd->lex->sql_command == SQLCOM_DELETE) ||
-                 (current_thd->lex->sql_command == SQLCOM_DELETE_MULTI)))
-        {
-            IDEBUG( cerr << "deleted func with 2 const columns" << endl );
-            delete rhs;
-            delete lhs;
-            return false;
-        }
-
-        // handle noop (only for table mode)
-        if (rhs->data() == "noop" || lhs->data() == "noop")
-        {
-            sop.reset(new Operator("noop"));
-        }
-        else
-        {
        for (uint32_t i = 0; i < ifp->argument_count(); i++)
-            {
-                if (isPredicateFunction(ifp->arguments()[i], gwip))
-                {
-                    gwip->fatalParseError = true;
-                    gwip->parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_SUB_EXPRESSION);
-                }
-            }
-        }
+            itemList.push_back(ifp->arguments()[i]);

-        sf->op(sop);
-        sf->lhs(lhs);
-        sf->rhs(rhs);
-        sop->setOpType(lhs->resultType(), rhs->resultType());
-        sop->resultType(sop->operationType());
-
-        if (sop->op() == OP_EQ)
-        {
-            CalpontSystemCatalog::TableAliasName tan_lhs;
-            CalpontSystemCatalog::TableAliasName tan_rhs;
-            set<CalpontSystemCatalog::TableAliasName>::const_iterator it;
-            bool outerjoin = (rhs->singleTable(tan_rhs) && lhs->singleTable(tan_lhs));
-
-            // @bug 1632. Alias should be taken account to the identity of tables for selfjoin to work
-            if (outerjoin && tan_lhs != tan_rhs) // join
-            {
-                if (!gwip->condPush) // vtable
-                {
-                    if (!gwip->innerTables.empty())
-                    {
-                        bool notInner = true;
-
-                        for (it = gwip->innerTables.begin(); it != gwip->innerTables.end(); ++it)
-                        {
-                            if (tan_lhs.alias == it->alias && tan_lhs.view == it->view)
-                                notInner = false;
-                        }
-
-                        if (notInner)
-                        {
-                            lhs->returnAll(true);
-                            IDEBUG( cerr << "setting returnAll on " << tan_lhs << endl);
-                        }
-                    }
-
-                    if (!gwip->innerTables.empty())
-                    {
-                        bool notInner = true;
-
-                        for (it = gwip->innerTables.begin(); it != gwip->innerTables.end(); ++it)
-                        {
-                            if (tan_rhs.alias == it->alias && tan_rhs.view == it->view)
-                                notInner = false;
-                        }
-
-                        if (notInner)
-                        {
-                            rhs->returnAll(true);
-                            IDEBUG( cerr << "setting returnAll on " << tan_rhs << endl );
-                        }
-                    }
-
-                    if (ifp->functype() == Item_func::EQ_FUNC)
-                    {
-                        gwip->equiCondSFList.push_back(sf);
-                    }
-
-                    ParseTree* ptp = new ParseTree(sf);
-                    gwip->ptWorkStack.push(ptp);
-                }
-            }
-            else
-            {
-                ParseTree* ptp = new ParseTree(sf);
-                gwip->ptWorkStack.push(ptp);
-            }
-        }
-        else
-        {
-            ParseTree* ptp = new ParseTree(sf);
-            gwip->ptWorkStack.push(ptp);
-        }
+        return buildEqualityPredicate(lhs, rhs, gwip, sop, ifp->functype(), itemList);
    }

    return true;
@ -6881,6 +6892,125 @@ int processLimitAndOffset(
    return 0;
 }

+/*@brief Create in-to-exists predicate for an IN subquery   */
+/***********************************************************
+ * DESCRIPTION:
+ * This function processes the lhs and rhs of an IN predicate
+ * for a query such as:
+ * select col1 from t1 where col2 in (select col2' from t2);
+ * here, lhs is col2 and rhs is the in subquery "select col2' from t2".
+ * It creates a new predicate of the form "col2=col2'" which then later
+ * gets injected into the execution plan of the subquery.
+ * If lhs is of type Item::ROW_ITEM instead, such as:
+ * select col1 from t1 where (col2,col3) in (select col2',col3' from t2);
+ * the function builds an "and" filter of the form "col2=col2' and col3=col3'".
+ * RETURNS
+ *  none
+ ***********************************************************/
+void buildInToExistsFilter(gp_walk_info& gwi, SELECT_LEX& select_lex)
+{
+    RowColumn* rlhs = dynamic_cast<RowColumn*>(gwi.inSubQueryLHS);
+
+    size_t additionalRetColsBefore = gwi.additionalRetCols.size();
+
+    if (rlhs)
+    {
+        idbassert(gwi.inSubQueryLHSItem->type() == Item::ROW_ITEM);
+
+        Item_row* row = (Item_row*)gwi.inSubQueryLHSItem;
+
+        idbassert(!rlhs->columnVec().empty() &&
+            (rlhs->columnVec().size() == gwi.returnedCols.size()) &&
+            row->cols() && (row->cols() == select_lex.item_list.elements) &&
+            (row->cols() == gwi.returnedCols.size()));
+
+        List_iterator_fast<Item> it(select_lex.item_list);
+        Item* item;
+
+        int i = 0;
+
+        ParseTree* rowFilter = nullptr;
+
+        while ((item = it++))
+        {
+            boost::shared_ptr<Operator> sop(new PredicateOperator("="));
+            vector<Item*> itemList = {row->element_index(i), item};
+            ReturnedColumn* rhs = gwi.returnedCols[i]->clone();
+
+            buildEqualityPredicate(rlhs->columnVec()[i]->clone(), rhs, &gwi, sop,
+                Item_func::EQ_FUNC, itemList, true);
+
+            if (gwi.fatalParseError)
+            {
+                delete rlhs;
+                return;
+            }
+
+            ParseTree* tmpFilter = nullptr;
+
+            if (!gwi.ptWorkStack.empty())
+            {
+                tmpFilter = gwi.ptWorkStack.top();
+                gwi.ptWorkStack.pop();
+            }
+
+            if (i == 0 && tmpFilter)
+            {
+                rowFilter = tmpFilter;
+            }
+            else if (i != 0 && tmpFilter && rowFilter)
+            {
+                ParseTree* ptp = new ParseTree(new LogicOperator("and"));
+                ptp->left(rowFilter);
+                ptp->right(tmpFilter);
+                rowFilter = ptp;
+            }
+
+            i++;
+        }
+
+        delete rlhs;
+
+        if (rowFilter)
+            gwi.ptWorkStack.push(rowFilter);
+    }
+    else
+    {
+        idbassert((gwi.returnedCols.size() == 1) &&
+            (select_lex.item_list.elements == 1));
+
+        boost::shared_ptr<Operator> sop(new PredicateOperator("="));
+        vector<Item*> itemList = {gwi.inSubQueryLHSItem,
+            select_lex.item_list.head()};
+        ReturnedColumn* rhs = gwi.returnedCols[0]->clone();
+        buildEqualityPredicate(gwi.inSubQueryLHS, rhs, &gwi, sop,
+            Item_func::EQ_FUNC, itemList, true);
+
+        if (gwi.fatalParseError)
+            return;
+    }
+
+    size_t additionalRetColsAdded = gwi.additionalRetCols.size() -
+        additionalRetColsBefore;
+
+    if (gwi.returnedCols.size() &&
+        (gwi.returnedCols.size() == additionalRetColsAdded))
+    {
+        for (size_t i = 0; i < gwi.returnedCols.size(); i++)
+        {
+            gwi.returnedCols[i]->expressionId(
+                gwi.additionalRetCols[additionalRetColsBefore + i]->expressionId());
+            gwi.returnedCols[i]->colSource(
+                gwi.additionalRetCols[additionalRetColsBefore + i]->colSource());
+        }
+
+        // Delete the duplicate copy of the returned cols
+        auto iter = gwi.additionalRetCols.begin();
+        std::advance(iter, additionalRetColsBefore);
+        gwi.additionalRetCols.erase(iter, gwi.additionalRetCols.end());
+    }
+}
+
 /*@brief  Translates SELECT_LEX into CSEP                  */
 /***********************************************************
 * DESCRIPTION:
@ -7476,6 +7606,51 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex,
        }
    }

+    // MCOL-4617 If this is an IN subquery, then create the in-to-exists
+    // predicate and inject it into the csep
+    if (gwi.subQuery &&
+        gwi.subSelectType == CalpontSelectExecutionPlan::IN_SUBS &&
+        gwi.inSubQueryLHS && gwi.inSubQueryLHSItem)
+    {
+        // create the predicate
+        buildInToExistsFilter(gwi, select_lex);
+
+        if (gwi.fatalParseError)
+        {
+            setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi);
+            return ER_INTERNAL_ERROR;
+        }
+
+        // now inject the created predicate
+        if (!gwi.ptWorkStack.empty())
+        {
+            ParseTree* inToExistsFilter = gwi.ptWorkStack.top();
+            gwi.ptWorkStack.pop();
+
+            if (havingFilter)
+            {
+                ParseTree* ptp = new ParseTree(new LogicOperator("and"));
+                ptp->left(havingFilter);
+                ptp->right(inToExistsFilter);
+                havingFilter = ptp;
+            }
+            else
+            {
+                if (csep->filters())
+                {
+                    ParseTree* ptp = new ParseTree(new LogicOperator("and"));
+                    ptp->left(csep->filters());
+                    ptp->right(inToExistsFilter);
+                    csep->filters(ptp);
+                }
+                else
+                {
+                    csep->filters(inToExistsFilter);
+                }
+            }
+        }
+    }
+
    // for post process expressions on the select list
    // error out post process for union and sub select unit
    if (isUnion || gwi.subSelectType != CalpontSelectExecutionPlan::MAIN_SELECT)
--- a/dbcon/mysql/ha_mcs_impl_if.h
+++ b/dbcon/mysql/ha_mcs_impl_if.h
@ -158,6 +158,12 @@ struct gp_walk_info

    bool isGroupByHandler;

+    // MCOL-4617 The below 2 fields are used for in-to-exists
+    // predicate creation and injection. See usage in InSub::transform()
+    // and buildInToExistsFilter()
+    execplan::ReturnedColumn* inSubQueryLHS;
+    Item* inSubQueryLHSItem;
+
    gp_walk_info() : sessionid(0),
        fatalParseError(false),
        condPush(false),
@ -177,7 +183,9 @@ struct gp_walk_info
        inCaseStmt(false),
        cs_vtable_is_update_with_derive(false),
        cs_vtable_impossible_where_on_union(false),
-        isGroupByHandler(false)
+        isGroupByHandler(false),
+        inSubQueryLHS(nullptr),
+        inSubQueryLHSItem(nullptr)
    {}

    ~gp_walk_info() {}
@ -385,6 +393,13 @@ execplan::ParseTree* setDerivedFilter(THD* thd, execplan::ParseTree*& n,
                                      std::map<std::string, execplan::ParseTree*>& obj,
                                      execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList);
 void derivedTableOptimization(THD* thd, execplan::SCSEP& csep);
+bool buildEqualityPredicate(execplan::ReturnedColumn* lhs,
+    execplan::ReturnedColumn* rhs,
+    gp_walk_info* gwip,
+    boost::shared_ptr<execplan::Operator>& sop,
+    const Item_func::Functype& funcType,
+    const std::vector<Item*>& itemList,
+    bool isInSubs = false);

 #ifdef DEBUG_WALK_COND
 void debug_walk(const Item* item, void* arg);
--- a/dbcon/mysql/ha_mcs_pushdown.cpp
+++ b/dbcon/mysql/ha_mcs_pushdown.cpp
@ -834,13 +834,6 @@ create_columnstore_select_handler(THD* thd, SELECT_LEX* select_lex)
                if (isPS)
                {
                    sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
-
-                    if (in_subselect_rewrite(sel))
-                    {
-                        unsupported_feature = true;
-                        handler->err_msg.assign("create_columnstore_select_handler(): \
-                            Internal error occured in in_subselect_rewrite()");
-                    }
                }

                select_lex->update_used_tables();
@ -860,15 +853,6 @@ create_columnstore_select_handler(THD* thd, SELECT_LEX* select_lex)
 #endif
            join->conds = conds;
        }
-
-        // MCOL-3747 IN-TO-EXISTS rewrite inside MDB didn't add
-        // an equi-JOIN condition.
-        if (!unsupported_feature && !isPS && in_subselect_rewrite(select_lex))
-        {
-            unsupported_feature = true;
-            handler->err_msg.assign("create_columnstore_select_handler(): \
-                Internal error occured in in_subselect_rewrite()");
-        }
    }

    // We shouldn't raise error now so set an error to raise it later in init_SH.
--- a/mysql-test/columnstore/basic/r/mcol_4617.result
+++ b/mysql-test/columnstore/basic/r/mcol_4617.result
@ -0,0 +1,648 @@
+set default_storage_engine=columnstore;
+drop database if exists test_mcol4617;
+create database test_mcol4617;
+use test_mcol4617;
+create table cs1 (a int);
+insert into cs1 values (1), (2), (3), (4), (null);
+create table cs2 (b int, c int);
+insert into cs2 values (1, 100), (1, 101), (2, 200),
+(3, 300), (3, 301), (3, 302), (null, null);
+select * from cs1 where a in (select b from cs2);
+a
+1
+2
+3
+select * from cs1 where a in (select c from cs2);
+a
+select * from cs1 where (a+a) in (select (b+b) from cs2);
+a
+1
+2
+3
+select * from cs1 where (a+1) in (select b from cs2);
+a
+1
+2
+select * from cs1 where hex(a*10) in (select hex(b*10) from cs2);
+a
+1
+2
+3
+select * from cs1 where a in (select b from cs2 where cs1.a=cs2.c-299);
+a
+3
+select * from cs1 where a is not null and a in (select b from cs2);
+a
+1
+2
+3
+select * from cs1 where a in (select b from cs2) and a is null;
+a
+select * from cs1 where a in (select 2 from cs2) and a in (select b from cs2);
+a
+2
+select * from cs1 where a in (1,3) and a in (select b from cs2);
+a
+1
+3
+select * from cs1 where a in (select b from cs2 where b in (select a from cs1));
+a
+1
+2
+3
+select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a in (2,3)));
+a
+2
+3
+select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a not in (2,3)));
+a
+1
+select * from cs1 where a in (select b from cs2 where b=3);
+a
+3
+select * from cs1 where a in (select b from cs2 where b=3 or c=200);
+a
+2
+3
+select * from cs1 where a in (select b from cs2 where b is not null);
+a
+1
+2
+3
+select * from cs1 where a in (select b from cs2 group by b);
+a
+1
+2
+3
+select * from cs1 where a in (select count(*) from cs2 group by b);
+a
+1
+2
+3
+select * from cs1 where a in (select count(*) from cs2 group by b having count(*) < 3);
+a
+1
+2
+select * from cs1 where a in (select count(*) from cs2 where b <> 2 group by b having count(*) < 3);
+a
+2
+select * from cs1 where a in (select count(c) over (partition by b) from cs2);
+a
+1
+2
+3
+select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b is null);
+a
+1
+select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b <> 2);
+a
+2
+3
+select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b);
+a
+1
+2
+3
+select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3);
+a
+1
+2
+select * from cs1 where a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c);
+a
+1
+2
+3
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) order by 1,2,3;
+a	b	c
+1	1	100
+1	1	101
+2	2	200
+3	3	300
+3	3	301
+3	3	302
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) and cs1.a=1;
+a	b	c
+1	1	100
+1	1	101
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3;
+a	b	c
+1	1	100
+1	1	101
+select * from cs1 where a not in (select b from cs2);
+a
+select * from cs1 where a not in (select c from cs2);
+a
+select * from cs1 where (a+a) not in (select (b+b) from cs2);
+a
+select * from cs1 where (a+1) not in (select b from cs2);
+a
+select * from cs1 where a is not null and a not in (select b from cs2);
+a
+select * from cs1 where a not in (select b from cs2) and a is null;
+a
+select * from cs1 where a not in (select 2 from cs2) and a not in (select b from cs2);
+a
+select * from cs1 where a in (1,3) and a not in (select b from cs2);
+a
+select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1));
+a
+1
+2
+3
+4
+NULL
+select * from cs1 where a not in (select b from cs2 where b=3 or c=200);
+a
+1
+4
+select * from cs1 where a not in (select b from cs2 where b is not null);
+a
+4
+select * from cs1 where a not in (select b from cs2 group by b);
+a
+select * from cs1 where a not in (select count(*) from cs2 group by b);
+a
+4
+select * from cs1 where a not in (select count(*) from cs2 group by b having count(*) < 3);
+a
+3
+4
+select * from cs1 where a not in (select count(*) from cs2 where b <> 2 group by b having count(*) < 3);
+a
+1
+3
+4
+select * from cs1 where a not in (select count(c) over (partition by b) from cs2);
+a
+4
+select * from cs1 where a not in (select count(*) over (partition by b) from cs2 where b <> 2);
+a
+1
+4
+select * from cs1 where a not in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b);
+a
+4
+select * from cs1 where a not in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3);
+a
+3
+4
+select * from cs1 where a not in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c);
+a
+4
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2);
+a	b	c
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2) and cs1.a=1;
+a	b	c
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3;
+a	b	c
+2	2	200
+3	3	300
+3	3	301
+3	3	302
+select * from cs1 where a not in (select b from cs2 where b is not null);
+a
+4
+select * from cs1 where hex(a*10) not in (select hex(b*10) from cs2 where b is not null);
+a
+4
+select * from cs1 where a is not null and a not in (select b from cs2 where b is not null);
+a
+4
+select * from cs1 where a not in (select b from cs2 where b is not null) and a is null;
+a
+select * from cs1 where a not in (select 2 from cs2) and a not in (select b from cs2 where b is not null);
+a
+4
+select * from cs1 where a in (1,3) and a not in (select b from cs2 where b is not null);
+a
+select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a is not null) and b is not null);
+a
+1
+2
+3
+4
+NULL
+select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a not in (2,3) and a is not null) and b is not null);
+a
+1
+4
+select * from cs1 where a not in (select b from cs2 where b is not null group by b);
+a
+4
+select * from cs1 where a not in (select count(*) from cs2 where b is not null group by b);
+a
+4
+select * from cs1 where a not in (select count(*) from cs2 where b is not null group by b having count(*) < 3);
+a
+3
+4
+select * from cs1 where a not in (select count(c) over (partition by b) from cs2 where b is not null);
+a
+4
+select * from cs1 where a not in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b is not null);
+a
+4
+select * from cs1 where a not in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c where t1.b is not null);
+a
+4
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2 where b is not null);
+a	b	c
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2 where b is not null) and cs1.a=1;
+a	b	c
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select t1.b from (select b from cs2 where b is not null) t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3;
+a	b	c
+2	2	200
+3	3	300
+3	3	301
+3	3	302
+select * from cs1 where a in (select b from cs2 where b is null);
+a
+select * from cs1 where a in (select b from cs2 where b is not null);
+a
+1
+2
+3
+select * from cs1 where a not in (select b from cs2 where b is null);
+a
+drop table cs1;
+create table cs1 (a int, d int);
+insert into cs1 values (1,100), (2,201), (3,302), (4,4000), (null,null);
+select * from cs1 where (a,d) in (select b,c from cs2);
+a	d
+1	100
+3	302
+select * from cs1 where ((a+a),(d+d)) in (select (b+b),(c+c) from cs2);
+a	d
+1	100
+3	302
+select * from cs1 where ((a+1),(d+1)) in (select b,c from cs2);
+a	d
+select * from cs1 where (hex(a*10),hex(d*10)) in (select hex(b*10),hex(c*10) from cs2);
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 where cs1.a=cs2.c-299);
+a	d
+3	302
+select * from cs1 where a is not null and (a,d) in (select b,c from cs2);
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2) and a is null;
+a	d
+select * from cs1 where (a,d) in (select 2,201 from cs2) and (a,d) in (select b,c from cs2);
+a	d
+select * from cs1 where a in (1,3) and (a,d) in (select b,c from cs2);
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 where (b,c) in (select a,d from cs1));
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 where (b,c) in (select a,d from cs1 where a in (2,3)));
+a	d
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 where (b,c) in (select a,d from cs1 where a not in (2,3)));
+a	d
+1	100
+select * from cs1 where (a,d) in (select b,c from cs2 where b=3);
+a	d
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 where b=3 or c=200);
+a	d
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 where b is not null);
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select b,c from cs2 group by b,c);
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select count(*),c from cs2 group by c);
+a	d
+1	100
+select * from cs1 where (a,d) in (select count(*),c from cs2 group by c having count(*) < 3);
+a	d
+1	100
+select * from cs1 where (a,d) in (select count(*),c from cs2 where b <> 2 group by c having count(*) < 3);
+a	d
+1	100
+select * from cs1 where (a,d) in (select count(c) over (partition by b), (count(b) over (partition by c))*100 from cs2);
+a	d
+1	100
+select * from cs1 where (a,d) in (select count(*) over (partition by b), (count(*) over (partition by c))*100 from cs2 where b is null);
+a	d
+1	100
+select * from cs1 where (a,d) in (select count(*) over (partition by b), (count(*) over (partition by c))*100 from cs2 where b <> 2);
+a	d
+select * from cs1 where (a,d) in (select t1.b,t1.c from cs2 t1, cs2 t2 where t1.b=t2.b);
+a	d
+1	100
+3	302
+select * from cs1 where (a,d) in (select t1.b,t1.c from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3);
+a	d
+1	100
+select * from cs1 where (a,d) in (select t1.b,t1.c from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c);
+a	d
+1	100
+3	302
+select * from cs1 join cs2 on cs1.a=cs2.b and (cs1.a,cs1.d) in (select b,c from cs2) order by 1,2,3,4;
+a	d	b	c
+1	100	1	100
+1	100	1	101
+3	302	3	300
+3	302	3	301
+3	302	3	302
+select * from cs1 join cs2 on cs1.a=cs2.b and (cs1.a,cs1.d) in (select b,c from cs2) and cs1.a=1 order by 1,2,3,4;
+a	d	b	c
+1	100	1	100
+1	100	1	101
+select * from cs1 join cs2 on cs1.a=cs2.b and (cs1.a,cs1.d) in (select t1.b,t1.c from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1);
+a	d	b	c
+1	100	1	100
+1	100	1	101
+drop table cs1;
+create table cs1 (a int);
+insert into cs1 values (1), (2), (3), (4), (null);
+prepare stmt from "select * from cs1 where a in (select b from cs2)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select c from cs2)";
+execute stmt;
+a
+execute stmt;
+a
+prepare stmt from "select * from cs1 where (a+a) in (select (b+b) from cs2)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where (a+1) in (select b from cs2)";
+execute stmt;
+a
+1
+2
+execute stmt;
+a
+1
+2
+prepare stmt from "select * from cs1 where hex(a*10) in (select hex(b*10) from cs2)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 where cs1.a=cs2.c-299)";
+execute stmt;
+a
+3
+execute stmt;
+a
+3
+prepare stmt from "select * from cs1 where a is not null and a in (select b from cs2)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2) and a is null";
+execute stmt;
+a
+execute stmt;
+a
+prepare stmt from "select * from cs1 where a in (select 2 from cs2) and a in (select b from cs2)";
+execute stmt;
+a
+2
+execute stmt;
+a
+2
+prepare stmt from "select * from cs1 where a in (1,3) and a in (select b from cs2)";
+execute stmt;
+a
+1
+3
+execute stmt;
+a
+1
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b in (select a from cs1))";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a in (2,3)))";
+execute stmt;
+a
+2
+3
+execute stmt;
+a
+2
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a not in (2,3)))";
+execute stmt;
+a
+1
+execute stmt;
+a
+1
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b=3)";
+execute stmt;
+a
+3
+execute stmt;
+a
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b=3 or c=200)";
+execute stmt;
+a
+2
+3
+execute stmt;
+a
+2
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b is not null)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select b from cs2 group by b)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select count(*) from cs2 group by b)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select count(*) from cs2 group by b having count(*) < 3)";
+execute stmt;
+a
+1
+2
+execute stmt;
+a
+1
+2
+prepare stmt from "select * from cs1 where a in (select count(*) from cs2 where b <> 2 group by b having count(*) < 3)";
+execute stmt;
+a
+2
+execute stmt;
+a
+2
+prepare stmt from "select * from cs1 where a in (select count(c) over (partition by b) from cs2)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b is null)";
+execute stmt;
+a
+1
+execute stmt;
+a
+1
+prepare stmt from "select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b <> 2)";
+execute stmt;
+a
+2
+3
+execute stmt;
+a
+2
+3
+prepare stmt from "select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3)";
+execute stmt;
+a
+1
+2
+execute stmt;
+a
+1
+2
+prepare stmt from "select * from cs1 where a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c)";
+execute stmt;
+a
+1
+2
+3
+execute stmt;
+a
+1
+2
+3
+prepare stmt from "select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) order by 1,2,3";
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+2	2	200
+3	3	300
+3	3	301
+3	3	302
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+2	2	200
+3	3	300
+3	3	301
+3	3	302
+prepare stmt from "select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) and cs1.a=1 order by 1,2,3";
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+prepare stmt from "select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3";
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+execute stmt;
+a	b	c
+1	1	100
+1	1	101
+drop database test_mcol4617;
--- a/mysql-test/columnstore/basic/t/mcol_4617.test
+++ b/mysql-test/columnstore/basic/t/mcol_4617.test
@ -0,0 +1,336 @@
+#
+# Test IN subquery
+#
+# Note that some queries in the tests below are commented out
+# due to differing behaviour from InnoDB. MCOL-4641 tracks this
+# issue. Uncomment the queries again once the issue is fixed.
+#
+
+--source ../include/have_columnstore.inc
+
+set default_storage_engine=columnstore;
+
+--disable_warnings
+drop database if exists test_mcol4617;
+--enable_warnings
+
+create database test_mcol4617;
+use test_mcol4617;
+
+create table cs1 (a int);
+insert into cs1 values (1), (2), (3), (4), (null);
+
+create table cs2 (b int, c int);
+insert into cs2 values (1, 100), (1, 101), (2, 200),
+(3, 300), (3, 301), (3, 302), (null, null);
+
+# Single column case
+## IN subquery
+### Basic tests
+select * from cs1 where a in (select b from cs2);
+select * from cs1 where a in (select c from cs2);
+select * from cs1 where (a+a) in (select (b+b) from cs2);
+select * from cs1 where (a+1) in (select b from cs2);
+select * from cs1 where hex(a*10) in (select hex(b*10) from cs2);
+
+### Correlated IN subquery
+select * from cs1 where a in (select b from cs2 where cs1.a=cs2.c-299);
+
+### Outer query containing additional WHERE predicates
+select * from cs1 where a is not null and a in (select b from cs2);
+select * from cs1 where a in (select b from cs2) and a is null;
+select * from cs1 where a in (select 2 from cs2) and a in (select b from cs2);
+select * from cs1 where a in (1,3) and a in (select b from cs2);
+
+### Nested IN predicates
+select * from cs1 where a in (select b from cs2 where b in (select a from cs1));
+select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a in (2,3)));
+select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a not in (2,3)));
+
+### WHERE predicates in the IN subquery
+select * from cs1 where a in (select b from cs2 where b=3);
+select * from cs1 where a in (select b from cs2 where b=3 or c=200);
+select * from cs1 where a in (select b from cs2 where b is not null);
+
+### GROUP BY and HAVING predicates in the IN subquery
+select * from cs1 where a in (select b from cs2 group by b);
+select * from cs1 where a in (select count(*) from cs2 group by b);
+select * from cs1 where a in (select count(*) from cs2 group by b having count(*) < 3);
+select * from cs1 where a in (select count(*) from cs2 where b <> 2 group by b having count(*) < 3);
+
+### Window functions in the SELECT clause of IN subquery
+select * from cs1 where a in (select count(c) over (partition by b) from cs2);
+select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b is null);
+select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b <> 2);
+
+### IN subquery containing joins
+select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b);
+select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3);
+select * from cs1 where a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c);
+
+### Outer query containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) order by 1,2,3;
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) and cs1.a=1;
+
+### Both IN subquery and outer queries containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3;
+
+## NOT IN subquery
+### Basic tests
+select * from cs1 where a not in (select b from cs2);
+select * from cs1 where a not in (select c from cs2);
+select * from cs1 where (a+a) not in (select (b+b) from cs2);
+select * from cs1 where (a+1) not in (select b from cs2);
+#select * from cs1 where hex(a*10) not in (select hex(b*10) from cs2);
+
+### Outer query containing additional WHERE predicates
+select * from cs1 where a is not null and a not in (select b from cs2);
+select * from cs1 where a not in (select b from cs2) and a is null;
+select * from cs1 where a not in (select 2 from cs2) and a not in (select b from cs2);
+select * from cs1 where a in (1,3) and a not in (select b from cs2);
+
+### Nested IN predicates
+select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1));
+#select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a in (2,3)));
+#select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a not in (2,3)));
+
+### WHERE predicates in the IN subquery
+#select * from cs1 where a not in (select b from cs2 where b=3);
+select * from cs1 where a not in (select b from cs2 where b=3 or c=200);
+select * from cs1 where a not in (select b from cs2 where b is not null);
+
+### GROUP BY and HAVING predicates in the IN subquery
+select * from cs1 where a not in (select b from cs2 group by b);
+select * from cs1 where a not in (select count(*) from cs2 group by b);
+select * from cs1 where a not in (select count(*) from cs2 group by b having count(*) < 3);
+select * from cs1 where a not in (select count(*) from cs2 where b <> 2 group by b having count(*) < 3);
+
+### Window functions in the SELECT clause of IN subquery
+select * from cs1 where a not in (select count(c) over (partition by b) from cs2);
+#select * from cs1 where a not in (select count(*) over (partition by b) from cs2 where b is null);
+select * from cs1 where a not in (select count(*) over (partition by b) from cs2 where b <> 2);
+
+### IN subquery containing joins
+select * from cs1 where a not in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b);
+select * from cs1 where a not in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3);
+select * from cs1 where a not in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c);
+
+### Outer query containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2);
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2) and cs1.a=1;
+
+### Both IN subquery and outer queries containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3;
+
+## NOT IN subquery without NULLs
+### Basic tests
+select * from cs1 where a not in (select b from cs2 where b is not null);
+#select * from cs1 where a not in (select c from cs2 where b is not null);
+#select * from cs1 where (a+a) not in (select (b+b) from cs2 where b is not null);
+#select * from cs1 where (a+1) not in (select b from cs2 where b is not null);
+select * from cs1 where hex(a*10) not in (select hex(b*10) from cs2 where b is not null);
+
+### Outer query containing additional WHERE predicates
+select * from cs1 where a is not null and a not in (select b from cs2 where b is not null);
+select * from cs1 where a not in (select b from cs2 where b is not null) and a is null;
+select * from cs1 where a not in (select 2 from cs2) and a not in (select b from cs2 where b is not null);
+select * from cs1 where a in (1,3) and a not in (select b from cs2 where b is not null);
+
+### Nested IN predicates
+select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a is not null) and b is not null);
+#select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a in (2,3) and a is not null) and b is not null);
+select * from cs1 where a not in (select b from cs2 where b not in (select a from cs1 where a not in (2,3) and a is not null) and b is not null);
+
+### GROUP BY and HAVING predicates in the IN subquery
+select * from cs1 where a not in (select b from cs2 where b is not null group by b);
+select * from cs1 where a not in (select count(*) from cs2 where b is not null group by b);
+select * from cs1 where a not in (select count(*) from cs2 where b is not null group by b having count(*) < 3);
+
+### Window functions in the SELECT clause of IN subquery
+select * from cs1 where a not in (select count(c) over (partition by b) from cs2 where b is not null);
+
+### IN subquery containing joins
+select * from cs1 where a not in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b is not null);
+select * from cs1 where a not in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c where t1.b is not null);
+
+### Outer query containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2 where b is not null);
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select b from cs2 where b is not null) and cs1.a=1;
+
+### Both IN subquery and outer queries containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a not in (select t1.b from (select b from cs2 where b is not null) t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3;
+
+# Special cases involving NULLs
+select * from cs1 where a in (select b from cs2 where b is null);
+select * from cs1 where a in (select b from cs2 where b is not null);
+select * from cs1 where a not in (select b from cs2 where b is null);
+
+# Row column case
+drop table cs1;
+create table cs1 (a int, d int);
+insert into cs1 values (1,100), (2,201), (3,302), (4,4000), (null,null);
+
+## IN subquery
+### Basic tests
+select * from cs1 where (a,d) in (select b,c from cs2);
+select * from cs1 where ((a+a),(d+d)) in (select (b+b),(c+c) from cs2);
+select * from cs1 where ((a+1),(d+1)) in (select b,c from cs2);
+select * from cs1 where (hex(a*10),hex(d*10)) in (select hex(b*10),hex(c*10) from cs2);
+
+### Correlated IN subquery
+select * from cs1 where (a,d) in (select b,c from cs2 where cs1.a=cs2.c-299);
+
+### Outer query containing additional WHERE predicates
+select * from cs1 where a is not null and (a,d) in (select b,c from cs2);
+select * from cs1 where (a,d) in (select b,c from cs2) and a is null;
+select * from cs1 where (a,d) in (select 2,201 from cs2) and (a,d) in (select b,c from cs2);
+select * from cs1 where a in (1,3) and (a,d) in (select b,c from cs2);
+
+### Nested IN predicates
+select * from cs1 where (a,d) in (select b,c from cs2 where (b,c) in (select a,d from cs1));
+select * from cs1 where (a,d) in (select b,c from cs2 where (b,c) in (select a,d from cs1 where a in (2,3)));
+select * from cs1 where (a,d) in (select b,c from cs2 where (b,c) in (select a,d from cs1 where a not in (2,3)));
+
+### WHERE predicates in the IN subquery
+select * from cs1 where (a,d) in (select b,c from cs2 where b=3);
+select * from cs1 where (a,d) in (select b,c from cs2 where b=3 or c=200);
+select * from cs1 where (a,d) in (select b,c from cs2 where b is not null);
+
+### GROUP BY and HAVING predicates in the IN subquery
+select * from cs1 where (a,d) in (select b,c from cs2 group by b,c);
+select * from cs1 where (a,d) in (select count(*),c from cs2 group by c);
+select * from cs1 where (a,d) in (select count(*),c from cs2 group by c having count(*) < 3);
+select * from cs1 where (a,d) in (select count(*),c from cs2 where b <> 2 group by c having count(*) < 3);
+
+### Window functions in the SELECT clause of IN subquery
+select * from cs1 where (a,d) in (select count(c) over (partition by b), (count(b) over (partition by c))*100 from cs2);
+select * from cs1 where (a,d) in (select count(*) over (partition by b), (count(*) over (partition by c))*100 from cs2 where b is null);
+select * from cs1 where (a,d) in (select count(*) over (partition by b), (count(*) over (partition by c))*100 from cs2 where b <> 2);
+
+### IN subquery containing joins
+select * from cs1 where (a,d) in (select t1.b,t1.c from cs2 t1, cs2 t2 where t1.b=t2.b);
+select * from cs1 where (a,d) in (select t1.b,t1.c from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3);
+select * from cs1 where (a,d) in (select t1.b,t1.c from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c);
+
+### Outer query containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and (cs1.a,cs1.d) in (select b,c from cs2) order by 1,2,3,4;
+select * from cs1 join cs2 on cs1.a=cs2.b and (cs1.a,cs1.d) in (select b,c from cs2) and cs1.a=1 order by 1,2,3,4;
+
+### Both IN subquery and outer queries containing joins
+select * from cs1 join cs2 on cs1.a=cs2.b and (cs1.a,cs1.d) in (select t1.b,t1.c from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1);
+
+# Prepared statement tests
+drop table cs1;
+create table cs1 (a int);
+insert into cs1 values (1), (2), (3), (4), (null);
+
+### Basic tests
+prepare stmt from "select * from cs1 where a in (select b from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select c from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where (a+a) in (select (b+b) from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where (a+1) in (select b from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where hex(a*10) in (select hex(b*10) from cs2)";
+execute stmt;
+execute stmt;
+
+### Correlated IN subquery
+prepare stmt from "select * from cs1 where a in (select b from cs2 where cs1.a=cs2.c-299)";
+execute stmt;
+execute stmt;
+
+### Outer query containing additional WHERE predicates
+prepare stmt from "select * from cs1 where a is not null and a in (select b from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select b from cs2) and a is null";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select 2 from cs2) and a in (select b from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (1,3) and a in (select b from cs2)";
+execute stmt;
+execute stmt;
+
+### Nested IN predicates
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b in (select a from cs1))";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a in (2,3)))";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b in (select a from cs1 where a not in (2,3)))";
+execute stmt;
+execute stmt;
+
+### WHERE predicates in the IN subquery
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b=3)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b=3 or c=200)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select b from cs2 where b is not null)";
+execute stmt;
+execute stmt;
+
+### GROUP BY and HAVING predicates in the IN subquery
+prepare stmt from "select * from cs1 where a in (select b from cs2 group by b)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select count(*) from cs2 group by b)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select count(*) from cs2 group by b having count(*) < 3)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select count(*) from cs2 where b <> 2 group by b having count(*) < 3)";
+execute stmt;
+execute stmt;
+
+### Window functions in the SELECT clause of IN subquery
+prepare stmt from "select * from cs1 where a in (select count(c) over (partition by b) from cs2)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b is null)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select count(*) over (partition by b) from cs2 where b <> 2)";
+execute stmt;
+execute stmt;
+
+### IN subquery containing joins
+prepare stmt from "select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select t1.b from cs2 t1, cs2 t2 where t1.b=t2.b and t1.b <> 3)";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 where a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.c=t2.c)";
+execute stmt;
+execute stmt;
+
+### Outer query containing joins
+prepare stmt from "select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) order by 1,2,3";
+execute stmt;
+execute stmt;
+prepare stmt from "select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select b from cs2) and cs1.a=1 order by 1,2,3";
+execute stmt;
+execute stmt;
+
+### Both IN subquery and outer queries containing joins
+prepare stmt from "select * from cs1 join cs2 on cs1.a=cs2.b and cs1.a in (select t1.b from cs2 t1 join cs2 t2 on t1.b=t2.b and t1.b=1) order by 1,2,3";
+execute stmt;
+execute stmt;
+execute stmt;
+execute stmt;
+
+drop database test_mcol4617;