mariadb-columnstore-engine/dbcon/joblist/joblistfactory.cpp

/* Copyright (C) 2014 InfiniDB, Inc.
   Copyright (C) 2019 MariaDB Corporation

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
   MA 02110-1301, USA. */

//   $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $

#include <iostream>
#include <stack>
#include <iterator>
#include <algorithm>
//#define NDEBUG
#include <cassert>
#include <vector>
#include <set>
#include <map>
#include <limits>
using namespace std;

#include <boost/scoped_ptr.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/uuid/uuid_io.hpp>
using namespace boost;

#include "joblistfactory.h"

#include "calpontexecutionplan.h"
#include "calpontselectexecutionplan.h"
#include "calpontsystemcatalog.h"
#include "dbrm.h"
#include "filter.h"
#include "simplefilter.h"
#include "constantfilter.h"
#include "existsfilter.h"
#include "selectfilter.h"
#include "returnedcolumn.h"
#include "aggregatecolumn.h"
#include "windowfunctioncolumn.h"
#include "arithmeticcolumn.h"
#include "constantcolumn.h"
#include "functioncolumn.h"
#include "groupconcatcolumn.h"
#include "pseudocolumn.h"
#include "simplecolumn.h"
#include "rowcolumn.h"
#include "treenodeimpl.h"
#include "udafcolumn.h"
using namespace execplan;

#include "configcpp.h"
using namespace config;

#include "messagelog.h"
using namespace logging;

#include "elementtype.h"
#include "joblist.h"
#include "jobstep.h"
#include "primitivestep.h"
#include "jl_logger.h"
#include "jlf_execplantojoblist.h"
#include "rowaggregation.h"
#include "tuplehashjoin.h"
#include "tupleunion.h"
#include "expressionstep.h"
#include "tupleconstantstep.h"
#include "tuplehavingstep.h"
#include "windowfunctionstep.h"

#include "jlf_common.h"
#include "jlf_graphics.h"
#include "jlf_subquery.h"
#include "jlf_tuplejoblist.h"

#include "rowgroup.h"
using namespace rowgroup;

#include "mcsv1_udaf.h"

namespace
{
using namespace joblist;


void projectSimpleColumn(const SimpleColumn* sc, JobStepVector& jsv, JobInfo& jobInfo)
{
    if (sc == NULL)
        throw logic_error("projectSimpleColumn: sc is null");

    CalpontSystemCatalog::OID oid = sc->oid();
    CalpontSystemCatalog::OID tbl_oid = tableOid(sc, jobInfo.csc);
    string alias(extractTableAlias(sc));
    string view(sc->viewName());
    CalpontSystemCatalog::OID dictOid = 0;
    CalpontSystemCatalog::ColType ct;
    pColStep* pcs = NULL;
    pDictionaryStep* pds = NULL;
    bool tokenOnly = false;
    TupleInfo ti;

    if (!sc->schemaName().empty())
    {
        SJSTEP sjstep;

//      always tuples after release 3.0
//		if (!jobInfo.tryTuples)
//			jobInfo.tables.insert(make_table(sc->schemaName(), sc->tableName()));

//		if (jobInfo.trace)
//			cout << "doProject Emit pCol for SimpleColumn " << oid << endl;

        const PseudoColumn* pc = dynamic_cast<const PseudoColumn*>(sc);
        ct = sc->colType();

//XXX use this before connector sets colType in sc correctly.
//    type of pseudo column is set by connector
        if (sc->isColumnStore() && !pc)
            ct = jobInfo.csc->colType(sc->oid());

//X
        if (pc == NULL)
            pcs = new pColStep(oid, tbl_oid, ct, jobInfo);
        else
            pcs = new PseudoColStep(oid, tbl_oid, pc->pseudoType(), ct, jobInfo);

        pcs->alias(alias);
        pcs->view(view);
        pcs->name(sc->columnName());
        pcs->cardinality(sc->cardinality());
        //pcs->setOrderRids(true);

        sjstep.reset(pcs);
        jsv.push_back(sjstep);

        dictOid = isDictCol(ct);
        ti = setTupleInfo(ct, oid, jobInfo, tbl_oid, sc, alias);
        pcs->tupleId(ti.key);

        if (dictOid > 0 && jobInfo.hasAggregation)
        {
            map<uint32_t, bool>::iterator it =
                jobInfo.tokenOnly.find(getTupleKey(jobInfo, sc));

            if (it != jobInfo.tokenOnly.end())
                tokenOnly = it->second;
        }

        if (dictOid > 0 && !tokenOnly)
        {
            //This is a double-step step
//			if (jobInfo.trace)
//				cout << "doProject Emit pGetSignature for SimpleColumn " << dictOid << endl;

            pds = new pDictionaryStep(dictOid, tbl_oid, ct, jobInfo);
            jobInfo.keyInfo->dictOidToColOid[dictOid] = oid;
            pds->alias(alias);
            pds->view(view);
            pds->name(sc->columnName());
            pds->cardinality(sc->cardinality());
            //pds->setOrderRids(true);

            //Associate these two linked steps
            JobStepAssociation outJs;
            AnyDataListSPtr spdl1(new AnyDataList());
            RowGroupDL* dl1 = new RowGroupDL(1, jobInfo.fifoSize);
            spdl1->rowGroupDL(dl1);
            dl1->OID(oid);

            // not a tokenOnly column
            setTupleInfo(ct, dictOid, jobInfo, tbl_oid, sc, alias);
            jobInfo.tokenOnly[getTupleKey(jobInfo, sc)] = false;
            outJs.outAdd(spdl1);

            pcs->outputAssociation(outJs);
            pds->inputAssociation(outJs);

            sjstep.reset(pds);
            jsv.push_back(sjstep);

            oid = dictOid; // dictionary column
            ti = setTupleInfo(ct, oid, jobInfo, tbl_oid, sc, alias);
            pds->tupleId(ti.key);
            jobInfo.keyInfo->dictKeyMap[pcs->tupleId()] = ti.key;
        }
    }
    else // must be vtable mode
    {
        oid = (tbl_oid + 1) + sc->colPosition();
        ct = jobInfo.vtableColTypes[UniqId(oid, alias, "", "")];
        ti = setTupleInfo(ct, oid, jobInfo, tbl_oid, sc, alias);
    }

    if (dictOid > 0 && tokenOnly)
    {
        // scale is not used by string columns
        // borrow it to indicate token is used in projection, not the real string.
        ti.scale = 8;
    }

    jobInfo.pjColList.push_back(ti);
}

const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo)
{
    JobStepVector jsv;
    SJSTEP sjstep;

    for (unsigned i = 0; i < retCols.size(); i++)
    {
        const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(retCols[i].get());
        const WindowFunctionColumn* wc = NULL;

        if (sc != NULL)
        {
            projectSimpleColumn(sc, jsv, jobInfo);
        }
        else if ((wc = dynamic_cast<const WindowFunctionColumn*>(retCols[i].get())) != NULL)
        {
            //put place hold column in projection list
            uint64_t eid = wc->expressionId();
            CalpontSystemCatalog::ColType ct = wc->resultType();
            TupleInfo ti(setExpTupleInfo(ct, eid, retCols[i].get()->alias(), jobInfo));
            jobInfo.pjColList.push_back(ti);
        }
        else
        {
            const ArithmeticColumn* ac = NULL;
            const FunctionColumn* fc = NULL;
            const ConstantColumn* cc = NULL;
            uint64_t eid = -1;
            CalpontSystemCatalog::ColType ct;
            ExpressionStep* es = new ExpressionStep(jobInfo);
            es->expression(retCols[i], jobInfo);
            sjstep.reset(es);

            if ((ac = dynamic_cast<const ArithmeticColumn*>(retCols[i].get())) != NULL)
            {
                eid = ac->expressionId();
                ct = ac->resultType();
            }
            else if ((fc = dynamic_cast<const FunctionColumn*>(retCols[i].get())) != NULL)
            {
                eid = fc->expressionId();
                ct = fc->resultType();
            }
            else if ((cc = dynamic_cast<const ConstantColumn*>(retCols[i].get())) != NULL)
            {
                eid = cc->expressionId();
                ct = cc->resultType();
            }
            else
            {
                std::ostringstream errmsg;
                errmsg << "doProject: unhandled returned column: " << typeid(*retCols[i]).name();
                cerr << boldStart << errmsg.str() << boldStop << endl;
                throw logic_error(errmsg.str());
            }

            // set expression tuple Info
            TupleInfo ti(setExpTupleInfo(ct, eid, retCols[i].get()->alias(), jobInfo));
            uint32_t key = ti.key;

            if (retCols[i]->windowfunctionColumnList().size() > 0)
                jobInfo.expressionVec.push_back(key);
            else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key)
                     == jobInfo.expressionVec.end())
            {
                jobInfo.returnedExpressions.push_back(sjstep);
            }

            //put place hold column in projection list
            jobInfo.pjColList.push_back(ti);
        }
    }

    return jsv;
}

void checkHavingClause(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
{
    TupleHavingStep* ths = new  TupleHavingStep(jobInfo);
    ths->expressionFilter(csep->having(), jobInfo);
    jobInfo.havingStep.reset(ths);

    // simple columns in select clause
    set<UniqId> scInSelect;

    for (RetColsVector::iterator		i  = jobInfo.nonConstCols.begin();
            i != jobInfo.nonConstCols.end();
            i++)
    {
        SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->get());

        if (sc != NULL)
        {
            if (sc->schemaName().empty())
                sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());

            scInSelect.insert(UniqId(sc));
        }
    }

    // simple columns in gruop by clause
    set<UniqId> scInGroupBy;

    for (RetColsVector::iterator  i  = csep->groupByCols().begin();
            i != csep->groupByCols().end();
            i++)
    {
        SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->get());

        if (sc != NULL)
        {
            if (sc->schemaName().empty() && sc->oid() == 0)
            {
                if (sc->colPosition() == -1)
                {
                    // from select subquery
                    SRCP ss = csep->returnedCols()[sc->orderPos()];
                    (*i) = ss;
                }
                else
                {
                    sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
                }
            }

            scInGroupBy.insert(UniqId(sc));
        }
    }

    bool  aggInHaving = false;
    const vector<ReturnedColumn*>& columns = ths->columns();

    for (vector<ReturnedColumn*>::const_iterator i = columns.begin(); i != columns.end(); i++)
    {
        // evaluate aggregate columns in having
        AggregateColumn* agc = dynamic_cast<AggregateColumn*>(*i);

        if (agc)
        {
            addAggregateColumn(agc, -1, jobInfo.nonConstCols, jobInfo);
            aggInHaving = true;
        }
        else
        {
            // simple columns used in having and in group by clause must be in rowgroup
            SimpleColumn* sc = dynamic_cast<SimpleColumn*>(*i);

            if (sc != NULL)
            {
                if (sc->schemaName().empty())
                    sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());

                UniqId scId(sc);

                if (scInGroupBy.find(scId) != scInGroupBy.end() &&
                        scInSelect.find(scId)  == scInSelect.end())
                {
                    jobInfo.nonConstCols.push_back(SRCP(sc->clone()));
                }
            }
        }
    }

    if (aggInHaving == false)
    {
        // treated the same as where clause if no aggregate column in having.
        jobInfo.havingStep.reset();

        // parse the having expression
        ParseTree* filters = csep->having();

        if (filters != 0)
        {
            JLF_ExecPlanToJobList::walkTree(filters, jobInfo);
        }

        if (!jobInfo.stack.empty())
        {
            idbassert(jobInfo.stack.size() == 1);
            jobInfo.havingStepVec = jobInfo.stack.top();
            jobInfo.stack.pop();
        }
    }
}

void preProcessFunctionOnAggregation(const vector<SimpleColumn*>& scs,
                                     const vector<AggregateColumn*>& aggs,
                                     const vector<WindowFunctionColumn*>& wcs,
                                     JobInfo& jobInfo)
{
    // append the simple columns if not already projected
    set<UniqId> scProjected;

    for (RetColsVector::iterator i  = jobInfo.projectionCols.begin();
            i != jobInfo.projectionCols.end();
            i++)
    {
        SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->get());

        if (sc != NULL)
        {
            if (sc->schemaName().empty())
                sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());

            scProjected.insert(UniqId(sc));
        }
    }

    for (vector<SimpleColumn*>::const_iterator i = scs.begin(); i != scs.end(); i++)
    {
        if (scProjected.find(UniqId(*i)) == scProjected.end())
        {
            jobInfo.projectionCols.push_back(SRCP((*i)->clone()));
            scProjected.insert(UniqId(*i));
        }
    }

    // append the aggregate columns in arithmetic/function column to the projection list
    for (vector<AggregateColumn*>::const_iterator i = aggs.begin(); i != aggs.end(); i++)
    {
        addAggregateColumn(*i, -1, jobInfo.projectionCols, jobInfo);
        if (wcs.size() > 0)
        {
            jobInfo.nonConstDelCols.push_back(SRCP((*i)->clone()));
        }
    }
}

void checkReturnedColumns(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
{
    for (uint64_t i = 0; i < jobInfo.deliveredCols.size(); i++)
    {
        if (NULL == dynamic_cast<const ConstantColumn*>(jobInfo.deliveredCols[i].get()))
            jobInfo.nonConstCols.push_back(jobInfo.deliveredCols[i]);
    }

    // save the original delivered non constant columns
    jobInfo.nonConstDelCols = jobInfo.nonConstCols;

    if (jobInfo.nonConstCols.size() != jobInfo.deliveredCols.size())
    {
        jobInfo.constantCol = CONST_COL_EXIST;

        // bug 2531, all constant column.
        if (jobInfo.nonConstCols.size() == 0)
        {
            if (csep->columnMap().size() > 0)
                jobInfo.nonConstCols.push_back((*(csep->columnMap().begin())).second);
            else
                jobInfo.constantCol = CONST_COL_ONLY;
        }
    }

    for (uint64_t i = 0; i < jobInfo.nonConstCols.size(); i++)
    {
        AggregateColumn* agc = dynamic_cast<AggregateColumn*>(jobInfo.nonConstCols[i].get());

        if (agc)
            addAggregateColumn(agc, i, jobInfo.nonConstCols, jobInfo);
    }

    if (csep->having() != NULL)
        checkHavingClause(csep, jobInfo);

    jobInfo.projectionCols = jobInfo.nonConstCols;

    for (uint64_t i = 0; i < jobInfo.nonConstCols.size(); i++)
    {
        const ArithmeticColumn* ac =
            dynamic_cast<const ArithmeticColumn*>(jobInfo.nonConstCols[i].get());
        const FunctionColumn* fc =
            dynamic_cast<const FunctionColumn*>(jobInfo.nonConstCols[i].get());

        if (ac != NULL && ac->aggColumnList().size() > 0)
        {
            jobInfo.nonConstCols[i]->outputIndex(i);
            preProcessFunctionOnAggregation(ac->simpleColumnList(), ac->aggColumnList(), ac->windowfunctionColumnList(), jobInfo);
        }
        else if (fc != NULL && fc->aggColumnList().size() > 0)
        {
            jobInfo.nonConstCols[i]->outputIndex(i);
            preProcessFunctionOnAggregation(fc->simpleColumnList(), fc->aggColumnList(), fc->windowfunctionColumnList(), jobInfo);
        }
    }
}

/*
This function is to get a unique non-constant column list for grouping.
After sub-query is supported, GROUP BY column can be a column from SELECT or FROM sub-queries,
which has empty schema name, and 0 oid (if SELECT).  In order to distinguish these columns,
data member fSequence is used to indicate the column position in FROM sub-query's select list,
the table OID for sub-query vtable is assumed to CNX_VTABLE_ID, the column OIDs for that vtable
is caculated based on this table OID and column position.
The data member fOrderPos is used to indicate the column position in the outer select clause,
this value is set to -1 if the column is not selected (implicit group by). For select sub-query,
the fSequence is not set, so orderPos is used to locate the column.
*/
void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
{
    // order by columns may be not in the select and [group by] clause
    const CalpontSelectExecutionPlan::OrderByColumnList& orderByCols = csep->orderByCols();

    for (uint64_t i = 0; i < orderByCols.size(); i++)
    {
        if (orderByCols[i]->orderPos() == (uint64_t)(-1))
        {
            // @bug 4531, skip window functions, should be already added.
            if (dynamic_cast<WindowFunctionColumn*>(orderByCols[i].get()) != NULL ||
                    orderByCols[i]->windowfunctionColumnList().size() > 0)
                continue;

            jobInfo.deliveredCols.push_back(orderByCols[i]);

            // @bug 3025
            // Append the non-aggregate orderby column to group by, if there is group by clause.
            // Duplicates will be removed by next if block.
            if (csep->groupByCols().size() > 0)
            {
                // Not an aggregate column and not an expression of aggregation.
                if (dynamic_cast<AggregateColumn*>(orderByCols[i].get()) == NULL &&
                        orderByCols[i]->aggColumnList().empty())
                    csep->groupByCols().push_back(orderByCols[i]);
            }
        }
    }

    if (csep->groupByCols().size() > 0)
    {
        set<UniqId> colInGroupBy;
        RetColsVector uniqGbCols;

        for (RetColsVector::iterator  i  = csep->groupByCols().begin();
                i != csep->groupByCols().end();
                i++)
        {
            // skip constant columns
            if (dynamic_cast<ConstantColumn*>(i->get()) != NULL)
                continue;

            ReturnedColumn* rc = i->get();
            SimpleColumn* sc = dynamic_cast<SimpleColumn*>(rc);

            bool selectSubquery = false;

            if (sc && sc->schemaName().empty() && sc->oid() == 0)
            {
                if (sc->colPosition() == -1)
                {
                    // from select subquery
                    // sc->orderPos() should NOT be -1 because it is a SELECT sub-query.
                    SRCP ss = csep->returnedCols()[sc->orderPos()];
                    (*i) = ss;
                    selectSubquery = true;

                    // At this point whatever sc pointed to is invalid
                    // update the rc and sc
                    rc = ss.get();
                    sc = dynamic_cast<SimpleColumn*>(rc);
                }
                else
                {
                    sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
                }
            }

            UniqId col;

            if (sc)
                col = UniqId(sc);
            else
                col = UniqId(rc->expressionId(), rc->alias(), "", "");

            if (colInGroupBy.find(col) == colInGroupBy.end() || selectSubquery)
            {
                colInGroupBy.insert(col);
                uniqGbCols.push_back(*i);
            }
        }

        if (csep->groupByCols().size() != uniqGbCols.size())
            (csep)->groupByCols(uniqGbCols);
    }
}

void checkAggregation(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
{
    checkGroupByCols(csep, jobInfo);
    checkReturnedColumns(csep, jobInfo);
    RetColsVector& retCols = jobInfo.projectionCols;

    jobInfo.hasDistinct = csep->distinct();

    // DISTINCT with window functions must be done in tupleannexstep
    if (csep->distinct() == true && jobInfo.windowDels.size() == 0)
    {
        jobInfo.hasAggregation = true;
    }
    else if (csep->groupByCols().size() > 0)
    {
        // groupby without aggregate functions is supported.
        jobInfo.hasAggregation = true;
    }
    else
    {
        for (uint64_t i = 0; i < retCols.size(); i++)
        {
            if (dynamic_cast<AggregateColumn*>(retCols[i].get()) != NULL)
            {
                jobInfo.hasAggregation = true;
                break;
            }
        }
    }
}

void updateAggregateColType(AggregateColumn* ac, const SRCP& srcp, int op, JobInfo& jobInfo)
{
    CalpontSystemCatalog::ColType ct;
    const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(srcp.get());
    const ArithmeticColumn* ar = NULL;
    const FunctionColumn* fc = NULL;

    if (sc != NULL)
        ct = sc->resultType();
    else if ((ar = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
        ct = ar->resultType();
    else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
        ct = fc->resultType();

    if (op == AggregateColumn::STDDEV_POP || op == AggregateColumn::STDDEV_SAMP ||
        op == AggregateColumn::VAR_POP    || op == AggregateColumn::VAR_SAMP)
    {
        ct.colWidth = sizeof(double);
        ct.colDataType = CalpontSystemCatalog::DOUBLE;
        ct.scale = 0;
        ct.precision = -1;
    }
    else if (op == AggregateColumn::UDAF)
    {
        UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(ac);

        if (udafc)
        {
            mcsv1sdk::mcsv1Context& udafContext = udafc->getContext();
            ct.colDataType = udafContext.getResultType();
            ct.colWidth = udafContext.getColWidth();
            ct.scale = udafContext.getScale();
            ct.precision = udafContext.getPrecision();
        }
        else
        {
            ct = ac->resultType();
        }
    }
    else
    {
        ct = ac->resultType();
    }

    ac->resultType(ct);

    // update the original if this aggregate column is cloned from function on aggregation
    pair<multimap<ReturnedColumn*, ReturnedColumn*>::iterator,
         multimap<ReturnedColumn*, ReturnedColumn*>::iterator> range =
             jobInfo.cloneAggregateColMap.equal_range(ac);

    for (multimap<ReturnedColumn*, ReturnedColumn*>::iterator i = range.first; i != range.second; ++i)
        (i->second)->resultType(ct);
}


const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
{
    vector<uint32_t> projectKeys;    // projected column keys   -- unique
    RetColsVector pcv;               // projected column vector -- may have duplicates

    // add the groupby cols in the front part of the project column vector (pcv)
    const CalpontSelectExecutionPlan::GroupByColumnList& groupByCols = csep->groupByCols();
    uint64_t lastGroupByPos = 0;

    for (uint64_t i = 0; i < groupByCols.size(); i++)
    {
        pcv.push_back(groupByCols[i]);
        lastGroupByPos++;

        const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(groupByCols[i].get());
        const ArithmeticColumn* ac = NULL;
        const FunctionColumn* fc = NULL;

        if (sc != NULL)
        {
            CalpontSystemCatalog::OID gbOid = sc->oid();
            CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
            CalpontSystemCatalog::OID dictOid = 0;
            CalpontSystemCatalog::ColType ct;
            string alias(extractTableAlias(sc));
            string view(sc->viewName());

            if (!sc->schemaName().empty())
            {
                ct = sc->colType();

//XXX use this before connector sets colType in sc correctly.
                if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
                    ct = jobInfo.csc->colType(sc->oid());

//X
                dictOid = isDictCol(ct);
            }
            else
            {
                gbOid = (tblOid + 1) + sc->colPosition();
                ct = jobInfo.vtableColTypes[UniqId(gbOid, alias, "", "")];
            }

            // As of bug3695, make sure varbinary is not used in group by.
            if (ct.colDataType == CalpontSystemCatalog::VARBINARY)
                throw runtime_error ("VARBINARY in group by is not supported.");

            TupleInfo ti(setTupleInfo(ct, gbOid, jobInfo, tblOid, sc, alias));
            uint32_t tupleKey = ti.key;

            if (find(projectKeys.begin(), projectKeys.end(), tupleKey) == projectKeys.end())
                projectKeys.push_back(tupleKey);

            // for dictionary columns, replace the token oid with string oid
            if (dictOid > 0)
            {
                jobInfo.tokenOnly[tupleKey] = false;
                ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
                jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
                tupleKey = ti.key;
            }

            jobInfo.groupByColVec.push_back(tupleKey);
        }
        else if ((ac = dynamic_cast<const ArithmeticColumn*>(groupByCols[i].get())) != NULL)
        {
            uint64_t eid = ac->expressionId();
            CalpontSystemCatalog::ColType ct = ac->resultType();
            TupleInfo ti(setExpTupleInfo(ct, eid, ac->alias(), jobInfo));
            uint32_t tupleKey = ti.key;
            jobInfo.groupByColVec.push_back(tupleKey);

            if (find(projectKeys.begin(), projectKeys.end(), tupleKey) == projectKeys.end())
                projectKeys.push_back(tupleKey);
        }
        else if ((fc = dynamic_cast<const FunctionColumn*>(groupByCols[i].get())) != NULL)
        {
            uint64_t eid = fc->expressionId();
            CalpontSystemCatalog::ColType ct = fc->resultType();
            TupleInfo ti(setExpTupleInfo(ct, eid, fc->alias(), jobInfo));
            uint32_t tupleKey = ti.key;
            jobInfo.groupByColVec.push_back(tupleKey);

            if (find(projectKeys.begin(), projectKeys.end(), tupleKey) == projectKeys.end())
                projectKeys.push_back(tupleKey);
        }
        else
        {
            std::ostringstream errmsg;
            errmsg  << "doAggProject: unsupported group by column: "
                    << typeid(*groupByCols[i]).name();
            cerr    << boldStart << errmsg.str() << boldStop << endl;
            throw logic_error(errmsg.str());
        }
    }

    // process the returned columns
    RetColsVector& retCols = jobInfo.projectionCols;
    SRCP srcp;

    for (uint64_t i = 0; i < retCols.size(); i++)
    {
        GroupConcatColumn* gcc = dynamic_cast<GroupConcatColumn*>(retCols[i].get());

        if (gcc != NULL)
        {
            srcp = gcc->aggParms()[0];
            const RowColumn* rcp = dynamic_cast<const RowColumn*>(srcp.get());

            const vector<SRCP>& cols = rcp->columnVec();

            for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
            {
                if (dynamic_cast<const ConstantColumn*>(j->get()) == NULL)
                    retCols.push_back(*j);
            }

            vector<SRCP>& orderCols = gcc->orderCols();

            for (vector<SRCP>::iterator k = orderCols.begin(); k != orderCols.end(); k++)
            {
                if (dynamic_cast<const ConstantColumn*>(k->get()) == NULL)
                    retCols.push_back(*k);
            }

            continue;
        }

#if 0
        // MCOL-1201 Add support for multi-parameter UDAnF
        UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(retCols[i].get());

        if (udafc != NULL)
        {
            srcp = udafc->aggParms()[0];
            const RowColumn* rcp = dynamic_cast<const RowColumn*>(srcp.get());

            const vector<SRCP>& cols = rcp->columnVec();

            for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
            {
                srcp = *j;

                if (dynamic_cast<const ConstantColumn*>(srcp.get()) == NULL)
                    retCols.push_back(srcp);

                // Do we need this?
                const ArithmeticColumn* ac = dynamic_cast<const ArithmeticColumn*>(srcp.get());
                const FunctionColumn* fc = dynamic_cast<const FunctionColumn*>(srcp.get());

                if (ac != NULL || fc != NULL)
                {
                    // bug 3728, make a dummy expression step for each expression.
                    scoped_ptr<ExpressionStep> es(new ExpressionStep(jobInfo));
                    es->expression(srcp, jobInfo);
                }
            }

            continue;
        }

#endif
        srcp = retCols[i];
        const AggregateColumn* ag = dynamic_cast<const AggregateColumn*>(retCols[i].get());

        // bug 3728 Make a dummy expression for srcp if it is an
        // expression. This is needed to fill in some stuff.
        // Note that es.expression does nothing if the item is not an expression.
        if (ag == NULL)
        {
            // Not an aggregate. Make a dummy expression for the item
            ExpressionStep es;
            es.expression(srcp, jobInfo);
        }
        else
        {
            // MCOL-1201 multi-argument aggregate. make a dummy expression
            // step for each argument that is an expression.
            for (uint32_t i = 0; i < ag->aggParms().size(); ++i)
            {
                srcp = ag->aggParms()[i];
                ExpressionStep es;
                es.expression(srcp, jobInfo);
            }
        }
    }

    map<uint32_t, CalpontSystemCatalog::OID> dictMap; // bug 1853, the tupleKey - dictoid map

    for (uint64_t i = 0; i < retCols.size(); i++)
    {
        srcp = retCols[i];
        const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(srcp.get());
        AggregateColumn* aggc = dynamic_cast<AggregateColumn*>(srcp.get());
        bool doDistinct = (csep->distinct() && csep->groupByCols().empty());
    //    Use this instead of the above line to mimic MariaDB's sql_mode = 'ONLY_FULL_GROUP_BY'
    //    bool doDistinct = (csep->distinct() &&
    //                       csep->groupByCols().empty() &&
    //                       !jobInfo.hasAggregation);
        uint32_t tupleKey = -1;
        string alias;
        string view;

        // returned column could be groupby column, a simplecoulumn not an aggregatecolumn
        int op = 0;
        CalpontSystemCatalog::OID dictOid = 0;
        CalpontSystemCatalog::ColType ct, aggCt;

        if (aggc)
        {
            GroupConcatColumn* gcc = dynamic_cast<GroupConcatColumn*>(retCols[i].get());

            if (gcc != NULL)
            {
                jobInfo.groupConcatCols.push_back(retCols[i]);

                uint64_t eid = gcc->expressionId();
                ct = gcc->resultType();
                TupleInfo ti(setExpTupleInfo(ct, eid, gcc->alias(), jobInfo));
                tupleKey = ti.key;
                jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp()));
                // not a tokenOnly column. Mark all the columns involved
                srcp = gcc->aggParms()[0];
                const RowColumn* rowCol = dynamic_cast<const RowColumn*>(srcp.get());

                if (rowCol)
                {
                    const std::vector<SRCP>& cols = rowCol->columnVec();

                    for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
                    {
                        sc = dynamic_cast<const SimpleColumn*>(j->get());

                        if (sc)
                        {
                            CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
                            alias = extractTableAlias(sc);
                            ct = sc->colType();
                            TupleInfo ti(setTupleInfo(ct, sc->oid(), jobInfo, tblOid, sc, alias));
                            jobInfo.tokenOnly[ti.key] = false;
                        }
                    }
                }

                continue;
            }
            else
            {
                // Aggregate column not group concat
                AggParms& aggParms = aggc->aggParms();

                for (uint32_t parm = 0; parm < aggParms.size(); ++parm)
                {
                    // Only do the optimization of converting to count(*) if
                    // there is only one parameter.
                    if (aggParms.size() == 1 && aggc->constCol().get() != NULL)
                    {
                        // replace the aggregate on constant with a count(*)
                        SRCP clone;
                        UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(aggc);

                        if (udafc)
                        {
                            clone.reset(new UDAFColumn(*udafc, aggc->sessionID()));
                        }
                        else
                        {
                            clone.reset(new AggregateColumn(*aggc, aggc->sessionID()));
                        }

                        jobInfo.constAggregate.insert(make_pair(i, clone));
                        aggc->aggOp(AggregateColumn::COUNT_ASTERISK);
                        aggc->distinct(false);
                    }

                    srcp = aggParms[parm];
                    sc = dynamic_cast<const SimpleColumn*>(srcp.get());

                    if (parm == 0)
                    {
                        op = aggc->aggOp();
                    }
                    else
                    {
                        op = AggregateColumn::MULTI_PARM;
                    }

                    doDistinct = aggc->distinct();

                    if (aggParms.size() == 1)
                    {
                        // Set the col type based on the single parm.
                        // Changing col type based on a parm if multiple parms
                        // doesn't really make sense.
                        if (op != AggregateColumn::SUM && op != AggregateColumn::DISTINCT_SUM &&
                            op != AggregateColumn::AVG && op != AggregateColumn::DISTINCT_AVG)
                        {
                            updateAggregateColType(aggc, srcp, op, jobInfo);
                        }
                    }

                    aggCt = aggc->resultType();

                    // As of bug3695, make sure varbinary is not used in aggregation.
                    // TODO: allow for UDAF
                    if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY)
                        throw runtime_error ("VARBINARY in aggregate function is not supported.");

                    // Project the parm columns or expressions
                    if (sc != NULL)
                    {
                        CalpontSystemCatalog::OID retOid = sc->oid();
                        CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
                        alias = extractTableAlias(sc);
                        view = sc->viewName();

                        if (!sc->schemaName().empty())
                        {
                            ct = sc->colType();

                            //XXX use this before connector sets colType in sc correctly.
                            if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
                                ct = jobInfo.csc->colType(sc->oid());

                            //X
                            dictOid = isDictCol(ct);
                        }
                        else
                        {
                            retOid = (tblOid + 1) + sc->colPosition();
                            ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
                        }

                        TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
                        tupleKey = ti.key;

                        // this is a string column
                        if (dictOid > 0)
                        {
                            map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);

                            // if the column has never seen, and the op is count: possible need count only.
                            if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
                            {
                                if (findit == jobInfo.tokenOnly.end())
                                    jobInfo.tokenOnly[tupleKey] = true;
                            }
                            // if aggregate other than count, token is not enough.
                            else if (op != 0 || doDistinct)
                            {
                                jobInfo.tokenOnly[tupleKey] = false;
                            }

                            findit = jobInfo.tokenOnly.find(tupleKey);

                            if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
                            {
                                dictMap[tupleKey] = dictOid;
                                jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
                                ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
                                jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
                            }
                        }
                    }
                    else
                    {
                        const ArithmeticColumn* ac = NULL;
                        const FunctionColumn* fc = NULL;
                        const WindowFunctionColumn* wc = NULL;
                        bool hasAggCols = false;
                        bool hasWndCols = false;

                        if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
                        {
                            if (ac->aggColumnList().size() > 0)
                                hasAggCols = true;
                            if (ac->windowfunctionColumnList().size() > 0)
                                hasWndCols = true;
                        }
                        else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
                        {
                            if (fc->aggColumnList().size() > 0)
                                hasAggCols = true;
                            if (fc->windowfunctionColumnList().size() > 0)
                                hasWndCols = true;
                        }
                        else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
                        {
                            std::ostringstream errmsg;
                            errmsg << "Invalid aggregate function nesting.";
                            cerr << boldStart << errmsg.str() << boldStop << endl;
                            throw logic_error(errmsg.str());
                        }
                        else if (dynamic_cast<const ConstantColumn*>(srcp.get()) != NULL)
                        {
                        }
                        else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
                        {
                            std::ostringstream errmsg;
                            errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
                            cerr << boldStart << errmsg.str() << boldStop << endl;
                            throw logic_error(errmsg.str());
                        }

                        uint64_t eid = srcp.get()->expressionId();
                        ct = srcp.get()->resultType();
                        TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
                        tupleKey = ti.key;

                        if (hasAggCols && !hasWndCols)
                            jobInfo.expressionVec.push_back(tupleKey);
                    }

                    // add to project list
                    vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);

                    if (keyIt == projectKeys.end())
                    {
                        RetColsVector::iterator it = pcv.end();

                        if (doDistinct)
                            it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
                        else
                            it = pcv.insert(pcv.end(), srcp);

                        projectKeys.insert(projectKeys.begin() + std::distance(pcv.begin(), it), tupleKey);
                    }
                    else if (doDistinct) // @bug4250, move forward distinct column if necessary.
                    {
                        uint32_t pos = std::distance(projectKeys.begin(), keyIt);

                        if (pos >= lastGroupByPos)
                        {
                            pcv[pos] = pcv[lastGroupByPos];
                            pcv[lastGroupByPos] = srcp;
                            projectKeys[pos] = projectKeys[lastGroupByPos];
                            projectKeys[lastGroupByPos] = tupleKey;
                            lastGroupByPos++;
                        }
                    }

                    if (doDistinct && dictOid > 0)
                        tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];

                    // remember the columns to be returned
                    jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));

                   // bug 1499 distinct processing, save unique distinct columns
                    if (doDistinct &&
                            (jobInfo.distinctColVec.end() ==
                             find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
                    {
                        jobInfo.distinctColVec.push_back(tupleKey);
                    }
                }
            }
        }
        else
        {
            // Not an Aggregate
            // simple column selected
            if (sc != NULL)
            {
                // one column only need project once
                CalpontSystemCatalog::OID retOid = sc->oid();
                CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
                alias = extractTableAlias(sc);
                view = sc->viewName();

                if (!sc->schemaName().empty())
                {
                    ct = sc->colType();

                    //XXX use this before connector sets colType in sc correctly.
                    if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
                        ct = jobInfo.csc->colType(sc->oid());

                    //X
                    dictOid = isDictCol(ct);
                }
                else
                {
                    retOid = (tblOid + 1) + sc->colPosition();
                    ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
                }

                TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
                tupleKey = ti.key;

                // this is a string column
                if (dictOid > 0)
                {
                    map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);

                    // if the column has never seen, and the op is count: possible need count only.
                    if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
                    {
                        if (findit == jobInfo.tokenOnly.end())
                            jobInfo.tokenOnly[tupleKey] = true;
                    }
                    // if aggregate other than count, token is not enough.
                    else if (op != 0 || doDistinct)
                    {
                        jobInfo.tokenOnly[tupleKey] = false;
                    }

                    findit = jobInfo.tokenOnly.find(tupleKey);

                    if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
                    {
                        dictMap[tupleKey] = dictOid;
                        jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
                        ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
                        jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
                    }
                }
            }
            else
            {
                const ArithmeticColumn* ac = NULL;
                const FunctionColumn* fc = NULL;
                const WindowFunctionColumn* wc = NULL;
                bool hasAggCols = false;
                bool hasWndCols = false;

                if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
                {
                    if (ac->aggColumnList().size() > 0)
                        hasAggCols = true;
                    if (ac->windowfunctionColumnList().size() > 0)
                        hasWndCols = true;
                }
                else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
                {
                    if (fc->aggColumnList().size() > 0)
                        hasAggCols = true;
                    if (fc->windowfunctionColumnList().size() > 0)
                        hasWndCols = true;
                }
                else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
                {
                    std::ostringstream errmsg;
                    errmsg << "Invalid aggregate function nesting.";
                    cerr << boldStart << errmsg.str() << boldStop << endl;
                    throw logic_error(errmsg.str());
                }
                else if (dynamic_cast<const ConstantColumn*>(srcp.get()) != NULL)
                {
                }
                else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
                {
                    std::ostringstream errmsg;
                    errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
                    cerr << boldStart << errmsg.str() << boldStop << endl;
                    throw logic_error(errmsg.str());
                }

                uint64_t eid = srcp.get()->expressionId();
                ct = srcp.get()->resultType();
                TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
                tupleKey = ti.key;

                if (hasAggCols && !hasWndCols)
                    jobInfo.expressionVec.push_back(tupleKey);
            }

            // add to project list
            vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);

            if (keyIt == projectKeys.end())
            {
                RetColsVector::iterator it = pcv.end();

                if (doDistinct)
                    it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
                else
                    it = pcv.insert(pcv.end(), srcp);

                projectKeys.insert(projectKeys.begin() + std::distance(pcv.begin(), it), tupleKey);
            }
            else if (doDistinct) // @bug4250, move forward distinct column if necessary.
            {
                uint32_t pos = std::distance(projectKeys.begin(), keyIt);

                if (pos >= lastGroupByPos)
                {
                    pcv[pos] = pcv[lastGroupByPos];
                    pcv[lastGroupByPos] = srcp;
                    projectKeys[pos] = projectKeys[lastGroupByPos];
                    projectKeys[lastGroupByPos] = tupleKey;
                    lastGroupByPos++;
                }
            }

            if (doDistinct && dictOid > 0)
                tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];

            // remember the columns to be returned
            jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));

            // bug 1499 distinct processing, save unique distinct columns
            if (doDistinct &&
                    (jobInfo.distinctColVec.end() ==
                     find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
            {
                jobInfo.distinctColVec.push_back(tupleKey);
            }
        }
    }


    // for dictionary columns not count only, replace the token oid with string oid
    for (vector<pair<uint32_t, int> >::iterator it = jobInfo.returnedColVec.begin();
            it != jobInfo.returnedColVec.end(); it++)
    {
        // if the column is a dictionary column and not count only
        bool tokenOnly = false;
        map<uint32_t, bool>::iterator i = jobInfo.tokenOnly.find(it->first);

        if (i != jobInfo.tokenOnly.end())
            tokenOnly = i->second;

        if (dictMap.find(it->first) != dictMap.end() && !tokenOnly)
        {
            uint32_t tupleKey = jobInfo.keyInfo->dictKeyMap[it->first];
            int op = it->second;
            *it = make_pair(tupleKey, op);
        }
    }

    return doProject(pcv, jobInfo);
}


template <typename T>
class Uniqer : public unary_function<typename T::value_type, void>
{
private:
    typedef typename T::mapped_type Mt_;
    class Pred : public unary_function<const Mt_, bool>
    {
    public:
        Pred(const Mt_& retCol) : fRetCol(retCol) { }
        bool operator()(const Mt_ rc) const
        {
            return fRetCol->sameColumn(rc.get());
        }
    private:
        const Mt_& fRetCol;
    };
public:
    void operator()(typename T::value_type mapItem)
    {
        Pred pred(mapItem.second);
        RetColsVector::iterator iter;
        iter = find_if(fRetColsVec.begin(), fRetColsVec.end(), pred);

        if (iter == fRetColsVec.end())
        {
            //Add this ReturnedColumn
            fRetColsVec.push_back(mapItem.second);
        }
    }
    RetColsVector fRetColsVec;
};

uint16_t numberSteps(JobStepVector& steps, uint16_t stepNo, uint32_t flags)
{
    JobStepVector::iterator iter = steps.begin();
    JobStepVector::iterator end = steps.end();

    while (iter != end)
    {
        // don't number the delimiters
        //if (dynamic_cast<OrDelimiter*>(iter->get()) != NULL)
        //{
        //	++iter;
        //	continue;
        //}

        JobStep* pJobStep = iter->get();
        pJobStep->stepId(stepNo);
        pJobStep->setTraceFlags(flags);
        stepNo++;
        ++iter;
    }

    return stepNo;
}

void changePcolStepToPcolScan(JobStepVector::iterator& it, JobStepVector::iterator& end)
{
    // make sure no pseudo column is a scan column
    idbassert(dynamic_cast<PseudoColStep*>(it->get()) == NULL);

    pColStep* colStep = dynamic_cast<pColStep*>(it->get());
    pColScanStep* scanStep = 0;

    //Might be a pDictionaryScan step
    if (colStep)
    {
        scanStep = new pColScanStep(*colStep);
    }
    else
    {
        //If we have a pDictionaryScan-pColStep duo, then change the pColStep
        if (typeid(*(it->get())) == typeid(pDictionaryScan) &&
                std::distance(it, end) > 1 &&
                typeid(*((it + 1)->get())) == typeid(pColStep))
        {
            ++it;
            colStep = dynamic_cast<pColStep*>(it->get());
            scanStep = new pColScanStep(*colStep);
        }
    }

    if (scanStep)
    {
        it->reset(scanStep);
    }
}

// optimize filter order
//   perform none string filters first because string filter joins the tokens.
void optimizeFilterOrder(JobStepVector& qsv)
{
    // move all none string filters
    uint64_t pdsPos = 0;

//	int64_t  orbranch = 0;
    for (; pdsPos < qsv.size(); ++pdsPos)
    {
        // skip the or branches
//		OrDelimiterLhs* lhs = dynamic_cast<OrDelimiterLhs*>(qsv[pdsPos].get());
//		if (lhs != NULL)
//		{
//			orbranch++;
//			continue;
//		}
//
//		if (orbranch > 0)
//		{
//			UnionStep* us = dynamic_cast<UnionStep*>(qsv[pdsPos].get());
//			if (us)
//				orbranch--;
//		}
//		else
        {
            pDictionaryScan* pds = dynamic_cast<pDictionaryScan*>(qsv[pdsPos].get());

            if (pds)
                break;
        }
    }

    // no pDictionaryScan step
    if (pdsPos >= qsv.size())
        return;

    // get the filter steps that are not in or branches
    vector<uint64_t> pcolIdVec;
    JobStepVector pcolStepVec;

//	orbranch = 0;
    for (uint64_t i = pdsPos; i < qsv.size(); ++i)
    {
//		OrDelimiterLhs* lhs = dynamic_cast<OrDelimiterLhs*>(qsv[pdsPos].get());
//		if (lhs != NULL)
//		{
//			orbranch++;
//			continue;
//		}

//		if (orbranch > 0)
//		{
//			UnionStep* us = dynamic_cast<UnionStep*>(qsv[pdsPos].get());
//			if (us)
//				orbranch--;
//		}
//		else
        {
            pColStep* pcol = dynamic_cast<pColStep*>(qsv[i].get());

            if (pcol != NULL && pcol->filterCount() > 0)
                pcolIdVec.push_back(i);
        }
    }

    for (vector<uint64_t>::reverse_iterator r = pcolIdVec.rbegin(); r < pcolIdVec.rend(); ++r)
    {
        pcolStepVec.push_back(qsv[*r]);
        qsv.erase(qsv.begin() + (*r));
    }

    qsv.insert(qsv.begin() + pdsPos, pcolStepVec.rbegin(), pcolStepVec.rend());
}

void exceptionHandler(JobList* joblist, const JobInfo& jobInfo, const string& logMsg,
                      logging::LOG_TYPE logLevel = LOG_TYPE_ERROR)
{
    cerr << "### JobListFactory ses:" << jobInfo.sessionId << " caught: " << logMsg << endl;
    Message::Args args;
    args.add(logMsg);
    jobInfo.logger->logMessage(logLevel, LogMakeJobList, args,
                               LoggingID(5, jobInfo.sessionId, jobInfo.txnId, 0));
    // dummy delivery map, workaround for (qb == 2) in main.cpp
    DeliveredTableMap dtm;
    SJSTEP dummyStep;
    dtm[0] = dummyStep;
    joblist->addDelivery(dtm);
}


void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
                        JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps)
{
    ParseTree* filters = csep->filters();
    jobInfo.deliveredCols = csep->returnedCols();

    if (filters != 0)
    {
        JLF_ExecPlanToJobList::walkTree(filters, jobInfo);
    }

    if (jobInfo.trace)
        cout << endl << "Stack: " << endl;

    if (!jobInfo.stack.empty())
    {
        idbassert(jobInfo.stack.size() == 1);
        querySteps = jobInfo.stack.top();
        jobInfo.stack.pop();

        // do some filter order optimization
        optimizeFilterOrder(querySteps);
    }

    if (jobInfo.selectAndFromSubs.size() > 0)
    {
        querySteps.insert(querySteps.begin(),
                          jobInfo.selectAndFromSubs.begin(), jobInfo.selectAndFromSubs.end());
    }

    // bug4531, window function support
    WindowFunctionStep::checkWindowFunction(csep, jobInfo);

    // bug3391, move forward the aggregation check for no aggregate having clause.
    checkAggregation(csep, jobInfo);

    // include filters in having clause, if any.
    if (jobInfo.havingStepVec.size() > 0)
        querySteps.insert(querySteps.begin(),
                          jobInfo.havingStepVec.begin(), jobInfo.havingStepVec.end());

    //Need to change the leading pColStep to a pColScanStep
    //Keep a list of the (table OIDs,alias) that we've already processed for @bug 598 self-join
    set<uint32_t> seenTableIds;

    //Stack of seenTables to make sure the left-hand side and right-hand have the same content
    stack<set<uint32_t> > seenTableStack;

    if (!querySteps.empty())
    {
        JobStepVector::iterator iter = querySteps.begin();
        JobStepVector::iterator end = querySteps.end();

        for (; iter != end; ++iter)
        {
            idbassert(iter->get());

            // As of bug3695, make sure varbinary is not used in filters.
            if (typeid(*(iter->get())) == typeid(pColStep))
            {
                // only pcolsteps, no pcolscan yet.
                pColStep* pcol = dynamic_cast<pColStep*>(iter->get());

                if (pcol->colType().colDataType == CalpontSystemCatalog::VARBINARY)
                {
                    if (pcol->filterCount() != 1)
                        throw runtime_error ("VARBINARY in filter or function is not supported.");

                    // error out if the filter is not "is null" or "is not null"
                    // should block "= null" and "!= null" ???
                    messageqcpp::ByteStream filter = pcol->filterString();
                    uint8_t op = 0;
                    filter >> op;
                    bool nullOp = (op == COMPARE_EQ || op == COMPARE_NE || op == COMPARE_NIL);
                    filter >> op;  // skip roundFlag
                    uint64_t value = 0;
                    filter >> value;
                    nullOp = nullOp && (value == 0xfffffffffffffffeULL);

                    if (nullOp == false)
                        throw runtime_error ("VARBINARY in filter or function is not supported.");
                }
            }

//			// save the current seentable for right-hand side
//			if (typeid(*(iter->get())) == typeid(OrDelimiterLhs))
//			{
//				seenTableStack.push(seenTableIds);
//				continue;
//			}
//
//			// restore the seentable
//			else if (typeid(*(iter->get())) == typeid(OrDelimiterRhs))
//			{
//				seenTableIds = seenTableStack.top();
//				seenTableStack.pop();
//				continue;
//			}

            if (typeid(*(iter->get())) == typeid(pColStep))
            {
                pColStep* colStep = dynamic_cast<pColStep*>(iter->get());
                string alias(colStep->alias());
                string view(colStep->view());
                //If this is the first time we've seen this table or alias
                uint32_t tableId = 0;
                tableId = getTableKey(jobInfo, colStep->tupleId());

                if (seenTableIds.find(tableId) == seenTableIds.end())
                    changePcolStepToPcolScan(iter, end);

                //Mark this OID as seen
                seenTableIds.insert(tableId);
            }
        }
    }

    //build the project steps
    if (jobInfo.deliveredCols.empty())
    {
        throw logic_error("No delivery column.");
    }

    // if any aggregate columns
    if (jobInfo.hasAggregation == true)
    {
        projectSteps = doAggProject(csep, jobInfo);
    }
    else
    {
        projectSteps = doProject(jobInfo.nonConstCols, jobInfo);
    }

    // bug3736, have jobInfo include the column map info.
    const CalpontSelectExecutionPlan::ColumnMap& retCols = csep->columnMap();
    CalpontSelectExecutionPlan::ColumnMap::const_iterator i = retCols.begin();

    for (; i != retCols.end(); i++)
    {
        SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->second.get());

        if (sc && !sc->schemaName().empty())
        {
            CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
            CalpontSystemCatalog::ColType ct = sc->colType();

//XXX use this before connector sets colType in sc correctly.
            if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
                ct = jobInfo.csc->colType(sc->oid());

//X

            string alias(extractTableAlias(sc));
            TupleInfo ti(setTupleInfo(ct, sc->oid(), jobInfo, tblOid, sc, alias));
            uint32_t colKey = ti.key;
            uint32_t tblKey = getTableKey(jobInfo, colKey);
            jobInfo.columnMap[tblKey].push_back(colKey);

            if (jobInfo.tableColMap.find(tblKey) == jobInfo.tableColMap.end())
                jobInfo.tableColMap[tblKey] = i->second;
        }
    }

    // special case, select without a table, like: select 1;
    if (jobInfo.constantCol == CONST_COL_ONLY)
        return;

    //If there are no filters (select * from table;) then add one simple scan
    //TODO: more work here...
    // @bug 497 fix. populate a map of tableoid for querysteps. tablescan
    // cols whose table does not belong to the map
    typedef set<uint32_t> tableIDMap_t;
    tableIDMap_t tableIDMap;
    JobStepVector::iterator qsiter = querySteps.begin();
    JobStepVector::iterator qsend = querySteps.end();
    uint32_t tableId = 0;

    while (qsiter != qsend)
    {
        JobStep* js = qsiter->get();

        if (js->tupleId() != (uint64_t) - 1)
            tableId = getTableKey(jobInfo, js->tupleId());

        tableIDMap.insert(tableId);
        ++qsiter;
    }

    JobStepVector::iterator jsiter = projectSteps.begin();
    JobStepVector::iterator jsend = projectSteps.end();

    while (jsiter != jsend)
    {
        JobStep* js = jsiter->get();

        if (js->tupleId() != (uint64_t) - 1)
            tableId = getTableKey(jobInfo, js->tupleId());
        else
            tableId = getTableKey(jobInfo, js);

        if (typeid(*(jsiter->get())) == typeid(pColStep) &&
                tableIDMap.find(tableId) == tableIDMap.end())
        {
            SJSTEP step0 = *jsiter;
            pColStep* colStep = dynamic_cast<pColStep*>(step0.get());
            pColScanStep* scanStep = new pColScanStep(*colStep);
            //clear out any output association so we get a nice, new one during association
            scanStep->outputAssociation(JobStepAssociation());
            step0.reset(scanStep);
            querySteps.push_back(step0);
            js = step0.get();
            tableId = getTableKey(jobInfo, js->tupleId());
            tableIDMap.insert(tableId);
        }

        ++jsiter;
    }
}


// v-table mode
void makeVtableModeSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
                         JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps)
{
    // special case for outer query order by limit -- return all
    if (jobInfo.subId == 0 && csep->hasOrderBy() && !csep->specHandlerProcessed())
    {
        jobInfo.limitCount = (uint64_t) - 1;
    }
    // support order by and limit in sub-query/union or
    // GROUP BY handler processed outer query order
    else if (csep->orderByCols().size() > 0)
    {
        addOrderByAndLimit(csep, jobInfo);
    }
    // limit without order by in any query
    else
    {
        jobInfo.limitStart = csep->limitStart();
        jobInfo.limitCount = csep->limitNum();
    }

    // Bug 2123.  Added overrideLargeSideEstimate parm below.  True if the query was written
    // with a hint telling us to skip the estimatation process for determining the large side
    // table and instead use the table order in the from clause.
    associateTupleJobSteps(querySteps, projectSteps, deliverySteps,
                           jobInfo, csep->overrideLargeSideEstimate());
    uint16_t stepNo = jobInfo.subId * 10000;
    numberSteps(querySteps, stepNo, jobInfo.traceFlags);
//	SJSTEP ds = deliverySteps.begin()->second;
    idbassert(deliverySteps.begin()->second.get());
//	ds->stepId(stepNo);
//	ds->setTraceFlags(jobInfo.traceFlags);
}

}

namespace joblist
{

void makeJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
                  JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps)
{
    // v-table mode, switch to tuple methods and return the tuple joblist.
    //@Bug 1958 Build table list only for tryTuples.
    const CalpontSelectExecutionPlan::SelectList& fromSubquery = csep->derivedTableList();
    int i = 0;

    for (CalpontSelectExecutionPlan::TableList::const_iterator it = csep->tableList().begin();
            it != csep->tableList().end();
            it++)
    {
        CalpontSystemCatalog::OID oid;

        if (it->schema.empty())
            oid = doFromSubquery(fromSubquery[i++].get(), it->alias, it->view, jobInfo);
        else if (it->fisColumnStore)
            oid = jobInfo.csc->tableRID(*it).objnum;
        else
            oid = 0;

        uint32_t tableUid = makeTableKey(jobInfo, oid, it->table, it->alias, it->schema, it->view);
        jobInfo.tableList.push_back(tableUid);
    }

    // add select suqueries
    preprocessSelectSubquery(csep, jobInfo);

    // semi-join may appear in having clause
    if (csep->having() != NULL)
        preprocessHavingClause(csep, jobInfo);

    // parse plan and make jobstep list
    parseExecutionPlan(csep, jobInfo, querySteps, projectSteps, deliverySteps);
    makeVtableModeSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
}

void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
                       JobStepVector& querySteps, JobStepVector&, DeliveredTableMap& deliverySteps)
{
    CalpontSelectExecutionPlan::SelectList& selectVec = csep->unionVec();
    uint8_t distinctUnionNum = csep->distinctUnionNum();
    RetColsVector unionRetCols = csep->returnedCols();
    JobStepVector unionFeeders;

    for (CalpontSelectExecutionPlan::SelectList::iterator cit = selectVec.begin();
            cit != selectVec.end();
            cit++)
    {
        // @bug4848, enhance and unify limit handling.
        SJSTEP sub = doUnionSub(cit->get(), jobInfo);
        querySteps.push_back(sub);
        unionFeeders.push_back(sub);
    }

    jobInfo.deliveredCols = unionRetCols;
    SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo));
    querySteps.push_back(unionStep);
    uint16_t stepNo = jobInfo.subId * 10000;
    numberSteps(querySteps, stepNo, jobInfo.traceFlags);
    deliverySteps[execplan::CNX_VTABLE_ID] = unionStep;
}


}

namespace
{

SJLP makeJobList_(
    CalpontExecutionPlan* cplan,
    ResourceManager* rm,
    bool isExeMgr,
    unsigned& errCode, string& emsg)
{
    CalpontSelectExecutionPlan* csep = dynamic_cast<CalpontSelectExecutionPlan*>(cplan);
    boost::shared_ptr<CalpontSystemCatalog> csc = CalpontSystemCatalog::makeCalpontSystemCatalog(csep->sessionID());

    static config::Config* sysConfig = config::Config::makeConfig();
    int pmsConfigured = atoi(sysConfig->getConfig("PrimitiveServers", "Count").c_str());

    // We have to go ahead and create JobList now so we can store the joblist's
    // projectTableOID pointer in JobInfo for use during jobstep creation.
    SErrorInfo errorInfo(new ErrorInfo());
    boost::shared_ptr<TupleKeyInfo> keyInfo(new TupleKeyInfo);
    boost::shared_ptr<int> subCount(new int);
    *subCount = 0;
    JobList* jl = new TupleJobList(isExeMgr);
    jl->setPMsConfigured(pmsConfigured);
    jl->priority(csep->priority());
    jl->errorInfo(errorInfo);
    rm->setTraceFlags(csep->traceFlags());

    //Stuff a util struct with some stuff we always need
    JobInfo jobInfo(rm);
    jobInfo.sessionId = csep->sessionID();
    jobInfo.txnId = csep->txnID();
    jobInfo.verId = csep->verID();
    jobInfo.statementId = csep->statementID();
    jobInfo.queryType = csep->queryType();
    jobInfo.csc = csc;
    //TODO: clean up the vestiges of the bool trace
    jobInfo.trace = csep->traceOn();
    jobInfo.traceFlags = csep->traceFlags();
    jobInfo.isExeMgr = isExeMgr;
//	jobInfo.tryTuples = tryTuples; // always tuples after release 3.0
    jobInfo.stringScanThreshold = csep->stringScanThreshold();
    jobInfo.errorInfo = errorInfo;
    jobInfo.keyInfo = keyInfo;
    jobInfo.subCount = subCount;
    jobInfo.projectingTableOID = jl->projectingTableOIDPtr();
    jobInfo.jobListPtr = jl;
    jobInfo.stringTableThreshold = csep->stringTableThreshold();
    jobInfo.localQuery = csep->localQuery();
    jobInfo.uuid = csep->uuid();
    jobInfo.timeZone = csep->timeZone();

    /* disk-based join vars */
    jobInfo.smallSideLimit = csep->djsSmallSideLimit();
    jobInfo.largeSideLimit = csep->djsLargeSideLimit();
    jobInfo.partitionSize = csep->djsPartitionSize();
    jobInfo.umMemLimit.reset(new int64_t);
    *(jobInfo.umMemLimit) = csep->umMemLimit();
    jobInfo.isDML = csep->isDML();

    jobInfo.smallSideUsage.reset(new int64_t);
    *jobInfo.smallSideUsage = 0;

    // set fifoSize to 1 for CalpontSystemCatalog query
    if (csep->sessionID() & 0x80000000)
        jobInfo.fifoSize = 1;
    else if (csep->traceOn())
        cout << (*csep) << endl;

    try
    {
        JobStepVector querySteps;
        JobStepVector projectSteps;
        DeliveredTableMap deliverySteps;

        if (csep->unionVec().size() == 0)
            makeJobSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
        else
            makeUnionJobSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);

        uint16_t stepNo = numberSteps(querySteps, 0, jobInfo.traceFlags);
        stepNo = numberSteps(projectSteps, stepNo, jobInfo.traceFlags);

        struct timeval stTime;

        if (jobInfo.trace)
        {
            ostringstream oss;
            oss << endl;
            oss << endl << "job parms: " << endl;
            oss << "maxBuckets = " << jobInfo.maxBuckets << ", maxElems = " << jobInfo.maxElems <<
                ", flushInterval = " << jobInfo.flushInterval <<
                ", fifoSize = " << jobInfo.fifoSize <<
                ", ScanLimit/Threshold = " << jobInfo.scanLbidReqLimit << "/" <<
                jobInfo.scanLbidReqThreshold << endl;
            oss << "UUID: " << jobInfo.uuid << endl;
            oss << endl << "job filter steps: " << endl;
            ostream_iterator<JobStepVector::value_type> oIter(oss, "\n");
            copy(querySteps.begin(), querySteps.end(), oIter);
            oss << endl << "job project steps: " << endl;
            copy(projectSteps.begin(), projectSteps.end(), oIter);
            oss << endl << "job delivery steps: " << endl;
            DeliveredTableMap::iterator dsi = deliverySteps.begin();

            while (dsi != deliverySteps.end())
            {
                oss << dynamic_cast<const JobStep*>(dsi->second.get()) << endl;
                ++dsi;
            }

            oss << endl;
            gettimeofday(&stTime, 0);

            struct tm tmbuf;
#ifdef _MSC_VER
            errno_t p = 0;
            time_t t = stTime.tv_sec;
            p = localtime_s(&tmbuf, &t);

            if (p != 0)
                memset(&tmbuf, 0, sizeof(tmbuf));

#else
            localtime_r(&stTime.tv_sec, &tmbuf);
#endif
            ostringstream tms;
            tms << setfill('0')
                << setw(4) << (tmbuf.tm_year + 1900)
                << setw(2) << (tmbuf.tm_mon + 1)
                << setw(2) << (tmbuf.tm_mday)
                << setw(2) << (tmbuf.tm_hour)
                << setw(2) << (tmbuf.tm_min)
                << setw(2) << (tmbuf.tm_sec)
                << setw(6) << (stTime.tv_usec);
            string tmstr(tms.str());
            string jsrname("jobstep." + tmstr + ".dot");
            ofstream dotFile(jsrname.c_str());
            jlf_graphics::writeDotCmds(dotFile, querySteps, projectSteps);

            char timestamp[80];
#ifdef _MSC_VER
            t = stTime.tv_sec;
            p = ctime_s(timestamp, 80, &t);

            if (p != 0)
                strcpy(timestamp, "UNKNOWN");

#else
            ctime_r((const time_t*)&stTime.tv_sec, timestamp);
#endif
            oss << "runtime updates: start at " << timestamp;
            cout << oss.str();
            Message::Args args;
            args.add(oss.str());
            jobInfo.logger->logMessage(LOG_TYPE_DEBUG, LogSQLTrace, args,
                                       LoggingID(5, jobInfo.sessionId, jobInfo.txnId, 0));
            cout << flush;
        }
        else
        {
            gettimeofday(&stTime, 0);
        }

        // Finish initializing the JobList object
        jl->addQuery(querySteps);
        jl->addProject(projectSteps);
        jl->addDelivery(deliverySteps);

        dynamic_cast<TupleJobList*>(jl)->setDeliveryFlag(true);
    }
    catch (IDBExcept& iex)
    {
        jobInfo.errorInfo->errCode = iex.errorCode();
        errCode = iex.errorCode();
        exceptionHandler(jl, jobInfo, iex.what(), LOG_TYPE_DEBUG);
        emsg = iex.what();
        goto bailout;
    }
    catch (const std::exception& ex)
    {
        jobInfo.errorInfo->errCode = makeJobListErr;
        errCode = makeJobListErr;
        exceptionHandler(jl, jobInfo, ex.what());
        emsg = ex.what();
        goto bailout;
    }
    catch (...)
    {
        jobInfo.errorInfo->errCode = makeJobListErr;
        errCode = makeJobListErr;
        exceptionHandler(jl, jobInfo, "an exception");
        emsg = "An unknown internal joblist error";
        goto bailout;
    }

    goto done;

bailout:
    delete jl;
    jl = 0;

    if (emsg.empty())
        emsg = "An unknown internal joblist error";

done:
    SJLP jlp(jl);
    return jlp;
}

}

namespace joblist
{

/* static */
SJLP JobListFactory::makeJobList(
    CalpontExecutionPlan* cplan,
    ResourceManager* rm,
    bool tryTuple,
    bool isExeMgr)
{
    SJLP ret;
    string emsg;
    unsigned errCode = 0;

    ret = makeJobList_(cplan, rm, isExeMgr, errCode, emsg);

    if (!ret)
    {
        ret.reset(new TupleJobList(isExeMgr));
        SErrorInfo errorInfo(new ErrorInfo);
        errorInfo->errCode = errCode;
        errorInfo->errMsg  = emsg;
        ret->errorInfo(errorInfo);
    }

    return ret;
}

}
// vim:ts=4 sw=4: