mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
501 lines
16 KiB
C++
501 lines
16 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (C) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
// $Id: jlf_common.h 9702 2013-07-17 19:08:07Z xlou $
|
|
|
|
/** @file jlf_common.h
|
|
*
|
|
*/
|
|
#pragma once
|
|
|
|
#include <map>
|
|
#include <set>
|
|
#include <stack>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <unordered_map>
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/uuid/uuid.hpp>
|
|
|
|
#include "calpontexecutionplan.h"
|
|
#include "calpontselectexecutionplan.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "simplecolumn.h"
|
|
|
|
#include "dbrm.h"
|
|
|
|
#include "joblist.h"
|
|
#include "jobstep.h"
|
|
#include "groupconcat.h"
|
|
#include "jsonarrayagg.h"
|
|
#include "jl_logger.h"
|
|
|
|
#include "resourcemanager.h"
|
|
#include "rowgroup.h"
|
|
#include "../../primitives/primproc/primitiveserverthreadpools.h"
|
|
|
|
// forward reference
|
|
namespace execplan
|
|
{
|
|
class AggregateColumn;
|
|
class SimpleColumn;
|
|
} // namespace execplan
|
|
|
|
namespace joblist
|
|
{
|
|
// for output error messages to screen.
|
|
const std::string boldStart = "\033[0;1m";
|
|
const std::string boldStop = "\033[0;39m";
|
|
|
|
const int8_t CONST_COL_NONE = 0;
|
|
const int8_t CONST_COL_EXIST = 1;
|
|
const int8_t CONST_COL_ONLY = 2;
|
|
|
|
// pretend all expressions belong to "virtual" table EXPRESSION, (CNX_EXP_TABLE_ID, expression)
|
|
// CNX_EXP_TABLE_ID(999) is not for user table or column, there will be no confilict in queries.
|
|
const int32_t CNX_EXP_TABLE_ID = 999;
|
|
|
|
struct TupleInfo
|
|
{
|
|
explicit TupleInfo(uint32_t w = 0, uint32_t o = 0, uint32_t k = -1, uint32_t t = -1, uint32_t s = 0,
|
|
uint32_t p = 0,
|
|
execplan::CalpontSystemCatalog::ColDataType dt = execplan::CalpontSystemCatalog::BIT,
|
|
uint32_t csn = 8)
|
|
: width(w), oid(o), key(k), tkey(t), scale(s), precision(p), dtype(dt), csNum(csn)
|
|
{
|
|
}
|
|
~TupleInfo() = default;
|
|
|
|
uint32_t width;
|
|
uint32_t oid;
|
|
uint32_t key;
|
|
uint32_t tkey;
|
|
uint32_t scale;
|
|
uint32_t precision;
|
|
execplan::CalpontSystemCatalog::ColDataType dtype;
|
|
uint32_t csNum; // For collations
|
|
};
|
|
|
|
// This struct holds information about `FunctionColumn`.
|
|
struct FunctionColumnInfo
|
|
{
|
|
// Function argument.
|
|
uint64_t associatedColumnOid;
|
|
// Function name.
|
|
std::string functionName;
|
|
|
|
FunctionColumnInfo(uint64_t colOid, std::string funcName)
|
|
: associatedColumnOid(colOid), functionName(funcName)
|
|
{
|
|
}
|
|
};
|
|
|
|
// for compound join
|
|
struct JoinData
|
|
{
|
|
int64_t fJoinId;
|
|
std::vector<uint32_t> fLeftKeys;
|
|
std::vector<uint32_t> fRightKeys;
|
|
std::vector<JoinType> fTypes; // joblisttypes.h: INNER, LEFTOUTER, RIGHTOUTER
|
|
bool fTypeless;
|
|
|
|
JoinData() : fJoinId(-1), fTypeless(false)
|
|
{
|
|
}
|
|
};
|
|
|
|
typedef std::stack<JobStepVector> JobStepVectorStack;
|
|
typedef std::map<execplan::CalpontSystemCatalog::OID, execplan::CalpontSystemCatalog::OID> DictOidToColOidMap;
|
|
typedef std::vector<TupleInfo> TupleInfoVector;
|
|
typedef std::map<uint32_t, TupleInfo> TupleInfoMap;
|
|
|
|
// for subquery support
|
|
struct UniqId
|
|
{
|
|
int fId; // OID for real table, sequence # for subquery
|
|
// std::string fName; // name (table alias + [column name, if column])
|
|
std::string fTable; // table name (table alias)
|
|
std::string fSchema; // schema name
|
|
std::string fView; // view name
|
|
uint32_t fPseudo; // pseudo type
|
|
// uint64_t fEngine; // InfiniDB == 0
|
|
uint64_t fSubId; // subquery ID
|
|
|
|
UniqId() : fId(-1), fSubId(-1)
|
|
{
|
|
}
|
|
UniqId(int i, const std::string& t, const std::string& s, const std::string& v, uint32_t pi = 0,
|
|
uint64_t l = -1)
|
|
: fId(i), fTable(t), fSchema(s), fView(v), fPseudo(pi), fSubId(l)
|
|
{
|
|
}
|
|
explicit UniqId(const execplan::SimpleColumn* sc);
|
|
UniqId(int o, const execplan::SimpleColumn* sc);
|
|
|
|
std::string toString() const;
|
|
};
|
|
bool operator<(const struct UniqId& x, const struct UniqId& y);
|
|
bool operator==(const struct UniqId& x, const struct UniqId& y);
|
|
typedef std::map<UniqId, uint32_t> TupleKeyMap;
|
|
|
|
// typedef vector<SRCP> RetColsVector;
|
|
typedef execplan::CalpontSelectExecutionPlan::ReturnedColumnList RetColsVector;
|
|
|
|
// join data between table pairs
|
|
typedef std::map<std::pair<uint32_t, uint32_t>, JoinData> TableJoinMap;
|
|
|
|
struct TupleKeyInfo
|
|
{
|
|
uint32_t nextKey;
|
|
TupleKeyMap tupleKeyMap;
|
|
std::vector<UniqId> tupleKeyVec;
|
|
std::vector<std::string> tupleKeyToName;
|
|
std::vector<bool> crossEngine;
|
|
|
|
// TODO: better organize these structs
|
|
std::map<uint32_t, execplan::CalpontSystemCatalog::OID> tupleKeyToTableOid;
|
|
std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> colType;
|
|
std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> token2DictTypeMap;
|
|
std::map<uint32_t, std::string> keyName;
|
|
std::map<uint32_t, uint32_t> colKeyToTblKey;
|
|
std::map<uint32_t, uint32_t> dictKeyMap; // map token key to dictionary key
|
|
DictOidToColOidMap dictOidToColOid; // map dictionary OID to column OID
|
|
std::map<uint32_t, uint32_t> pseudoType; // key to pseudo column type
|
|
std::set<uint32_t> functionJoinKeys; // key used in function join
|
|
TupleInfoMap tupleInfoMap;
|
|
|
|
TupleKeyInfo() : nextKey(0)
|
|
{
|
|
}
|
|
};
|
|
|
|
//------------------------------------------------------------------------------
|
|
/** @brief This struct maintains state for the query processing
|
|
*
|
|
*/
|
|
//------------------------------------------------------------------------------
|
|
struct JobInfo
|
|
{
|
|
explicit JobInfo(ResourceManager* r)
|
|
: rm(r)
|
|
, sessionId(0)
|
|
, txnId(0)
|
|
, statementId(0)
|
|
, maxBuckets(rm->getHjMaxBuckets())
|
|
, maxElems(rm->getHjMaxElems())
|
|
, flushInterval(rm->getJLFlushInterval())
|
|
, fifoSize(rm->getJlFifoSize())
|
|
, logger(new Logger())
|
|
, traceFlags(0)
|
|
, projectingTableOID(nullptr)
|
|
, isExeMgr(false)
|
|
, trace(false)
|
|
, tryTuples(false)
|
|
, constantCol(CONST_COL_NONE)
|
|
, hasDistinct(false)
|
|
, hasAggregation(false)
|
|
, hasRollup(false)
|
|
, limitStart(0)
|
|
, limitCount(-1)
|
|
, joinNum(0)
|
|
, joinNumInView(0)
|
|
, subLevel(0)
|
|
, subNum(0)
|
|
, subId(0)
|
|
, pJobInfo(nullptr)
|
|
, constantFalse(false)
|
|
, cntStarPos(-1)
|
|
, stringScanThreshold(1)
|
|
, wfqLimitStart(0)
|
|
, wfqLimitCount(-1)
|
|
, djsForceRun(false)
|
|
, timeZone(0)
|
|
, maxPmJoinResultCount(1048576)
|
|
{
|
|
}
|
|
ResourceManager* rm;
|
|
uint32_t sessionId;
|
|
uint32_t txnId;
|
|
BRM::QueryContext verId;
|
|
uint32_t statementId;
|
|
std::string queryType;
|
|
boost::shared_ptr<execplan::CalpontSystemCatalog> csc;
|
|
int maxBuckets;
|
|
uint64_t maxElems;
|
|
JobStepVectorStack stack;
|
|
uint32_t flushInterval;
|
|
uint32_t fifoSize;
|
|
SPJL logger;
|
|
uint32_t traceFlags;
|
|
SErrorInfo errorInfo;
|
|
execplan::CalpontSystemCatalog::OID* projectingTableOID; // DeliveryWSDLs get a reference to this
|
|
bool isExeMgr;
|
|
bool trace;
|
|
bool tryTuples;
|
|
int8_t constantCol;
|
|
TupleInfoVector pjColList;
|
|
|
|
// aggregation
|
|
bool hasDistinct;
|
|
bool hasAggregation;
|
|
bool hasRollup;
|
|
std::vector<uint32_t> groupByColVec;
|
|
std::vector<uint32_t> distinctColVec;
|
|
std::vector<uint32_t> expressionVec;
|
|
std::vector<std::pair<uint32_t, int> > returnedColVec;
|
|
|
|
// order by and limit
|
|
std::vector<std::pair<uint32_t, bool> > orderByColVec;
|
|
uint64_t limitStart;
|
|
uint64_t limitCount;
|
|
uint32_t orderByThreads;
|
|
|
|
// tupleInfo
|
|
boost::shared_ptr<TupleKeyInfo> keyInfo;
|
|
|
|
// skip dictionary step if the real string is not necessary to projected.
|
|
// In most case, the string is used for return or comparison, so default is false.
|
|
// when setting to false, no need to check: false overwrites true;
|
|
// When setting to true, need check: true cannot overwrite false.
|
|
std::map<uint32_t, bool> tokenOnly;
|
|
|
|
// unique ID list of the tables in from clause
|
|
std::vector<uint32_t> tableList;
|
|
|
|
// table join map
|
|
TableJoinMap tableJoinMap;
|
|
|
|
// for expression
|
|
JobStepVector crossTableExpressions;
|
|
JobStepVector returnedExpressions;
|
|
|
|
// @bug3683, function join
|
|
std::vector<JobStep*> functionJoins; // store expressions can be converted to joins
|
|
|
|
// for function on aggregation
|
|
RetColsVector deliveredCols; // columns to be sent to connector
|
|
RetColsVector nonConstCols; // none constant columns
|
|
RetColsVector nonConstDelCols; // delivered none constant columns
|
|
RetColsVector projectionCols; // columns for projection
|
|
std::multimap<execplan::ReturnedColumn*, execplan::ReturnedColumn*> cloneAggregateColMap;
|
|
std::vector<std::pair<int, int> > aggEidIndexList;
|
|
|
|
// table pairs with incompatible join which is treated as expression
|
|
std::map<uint32_t, uint32_t> incompatibleJoinMap;
|
|
|
|
// bug 1573 & 3391, having
|
|
SJSTEP havingStep;
|
|
JobStepVector havingStepVec;
|
|
|
|
// bug 2634, 5311 and 5374, outjoin and predicates
|
|
std::set<uint32_t> outerOnTable;
|
|
// MCOL-4715.
|
|
std::set<uint32_t> innerOnTable;
|
|
std::set<uint32_t> tableHasIsNull;
|
|
JobStepVector outerJoinExpressions;
|
|
|
|
// bug 3759, join in order
|
|
// mixed outer join
|
|
std::map<int, uint64_t> tableSize;
|
|
int64_t joinNum;
|
|
// MCOL-5061, MCOL-334.
|
|
int64_t joinNumInView;
|
|
|
|
// for subquery
|
|
boost::shared_ptr<int> subCount; // # of subqueries in the query statement
|
|
int subLevel; // subquery level
|
|
int subNum; // # of subqueries @ level n
|
|
int subId; // id of current subquery
|
|
JobInfo* pJobInfo; // jobinfo of outer query
|
|
bool constantFalse; // has constant false filter
|
|
std::string subAlias; // unique alias to identify the subquery
|
|
JobStepVector correlateSteps;
|
|
JobStepVector selectAndFromSubs;
|
|
std::set<uint64_t> returnColSet;
|
|
std::map<UniqId, execplan::CalpontSystemCatalog::ColType> vtableColTypes;
|
|
|
|
// step to process orderby, limit and fill in constants
|
|
SJSTEP annexStep;
|
|
|
|
// @bug3475, aggregate constant column <position, aggregate column>
|
|
std::map<uint64_t, execplan::SRCP> constAggregate;
|
|
int64_t cntStarPos; // position of count(*)
|
|
|
|
// @bug3321, dictionary scan setting, HWM = stringScanThreshold -1
|
|
uint64_t stringScanThreshold;
|
|
|
|
// @bug3362, group_concat
|
|
RetColsVector groupConcatCols;
|
|
GroupConcatInfo groupConcatInfo;
|
|
|
|
// @bug3736, column map
|
|
std::map<uint32_t, std::vector<uint32_t> > columnMap;
|
|
|
|
// @bug3438, joblist for trace/stats
|
|
JobList* jobListPtr; // just reference, NOT delete by JobInfo
|
|
|
|
// WORKAROUND for join FE limitation (join Id to expression tables map)
|
|
std::map<uint32_t, std::set<uint32_t> > joinFeTableMap;
|
|
|
|
uint32_t stringTableThreshold;
|
|
|
|
// @bug4531, Window Function support
|
|
RetColsVector windowCols;
|
|
RetColsVector windowExps;
|
|
RetColsVector windowDels;
|
|
std::set<uint64_t> windowSet;
|
|
RetColsVector wfqOrderby;
|
|
uint64_t wfqLimitStart;
|
|
uint64_t wfqLimitCount;
|
|
// workaround for expression of windowfunction in IN/EXISTS sub-query
|
|
// std::map<uint32_t, RetColsVector> exprWinfuncListMap;
|
|
|
|
// Flag to tell us we are in local PM only query mode
|
|
uint32_t localQuery;
|
|
|
|
boost::uuids::uuid uuid;
|
|
|
|
// @bug4021, column map for all pseudo column queries
|
|
std::map<uint64_t, execplan::SRCP> tableColMap;
|
|
std::set<uint64_t> pseudoColTable;
|
|
|
|
/* Disk-based join vars */
|
|
boost::shared_ptr<int64_t> smallSideUsage;
|
|
boost::shared_ptr<int64_t> umMemLimit;
|
|
int64_t smallSideLimit; // need to get these from a session var in execplan
|
|
int64_t largeSideLimit;
|
|
uint64_t partitionSize;
|
|
uint32_t djsMaxPartitionTreeDepth;
|
|
bool djsForceRun;
|
|
bool isDML;
|
|
long timeZone;
|
|
uint32_t maxPmJoinResultCount;
|
|
|
|
// This is for tracking any dynamically allocated ParseTree objects
|
|
// in simpleScalarFilterToParseTree() for later deletion in
|
|
// ~csep() or csep.unserialize()
|
|
std::vector<execplan::ParseTree*> dynamicParseTreeVec;
|
|
|
|
PrimitiveServerThreadPools primitiveServerThreadPools;
|
|
// Represents a `join edges` and `join id` to be restored in `join order` part.
|
|
std::map<std::pair<uint32_t, uint32_t>, int64_t> joinEdgesToRestore;
|
|
// Represents a pair of `table` to be on a large side and weight associated with that table.
|
|
std::unordered_map<uint32_t, int64_t> tablesForLargeSide;
|
|
// Represents a pair of `tupleId` and `FunctionColumnInfo`.
|
|
std::unordered_map<uint32_t, FunctionColumnInfo> functionColumnMap;
|
|
|
|
private:
|
|
// defaults okay
|
|
// JobInfo(const JobInfo& rhs);
|
|
// JobInfo& operator=(const JobInfo& rhs);
|
|
};
|
|
|
|
//------------------------------------------------------------------------------
|
|
// namespace scoped functions
|
|
//------------------------------------------------------------------------------
|
|
|
|
/** @brief Returns the table alias for the specified column
|
|
*
|
|
*/
|
|
std::string extractTableAlias(const execplan::SimpleColumn* sc);
|
|
|
|
/** @brief Returns the table alias for the specified column
|
|
*
|
|
*/
|
|
std::string extractTableAlias(const execplan::SSC& sc);
|
|
|
|
/** @brief Returns OID associated with colType if it is a dictionary column
|
|
*
|
|
*/
|
|
execplan::CalpontSystemCatalog::OID isDictCol(const execplan::CalpontSystemCatalog::ColType& colType);
|
|
|
|
/** @brief Determines if colType is a character column
|
|
*
|
|
*/
|
|
bool isCharCol(const execplan::CalpontSystemCatalog::ColType& colType);
|
|
|
|
/** @brief Returns OID associated with a table
|
|
*
|
|
*/
|
|
execplan::CalpontSystemCatalog::OID tableOid(const execplan::SimpleColumn* sc,
|
|
boost::shared_ptr<execplan::CalpontSystemCatalog> cat);
|
|
|
|
/** @brief Returns the unique ID to be used in tupleInfo
|
|
*
|
|
*/
|
|
uint32_t getTupleKey(JobInfo& jobInfo, const execplan::SimpleColumn* sc, bool add = false);
|
|
uint32_t getTableKey(const JobInfo& jobInfo, execplan::CalpontSystemCatalog::OID tableOid,
|
|
const std::string& alias, const std::string& schema, const std::string& view);
|
|
uint32_t getTupleKey(JobInfo& jobInfo, const execplan::SRCP& srcp, bool add = false);
|
|
uint32_t getTableKey(const JobInfo& jobInfo, uint32_t cid);
|
|
uint32_t getTableKey(JobInfo& jobInfo, JobStep* js);
|
|
|
|
void updateTableKey(uint32_t cid, uint32_t tid, JobInfo& jobInfo);
|
|
|
|
uint32_t getExpTupleKey(const JobInfo& jobInfo, uint64_t eid, bool cr = false);
|
|
|
|
uint32_t makeTableKey(JobInfo& jobInfo, const execplan::SimpleColumn* sc);
|
|
uint32_t makeTableKey(JobInfo& jobInfo, execplan::CalpontSystemCatalog::OID tableOid,
|
|
const std::string& tbl_name, const std::string& tbl_alias, const std::string& sch_name,
|
|
const std::string& vw_name, uint64_t engine = 0);
|
|
|
|
/** @brief Returns the tupleInfo associate with the (table, column) key pair
|
|
*
|
|
*/
|
|
TupleInfo getTupleInfo(uint32_t columnKey, const JobInfo& jobInfo);
|
|
|
|
/** @brief set tuple info for simple column
|
|
*
|
|
*/
|
|
TupleInfo setTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct,
|
|
execplan::CalpontSystemCatalog::OID col_oid, JobInfo& jobInfo,
|
|
execplan::CalpontSystemCatalog::OID tbl_oid, const execplan::SimpleColumn* sc,
|
|
const std::string& alias);
|
|
|
|
/** @brief set tuple info for expressions
|
|
*
|
|
*/
|
|
TupleInfo setExpTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct, uint64_t expressionId,
|
|
const std::string& alias, JobInfo& jobInfo, bool rc = false);
|
|
|
|
TupleInfo setExpTupleInfo(const execplan::ReturnedColumn* rc, JobInfo& jobInfo);
|
|
|
|
/** @brief add an aggregate column info
|
|
*
|
|
*/
|
|
void addAggregateColumn(execplan::ReturnedColumn*, int, RetColsVector&, JobInfo&);
|
|
|
|
void makeJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
|
|
JobStepVector& projectSteps, DeliveredTableMap& deliverySteps);
|
|
|
|
void makeUnionJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
|
JobStepVector& querySteps, JobStepVector&, DeliveredTableMap& deliverySteps);
|
|
|
|
void updateDerivedColumn(JobInfo&, execplan::SimpleColumn*, execplan::CalpontSystemCatalog::ColType&);
|
|
|
|
bool filterWithDictionary(execplan::CalpontSystemCatalog::OID dictOid, uint64_t n);
|
|
|
|
bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColType& ct1,
|
|
const execplan::CalpontSystemCatalog::ColType& ct2, bool forJoin = true);
|
|
bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColDataType& dt1, uint32_t scale1,
|
|
const execplan::CalpontSystemCatalog::ColDataType& dt2, uint32_t scale2,
|
|
bool forJoin = true);
|
|
|
|
} // namespace joblist
|