You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-09-11 08:50:45 +03:00
The most important fix here is the fix of possible buffer overrun in DATEFORMAT() function. A "%W" format, repeated enough times, would overflow the 256-bytes buffer for result. Now we use ostringstream to construct result and we are safe. Changes in date/time projection functions made me fix difference between us and server behavior. The new, better behavior is reflected in changes in tests' results. Also, there was incorrect logic in TRUNCATE() and ROUND() functions in computing the decimal "shift."
502 lines
16 KiB
C++
502 lines
16 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (C) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
// $Id: jlf_common.h 9702 2013-07-17 19:08:07Z xlou $
|
|
|
|
/** @file jlf_common.h
|
|
*
|
|
*/
|
|
#pragma once
|
|
|
|
#include <map>
|
|
#include <set>
|
|
#include <stack>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <unordered_map>
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/uuid/uuid.hpp>
|
|
|
|
#include "calpontexecutionplan.h"
|
|
#include "calpontselectexecutionplan.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "simplecolumn.h"
|
|
|
|
#include "dbrm.h"
|
|
|
|
#include "joblist.h"
|
|
#include "jobstep.h"
|
|
#include "groupconcat.h"
|
|
#include "jsonarrayagg.h"
|
|
#include "jl_logger.h"
|
|
|
|
#include "resourcemanager.h"
|
|
#include "rowgroup.h"
|
|
#include "../../primitives/primproc/primitiveserverthreadpools.h"
|
|
|
|
// forward reference
|
|
namespace execplan
|
|
{
|
|
class AggregateColumn;
|
|
class SimpleColumn;
|
|
} // namespace execplan
|
|
|
|
namespace joblist
|
|
{
|
|
// for output error messages to screen.
|
|
const std::string boldStart = "\033[0;1m";
|
|
const std::string boldStop = "\033[0;39m";
|
|
|
|
const int8_t CONST_COL_NONE = 0;
|
|
const int8_t CONST_COL_EXIST = 1;
|
|
const int8_t CONST_COL_ONLY = 2;
|
|
|
|
// pretend all expressions belong to "virtual" table EXPRESSION, (CNX_EXP_TABLE_ID, expression)
|
|
// CNX_EXP_TABLE_ID(999) is not for user table or column, there will be no confilict in queries.
|
|
const int32_t CNX_EXP_TABLE_ID = 999;
|
|
|
|
struct TupleInfo
|
|
{
|
|
TupleInfo(uint32_t w = 0, uint32_t o = 0, uint32_t k = -1, uint32_t t = -1, uint32_t s = 0, uint32_t p = 0,
|
|
execplan::CalpontSystemCatalog::ColDataType dt = execplan::CalpontSystemCatalog::BIT,
|
|
uint32_t csn = 8)
|
|
: width(w), oid(o), key(k), tkey(t), scale(s), precision(p), dtype(dt), csNum(csn)
|
|
{
|
|
}
|
|
~TupleInfo()
|
|
{
|
|
}
|
|
|
|
uint32_t width;
|
|
uint32_t oid;
|
|
uint32_t key;
|
|
uint32_t tkey;
|
|
uint32_t scale;
|
|
uint32_t precision;
|
|
execplan::CalpontSystemCatalog::ColDataType dtype;
|
|
uint32_t csNum; // For collations
|
|
};
|
|
|
|
// This struct holds information about `FunctionColumn`.
|
|
struct FunctionColumnInfo
|
|
{
|
|
// Function argument.
|
|
uint64_t associatedColumnOid;
|
|
// Function name.
|
|
std::string functionName;
|
|
|
|
FunctionColumnInfo(uint64_t colOid, std::string funcName)
|
|
: associatedColumnOid(colOid), functionName(funcName)
|
|
{
|
|
}
|
|
};
|
|
|
|
// for compound join
|
|
struct JoinData
|
|
{
|
|
int64_t fJoinId;
|
|
std::vector<uint32_t> fLeftKeys;
|
|
std::vector<uint32_t> fRightKeys;
|
|
std::vector<JoinType> fTypes; // joblisttypes.h: INNER, LEFTOUTER, RIGHTOUTER
|
|
bool fTypeless;
|
|
|
|
JoinData() : fJoinId(-1), fTypeless(false)
|
|
{
|
|
}
|
|
};
|
|
|
|
typedef std::stack<JobStepVector> JobStepVectorStack;
|
|
typedef std::map<execplan::CalpontSystemCatalog::OID, execplan::CalpontSystemCatalog::OID> DictOidToColOidMap;
|
|
typedef std::vector<TupleInfo> TupleInfoVector;
|
|
typedef std::map<uint32_t, TupleInfo> TupleInfoMap;
|
|
|
|
// for subquery support
|
|
struct UniqId
|
|
{
|
|
int fId; // OID for real table, sequence # for subquery
|
|
// std::string fName; // name (table alias + [column name, if column])
|
|
std::string fTable; // table name (table alias)
|
|
std::string fSchema; // schema name
|
|
std::string fView; // view name
|
|
uint32_t fPseudo; // pseudo type
|
|
// uint64_t fEngine; // InfiniDB == 0
|
|
uint64_t fSubId; // subquery ID
|
|
|
|
UniqId() : fId(-1), fSubId(-1)
|
|
{
|
|
}
|
|
UniqId(int i, const std::string& t, const std::string& s, const std::string& v, uint32_t pi = 0,
|
|
uint64_t l = -1)
|
|
: fId(i), fTable(t), fSchema(s), fView(v), fPseudo(pi), fSubId(l)
|
|
{
|
|
}
|
|
UniqId(const execplan::SimpleColumn* sc);
|
|
UniqId(int o, const execplan::SimpleColumn* sc);
|
|
|
|
std::string toString() const;
|
|
};
|
|
bool operator<(const struct UniqId& x, const struct UniqId& y);
|
|
bool operator==(const struct UniqId& x, const struct UniqId& y);
|
|
typedef std::map<UniqId, uint32_t> TupleKeyMap;
|
|
|
|
// typedef vector<SRCP> RetColsVector;
|
|
typedef execplan::CalpontSelectExecutionPlan::ReturnedColumnList RetColsVector;
|
|
|
|
// join data between table pairs
|
|
typedef std::map<std::pair<uint32_t, uint32_t>, JoinData> TableJoinMap;
|
|
|
|
struct TupleKeyInfo
|
|
{
|
|
uint32_t nextKey;
|
|
TupleKeyMap tupleKeyMap;
|
|
std::vector<UniqId> tupleKeyVec;
|
|
std::vector<std::string> tupleKeyToName;
|
|
std::vector<bool> crossEngine;
|
|
|
|
// TODO: better organize these structs
|
|
std::map<uint32_t, execplan::CalpontSystemCatalog::OID> tupleKeyToTableOid;
|
|
std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> colType;
|
|
std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> token2DictTypeMap;
|
|
std::map<uint32_t, std::string> keyName;
|
|
std::map<uint32_t, uint32_t> colKeyToTblKey;
|
|
std::map<uint32_t, uint32_t> dictKeyMap; // map token key to dictionary key
|
|
DictOidToColOidMap dictOidToColOid; // map dictionary OID to column OID
|
|
std::map<uint32_t, uint32_t> pseudoType; // key to pseudo column type
|
|
std::set<uint32_t> functionJoinKeys; // key used in function join
|
|
TupleInfoMap tupleInfoMap;
|
|
|
|
TupleKeyInfo() : nextKey(0)
|
|
{
|
|
}
|
|
};
|
|
|
|
//------------------------------------------------------------------------------
|
|
/** @brief This struct maintains state for the query processing
|
|
*
|
|
*/
|
|
//------------------------------------------------------------------------------
|
|
struct JobInfo
|
|
{
|
|
JobInfo(ResourceManager* r)
|
|
: rm(r)
|
|
, sessionId(0)
|
|
, txnId(0)
|
|
, statementId(0)
|
|
, maxBuckets(rm->getHjMaxBuckets())
|
|
, maxElems(rm->getHjMaxElems())
|
|
, flushInterval(rm->getJLFlushInterval())
|
|
, fifoSize(rm->getJlFifoSize())
|
|
, logger(new Logger())
|
|
, traceFlags(0)
|
|
, projectingTableOID(0)
|
|
, isExeMgr(false)
|
|
, trace(false)
|
|
, tryTuples(false)
|
|
, constantCol(CONST_COL_NONE)
|
|
, hasDistinct(false)
|
|
, hasAggregation(false)
|
|
, hasRollup(false)
|
|
, limitStart(0)
|
|
, limitCount(-1)
|
|
, joinNum(0)
|
|
, joinNumInView(0)
|
|
, subLevel(0)
|
|
, subNum(0)
|
|
, subId(0)
|
|
, pJobInfo(NULL)
|
|
, constantFalse(false)
|
|
, cntStarPos(-1)
|
|
, stringScanThreshold(1)
|
|
, wfqLimitStart(0)
|
|
, wfqLimitCount(-1)
|
|
, djsForceRun(false)
|
|
, timeZone(0)
|
|
, maxPmJoinResultCount(1048576)
|
|
{
|
|
}
|
|
ResourceManager* rm;
|
|
uint32_t sessionId;
|
|
uint32_t txnId;
|
|
BRM::QueryContext verId;
|
|
uint32_t statementId;
|
|
std::string queryType;
|
|
boost::shared_ptr<execplan::CalpontSystemCatalog> csc;
|
|
int maxBuckets;
|
|
uint64_t maxElems;
|
|
JobStepVectorStack stack;
|
|
uint32_t flushInterval;
|
|
uint32_t fifoSize;
|
|
SPJL logger;
|
|
uint32_t traceFlags;
|
|
SErrorInfo errorInfo;
|
|
execplan::CalpontSystemCatalog::OID* projectingTableOID; // DeliveryWSDLs get a reference to this
|
|
bool isExeMgr;
|
|
bool trace;
|
|
bool tryTuples;
|
|
int8_t constantCol;
|
|
TupleInfoVector pjColList;
|
|
|
|
// aggregation
|
|
bool hasDistinct;
|
|
bool hasAggregation;
|
|
bool hasRollup;
|
|
std::vector<uint32_t> groupByColVec;
|
|
std::vector<uint32_t> distinctColVec;
|
|
std::vector<uint32_t> expressionVec;
|
|
std::vector<std::pair<uint32_t, int> > returnedColVec;
|
|
|
|
// order by and limit
|
|
std::vector<std::pair<uint32_t, bool> > orderByColVec;
|
|
uint64_t limitStart;
|
|
uint64_t limitCount;
|
|
uint32_t orderByThreads;
|
|
|
|
// tupleInfo
|
|
boost::shared_ptr<TupleKeyInfo> keyInfo;
|
|
|
|
// skip dictionary step if the real string is not necessary to projected.
|
|
// In most case, the string is used for return or comparison, so default is false.
|
|
// when setting to false, no need to check: false overwrites true;
|
|
// When setting to true, need check: true cannot overwrite false.
|
|
std::map<uint32_t, bool> tokenOnly;
|
|
|
|
// unique ID list of the tables in from clause
|
|
std::vector<uint32_t> tableList;
|
|
|
|
// table join map
|
|
TableJoinMap tableJoinMap;
|
|
|
|
// for expression
|
|
JobStepVector crossTableExpressions;
|
|
JobStepVector returnedExpressions;
|
|
|
|
// @bug3683, function join
|
|
std::vector<JobStep*> functionJoins; // store expressions can be converted to joins
|
|
|
|
// for function on aggregation
|
|
RetColsVector deliveredCols; // columns to be sent to connector
|
|
RetColsVector nonConstCols; // none constant columns
|
|
RetColsVector nonConstDelCols; // delivered none constant columns
|
|
RetColsVector projectionCols; // columns for projection
|
|
std::multimap<execplan::ReturnedColumn*, execplan::ReturnedColumn*> cloneAggregateColMap;
|
|
std::vector<std::pair<int, int> > aggEidIndexList;
|
|
|
|
// table pairs with incompatible join which is treated as expression
|
|
std::map<uint32_t, uint32_t> incompatibleJoinMap;
|
|
|
|
// bug 1573 & 3391, having
|
|
SJSTEP havingStep;
|
|
JobStepVector havingStepVec;
|
|
|
|
// bug 2634, 5311 and 5374, outjoin and predicates
|
|
std::set<uint32_t> outerOnTable;
|
|
// MCOL-4715.
|
|
std::set<uint32_t> innerOnTable;
|
|
std::set<uint32_t> tableHasIsNull;
|
|
JobStepVector outerJoinExpressions;
|
|
|
|
// bug 3759, join in order
|
|
// mixed outer join
|
|
std::map<int, uint64_t> tableSize;
|
|
int64_t joinNum;
|
|
// MCOL-5061, MCOL-334.
|
|
int64_t joinNumInView;
|
|
|
|
// for subquery
|
|
boost::shared_ptr<int> subCount; // # of subqueries in the query statement
|
|
int subLevel; // subquery level
|
|
int subNum; // # of subqueries @ level n
|
|
int subId; // id of current subquery
|
|
JobInfo* pJobInfo; // jobinfo of outer query
|
|
bool constantFalse; // has constant false filter
|
|
std::string subAlias; // unique alias to identify the subquery
|
|
JobStepVector correlateSteps;
|
|
JobStepVector selectAndFromSubs;
|
|
std::set<uint64_t> returnColSet;
|
|
std::map<UniqId, execplan::CalpontSystemCatalog::ColType> vtableColTypes;
|
|
|
|
// step to process orderby, limit and fill in constants
|
|
SJSTEP annexStep;
|
|
|
|
// @bug3475, aggregate constant column <position, aggregate column>
|
|
std::map<uint64_t, execplan::SRCP> constAggregate;
|
|
int64_t cntStarPos; // position of count(*)
|
|
|
|
// @bug3321, dictionary scan setting, HWM = stringScanThreshold -1
|
|
uint64_t stringScanThreshold;
|
|
|
|
// @bug3362, group_concat
|
|
RetColsVector groupConcatCols;
|
|
GroupConcatInfo groupConcatInfo;
|
|
|
|
// @bug3736, column map
|
|
std::map<uint32_t, std::vector<uint32_t> > columnMap;
|
|
|
|
// @bug3438, joblist for trace/stats
|
|
JobList* jobListPtr; // just reference, NOT delete by JobInfo
|
|
|
|
// WORKAROUND for join FE limitation (join Id to expression tables map)
|
|
std::map<uint32_t, std::set<uint32_t> > joinFeTableMap;
|
|
|
|
uint32_t stringTableThreshold;
|
|
|
|
// @bug4531, Window Function support
|
|
RetColsVector windowCols;
|
|
RetColsVector windowExps;
|
|
RetColsVector windowDels;
|
|
std::set<uint64_t> windowSet;
|
|
RetColsVector wfqOrderby;
|
|
uint64_t wfqLimitStart;
|
|
uint64_t wfqLimitCount;
|
|
// workaround for expression of windowfunction in IN/EXISTS sub-query
|
|
// std::map<uint32_t, RetColsVector> exprWinfuncListMap;
|
|
|
|
// Flag to tell us we are in local PM only query mode
|
|
uint32_t localQuery;
|
|
|
|
boost::uuids::uuid uuid;
|
|
|
|
// @bug4021, column map for all pseudo column queries
|
|
std::map<uint64_t, execplan::SRCP> tableColMap;
|
|
std::set<uint64_t> pseudoColTable;
|
|
|
|
/* Disk-based join vars */
|
|
boost::shared_ptr<int64_t> smallSideUsage;
|
|
boost::shared_ptr<int64_t> umMemLimit;
|
|
int64_t smallSideLimit; // need to get these from a session var in execplan
|
|
int64_t largeSideLimit;
|
|
uint64_t partitionSize;
|
|
uint32_t djsMaxPartitionTreeDepth;
|
|
bool djsForceRun;
|
|
bool isDML;
|
|
long timeZone;
|
|
uint32_t maxPmJoinResultCount;
|
|
|
|
// This is for tracking any dynamically allocated ParseTree objects
|
|
// in simpleScalarFilterToParseTree() for later deletion in
|
|
// ~csep() or csep.unserialize()
|
|
std::vector<execplan::ParseTree*> dynamicParseTreeVec;
|
|
|
|
PrimitiveServerThreadPools primitiveServerThreadPools;
|
|
// Represents a `join edges` and `join id` to be restored in `join order` part.
|
|
std::map<std::pair<uint32_t, uint32_t>, int64_t> joinEdgesToRestore;
|
|
// Represents a pair of `table` to be on a large side and weight associated with that table.
|
|
std::unordered_map<uint32_t, int64_t> tablesForLargeSide;
|
|
// Represents a pair of `tupleId` and `FunctionColumnInfo`.
|
|
std::unordered_map<uint32_t, FunctionColumnInfo> functionColumnMap;
|
|
|
|
private:
|
|
// defaults okay
|
|
// JobInfo(const JobInfo& rhs);
|
|
// JobInfo& operator=(const JobInfo& rhs);
|
|
};
|
|
|
|
//------------------------------------------------------------------------------
|
|
// namespace scoped functions
|
|
//------------------------------------------------------------------------------
|
|
|
|
/** @brief Returns the table alias for the specified column
|
|
*
|
|
*/
|
|
std::string extractTableAlias(const execplan::SimpleColumn* sc);
|
|
|
|
/** @brief Returns the table alias for the specified column
|
|
*
|
|
*/
|
|
std::string extractTableAlias(const execplan::SSC& sc);
|
|
|
|
/** @brief Returns OID associated with colType if it is a dictionary column
|
|
*
|
|
*/
|
|
execplan::CalpontSystemCatalog::OID isDictCol(const execplan::CalpontSystemCatalog::ColType& colType);
|
|
|
|
/** @brief Determines if colType is a character column
|
|
*
|
|
*/
|
|
bool isCharCol(const execplan::CalpontSystemCatalog::ColType& colType);
|
|
|
|
/** @brief Returns OID associated with a table
|
|
*
|
|
*/
|
|
execplan::CalpontSystemCatalog::OID tableOid(const execplan::SimpleColumn* sc,
|
|
boost::shared_ptr<execplan::CalpontSystemCatalog> cat);
|
|
|
|
/** @brief Returns the unique ID to be used in tupleInfo
|
|
*
|
|
*/
|
|
uint32_t getTupleKey(JobInfo& jobInfo, const execplan::SimpleColumn* sc, bool add = false);
|
|
uint32_t getTableKey(const JobInfo& jobInfo, execplan::CalpontSystemCatalog::OID tableOid,
|
|
const std::string& alias, const std::string& schema, const std::string& view);
|
|
uint32_t getTupleKey(JobInfo& jobInfo, const execplan::SRCP& srcp, bool add = false);
|
|
uint32_t getTableKey(const JobInfo& jobInfo, uint32_t cid);
|
|
uint32_t getTableKey(JobInfo& jobInfo, JobStep* js);
|
|
|
|
void updateTableKey(uint32_t cid, uint32_t tid, JobInfo& jobInfo);
|
|
|
|
uint32_t getExpTupleKey(const JobInfo& jobInfo, uint64_t eid, bool cr = false);
|
|
|
|
uint32_t makeTableKey(JobInfo& jobInfo, const execplan::SimpleColumn* sc);
|
|
uint32_t makeTableKey(JobInfo& jobInfo, execplan::CalpontSystemCatalog::OID tableOid,
|
|
const std::string& tbl_name, const std::string& tbl_alias, const std::string& sch_name,
|
|
const std::string& vw_name, uint64_t engine = 0);
|
|
|
|
/** @brief Returns the tupleInfo associate with the (table, column) key pair
|
|
*
|
|
*/
|
|
TupleInfo getTupleInfo(uint32_t columnKey, const JobInfo& jobInfo);
|
|
|
|
/** @brief set tuple info for simple column
|
|
*
|
|
*/
|
|
TupleInfo setTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct,
|
|
execplan::CalpontSystemCatalog::OID col_oid, JobInfo& jobInfo,
|
|
execplan::CalpontSystemCatalog::OID tbl_oid, const execplan::SimpleColumn* sc,
|
|
const std::string& alias);
|
|
|
|
/** @brief set tuple info for expressions
|
|
*
|
|
*/
|
|
TupleInfo setExpTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct, uint64_t expressionId,
|
|
const std::string& alias, JobInfo& jobInfo, bool rc = false);
|
|
|
|
TupleInfo setExpTupleInfo(const execplan::ReturnedColumn* rc, JobInfo& jobInfo);
|
|
|
|
/** @brief add an aggregate column info
|
|
*
|
|
*/
|
|
void addAggregateColumn(execplan::ReturnedColumn*, int, RetColsVector&, JobInfo&);
|
|
|
|
void makeJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
|
|
JobStepVector& projectSteps, DeliveredTableMap& deliverySteps);
|
|
|
|
void makeUnionJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
|
JobStepVector& querySteps, JobStepVector&, DeliveredTableMap& deliverySteps);
|
|
|
|
void updateDerivedColumn(JobInfo&, execplan::SimpleColumn*, execplan::CalpontSystemCatalog::ColType&);
|
|
|
|
bool filterWithDictionary(execplan::CalpontSystemCatalog::OID dictOid, uint64_t n);
|
|
|
|
bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColType& ct1,
|
|
const execplan::CalpontSystemCatalog::ColType& ct2, bool forJoin = true);
|
|
bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColDataType& dt1, uint32_t scale1,
|
|
const execplan::CalpontSystemCatalog::ColDataType& dt2, uint32_t scale2,
|
|
bool forJoin = true);
|
|
|
|
} // namespace joblist
|