1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-09-11 08:50:45 +03:00
Files
mariadb-columnstore-engine/dbcon/joblist/jlf_common.h
Serguey Zefirov 39a976c39a fix(ubsan): MCOL-5844 - iron out UBSAN reports
The most important fix here is the fix of possible buffer overrun in
DATEFORMAT() function. A "%W" format, repeated enough times, would
overflow the 256-bytes buffer for result. Now we use ostringstream to
construct result and we are safe.

Changes in date/time projection functions made me fix difference between
us and server behavior. The new, better behavior is reflected in changes
in tests' results.

Also, there was incorrect logic in TRUNCATE() and ROUND() functions in
computing the decimal "shift."
2024-12-10 20:30:58 +04:00

502 lines
16 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2019 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
// $Id: jlf_common.h 9702 2013-07-17 19:08:07Z xlou $
/** @file jlf_common.h
*
*/
#pragma once
#include <map>
#include <set>
#include <stack>
#include <string>
#include <vector>
#include <unordered_map>
#include <boost/shared_ptr.hpp>
#include <boost/uuid/uuid.hpp>
#include "calpontexecutionplan.h"
#include "calpontselectexecutionplan.h"
#include "calpontsystemcatalog.h"
#include "simplecolumn.h"
#include "dbrm.h"
#include "joblist.h"
#include "jobstep.h"
#include "groupconcat.h"
#include "jsonarrayagg.h"
#include "jl_logger.h"
#include "resourcemanager.h"
#include "rowgroup.h"
#include "../../primitives/primproc/primitiveserverthreadpools.h"
// forward reference
namespace execplan
{
class AggregateColumn;
class SimpleColumn;
} // namespace execplan
namespace joblist
{
// for output error messages to screen.
const std::string boldStart = "\033[0;1m";
const std::string boldStop = "\033[0;39m";
const int8_t CONST_COL_NONE = 0;
const int8_t CONST_COL_EXIST = 1;
const int8_t CONST_COL_ONLY = 2;
// pretend all expressions belong to "virtual" table EXPRESSION, (CNX_EXP_TABLE_ID, expression)
// CNX_EXP_TABLE_ID(999) is not for user table or column, there will be no confilict in queries.
const int32_t CNX_EXP_TABLE_ID = 999;
struct TupleInfo
{
TupleInfo(uint32_t w = 0, uint32_t o = 0, uint32_t k = -1, uint32_t t = -1, uint32_t s = 0, uint32_t p = 0,
execplan::CalpontSystemCatalog::ColDataType dt = execplan::CalpontSystemCatalog::BIT,
uint32_t csn = 8)
: width(w), oid(o), key(k), tkey(t), scale(s), precision(p), dtype(dt), csNum(csn)
{
}
~TupleInfo()
{
}
uint32_t width;
uint32_t oid;
uint32_t key;
uint32_t tkey;
uint32_t scale;
uint32_t precision;
execplan::CalpontSystemCatalog::ColDataType dtype;
uint32_t csNum; // For collations
};
// This struct holds information about `FunctionColumn`.
struct FunctionColumnInfo
{
// Function argument.
uint64_t associatedColumnOid;
// Function name.
std::string functionName;
FunctionColumnInfo(uint64_t colOid, std::string funcName)
: associatedColumnOid(colOid), functionName(funcName)
{
}
};
// for compound join
struct JoinData
{
int64_t fJoinId;
std::vector<uint32_t> fLeftKeys;
std::vector<uint32_t> fRightKeys;
std::vector<JoinType> fTypes; // joblisttypes.h: INNER, LEFTOUTER, RIGHTOUTER
bool fTypeless;
JoinData() : fJoinId(-1), fTypeless(false)
{
}
};
typedef std::stack<JobStepVector> JobStepVectorStack;
typedef std::map<execplan::CalpontSystemCatalog::OID, execplan::CalpontSystemCatalog::OID> DictOidToColOidMap;
typedef std::vector<TupleInfo> TupleInfoVector;
typedef std::map<uint32_t, TupleInfo> TupleInfoMap;
// for subquery support
struct UniqId
{
int fId; // OID for real table, sequence # for subquery
// std::string fName; // name (table alias + [column name, if column])
std::string fTable; // table name (table alias)
std::string fSchema; // schema name
std::string fView; // view name
uint32_t fPseudo; // pseudo type
// uint64_t fEngine; // InfiniDB == 0
uint64_t fSubId; // subquery ID
UniqId() : fId(-1), fSubId(-1)
{
}
UniqId(int i, const std::string& t, const std::string& s, const std::string& v, uint32_t pi = 0,
uint64_t l = -1)
: fId(i), fTable(t), fSchema(s), fView(v), fPseudo(pi), fSubId(l)
{
}
UniqId(const execplan::SimpleColumn* sc);
UniqId(int o, const execplan::SimpleColumn* sc);
std::string toString() const;
};
bool operator<(const struct UniqId& x, const struct UniqId& y);
bool operator==(const struct UniqId& x, const struct UniqId& y);
typedef std::map<UniqId, uint32_t> TupleKeyMap;
// typedef vector<SRCP> RetColsVector;
typedef execplan::CalpontSelectExecutionPlan::ReturnedColumnList RetColsVector;
// join data between table pairs
typedef std::map<std::pair<uint32_t, uint32_t>, JoinData> TableJoinMap;
struct TupleKeyInfo
{
uint32_t nextKey;
TupleKeyMap tupleKeyMap;
std::vector<UniqId> tupleKeyVec;
std::vector<std::string> tupleKeyToName;
std::vector<bool> crossEngine;
// TODO: better organize these structs
std::map<uint32_t, execplan::CalpontSystemCatalog::OID> tupleKeyToTableOid;
std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> colType;
std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> token2DictTypeMap;
std::map<uint32_t, std::string> keyName;
std::map<uint32_t, uint32_t> colKeyToTblKey;
std::map<uint32_t, uint32_t> dictKeyMap; // map token key to dictionary key
DictOidToColOidMap dictOidToColOid; // map dictionary OID to column OID
std::map<uint32_t, uint32_t> pseudoType; // key to pseudo column type
std::set<uint32_t> functionJoinKeys; // key used in function join
TupleInfoMap tupleInfoMap;
TupleKeyInfo() : nextKey(0)
{
}
};
//------------------------------------------------------------------------------
/** @brief This struct maintains state for the query processing
*
*/
//------------------------------------------------------------------------------
struct JobInfo
{
JobInfo(ResourceManager* r)
: rm(r)
, sessionId(0)
, txnId(0)
, statementId(0)
, maxBuckets(rm->getHjMaxBuckets())
, maxElems(rm->getHjMaxElems())
, flushInterval(rm->getJLFlushInterval())
, fifoSize(rm->getJlFifoSize())
, logger(new Logger())
, traceFlags(0)
, projectingTableOID(0)
, isExeMgr(false)
, trace(false)
, tryTuples(false)
, constantCol(CONST_COL_NONE)
, hasDistinct(false)
, hasAggregation(false)
, hasRollup(false)
, limitStart(0)
, limitCount(-1)
, joinNum(0)
, joinNumInView(0)
, subLevel(0)
, subNum(0)
, subId(0)
, pJobInfo(NULL)
, constantFalse(false)
, cntStarPos(-1)
, stringScanThreshold(1)
, wfqLimitStart(0)
, wfqLimitCount(-1)
, djsForceRun(false)
, timeZone(0)
, maxPmJoinResultCount(1048576)
{
}
ResourceManager* rm;
uint32_t sessionId;
uint32_t txnId;
BRM::QueryContext verId;
uint32_t statementId;
std::string queryType;
boost::shared_ptr<execplan::CalpontSystemCatalog> csc;
int maxBuckets;
uint64_t maxElems;
JobStepVectorStack stack;
uint32_t flushInterval;
uint32_t fifoSize;
SPJL logger;
uint32_t traceFlags;
SErrorInfo errorInfo;
execplan::CalpontSystemCatalog::OID* projectingTableOID; // DeliveryWSDLs get a reference to this
bool isExeMgr;
bool trace;
bool tryTuples;
int8_t constantCol;
TupleInfoVector pjColList;
// aggregation
bool hasDistinct;
bool hasAggregation;
bool hasRollup;
std::vector<uint32_t> groupByColVec;
std::vector<uint32_t> distinctColVec;
std::vector<uint32_t> expressionVec;
std::vector<std::pair<uint32_t, int> > returnedColVec;
// order by and limit
std::vector<std::pair<uint32_t, bool> > orderByColVec;
uint64_t limitStart;
uint64_t limitCount;
uint32_t orderByThreads;
// tupleInfo
boost::shared_ptr<TupleKeyInfo> keyInfo;
// skip dictionary step if the real string is not necessary to projected.
// In most case, the string is used for return or comparison, so default is false.
// when setting to false, no need to check: false overwrites true;
// When setting to true, need check: true cannot overwrite false.
std::map<uint32_t, bool> tokenOnly;
// unique ID list of the tables in from clause
std::vector<uint32_t> tableList;
// table join map
TableJoinMap tableJoinMap;
// for expression
JobStepVector crossTableExpressions;
JobStepVector returnedExpressions;
// @bug3683, function join
std::vector<JobStep*> functionJoins; // store expressions can be converted to joins
// for function on aggregation
RetColsVector deliveredCols; // columns to be sent to connector
RetColsVector nonConstCols; // none constant columns
RetColsVector nonConstDelCols; // delivered none constant columns
RetColsVector projectionCols; // columns for projection
std::multimap<execplan::ReturnedColumn*, execplan::ReturnedColumn*> cloneAggregateColMap;
std::vector<std::pair<int, int> > aggEidIndexList;
// table pairs with incompatible join which is treated as expression
std::map<uint32_t, uint32_t> incompatibleJoinMap;
// bug 1573 & 3391, having
SJSTEP havingStep;
JobStepVector havingStepVec;
// bug 2634, 5311 and 5374, outjoin and predicates
std::set<uint32_t> outerOnTable;
// MCOL-4715.
std::set<uint32_t> innerOnTable;
std::set<uint32_t> tableHasIsNull;
JobStepVector outerJoinExpressions;
// bug 3759, join in order
// mixed outer join
std::map<int, uint64_t> tableSize;
int64_t joinNum;
// MCOL-5061, MCOL-334.
int64_t joinNumInView;
// for subquery
boost::shared_ptr<int> subCount; // # of subqueries in the query statement
int subLevel; // subquery level
int subNum; // # of subqueries @ level n
int subId; // id of current subquery
JobInfo* pJobInfo; // jobinfo of outer query
bool constantFalse; // has constant false filter
std::string subAlias; // unique alias to identify the subquery
JobStepVector correlateSteps;
JobStepVector selectAndFromSubs;
std::set<uint64_t> returnColSet;
std::map<UniqId, execplan::CalpontSystemCatalog::ColType> vtableColTypes;
// step to process orderby, limit and fill in constants
SJSTEP annexStep;
// @bug3475, aggregate constant column <position, aggregate column>
std::map<uint64_t, execplan::SRCP> constAggregate;
int64_t cntStarPos; // position of count(*)
// @bug3321, dictionary scan setting, HWM = stringScanThreshold -1
uint64_t stringScanThreshold;
// @bug3362, group_concat
RetColsVector groupConcatCols;
GroupConcatInfo groupConcatInfo;
// @bug3736, column map
std::map<uint32_t, std::vector<uint32_t> > columnMap;
// @bug3438, joblist for trace/stats
JobList* jobListPtr; // just reference, NOT delete by JobInfo
// WORKAROUND for join FE limitation (join Id to expression tables map)
std::map<uint32_t, std::set<uint32_t> > joinFeTableMap;
uint32_t stringTableThreshold;
// @bug4531, Window Function support
RetColsVector windowCols;
RetColsVector windowExps;
RetColsVector windowDels;
std::set<uint64_t> windowSet;
RetColsVector wfqOrderby;
uint64_t wfqLimitStart;
uint64_t wfqLimitCount;
// workaround for expression of windowfunction in IN/EXISTS sub-query
// std::map<uint32_t, RetColsVector> exprWinfuncListMap;
// Flag to tell us we are in local PM only query mode
uint32_t localQuery;
boost::uuids::uuid uuid;
// @bug4021, column map for all pseudo column queries
std::map<uint64_t, execplan::SRCP> tableColMap;
std::set<uint64_t> pseudoColTable;
/* Disk-based join vars */
boost::shared_ptr<int64_t> smallSideUsage;
boost::shared_ptr<int64_t> umMemLimit;
int64_t smallSideLimit; // need to get these from a session var in execplan
int64_t largeSideLimit;
uint64_t partitionSize;
uint32_t djsMaxPartitionTreeDepth;
bool djsForceRun;
bool isDML;
long timeZone;
uint32_t maxPmJoinResultCount;
// This is for tracking any dynamically allocated ParseTree objects
// in simpleScalarFilterToParseTree() for later deletion in
// ~csep() or csep.unserialize()
std::vector<execplan::ParseTree*> dynamicParseTreeVec;
PrimitiveServerThreadPools primitiveServerThreadPools;
// Represents a `join edges` and `join id` to be restored in `join order` part.
std::map<std::pair<uint32_t, uint32_t>, int64_t> joinEdgesToRestore;
// Represents a pair of `table` to be on a large side and weight associated with that table.
std::unordered_map<uint32_t, int64_t> tablesForLargeSide;
// Represents a pair of `tupleId` and `FunctionColumnInfo`.
std::unordered_map<uint32_t, FunctionColumnInfo> functionColumnMap;
private:
// defaults okay
// JobInfo(const JobInfo& rhs);
// JobInfo& operator=(const JobInfo& rhs);
};
//------------------------------------------------------------------------------
// namespace scoped functions
//------------------------------------------------------------------------------
/** @brief Returns the table alias for the specified column
*
*/
std::string extractTableAlias(const execplan::SimpleColumn* sc);
/** @brief Returns the table alias for the specified column
*
*/
std::string extractTableAlias(const execplan::SSC& sc);
/** @brief Returns OID associated with colType if it is a dictionary column
*
*/
execplan::CalpontSystemCatalog::OID isDictCol(const execplan::CalpontSystemCatalog::ColType& colType);
/** @brief Determines if colType is a character column
*
*/
bool isCharCol(const execplan::CalpontSystemCatalog::ColType& colType);
/** @brief Returns OID associated with a table
*
*/
execplan::CalpontSystemCatalog::OID tableOid(const execplan::SimpleColumn* sc,
boost::shared_ptr<execplan::CalpontSystemCatalog> cat);
/** @brief Returns the unique ID to be used in tupleInfo
*
*/
uint32_t getTupleKey(JobInfo& jobInfo, const execplan::SimpleColumn* sc, bool add = false);
uint32_t getTableKey(const JobInfo& jobInfo, execplan::CalpontSystemCatalog::OID tableOid,
const std::string& alias, const std::string& schema, const std::string& view);
uint32_t getTupleKey(JobInfo& jobInfo, const execplan::SRCP& srcp, bool add = false);
uint32_t getTableKey(const JobInfo& jobInfo, uint32_t cid);
uint32_t getTableKey(JobInfo& jobInfo, JobStep* js);
void updateTableKey(uint32_t cid, uint32_t tid, JobInfo& jobInfo);
uint32_t getExpTupleKey(const JobInfo& jobInfo, uint64_t eid, bool cr = false);
uint32_t makeTableKey(JobInfo& jobInfo, const execplan::SimpleColumn* sc);
uint32_t makeTableKey(JobInfo& jobInfo, execplan::CalpontSystemCatalog::OID tableOid,
const std::string& tbl_name, const std::string& tbl_alias, const std::string& sch_name,
const std::string& vw_name, uint64_t engine = 0);
/** @brief Returns the tupleInfo associate with the (table, column) key pair
*
*/
TupleInfo getTupleInfo(uint32_t columnKey, const JobInfo& jobInfo);
/** @brief set tuple info for simple column
*
*/
TupleInfo setTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct,
execplan::CalpontSystemCatalog::OID col_oid, JobInfo& jobInfo,
execplan::CalpontSystemCatalog::OID tbl_oid, const execplan::SimpleColumn* sc,
const std::string& alias);
/** @brief set tuple info for expressions
*
*/
TupleInfo setExpTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct, uint64_t expressionId,
const std::string& alias, JobInfo& jobInfo, bool rc = false);
TupleInfo setExpTupleInfo(const execplan::ReturnedColumn* rc, JobInfo& jobInfo);
/** @brief add an aggregate column info
*
*/
void addAggregateColumn(execplan::ReturnedColumn*, int, RetColsVector&, JobInfo&);
void makeJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
JobStepVector& projectSteps, DeliveredTableMap& deliverySteps);
void makeUnionJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
JobStepVector& querySteps, JobStepVector&, DeliveredTableMap& deliverySteps);
void updateDerivedColumn(JobInfo&, execplan::SimpleColumn*, execplan::CalpontSystemCatalog::ColType&);
bool filterWithDictionary(execplan::CalpontSystemCatalog::OID dictOid, uint64_t n);
bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColType& ct1,
const execplan::CalpontSystemCatalog::ColType& ct2, bool forJoin = true);
bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColDataType& dt1, uint32_t scale1,
const execplan::CalpontSystemCatalog::ColDataType& dt2, uint32_t scale2,
bool forJoin = true);
} // namespace joblist