1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

[MCOL-4709] Disk-based aggregation

* Introduce multigeneration aggregation

* Do not save unused part of RGDatas to disk
* Add IO error explanation (strerror)

* Reduce memory usage while aggregating
* introduce in-memory generations to better memory utilization

* Try to limit the qty of buckets at a low limit

* Refactor disk aggregation a bit
* pass calculated hash into RowAggregation
* try to keep some RGData with free space in memory

* do not dump more than half of rowgroups to disk if generations are
  allowed, instead start a new generation
* for each thread shift the first processed bucket at each iteration,
  so the generations start more evenly

* Unify temp data location

* Explicitly create temp subdirectories
  whether disk aggregation/join are enabled or not
This commit is contained in:
Alexey Antipovsky
2021-01-15 18:52:13 +03:00
parent 3537c0d635
commit 475104e4d3
24 changed files with 5932 additions and 906 deletions

2454
utils/common/robin_hood.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,7 @@
MA 02110-1301, USA. */
#include <sys/prctl.h>
#include "threadnaming.h"
namespace utils
{
@ -23,4 +24,11 @@ namespace utils
{
prctl(PR_SET_NAME, threadName, 0, 0, 0);
}
std::string getThreadName()
{
char buf[32];
prctl(PR_GET_NAME, buf, 0, 0, 0);
return std::string(buf);
}
} // end of namespace

View File

@ -17,8 +17,11 @@
#ifndef H_SETTHREADNAME
#define H_SETTHREADNAME
#include <string>
namespace utils
{
void setThreadName(const char *threadName);
std::string getThreadName();
} // end of namespace
#endif

View File

@ -59,6 +59,9 @@ namespace fs = boost::filesystem;
#include "installdir.h"
#ifdef _MSC_VER
#include "idbregistry.h"
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "bytestream.h"
@ -673,6 +676,24 @@ const vector<string> Config::enumSection(const string& section)
return fParser.enumSection(fDoc, section);
}
std::string Config::getTempFileDir(Config::TempDirPurpose what)
{
std::string prefix = getConfig("SystemConfig", "SystemTempFileDir");
if (prefix.empty())
{
prefix.assign("/tmp/columnstore_tmp_files");
}
prefix.append("/");
switch (what)
{
case TempDirPurpose::Joins:
return prefix.append("joins/");
case TempDirPurpose::Aggregates:
return prefix.append("aggregates/");
}
// NOTREACHED
return {};
}
} //namespace config
// vim:ts=4 sw=4:

View File

@ -203,6 +203,14 @@ public:
*/
EXPORT const std::vector<std::string> enumSection(const std::string& section);
enum class TempDirPurpose
{
Joins, ///< disk joins
Aggregates ///< disk-based aggregation
};
/** @brief Return temporaru directory path for the specified purpose */
EXPORT std::string getTempFileDir(TempDirPurpose what);
protected:
/** @brief parse the XML file
*

View File

@ -129,7 +129,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
// Instead, each will double in size, giving a capacity of 8GB -> 16 -> 32, and so on.
// bucketCount = jp.bucketCount;
bucketCount = 2;
filenamePrefix = startup::StartUp::tmpDir();
config::Config* config = config::Config::makeConfig();
filenamePrefix = config->getTempFileDir(config::Config::TempDirPurpose::Joins);
filenamePrefix += "/Columnstore-join-data-";

View File

@ -100,6 +100,10 @@
2053 ERR_FUNC_OUT_OF_RANGE_RESULT The result is out of range for function %1% using value(s): %2% %3%
2054 ERR_DISKAGG_ERROR Unknown error while aggregation.
2055 ERR_DISKAGG_TOO_BIG Not enough memory to make disk-based aggregation. Raise TotalUmMemory if possible.
2056 ERR_DISKAGG_FILEIO_ERROR There was an IO error during a disk-based aggregation: %1%
# Sub-query errors
3001 ERR_NON_SUPPORT_SUB_QUERY_TYPE This subquery type is not supported yet.
3002 ERR_MORE_THAN_1_ROW Subquery returns more than 1 row.

View File

@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} )
########### next target ###############
set(rowgroup_LIB_SRCS rowaggregation.cpp rowgroup.cpp)
set(rowgroup_LIB_SRCS rowaggregation.cpp rowgroup.cpp rowstorage.cpp)
#librowgroup_la_CXXFLAGS = $(march_flags) $(AM_CXXFLAGS)

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,8 @@
*/
#include <cstring>
#include <stdint.h>
#include <cstdint>
#include <utility>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
@ -54,6 +55,9 @@
#include "constantcolumn.h"
#include "resourcemanager.h"
#include "rowstorage.h"
// To do: move code that depends on joblist to a proper subsystem.
namespace joblist
{
@ -63,17 +67,6 @@ class ResourceManager;
namespace rowgroup
{
struct RowPosition
{
uint64_t group: 48;
uint64_t row: 16;
static const uint64_t MSB = 0x800000000000ULL; //48th bit is set
inline RowPosition(uint64_t g, uint64_t r) : group(g), row(r) { }
inline RowPosition() { }
};
/** @brief Enumerates aggregate functions supported by RowAggregation
*/
enum RowAggFunctionType
@ -143,9 +136,9 @@ struct RowAggGroupByCol
* outputColIndex argument should be omitted if this GroupBy
* column is not to be included in the output.
*/
RowAggGroupByCol(int32_t inputColIndex, int32_t outputColIndex = -1) :
explicit RowAggGroupByCol(int32_t inputColIndex, int32_t outputColIndex = -1) :
fInputColumnIndex(inputColIndex), fOutputColumnIndex(outputColIndex) {}
~RowAggGroupByCol() {}
~RowAggGroupByCol() = default;
uint32_t fInputColumnIndex;
uint32_t fOutputColumnIndex;
@ -184,7 +177,7 @@ struct RowAggFunctionCol
int32_t inputColIndex, int32_t outputColIndex, int32_t auxColIndex = -1) :
fAggFunction(aggFunction), fStatsFunction(stats), fInputColumnIndex(inputColIndex),
fOutputColumnIndex(outputColIndex), fAuxColumnIndex(auxColIndex) {}
virtual ~RowAggFunctionCol() {}
virtual ~RowAggFunctionCol() = default;
virtual void serialize(messageqcpp::ByteStream& bs) const;
virtual void deserialize(messageqcpp::ByteStream& bs);
@ -237,10 +230,10 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol
bInterrupted(false)
{}
virtual ~RowUDAFFunctionCol() {}
~RowUDAFFunctionCol() override = default;
virtual void serialize(messageqcpp::ByteStream& bs) const;
virtual void deserialize(messageqcpp::ByteStream& bs);
void serialize(messageqcpp::ByteStream& bs) const override;
void deserialize(messageqcpp::ByteStream& bs) override;
mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context
bool bInterrupted; // Shared by all the threads
@ -312,104 +305,18 @@ struct ConstantAggData
ConstantAggData() : fOp(ROWAGG_FUNCT_UNDEFINE), fIsNull(false)
{}
ConstantAggData(const std::string& v, RowAggFunctionType f, bool n) :
fConstValue(v), fOp(f), fIsNull(n)
ConstantAggData(std::string v, RowAggFunctionType f, bool n) :
fConstValue(std::move(v)), fOp(f), fIsNull(n)
{}
ConstantAggData(const std::string& v, const std::string u, RowAggFunctionType f, bool n) :
fConstValue(v), fUDAFName(u), fOp(f), fIsNull(n)
ConstantAggData(std::string v, std::string u, RowAggFunctionType f, bool n) :
fConstValue(std::move(v)), fUDAFName(std::move(u)), fOp(f), fIsNull(n)
{}
};
typedef boost::shared_ptr<RowAggGroupByCol> SP_ROWAGG_GRPBY_t;
typedef boost::shared_ptr<RowAggFunctionCol> SP_ROWAGG_FUNC_t;
class RowAggregation;
class AggHasher
{
public:
AggHasher(const Row& row, Row** tRow, uint32_t keyCount, RowAggregation* ra);
inline uint64_t operator()(const RowPosition& p) const;
private:
explicit AggHasher();
RowAggregation* agg;
Row** tmpRow;
mutable Row r;
uint32_t lastKeyCol;
};
class AggComparator
{
public:
AggComparator(const Row& row, Row** tRow, uint32_t keyCount, RowAggregation* ra);
inline bool operator()(const RowPosition&, const RowPosition&) const;
private:
explicit AggComparator();
RowAggregation* agg;
Row** tmpRow;
mutable Row r1, r2;
uint32_t lastKeyCol;
};
class KeyStorage
{
public:
KeyStorage(const RowGroup& keyRG, Row** tRow);
inline RowPosition addKey();
inline uint64_t getMemUsage();
private:
Row row;
Row** tmpRow;
RowGroup rg;
std::vector<RGData> storage;
uint64_t memUsage;
friend class ExternalKeyEq;
friend class ExternalKeyHasher;
};
class ExternalKeyHasher
{
public:
ExternalKeyHasher(const RowGroup& keyRG, KeyStorage* ks, uint32_t keyColCount, Row** tRow);
inline uint64_t operator()(const RowPosition& pos) const;
private:
mutable Row row;
mutable Row** tmpRow;
uint32_t lastKeyCol;
KeyStorage* ks;
};
class ExternalKeyEq
{
public:
ExternalKeyEq(const RowGroup& keyRG, KeyStorage* ks, uint32_t keyColCount, Row** tRow);
inline bool operator()(const RowPosition& pos1, const RowPosition& pos2) const;
private:
mutable Row row1, row2;
mutable Row** tmpRow;
uint32_t lastKeyCol;
KeyStorage* ks;
};
typedef std::tr1::unordered_set<RowPosition, AggHasher, AggComparator, utils::STLPoolAllocator<RowPosition> >
RowAggMap_t;
#if defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ < 5)
typedef std::tr1::unordered_map<RowPosition, RowPosition, ExternalKeyHasher, ExternalKeyEq,
utils::STLPoolAllocator<std::pair<const RowPosition, RowPosition> > > ExtKeyMap_t;
#else
typedef std::tr1::unordered_map<RowPosition, RowPosition, ExternalKeyHasher, ExternalKeyEq,
utils::STLPoolAllocator<std::pair<RowPosition, RowPosition> > > ExtKeyMap_t;
#endif
struct GroupConcat
{
// GROUP_CONCAT(DISTINCT col1, 'const', col2 ORDER BY col3 desc SEPARATOR 'sep')
@ -427,7 +334,7 @@ struct GroupConcat
boost::shared_ptr<int64_t> fSessionMemLimit;
std::string fTimeZone;
GroupConcat() : fRm(NULL) {}
GroupConcat() : fRm(nullptr) {}
};
typedef boost::shared_ptr<GroupConcat> SP_GroupConcat;
@ -436,7 +343,7 @@ typedef boost::shared_ptr<GroupConcat> SP_GroupConcat;
class GroupConcatAg
{
public:
GroupConcatAg(SP_GroupConcat&);
explicit GroupConcatAg(SP_GroupConcat&);
virtual ~GroupConcatAg();
virtual void initialize() {};
@ -446,7 +353,7 @@ public:
void getResult(uint8_t*) {};
uint8_t* getResult()
{
return NULL;
return nullptr;
}
protected:
@ -478,12 +385,14 @@ public:
*/
RowAggregation();
RowAggregation(const std::vector<SP_ROWAGG_GRPBY_t>& rowAggGroupByCols,
const std::vector<SP_ROWAGG_FUNC_t>& rowAggFunctionCols);
const std::vector<SP_ROWAGG_FUNC_t>& rowAggFunctionCols,
joblist::ResourceManager* rm = nullptr,
boost::shared_ptr<int64_t> sessMemLimit = {});
RowAggregation(const RowAggregation& rhs);
/** @brief RowAggregation default destructor
*/
virtual ~RowAggregation();
~RowAggregation() override;
/** @brief clone this object for multi-thread use
*/
@ -551,28 +460,19 @@ public:
* @parm pRowGroupIn(in) RowGroup to be added to aggregation.
*/
virtual void addRowGroup(const RowGroup* pRowGroupIn);
virtual void addRowGroup(const RowGroup* pRowGroupIn, std::vector<Row::Pointer>& inRows);
virtual void addRowGroup(const RowGroup* pRowGroupIn, std::vector<std::pair<Row::Pointer, uint64_t>>& inRows);
/** @brief Serialize RowAggregation object into a ByteStream.
*
* @parm bs(out) BytesStream that is to be written to.
*/
void serialize(messageqcpp::ByteStream& bs) const;
void serialize(messageqcpp::ByteStream& bs) const override;
/** @brief Unserialize RowAggregation object from a ByteStream.
*
* @parm bs(in) BytesStream that is to be read from.
*/
void deserialize(messageqcpp::ByteStream& bs);
/** @brief set the memory limit for RowAggregation
*
* @parm limit(in) memory limit for both Map and secondary RowGroups
*/
void setMaxMemory(uint64_t limit)
{
fMaxMemory = limit;
}
void deserialize(messageqcpp::ByteStream& bs) override;
/** @brief load result set into byte stream
*
@ -594,18 +494,12 @@ public:
return fRowGroupOut;
}
RowAggMap_t* mapPtr()
{
return fAggMapPtr;
}
std::vector<RGData*>& resultDataVec()
{
return fResultDataVec;
}
void append(RowAggregation* other);
virtual void aggregateRow(Row& row,
const uint64_t* hash = nullptr,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr);
inline uint32_t aggMapKeyLength()
inline uint32_t aggMapKeyLength() const
{
return fAggMapKeyCount;
}
@ -623,6 +517,16 @@ public:
return &fRGContextColl;
}
void finalAggregation()
{
return fRowAggStorage->finalize([this](Row& row) { mergeEntries(row);}, fRow);
}
std::unique_ptr<RGData> moveCurrentRGData()
{
return std::move(fCurRGData);
}
protected:
virtual void initialize();
virtual void initMapData(const Row& row);
@ -630,10 +534,12 @@ protected:
virtual void updateEntry(const Row& row,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr);
void mergeEntries(const Row& row);
virtual void doMinMax(const Row&, int64_t, int64_t, int);
virtual void doSum(const Row&, int64_t, int64_t, int);
virtual void doAvg(const Row&, int64_t, int64_t, int64_t);
virtual void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false);
virtual void doStatistics(const Row&, int64_t, int64_t, int64_t);
void mergeStatistics(const Row&, uint64_t colOut, uint64_t colAux);
virtual void doBitOp(const Row&, int64_t, int64_t, int);
virtual void doUDAF(const Row&,
int64_t,
@ -647,12 +553,6 @@ protected:
return true;
}
virtual bool newRowGroup();
virtual void clearAggMap()
{
if (fAggMapPtr) fAggMapPtr->clear();
}
void resetUDAF(RowUDAFFunctionCol* rowUDAF);
void resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColIdx);
@ -673,24 +573,19 @@ protected:
inline void updateStringMinMax(std::string val1, std::string val2, int64_t col, int func);
std::vector<SP_ROWAGG_GRPBY_t> fGroupByCols;
std::vector<SP_ROWAGG_FUNC_t> fFunctionCols;
RowAggMap_t* fAggMapPtr;
uint32_t fAggMapKeyCount; // the number of columns that make up the key
RowGroup fRowGroupIn;
RowGroup* fRowGroupOut;
// for when the group by & distinct keys are not stored in the output rows
rowgroup::RowGroup fKeyRG;
Row fRow;
Row fNullRow;
Row* tmpRow; // used by the hashers & eq functors
boost::scoped_array<uint8_t> fNullRowData;
std::vector<RGData*> fResultDataVec;
uint64_t fTotalRowCount;
uint64_t fMaxTotalRowCount;
uint64_t fMaxMemory;
RGData* fPrimaryRowData;
std::vector<boost::shared_ptr<RGData> > fSecondaryRowDataVec;
std::unique_ptr<RowAggStorage> fRowAggStorage;
// for support PM aggregation after PM hashjoin
std::vector<RowGroup>* fSmallSideRGs;
@ -700,28 +595,19 @@ protected:
uint32_t fSmallSideCount;
boost::scoped_array<Row> rowSmalls;
// for hashmap
boost::shared_ptr<utils::STLPoolAllocator<RowPosition> > fAlloc;
// for 8k poc
RowGroup fEmptyRowGroup;
RGData fEmptyRowData;
Row fEmptyRow;
boost::scoped_ptr<AggHasher> fHasher;
boost::scoped_ptr<AggComparator> fEq;
bool fKeyOnHeap = false;
std::string fTimeZone;
//TODO: try to get rid of these friend decl's. AggHasher & Comparator
//need access to rowgroup storage holding the rows to hash & ==.
friend class AggHasher;
friend class AggComparator;
// We need a separate copy for each thread.
mcsv1sdk::mcsv1Context fRGContext;
std::vector<mcsv1sdk::mcsv1Context> fRGContextColl;
// These are handy for testing the actual type of static_any for UDAF
static const static_any::any& charTypeId;
static const static_any::any& scharTypeId;
@ -742,6 +628,10 @@ protected:
// For UDAF along with with multiple distinct columns
std::vector<SP_ROWAGG_FUNC_t>* fOrigFunctionCols;
joblist::ResourceManager* fRm = nullptr;
boost::shared_ptr<int64_t> fSessionMemLimit;
std::unique_ptr<RGData> fCurRGData;
};
//------------------------------------------------------------------------------
@ -764,11 +654,11 @@ public:
/** @brief RowAggregationUM default destructor
*/
~RowAggregationUM();
~RowAggregationUM() override;
/** @brief Denotes end of data insertion following multiple calls to addRowGroup().
*/
void endOfInput();
void endOfInput() override;
/** @brief Finializes the result set before sending back to the front end.
*/
@ -805,7 +695,7 @@ public:
{
return fRm;
}
inline virtual RowAggregationUM* clone() const
inline RowAggregationUM* clone() const override
{
return new RowAggregationUM (*this);
}
@ -832,22 +722,18 @@ public:
return fGroupConcat;
}
void aggregateRow(Row&,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr) override;
virtual void aggReset();
void aggReset() override;
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut);
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut) override;
protected:
// virtual methods from base
void initialize() override;
void attachGroupConcatAg() override;
void updateEntry(const Row& row,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr) override;
void aggregateRowWithRemap(Row&,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr);
void attachGroupConcatAg();
bool countSpecial(const RowGroup* pRG)
bool countSpecial(const RowGroup* pRG) override
{
fRow.setIntField<8>(
fRow.getIntField<8>(
@ -856,8 +742,6 @@ protected:
return true;
}
bool newRowGroup();
// calculate the average after all rows received. UM only function.
void calculateAvgColumns();
@ -889,7 +773,6 @@ protected:
virtual void setGroupConcatString();
bool fHasAvg;
bool fKeyOnHeap;
bool fHasStatsFunc;
bool fHasUDAF;
@ -902,8 +785,6 @@ protected:
* the memory from rm in that order. */
uint64_t fTotalMemUsage;
joblist::ResourceManager* fRm;
// @bug3475, aggregate(constant), sum(0), count(null), etc
std::vector<ConstantAggData> fConstantAggregate;
@ -912,18 +793,8 @@ protected:
std::vector<SP_GroupConcatAg> fGroupConcatAg;
std::vector<SP_ROWAGG_FUNC_t> fFunctionColGc;
// for when the group by & distinct keys are not stored in the output rows
rowgroup::RowGroup fKeyRG;
boost::scoped_ptr<ExternalKeyEq> fExtEq;
boost::scoped_ptr<ExternalKeyHasher> fExtHash;
boost::scoped_ptr<KeyStorage> fKeyStore;
boost::scoped_ptr<utils::STLPoolAllocator<std::pair<RowPosition, RowPosition> > > fExtKeyMapAlloc;
boost::scoped_ptr<ExtKeyMap_t> fExtKeyMap;
boost::shared_ptr<int64_t> fSessionMemLimit;
private:
uint64_t fLastMemUsage;
uint32_t fNextRGIndex;
};
@ -951,8 +822,8 @@ public:
/** @brief RowAggregationUMP2 default destructor
*/
~RowAggregationUMP2();
inline virtual RowAggregationUMP2* clone() const
~RowAggregationUMP2() override;
inline RowAggregationUMP2* clone() const override
{
return new RowAggregationUMP2 (*this);
}
@ -961,17 +832,17 @@ protected:
// virtual methods from base
void updateEntry(const Row& row,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr) override;
void doAvg(const Row&, int64_t, int64_t, int64_t);
void doStatistics(const Row&, int64_t, int64_t, int64_t);
void doGroupConcat(const Row&, int64_t, int64_t);
void doBitOp(const Row&, int64_t, int64_t, int);
void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false) override;
void doStatistics(const Row&, int64_t, int64_t, int64_t) override;
void doGroupConcat(const Row&, int64_t, int64_t) override;
void doBitOp(const Row&, int64_t, int64_t, int) override;
void doUDAF(const Row&,
int64_t,
int64_t,
int64_t,
uint64_t& funcColsIdx,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr) override;
bool countSpecial(const RowGroup* pRG)
bool countSpecial(const RowGroup* pRG) override
{
return false;
}
@ -1002,18 +873,18 @@ public:
/** @brief RowAggregationDistinct default destructor
*/
~RowAggregationDistinct();
~RowAggregationDistinct() override;
/** @brief Add an aggregator for pre-DISTINCT aggregation
*/
void addAggregator(const boost::shared_ptr<RowAggregation>& agg, const RowGroup& rg);
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut);
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut) override;
virtual void doDistinctAggregation();
virtual void doDistinctAggregation_rowVec(std::vector<Row::Pointer>& inRows);
void addRowGroup(const RowGroup* pRowGroupIn);
void addRowGroup(const RowGroup* pRowGroupIn, std::vector<Row::Pointer>& inRows);
virtual void doDistinctAggregation_rowVec(std::vector<std::pair<Row::Pointer, uint64_t>>& inRows);
void addRowGroup(const RowGroup* pRowGroupIn) override;
void addRowGroup(const RowGroup* pRowGroupIn, std::vector<std::pair<Row::Pointer, uint64_t>>& inRows) override;
// multi-threade debug
boost::shared_ptr<RowAggregation>& aggregator()
@ -1022,7 +893,7 @@ public:
}
void aggregator(boost::shared_ptr<RowAggregation> aggregator)
{
fAggregator = aggregator;
fAggregator = std::move(aggregator);
}
RowGroup& rowGroupDist()
{
@ -1032,7 +903,7 @@ public:
{
fRowGroupDist = rowGroupDist;
}
inline virtual RowAggregationDistinct* clone() const
inline RowAggregationDistinct* clone() const override
{
return new RowAggregationDistinct (*this);
}
@ -1067,20 +938,20 @@ public:
/** @brief RowAggregationSubDistinct default destructor
*/
~RowAggregationSubDistinct();
~RowAggregationSubDistinct() override;
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut);
void addRowGroup(const RowGroup* pRowGroupIn);
inline virtual RowAggregationSubDistinct* clone() const
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut) override;
void addRowGroup(const RowGroup* pRowGroupIn) override;
inline RowAggregationSubDistinct* clone() const override
{
return new RowAggregationSubDistinct (*this);
}
void addRowGroup(const RowGroup* pRowGroupIn, std::vector<Row::Pointer>& inRow);
void addRowGroup(const RowGroup* pRowGroupIn, std::vector<std::pair<Row::Pointer, uint64_t>>& inRow) override;
protected:
// virtual methods from RowAggregationUM
void doGroupConcat(const Row&, int64_t, int64_t);
void doGroupConcat(const Row&, int64_t, int64_t) override;
// for groupby columns and the aggregated distinct column
Row fDistRow;
@ -1108,7 +979,7 @@ public:
/** @brief RowAggregationMultiDistinct default destructor
*/
~RowAggregationMultiDistinct();
~RowAggregationMultiDistinct() override;
/** @brief Add sub aggregators
*/
@ -1116,21 +987,21 @@ public:
const RowGroup& rg,
const std::vector<SP_ROWAGG_FUNC_t>& funct);
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut);
void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut) override;
using RowAggregationDistinct::addRowGroup;
void addRowGroup(const RowGroup* pRowGroupIn);
void addRowGroup(const RowGroup* pRowGroupIn) override;
using RowAggregationDistinct::doDistinctAggregation;
virtual void doDistinctAggregation();
void doDistinctAggregation() override;
using RowAggregationDistinct::doDistinctAggregation_rowVec;
virtual void doDistinctAggregation_rowVec(std::vector<std::vector<Row::Pointer> >& inRows);
virtual void doDistinctAggregation_rowVec(std::vector<std::vector<std::pair<Row::Pointer, uint64_t>> >& inRows);
inline virtual RowAggregationMultiDistinct* clone() const
inline RowAggregationMultiDistinct* clone() const override
{
return new RowAggregationMultiDistinct (*this);
}
void addRowGroup(const RowGroup* pRowGroupIn, std::vector<std::vector<Row::Pointer> >& inRows);
void addRowGroup(const RowGroup* pRowGroupIn, std::vector<std::vector<std::pair<Row::Pointer, uint64_t>>>& inRows);
std::vector<boost::shared_ptr<RowAggregationUM> >& subAggregators()
{

View File

@ -32,6 +32,7 @@
using namespace std;
#include <boost/shared_array.hpp>
#include <numeric>
using namespace boost;
#include "bytestream.h"
@ -405,6 +406,7 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount)
*/
memset(rowData.get(), 0, rg.getDataSize(rowCount)); // XXXPAT: make valgrind happy temporarily
#endif
memset(rowData.get(), 0, rg.getDataSize(rowCount)); // XXXPAT: make valgrind happy temporarily
}
RGData::RGData(const RowGroup& rg)
@ -481,7 +483,7 @@ void RGData::serialize(ByteStream& bs, uint32_t amount) const
bs << (uint8_t) 0;
}
void RGData::deserialize(ByteStream& bs, bool hasLenField)
void RGData::deserialize(ByteStream& bs, uint32_t defAmount)
{
uint32_t amount, sig;
uint8_t* buf;
@ -493,7 +495,7 @@ void RGData::deserialize(ByteStream& bs, bool hasLenField)
{
bs >> sig;
bs >> amount;
rowData.reset(new uint8_t[amount]);
rowData.reset(new uint8_t[std::max(amount, defAmount)]);
buf = bs.buf();
memcpy(rowData.get(), buf, amount);
bs.advance(amount);
@ -577,12 +579,13 @@ Row& Row::operator=(const Row& r)
return *this;
}
string Row::toString() const
string Row::toString(uint32_t rownum) const
{
ostringstream os;
uint32_t i;
//os << getRid() << ": ";
os << "[" << std::setw(5) << rownum << std::setw(0) << "]: ";
os << (int) useStringTable << ": ";
for (i = 0; i < columnCount; i++)
@ -1447,7 +1450,7 @@ uint32_t RowGroup::getColumnCount() const
return columnCount;
}
string RowGroup::toString() const
string RowGroup::toString(const std::vector<uint64_t>& used) const
{
ostringstream os;
ostream_iterator<int> oIter1(os, "\t");
@ -1479,6 +1482,8 @@ string RowGroup::toString() const
os << "uses a string table\n";
else
os << "doesn't use a string table\n";
if (!used.empty())
os << "sparse\n";
//os << "strings = " << hex << (int64_t) strings << "\n";
//os << "data = " << (int64_t) data << "\n" << dec;
@ -1488,14 +1493,25 @@ string RowGroup::toString() const
initRow(&r);
getRow(0, &r);
os << "rowcount = " << getRowCount() << endl;
if (!used.empty())
{
uint64_t cnt = std::accumulate(used.begin(), used.end(), 0ULL,
[](uint64_t a, uint64_t bits) {
return a + __builtin_popcountll(bits);
});
os << "sparse row count = " << cnt << endl;
}
os << "base rid = " << getBaseRid() << endl;
os << "status = " << getStatus() << endl;
os << "dbroot = " << getDBRoot() << endl;
os << "row data...\n";
for (uint32_t i = 0; i < getRowCount(); i++)
uint32_t max_cnt = used.empty() ? getRowCount() : (used.size() * 64);
for (uint32_t i = 0; i < max_cnt; i++)
{
os << r.toString() << endl;
if (!used.empty() && !(used[i/64] & (1ULL << (i%64))))
continue;
os << r.toString(i) << endl;
r.nextRow();
}
}

View File

@ -270,7 +270,7 @@ public:
// the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
// inline data with a length field. Once that's converted to string table format, that
// option can go away.
void deserialize(messageqcpp::ByteStream&, bool hasLengthField = false); // returns the # of bytes read
void deserialize(messageqcpp::ByteStream&, uint32_t amount = 0); // returns the # of bytes read
inline uint64_t getStringTableMemUsage();
void clear();
@ -531,7 +531,7 @@ public:
template<typename T>
inline void copyBinaryField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
std::string toString() const;
std::string toString(uint32_t rownum = 0) const;
std::string toCSV() const;
/* These fcns are used only in joins. The RID doesn't matter on the side that
@ -1537,7 +1537,7 @@ public:
RGData duplicate(); // returns a copy of the attached RGData
std::string toString() const;
std::string toString(const std::vector<uint64_t>& used = {}) const;
/** operator+=
*

File diff suppressed because it is too large Load Diff

366
utils/rowgroup/rowstorage.h Normal file
View File

@ -0,0 +1,366 @@
/* Copyright (C) 2021 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#ifndef ROWSTORAGE_H
#define ROWSTORAGE_H
#include "rowgroup.h"
#include <sys/stat.h>
#include <unistd.h>
namespace rowgroup
{
uint32_t calcNumberOfBuckets(ssize_t availMem,
uint32_t numOfThreads,
uint32_t numOfBuckets,
uint32_t groupsPerThread,
uint32_t inRowSize,
uint32_t outRowSize,
bool enabledDiskAggr);
class MemManager;
class RowPosHashStorage;
using RowPosHashStoragePtr = std::unique_ptr<RowPosHashStorage>;
class RowGroupStorage;
uint64_t hashRow(const rowgroup::Row& r, std::size_t lastCol);
class RowAggStorage
{
public:
RowAggStorage(const std::string& tmpDir,
RowGroup* rowGroupOut,
RowGroup* keysRowGroup,
uint32_t keyCount,
joblist::ResourceManager* rm = nullptr,
boost::shared_ptr<int64_t> sessLimit = {},
bool enabledDiskAgg = false,
bool allowGenerations = false);
RowAggStorage(const std::string& tmpDir,
RowGroup* rowGroupOut,
uint32_t keyCount,
joblist::ResourceManager* rm = nullptr,
boost::shared_ptr<int64_t> sessLimit = {},
bool enabledDiskAgg = false,
bool allowGenerations = false)
: RowAggStorage(tmpDir, rowGroupOut, rowGroupOut, keyCount,
rm, std::move(sessLimit),
enabledDiskAgg, allowGenerations)
{}
~RowAggStorage();
static uint16_t getMaxRows(bool enabledDiskAgg)
{
return (enabledDiskAgg ? 8192 : 256);
}
static size_t getBucketSize();
/** @brief Find or create resulting row.
*
* Create "aggregation key" row if necessary.
* NB! Using getTargetRow() after append() is UB!
*
* @param row(in) input row
* @param rowOut() row to aggregate data from input row
*
* @returns true if new row created, false otherwise
*/
bool getTargetRow(const Row& row, Row& rowOut);
bool getTargetRow(const Row& row, uint64_t row_hash, Row& rowOut);
/** @brief Dump some RGDatas to disk and release memory for further use.
*/
void dump();
/** @brief Append RGData from other RowAggStorage and clear it.
*
* NB! Any operation except getNextRGData() or append() is UB!
*
* @param other(in) donor storage
*/
void append(RowAggStorage& other);
/** @brief Remove last RGData from internal RGData storage and return it.
*
* @returns pointer to the next RGData or nullptr if empty
*/
std::unique_ptr<RGData> getNextRGData();
/** @brief TODO
*
* @param mergeFunc
* @param rowOut
*/
void finalize(std::function<void(Row &)> mergeFunc, Row &rowOut);
/** @brief Calculate maximum size of hash assuming 80% fullness.
*
* @param elems(in) number of elements
* @returns calculated size
*/
inline static size_t calcMaxSize(size_t elems) noexcept
{
if (LIKELY(elems <= std::numeric_limits<size_t>::max() / 100))
return elems * 80 / 100;
return (elems / 100) * 80;
}
inline static size_t calcSizeWithBuffer(size_t elems, size_t maxSize) noexcept
{
return elems + std::min(maxSize, 0xFFUL);
}
inline static size_t calcSizeWithBuffer(size_t elems) noexcept
{
return calcSizeWithBuffer(elems, calcMaxSize(elems));
}
private:
struct Data;
/** @brief Create new RowAggStorage with the same params and load dumped data
*
* @param gen(in) generation number
* @return pointer to a new RowAggStorage
*/
RowAggStorage* clone(uint16_t gen) const;
/** @brief Free any internal data
*/
void freeData();
/** @brief Move internal data & row position inside [insIdx, startIdx] up by 1.
*
* @param startIdx(in) last element's index to move
* @param insIdx(in) first element's index to move
*/
void shiftUp(size_t startIdx, size_t insIdx);
/** @brief Find best position of row and save it's hash.
*
* @param row(in) input row
* @param info(out) info data
* @param idx(out) index computed from row hash
* @param hash(out) row hash value
*/
void rowToIdx(const Row& row, uint32_t& info, size_t& idx, uint64_t& hash) const;
void rowToIdx(const Row& row, uint32_t& info, size_t& idx, uint64_t& hash, const Data* curData) const;
/** @brief Find best position using precomputed hash
*
* @param h(in) row hash
* @param info(out) info data
* @param idx(out) index
*/
inline void rowHashToIdx(uint64_t h, uint32_t& info, size_t& idx, const Data* curData) const
{
info = curData->fInfoInc + static_cast<uint32_t>((h & INFO_MASK) >> curData->fInfoHashShift);
idx = (h >> INIT_INFO_BITS) & curData->fMask;
}
inline void rowHashToIdx(uint64_t h, uint32_t& info, size_t& idx) const
{
return rowHashToIdx(h, info, idx, fCurData);
}
/** @brief Iterate over internal info until info with less-or-equal distance
* from the best position was found.
*
* @param info(in,out) info data
* @param idx(in,out) index
*/
inline void nextWhileLess(uint32_t& info, size_t& idx, const Data* curData) const noexcept
{
while (info < curData->fInfo[idx])
{
next(info, idx, curData);
}
}
inline void nextWhileLess(uint32_t& info, size_t& idx) const noexcept
{
return nextWhileLess(info, idx, fCurData);
}
/** @brief Get next index and corresponding info
*/
inline void next(uint32_t& info, size_t& idx, const Data* curData) const noexcept
{
++(idx);
info += curData->fInfoInc;
}
inline void next(uint32_t& info, size_t& idx) const noexcept
{
return next(info, idx, fCurData);
}
/** @brief Get index and info of the next non-empty entry
*/
inline void nextExisting(uint32_t& info, size_t& idx) const noexcept
{
uint64_t n = 0;
uint64_t data;
while (true)
{
memcpy(&data, fCurData->fInfo + idx, sizeof(data));
if (data == 0)
{
idx += sizeof(n);
}
else
{
break;
}
}
#if BYTE_ORDER == BIG_ENDIAN
n = __builtin_clzll(data) / sizeof(data);
#else
n = __builtin_ctzll(data) / sizeof(data);
#endif
idx += n;
info = fCurData->fInfo[idx];
}
/** @brief Increase internal data size if needed
*/
void increaseSize();
/** @brief Increase distance capacity of info removing 1 bit of the hash.
*
* @returns success
*/
bool tryIncreaseInfo();
/** @brief Reserve space for number of elements (power of two)
*
* This function performs re-insert all data
*
* @param elems(in) new size
*/
void rehashPowerOfTwo(size_t elems);
/** @brief Move elements from old one into rehashed data.
*
* It's mostly the same algo as in getTargetRow(), but returns nothing
* and skips some checks because it's guaranteed that there is no dups.
*
* @param oldIdx(in) index of "old" data
* @param oldHashes(in) old storage of row positions and hashes
*/
void insertSwap(size_t oldIdx, RowPosHashStorage* oldHashes);
/** @brief (Re)Initialize internal data of specified size.
*
* @param elems(in) number of elements
*/
void initData(size_t elems, const RowPosHashStorage* oldHashes);
/** @brief Calculate memory size of info data
*
* @param elems(in) number of elements
* @returns size in bytes
*/
inline static size_t calcBytes(size_t elems) noexcept
{
return elems + sizeof(uint64_t);
}
/** @brief Reserve place sufficient for elems
*
* @param elems(in) number of elements
*/
void reserve(size_t elems);
/** @brief Start new aggregation generation
*
* Dump all the data on disk, including internal info data, positions & row
* hashes, and the rowgroups itself.
*/
void startNewGeneration();
/** @brief Save internal info data on disk */
void dumpInternalData() const;
/** @brief Load previously dumped data from disk
*
* @param gen(in) generation number
*/
void loadGeneration(uint16_t gen);
/** @brief Load previously dumped data into the tmp storage */
void loadGeneration(uint16_t gen, size_t& size, size_t& mask, size_t& maxSize, uint32_t& infoInc, uint32_t& infoHashShift, uint8_t*& info);
/** @brief Remove temporary data files */
void cleanup();
void cleanup(uint16_t gen);
/** @brief Remove all temporary data files */
void cleanupAll() noexcept;
std::string makeDumpFilename(int32_t gen = -1) const;
private:
static constexpr size_t INIT_SIZE{sizeof(uint64_t)};
static constexpr uint32_t INIT_INFO_BITS{5};
static constexpr uint8_t INIT_INFO_INC{1U << INIT_INFO_BITS};
static constexpr size_t INFO_MASK{INIT_INFO_INC - 1U};
static constexpr uint8_t INIT_INFO_HASH_SHIFT{0};
static constexpr uint16_t MAX_INMEMORY_GENS{4};
struct Data
{
RowPosHashStoragePtr fHashes;
uint8_t *fInfo{nullptr};
size_t fSize{0};
size_t fMask{0};
size_t fMaxSize{0};
uint32_t fInfoInc{INIT_INFO_INC};
uint32_t fInfoHashShift{INIT_INFO_HASH_SHIFT};
};
std::vector<std::unique_ptr<Data>> fGens;
Data* fCurData;
uint32_t fMaxRows;
const bool fExtKeys;
std::unique_ptr<RowGroupStorage> fStorage;
RowGroupStorage* fKeysStorage;
uint32_t fLastKeyCol;
uint16_t fGeneration{0};
void* fUniqId;
Row fKeyRow;
std::unique_ptr<MemManager> fMM;
uint32_t fNumOfInputRGPerThread;
bool fAggregated = true;
bool fAllowGenerations;
bool fEnabledDiskAggregation;
std::string fTmpDir;
bool fInitialized{false};
rowgroup::RowGroup* fRowGroupOut;
rowgroup::RowGroup* fKeysRowGroup;
};
} // namespace rowgroup
#endif // MYSQL_ROWSTORAGE_H