1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

fix(allocator,perf): performance degradation caused by lack of STLPoolAllocator replaced by CountingAllocator

This commit is contained in:
=
2025-04-24 13:45:33 +00:00
committed by drrtuy
parent 6fb18b457c
commit 671b7301f3
7 changed files with 182 additions and 92 deletions

View File

@@ -40,42 +40,63 @@ namespace joiner
{
constexpr const size_t DEFAULT_BUCKET_COUNT = 10;
template <typename HashTable>
std::unique_ptr<HashTable> makeHashMap(size_t bucketCount, ResourceManager* resourceManager)
{
// auto alloc = resourceManager->getAllocator<T>();
// return std::unique_ptr<T>(new T(bucketCount, TupleJoiner::hasher(), typename T::key_equal(), alloc));
return std::unique_ptr<HashTable>(new HashTable(bucketCount, TupleJoiner::hasher(),
typename HashTable::key_equal(),
utils::STLPoolAllocator<typename HashTable::value_type>(resourceManager)));
}
void TupleJoiner::initRowsVector()
{
// auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
// rows.reset(new RowPointersVec(alloc));
rows.reset(new RowPointersVec(resourceManager_->getAllocator<rowgroup::Row::Pointer>()));
}
void TupleJoiner::initHashMaps(uint32_t& smallJoinColumn)
{
if (typelessJoin)
{
for (size_t i = 0; i < bucketCount; i++)
{
auto alloc = resourceManager_->getAllocator<pair<const TypelessData, Row::Pointer>>();
ht.emplace_back(std::unique_ptr<typelesshash_t>(
new typelesshash_t(DEFAULT_BUCKET_COUNT, hasher(), typelesshash_t::key_equal(), alloc)));
// auto alloc = resourceManager_->getAllocator<pair<const TypelessData, Row::Pointer>>();
// ht.emplace_back(std::unique_ptr<typelesshash_t>(
// new typelesshash_t(DEFAULT_BUCKET_COUNT, hasher(), typelesshash_t::key_equal(), alloc)));
ht.emplace_back(makeHashMap<typelesshash_t>(DEFAULT_BUCKET_COUNT, resourceManager_));
}
}
else if (smallRG.getColTypes()[smallJoinColumn] == CalpontSystemCatalog::LONGDOUBLE)
{
for (size_t i = 0; i < bucketCount; i++)
{
auto alloc = resourceManager_->getAllocator<pair<const long double, Row::Pointer>>();
ld.emplace_back(std::unique_ptr<ldhash_t>(
new ldhash_t(DEFAULT_BUCKET_COUNT, hasher(), ldhash_t::key_equal(), alloc)));
// auto alloc = resourceManager_->getAllocator<pair<const long double, Row::Pointer>>();
// ld.emplace_back(std::unique_ptr<ldhash_t>(
// new ldhash_t(DEFAULT_BUCKET_COUNT, hasher(), ldhash_t::key_equal(), alloc)));
ld.emplace_back(makeHashMap<ldhash_t>(DEFAULT_BUCKET_COUNT, resourceManager_));
}
}
else if (smallRG.usesStringTable())
{
for (size_t i = 0; i < bucketCount; i++)
{
auto alloc = resourceManager_->getAllocator<pair<const int64_t, Row::Pointer>>();
sth.emplace_back(std::unique_ptr<sthash_t>(
new sthash_t(DEFAULT_BUCKET_COUNT, hasher(), sthash_t::key_equal(), alloc)));
// auto alloc = resourceManager_->getAllocator<pair<const int64_t, Row::Pointer>>();
// sth.emplace_back(std::unique_ptr<sthash_t>(
// new sthash_t(DEFAULT_BUCKET_COUNT, hasher(), sthash_t::key_equal(), alloc)));
sth.emplace_back(makeHashMap<sthash_t>(DEFAULT_BUCKET_COUNT, resourceManager_));
}
}
else
{
for (size_t i = 0; i < bucketCount; i++)
{
auto alloc = resourceManager_->getAllocator<pair<const int64_t, uint8_t*>>();
h.emplace_back(
std::unique_ptr<hash_t>(new hash_t(DEFAULT_BUCKET_COUNT, hasher(), hash_t::key_equal(), alloc)));
// auto alloc = resourceManager_->getAllocator<pair<const int64_t, uint8_t*>>();
// h.emplace_back(
// std::unique_ptr<hash_t>(new hash_t(DEFAULT_BUCKET_COUNT, hasher(), hash_t::key_equal(), alloc)));
h.emplace_back(makeHashMap<hash_t>(DEFAULT_BUCKET_COUNT, resourceManager_));
}
}
}
@@ -99,10 +120,9 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
, _convertToDiskJoin(false)
, resourceManager_(rm)
{
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
rows.reset(new RowPointersVec(alloc));
initRowsVector();
getBucketCount();
m_bucketLocks.reset(new boost::mutex[bucketCount]);
initHashMaps(smallJoinColumn);
@@ -161,7 +181,8 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
// Typeless joiner ctor
TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::RowGroup& largeInput,
const vector<uint32_t>& smallJoinColumns, const vector<uint32_t>& largeJoinColumns,
JoinType jt, threadpool::ThreadPool* jsThreadPool, joblist::ResourceManager* rm, const uint64_t numCores)
JoinType jt, threadpool::ThreadPool* jsThreadPool, joblist::ResourceManager* rm,
const uint64_t numCores)
: smallRG(smallInput)
, largeRG(largeInput)
, joinAlg(INSERTING)
@@ -180,9 +201,7 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
{
uint i;
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
rows.reset(new RowPointersVec(alloc));
initRowsVector();
getBucketCount();
uint32_t unused = 0;
@@ -870,8 +889,7 @@ void TupleJoiner::setInUM()
#ifdef TJ_DEBUG
cout << "done\n";
#endif
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
rows.reset(new RowPointersVec(alloc));
initRowsVector();
if (typelessJoin)
{
@@ -904,10 +922,7 @@ void TupleJoiner::setInUM(vector<RGData>& rgs)
if (joinAlg == UM)
return;
{ // don't need rows anymore, free the mem
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
rows.reset(new RowPointersVec(alloc));
}
initRowsVector();
joinAlg = UM;
size = rgs.size();
@@ -918,7 +933,8 @@ void TupleJoiner::setInUM(vector<RGData>& rgs)
i = 0;
for (size_t firstRow = 0; i < (uint)numCores && firstRow < size; i++, firstRow += chunkSize)
jobs[i] = jobstepThreadPool->invoke(
[this, firstRow, chunkSize, size, i, &rgs] {
[this, firstRow, chunkSize, size, i, &rgs]
{
this->umJoinConvert(i, rgs, firstRow, (firstRow + chunkSize < size ? firstRow + chunkSize : size));
});
@@ -1291,7 +1307,7 @@ class WideDecimalKeyConverter
if (value > AT(std::numeric_limits<T>::max()) || value < AT(std::numeric_limits<T>::min()))
return true;
convertedValue = (uint64_t) static_cast<T>(value);
convertedValue = (uint64_t)static_cast<T>(value);
return false;
}
// As of MCS 6.x there is an asumption MCS can't join having
@@ -1801,9 +1817,7 @@ void TupleJoiner::clearData()
// This loop calls dtors and deallocates mem.
clearHashMaps();
initHashMaps(smallKeyColumns[0]);
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
rows.reset(new RowPointersVec(alloc));
initRowsVector();
finished = false;
}