You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
feat(): Replacing STLPoolAllocator with CountingAllocator for in-memory joins
This commit is contained in:
@ -58,43 +58,34 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
|
||||
{
|
||||
uint i;
|
||||
|
||||
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
|
||||
rows.reset(new RowPointersVec(alloc));
|
||||
|
||||
getBucketCount();
|
||||
m_bucketLocks.reset(new boost::mutex[bucketCount]);
|
||||
|
||||
if (smallRG.getColTypes()[smallJoinColumn] == CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
ld.reset(new boost::scoped_ptr<ldhash_t>[bucketCount]);
|
||||
// _pool.reset(new boost::shared_ptr<PoolAllocator>[bucketCount]);
|
||||
for (i = 0; i < bucketCount; i++)
|
||||
{
|
||||
// STLPoolAllocator<pair<const long double, Row::Pointer>> alloc(resourceManager_);
|
||||
// _pool[i] = alloc.getPoolAllocator();
|
||||
auto alloc = resourceManager_->getAllocator<pair<const long double, Row::Pointer>>();
|
||||
ld[i].reset(new ldhash_t(10, hasher(), ldhash_t::key_equal(), alloc));
|
||||
ld.emplace_back(std::unique_ptr<ldhash_t>(new ldhash_t(10, hasher(), ldhash_t::key_equal(), alloc)));
|
||||
}
|
||||
}
|
||||
else if (smallRG.usesStringTable())
|
||||
{
|
||||
sth.reset(new boost::scoped_ptr<sthash_t>[bucketCount]);
|
||||
_pool.reset(new boost::shared_ptr<PoolAllocator>[bucketCount]);
|
||||
for (i = 0; i < bucketCount; i++)
|
||||
{
|
||||
// STLPoolAllocator<pair<const int64_t, Row::Pointer>> alloc(resourceManager_);
|
||||
// _pool[i] = alloc.getPoolAllocator();
|
||||
auto alloc = resourceManager_->getAllocator<pair<const int64_t, Row::Pointer>>();
|
||||
sth[i].reset(new sthash_t(10, hasher(), sthash_t::key_equal(), alloc));
|
||||
sth.emplace_back(std::unique_ptr<sthash_t>(new sthash_t(10, hasher(), sthash_t::key_equal(), alloc)));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
h.reset(new boost::scoped_ptr<hash_t>[bucketCount]);
|
||||
_pool.reset(new boost::shared_ptr<PoolAllocator>[bucketCount]);
|
||||
for (i = 0; i < bucketCount; i++)
|
||||
{
|
||||
// STLPoolAllocator<pair<const int64_t, uint8_t*>> alloc(resourceManager_);
|
||||
// _pool[i] = alloc.getPoolAllocator();
|
||||
auto alloc = resourceManager_->getAllocator<pair<const int64_t, uint8_t*>>();
|
||||
h[i].reset(new hash_t(10, hasher(), hash_t::key_equal(), alloc));
|
||||
h.emplace_back(std::unique_ptr<hash_t>(new hash_t(10, hasher(), hash_t::key_equal(), alloc)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,13 +140,6 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
|
||||
nullValueForJoinColumn = smallNullRow.getSignedNullValue(smallJoinColumn);
|
||||
}
|
||||
|
||||
// TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::RowGroup& largeInput,
|
||||
// const vector<uint32_t>& smallJoinColumns, const vector<uint32_t>& largeJoinColumns,
|
||||
// JoinType jt, threadpool::ThreadPool* jsThreadPool)
|
||||
// : TupleJoiner(smallInput, largeInput, smallJoinColumns, largeJoinColumns, jt, jsThreadPool, nullptr)
|
||||
// {
|
||||
// }
|
||||
|
||||
// Typeless joiner ctor
|
||||
TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::RowGroup& largeInput,
|
||||
const vector<uint32_t>& smallJoinColumns, const vector<uint32_t>& largeJoinColumns,
|
||||
@ -180,14 +164,11 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
|
||||
|
||||
getBucketCount();
|
||||
|
||||
_pool.reset(new boost::shared_ptr<PoolAllocator>[bucketCount]);
|
||||
ht.reset(new boost::scoped_ptr<typelesshash_t>[bucketCount]);
|
||||
for (i = 0; i < bucketCount; i++)
|
||||
{
|
||||
// STLPoolAllocator<pair<const TypelessData, Row::Pointer>> alloc(resourceManager_);
|
||||
// _pool[i] = alloc.getPoolAllocator();
|
||||
auto alloc = resourceManager_->getAllocator<pair<const TypelessData, Row::Pointer>>();
|
||||
ht[i].reset(new typelesshash_t(10, hasher(), typelesshash_t::key_equal(), alloc));
|
||||
ht.emplace_back(std::unique_ptr<typelesshash_t>(
|
||||
new typelesshash_t(10, hasher(), typelesshash_t::key_equal(), alloc)));
|
||||
}
|
||||
m_bucketLocks.reset(new boost::mutex[bucketCount]);
|
||||
|
||||
@ -239,11 +220,10 @@ TupleJoiner::TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::R
|
||||
|
||||
// note, 'numcores' is implied by tuplehashjoin on calls to insertRGData().
|
||||
// TODO: make it explicit to avoid future confusion.
|
||||
storedKeyAlloc.reset(new FixedAllocator[numCores]);
|
||||
for (i = 0; i < (uint)numCores; i++)
|
||||
{
|
||||
auto alloc = resourceManager_->getAllocator<utils::FixedAllocatorBufType>();
|
||||
storedKeyAlloc[i] = FixedAllocator(alloc, keyLength);
|
||||
storedKeyAlloc.emplace_back(FixedAllocator(alloc, keyLength));
|
||||
}
|
||||
}
|
||||
|
||||
@ -279,7 +259,7 @@ void TupleJoiner::getBucketCount()
|
||||
}
|
||||
|
||||
template <typename buckets_t, typename hash_table_t>
|
||||
void TupleJoiner::bucketsToTables(buckets_t* buckets, hash_table_t* tables)
|
||||
void TupleJoiner::bucketsToTables(buckets_t* buckets, hash_table_t& tables)
|
||||
{
|
||||
uint i;
|
||||
|
||||
@ -301,7 +281,7 @@ void TupleJoiner::bucketsToTables(buckets_t* buckets, hash_table_t* tables)
|
||||
}
|
||||
tables[i]->insert(buckets[i].begin(), buckets[i].end());
|
||||
}
|
||||
|
||||
|
||||
wasProductive = true;
|
||||
buckets[i].clear();
|
||||
}
|
||||
@ -326,7 +306,7 @@ void TupleJoiner::um_insertTypeless(uint threadID, uint rowCount, Row& r)
|
||||
uint bucket = bucketPicker((char*)td[i].data, td[i].len, bpSeed) & bucketMask;
|
||||
v[bucket].emplace_back(pair<TypelessData, Row::Pointer>(td[i], r.getPointer()));
|
||||
}
|
||||
bucketsToTables(&v[0], ht.get());
|
||||
bucketsToTables(&v[0], ht);
|
||||
}
|
||||
|
||||
void TupleJoiner::um_insertLongDouble(uint rowCount, Row& r)
|
||||
@ -345,7 +325,7 @@ void TupleJoiner::um_insertLongDouble(uint rowCount, Row& r)
|
||||
else
|
||||
v[bucket].emplace_back(pair<long double, Row::Pointer>(smallKey, r.getPointer()));
|
||||
}
|
||||
bucketsToTables(&v[0], ld.get());
|
||||
bucketsToTables(&v[0], ld);
|
||||
}
|
||||
|
||||
void TupleJoiner::um_insertInlineRows(uint rowCount, Row& r)
|
||||
@ -367,7 +347,7 @@ void TupleJoiner::um_insertInlineRows(uint rowCount, Row& r)
|
||||
else
|
||||
v[bucket].emplace_back(pair<int64_t, uint8_t*>(smallKey, r.getData()));
|
||||
}
|
||||
bucketsToTables(&v[0], h.get());
|
||||
bucketsToTables(&v[0], h);
|
||||
}
|
||||
|
||||
void TupleJoiner::um_insertStringTable(uint rowCount, Row& r)
|
||||
@ -389,7 +369,7 @@ void TupleJoiner::um_insertStringTable(uint rowCount, Row& r)
|
||||
else
|
||||
v[bucket].emplace_back(pair<int64_t, Row::Pointer>(smallKey, r.getPointer()));
|
||||
}
|
||||
bucketsToTables(&v[0], sth.get());
|
||||
bucketsToTables(&v[0], sth);
|
||||
}
|
||||
|
||||
void TupleJoiner::insertRGData(RowGroup& rg, uint threadID)
|
||||
@ -409,7 +389,7 @@ void TupleJoiner::insertRGData(RowGroup& rg, uint threadID)
|
||||
r.zeroRid();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
rg.getRow(0, &r);
|
||||
|
||||
if (joinAlg == UM)
|
||||
@ -427,7 +407,7 @@ void TupleJoiner::insertRGData(RowGroup& rg, uint threadID)
|
||||
{
|
||||
// while in PM-join mode, inserting is single-threaded
|
||||
for (i = 0; i < rowCount; i++, r.nextRow())
|
||||
rows.push_back(r.getPointer());
|
||||
rows->push_back(r.getPointer());
|
||||
}
|
||||
}
|
||||
|
||||
@ -492,7 +472,7 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
|
||||
}
|
||||
}
|
||||
else
|
||||
rows.push_back(r.getPointer());
|
||||
rows->push_back(r.getPointer());
|
||||
}
|
||||
|
||||
void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uint32_t threadID,
|
||||
@ -508,8 +488,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
uint32_t size = v.size();
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
if (v[i] < rows.size())
|
||||
matches->push_back(rows[v[i]]);
|
||||
if (v[i] < rows->size())
|
||||
matches->push_back((*rows)[v[i]]);
|
||||
|
||||
if (UNLIKELY((semiJoin() || antiJoin()) && matches->size() == 0))
|
||||
matches->push_back(smallNullRow.getPointer());
|
||||
@ -536,7 +516,7 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->push_back(range.first->second);
|
||||
}
|
||||
else if (largeSideRow.getColType(largeKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE && ld)
|
||||
else if (largeSideRow.getColType(largeKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE && !ld.empty())
|
||||
{
|
||||
// This is a compare of two long double
|
||||
long double largeKey;
|
||||
@ -572,7 +552,7 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
largeKey = largeSideRow.getIntField(largeKeyColumns[0]);
|
||||
}
|
||||
|
||||
if (ld)
|
||||
if (!ld.empty())
|
||||
{
|
||||
// Compare against long double
|
||||
long double ldKey = largeKey;
|
||||
@ -619,7 +599,7 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
|
||||
if (UNLIKELY(inUM() && (joinType & MATCHNULLS) && !isNull && !typelessJoin))
|
||||
{
|
||||
if (largeRG.getColType(largeKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE && ld)
|
||||
if (largeRG.getColType(largeKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE && !ld.empty())
|
||||
{
|
||||
uint bucket = bucketPicker((char*)&(joblist::LONGDOUBLENULL), sizeof(joblist::LONGDOUBLENULL), bpSeed) &
|
||||
bucketMask;
|
||||
@ -749,7 +729,7 @@ void TupleJoiner::doneInserting()
|
||||
for (i = 0; i < rowCount; i++)
|
||||
{
|
||||
if (joinAlg == PM)
|
||||
smallRow.setPointer(rows[pmpos++]);
|
||||
smallRow.setPointer((*rows)[pmpos++]);
|
||||
else if (typelessJoin)
|
||||
{
|
||||
while (thit == ht[bucket]->end())
|
||||
@ -839,14 +819,13 @@ void TupleJoiner::umJoinConvert(size_t begin, size_t end)
|
||||
|
||||
while (begin < end)
|
||||
{
|
||||
smallRow.setPointer(rows[begin++]);
|
||||
smallRow.setPointer((*rows)[begin++]);
|
||||
insert(smallRow);
|
||||
}
|
||||
}
|
||||
|
||||
void TupleJoiner::setInUM()
|
||||
{
|
||||
vector<Row::Pointer> empty;
|
||||
Row smallRow;
|
||||
uint32_t i, size;
|
||||
|
||||
@ -854,7 +833,7 @@ void TupleJoiner::setInUM()
|
||||
return;
|
||||
|
||||
joinAlg = UM;
|
||||
size = rows.size();
|
||||
size = rows->size();
|
||||
size_t chunkSize =
|
||||
((size / numCores) + 1 < 50000 ? 50000
|
||||
: (size / numCores) + 1); // don't start a thread to process < 50k rows
|
||||
@ -872,17 +851,15 @@ void TupleJoiner::setInUM()
|
||||
#ifdef TJ_DEBUG
|
||||
cout << "done\n";
|
||||
#endif
|
||||
rows.swap(empty);
|
||||
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
|
||||
rows.reset(new RowPointersVec(alloc));
|
||||
|
||||
if (typelessJoin)
|
||||
{
|
||||
tmpKeyAlloc.reset(new FixedAllocator[threadCount]);
|
||||
|
||||
for (i = 0; i < threadCount; i++)
|
||||
{
|
||||
auto alloc = resourceManager_->getAllocator<utils::FixedAllocatorBufType>();
|
||||
tmpKeyAlloc[i] = FixedAllocator(alloc, keyLength, true);
|
||||
|
||||
tmpKeyAlloc.emplace_back(FixedAllocator(alloc, keyLength, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -909,8 +886,8 @@ void TupleJoiner::setInUM(vector<RGData>& rgs)
|
||||
return;
|
||||
|
||||
{ // don't need rows anymore, free the mem
|
||||
vector<Row::Pointer> empty;
|
||||
rows.swap(empty);
|
||||
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
|
||||
rows.reset(new RowPointersVec(alloc));
|
||||
}
|
||||
|
||||
joinAlg = UM;
|
||||
@ -935,12 +912,10 @@ void TupleJoiner::setInUM(vector<RGData>& rgs)
|
||||
|
||||
if (typelessJoin)
|
||||
{
|
||||
tmpKeyAlloc.reset(new FixedAllocator[threadCount]);
|
||||
|
||||
for (i = 0; i < threadCount; i++)
|
||||
{
|
||||
auto alloc = resourceManager_->getAllocator<utils::FixedAllocatorBufType>();
|
||||
tmpKeyAlloc[i] = FixedAllocator(alloc, keyLength, true);
|
||||
tmpKeyAlloc.emplace_back(FixedAllocator(alloc, keyLength, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -958,9 +933,9 @@ void TupleJoiner::markMatches(uint32_t threadID, uint32_t rowCount)
|
||||
for (i = 0; i < rowCount; i++)
|
||||
for (j = 0; j < matches[i].size(); j++)
|
||||
{
|
||||
if (matches[i][j] < rows.size())
|
||||
if (matches[i][j] < rows->size())
|
||||
{
|
||||
smallRow[threadID].setPointer(rows[matches[i][j]]);
|
||||
smallRow[threadID].setPointer((*rows)[matches[i][j]]);
|
||||
smallRow[threadID].markRow();
|
||||
}
|
||||
}
|
||||
@ -994,12 +969,10 @@ void TupleJoiner::setThreadCount(uint32_t cnt)
|
||||
|
||||
if (typelessJoin)
|
||||
{
|
||||
tmpKeyAlloc.reset(new FixedAllocator[threadCount]);
|
||||
|
||||
for (uint32_t i = 0; i < threadCount; i++)
|
||||
{
|
||||
auto alloc = resourceManager_->getAllocator<utils::FixedAllocatorBufType>();
|
||||
tmpKeyAlloc[i] = FixedAllocator(alloc, keyLength, true);
|
||||
tmpKeyAlloc.emplace_back(FixedAllocator(alloc, keyLength, true));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1023,14 +996,14 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
|
||||
{
|
||||
uint32_t i, size;
|
||||
|
||||
size = rows.size();
|
||||
size = rows->size();
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
smallR.setPointer(rows[i]);
|
||||
smallR.setPointer((*rows)[i]);
|
||||
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(rows[i]);
|
||||
out->push_back((*rows)[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1090,28 +1063,6 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t TupleJoiner::getMemUsage() const
|
||||
{
|
||||
if (inUM() && typelessJoin)
|
||||
{
|
||||
size_t ret = 0;
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
ret += _pool[i]->getMemUsage();
|
||||
for (int i = 0; i < numCores; i++)
|
||||
ret += storedKeyAlloc[i].getMemUsage();
|
||||
return ret;
|
||||
}
|
||||
else if (inUM())
|
||||
{
|
||||
size_t ret = 0;
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
ret += _pool[i]->getMemUsage();
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
return (rows.size() * sizeof(Row::Pointer));
|
||||
}
|
||||
|
||||
void TupleJoiner::setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> pt)
|
||||
{
|
||||
fe = pt;
|
||||
@ -1250,7 +1201,7 @@ size_t TupleJoiner::size() const
|
||||
return ret;
|
||||
}
|
||||
|
||||
return rows.size();
|
||||
return rows->size();
|
||||
}
|
||||
|
||||
class TypelessDataStringEncoder
|
||||
@ -1821,20 +1772,9 @@ void TupleJoiner::setTableName(const string& tname)
|
||||
|
||||
void TupleJoiner::clearData()
|
||||
{
|
||||
_pool.reset(new boost::shared_ptr<utils::PoolAllocator>[bucketCount]);
|
||||
if (typelessJoin)
|
||||
ht.reset(new boost::scoped_ptr<typelesshash_t>[bucketCount]);
|
||||
else if (smallRG.getColTypes()[smallKeyColumns[0]] == CalpontSystemCatalog::LONGDOUBLE)
|
||||
ld.reset(new boost::scoped_ptr<ldhash_t>[bucketCount]);
|
||||
else if (smallRG.usesStringTable())
|
||||
sth.reset(new boost::scoped_ptr<sthash_t>[bucketCount]);
|
||||
else
|
||||
h.reset(new boost::scoped_ptr<hash_t>[bucketCount]);
|
||||
|
||||
// This loop calls dtors and deallocates mem.
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
{
|
||||
// STLPoolAllocator<pair<const TypelessData, Row::Pointer>> alloc(resourceManager_);
|
||||
// _pool[i] = alloc.getPoolAllocator();
|
||||
auto alloc = resourceManager_->getAllocator<pair<const TypelessData, Row::Pointer>>();
|
||||
if (typelessJoin)
|
||||
ht[i].reset(new typelesshash_t(10, hasher(), typelesshash_t::key_equal(), alloc));
|
||||
@ -1843,11 +1783,13 @@ void TupleJoiner::clearData()
|
||||
else if (smallRG.usesStringTable())
|
||||
sth[i].reset(new sthash_t(10, hasher(), sthash_t::key_equal(), alloc));
|
||||
else
|
||||
{
|
||||
h[i].reset(new hash_t(10, hasher(), hash_t::key_equal(), alloc));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<rowgroup::Row::Pointer> empty;
|
||||
rows.swap(empty);
|
||||
auto alloc = resourceManager_->getAllocator<rowgroup::Row::Pointer>();
|
||||
rows.reset(new RowPointersVec(alloc));
|
||||
finished = false;
|
||||
}
|
||||
|
||||
@ -1910,11 +1852,10 @@ std::shared_ptr<TupleJoiner> TupleJoiner::copyForDiskJoin()
|
||||
|
||||
if (typelessJoin)
|
||||
{
|
||||
ret->storedKeyAlloc.reset(new FixedAllocator[numCores]);
|
||||
for (int i = 0; i < numCores; i++)
|
||||
{
|
||||
auto alloc = resourceManager_->getAllocator<utils::FixedAllocatorBufType>();
|
||||
storedKeyAlloc[i] = FixedAllocator(alloc, keyLength);
|
||||
storedKeyAlloc.emplace_back(FixedAllocator(alloc, keyLength));
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user