You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Fix/mcol 5787 rgdata buffer max size dev (#3325)
* fix(rowgroup): RGData now uses uint64_t counter for the fixed sizes columns data buf. The buffer can utilize > 4GB RAM that is necessary for PM side join. RGData ctor uses uint32_t allocating data buffer. This fact causes implicit heap overflow. * feat(bytestream,serdes): BS buffer size type is uint64_t This necessary to handle 64bit RGData, that comes as a separate patch. The pair of patches would allow to have PM joins when SmallSide size > 4GB. * feat(bytestream,serdes): Distribute BS buf size data type change to avoid implicit data type narrowing * feat(rowgroup): this returns bits lost during cherry-pick. The bits lost caused the first RGData::serialize to crash a process
This commit is contained in:
@ -31,7 +31,6 @@
|
||||
#include <iterator>
|
||||
using namespace std;
|
||||
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "bytestream.h"
|
||||
@ -49,8 +48,6 @@ namespace rowgroup
|
||||
{
|
||||
using cscType = execplan::CalpontSystemCatalog::ColDataType;
|
||||
|
||||
|
||||
|
||||
StringStore::~StringStore()
|
||||
{
|
||||
#if 0
|
||||
@ -302,47 +299,27 @@ void UserDataStore::deserialize(ByteStream& bs)
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
RGData::RGData(const RowGroup& rg, uint32_t rowCount)
|
||||
{
|
||||
// cout << "rgdata++ = " << __sync_add_and_fetch(&rgDataCount, 1) << endl;
|
||||
rowData.reset(new uint8_t[rg.getDataSize(rowCount)]);
|
||||
RGDataSizeType s = rg.getDataSize(rowCount);
|
||||
rowData.reset(new uint8_t[s]);
|
||||
|
||||
if (rg.usesStringTable() && rowCount > 0)
|
||||
strings.reset(new StringStore());
|
||||
|
||||
userDataStore.reset();
|
||||
|
||||
|
||||
#ifdef VALGRIND
|
||||
/* In a PM-join, we can serialize entire tables; not every value has been
|
||||
* filled in yet. Need to look into that. Valgrind complains that
|
||||
* those bytes are uninitialized, this suppresses that error.
|
||||
*/
|
||||
memset(rowData.get(), 0, rg.getDataSize(rowCount)); // XXXPAT: make valgrind happy temporarily
|
||||
#endif
|
||||
memset(rowData.get(), 0, rg.getDataSize(rowCount)); // XXXPAT: make valgrind happy temporarily
|
||||
columnCount = rg.getColumnCount();
|
||||
rowSize = rg.getRowSize();
|
||||
}
|
||||
|
||||
RGData::RGData(const RowGroup& rg)
|
||||
{
|
||||
// cout << "rgdata++ = " << __sync_add_and_fetch(&rgDataCount, 1) << endl;
|
||||
rowData.reset(new uint8_t[rg.getMaxDataSize()]);
|
||||
|
||||
if (rg.usesStringTable())
|
||||
strings.reset(new StringStore());
|
||||
|
||||
userDataStore.reset();
|
||||
|
||||
#ifdef VALGRIND
|
||||
/* In a PM-join, we can serialize entire tables; not every value has been
|
||||
* filled in yet. Need to look into that. Valgrind complains that
|
||||
* those bytes are uninitialized, this suppresses that error.
|
||||
*/
|
||||
memset(rowData.get(), 0, rg.getMaxDataSize());
|
||||
#endif
|
||||
columnCount = rg.getColumnCount();
|
||||
rowSize = rg.getRowSize();
|
||||
}
|
||||
@ -356,14 +333,6 @@ void RGData::reinit(const RowGroup& rg, uint32_t rowCount)
|
||||
strings.reset(new StringStore());
|
||||
else
|
||||
strings.reset();
|
||||
|
||||
#ifdef VALGRIND
|
||||
/* In a PM-join, we can serialize entire tables; not every value has been
|
||||
* filled in yet. Need to look into that. Valgrind complains that
|
||||
* those bytes are uninitialized, this suppresses that error.
|
||||
*/
|
||||
memset(rowData.get(), 0, rg.getDataSize(rowCount));
|
||||
#endif
|
||||
columnCount = rg.getColumnCount();
|
||||
rowSize = rg.getRowSize();
|
||||
}
|
||||
@ -373,11 +342,11 @@ void RGData::reinit(const RowGroup& rg)
|
||||
reinit(rg, 8192);
|
||||
}
|
||||
|
||||
void RGData::serialize(ByteStream& bs, uint32_t amount) const
|
||||
void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const
|
||||
{
|
||||
// cout << "serializing!\n";
|
||||
bs << (uint32_t)RGDATA_SIG;
|
||||
bs << (uint32_t)amount;
|
||||
bs << amount;
|
||||
bs << columnCount;
|
||||
bs << rowSize;
|
||||
bs.append(rowData.get(), amount);
|
||||
@ -399,9 +368,10 @@ void RGData::serialize(ByteStream& bs, uint32_t amount) const
|
||||
bs << (uint8_t)0;
|
||||
}
|
||||
|
||||
void RGData::deserialize(ByteStream& bs, uint32_t defAmount)
|
||||
void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount)
|
||||
{
|
||||
uint32_t amount, sig;
|
||||
uint32_t sig;
|
||||
RGDataSizeType amount;
|
||||
uint8_t* buf;
|
||||
uint8_t tmp8;
|
||||
|
||||
@ -642,7 +612,7 @@ string Row::toCSV() const
|
||||
|
||||
void Row::setToNull(uint32_t colIndex)
|
||||
{
|
||||
setNullMark(colIndex, true); // mark as null.
|
||||
setNullMark(colIndex, true); // mark as null.
|
||||
switch (types[colIndex])
|
||||
{
|
||||
case CalpontSystemCatalog::TINYINT: data[offsets[colIndex]] = joblist::TINYINTNULL; break;
|
||||
@ -665,11 +635,11 @@ void Row::setToNull(uint32_t colIndex)
|
||||
*((int32_t*)&data[offsets[colIndex]]) = static_cast<int32_t>(joblist::DATENULL);
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::BIGINT:
|
||||
if (precision[colIndex] != MagicPrecisionForCountAgg)
|
||||
*((uint64_t*)&data[offsets[colIndex]]) = joblist::BIGINTNULL;
|
||||
else // work around for count() in outer join result.
|
||||
*((uint64_t*)&data[offsets[colIndex]]) = 0;
|
||||
case CalpontSystemCatalog::BIGINT:
|
||||
if (precision[colIndex] != MagicPrecisionForCountAgg)
|
||||
*((uint64_t*)&data[offsets[colIndex]]) = joblist::BIGINTNULL;
|
||||
else // work around for count() in outer join result.
|
||||
*((uint64_t*)&data[offsets[colIndex]]) = 0;
|
||||
|
||||
break;
|
||||
|
||||
@ -680,9 +650,13 @@ void Row::setToNull(uint32_t colIndex)
|
||||
*((long double*)&data[offsets[colIndex]]) = joblist::LONGDOUBLENULL;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::DATETIME: *((uint64_t*)&data[offsets[colIndex]]) = joblist::DATETIMENULL; break;
|
||||
case CalpontSystemCatalog::DATETIME:
|
||||
*((uint64_t*)&data[offsets[colIndex]]) = joblist::DATETIMENULL;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::TIMESTAMP: *((uint64_t*)&data[offsets[colIndex]]) = joblist::TIMESTAMPNULL; break;
|
||||
case CalpontSystemCatalog::TIMESTAMP:
|
||||
*((uint64_t*)&data[offsets[colIndex]]) = joblist::TIMESTAMPNULL;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::TIME: *((uint64_t*)&data[offsets[colIndex]]) = joblist::TIMENULL; break;
|
||||
|
||||
@ -716,9 +690,7 @@ void Row::setToNull(uint32_t colIndex)
|
||||
case 7:
|
||||
case 8: *((uint64_t*)&data[offsets[colIndex]]) = joblist::CHAR8NULL; break;
|
||||
|
||||
default:
|
||||
setNullMark(colIndex, true);
|
||||
break;
|
||||
default: setNullMark(colIndex, true); break;
|
||||
}
|
||||
|
||||
break;
|
||||
@ -751,7 +723,9 @@ void Row::setToNull(uint32_t colIndex)
|
||||
|
||||
case CalpontSystemCatalog::UTINYINT: data[offsets[colIndex]] = joblist::UTINYINTNULL; break;
|
||||
|
||||
case CalpontSystemCatalog::USMALLINT: *((uint16_t*)&data[offsets[colIndex]]) = joblist::USMALLINTNULL; break;
|
||||
case CalpontSystemCatalog::USMALLINT:
|
||||
*((uint16_t*)&data[offsets[colIndex]]) = joblist::USMALLINTNULL;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::UMEDINT:
|
||||
case CalpontSystemCatalog::UINT: *((uint32_t*)&data[offsets[colIndex]]) = joblist::UINTNULL; break;
|
||||
@ -760,8 +734,8 @@ void Row::setToNull(uint32_t colIndex)
|
||||
|
||||
default:
|
||||
ostringstream os;
|
||||
os << "Row::initToNull(): got bad column type (" << types[colIndex] << "). Width=" << getColumnWidth(colIndex)
|
||||
<< endl;
|
||||
os << "Row::initToNull(): got bad column type (" << types[colIndex]
|
||||
<< "). Width=" << getColumnWidth(colIndex) << endl;
|
||||
os << toString();
|
||||
throw logic_error(os.str());
|
||||
}
|
||||
@ -870,8 +844,8 @@ bool Row::isNullValue(uint32_t colIndex) const
|
||||
return strings->isNullValue(offset);
|
||||
}
|
||||
|
||||
// if (data[offsets[colIndex]] == 0) // empty string
|
||||
// return true;
|
||||
// if (data[offsets[colIndex]] == 0) // empty string
|
||||
// return true;
|
||||
|
||||
switch (len)
|
||||
{
|
||||
@ -1126,7 +1100,6 @@ RowGroup::RowGroup(const RowGroup& r)
|
||||
offsets = &stOffsets[0];
|
||||
else if (!useStringTable && !oldOffsets.empty())
|
||||
offsets = &oldOffsets[0];
|
||||
|
||||
}
|
||||
|
||||
RowGroup& RowGroup::operator=(const RowGroup& r)
|
||||
@ -1241,27 +1214,28 @@ void RowGroup::serializeRGData(ByteStream& bs) const
|
||||
rgData->serialize(bs, getDataSize());
|
||||
}
|
||||
|
||||
uint32_t RowGroup::getDataSize() const
|
||||
RGDataSizeType RowGroup::getDataSize() const
|
||||
{
|
||||
return getDataSize(getRowCount());
|
||||
}
|
||||
|
||||
uint32_t RowGroup::getDataSize(uint64_t n) const
|
||||
RGDataSizeType RowGroup::getDataSize(uint64_t n) const
|
||||
{
|
||||
return headerSize + (n * getRowSize());
|
||||
return headerSize + (n * static_cast<RGDataSizeType>(getRowSize()));
|
||||
}
|
||||
|
||||
uint32_t RowGroup::getMaxDataSize() const
|
||||
RGDataSizeType RowGroup::getMaxDataSize() const
|
||||
{
|
||||
return headerSize + (8192 * getRowSize());
|
||||
return headerSize + (static_cast<RGDataSizeType>(rgCommonSize) * static_cast<RGDataSizeType>(getRowSize()));
|
||||
}
|
||||
|
||||
uint32_t RowGroup::getMaxDataSizeWithStrings() const
|
||||
RGDataSizeType RowGroup::getMaxDataSizeWithStrings() const
|
||||
{
|
||||
return headerSize + (8192 * (oldOffsets[columnCount] + columnCount));
|
||||
return headerSize +
|
||||
(static_cast<RGDataSizeType>(rgCommonSize) * static_cast<RGDataSizeType>(getRowSizeWithStrings()));
|
||||
}
|
||||
|
||||
uint32_t RowGroup::getEmptySize() const
|
||||
RGDataSizeType RowGroup::getEmptySize() const
|
||||
{
|
||||
return headerSize;
|
||||
}
|
||||
@ -1331,9 +1305,8 @@ string RowGroup::toString(const std::vector<uint64_t>& used) const
|
||||
os << "rowcount = " << getRowCount() << endl;
|
||||
if (!used.empty())
|
||||
{
|
||||
uint64_t cnt =
|
||||
std::accumulate(used.begin(), used.end(), 0ULL,
|
||||
[](uint64_t a, uint64_t bits) { return a + __builtin_popcountll(bits); });
|
||||
uint64_t cnt = std::accumulate(used.begin(), used.end(), 0ULL, [](uint64_t a, uint64_t bits)
|
||||
{ return a + __builtin_popcountll(bits); });
|
||||
os << "sparse row count = " << cnt << endl;
|
||||
}
|
||||
os << "base rid = " << getBaseRid() << endl;
|
||||
|
@ -62,6 +62,7 @@
|
||||
namespace rowgroup
|
||||
{
|
||||
const int16_t rgCommonSize = 8192;
|
||||
using RGDataSizeType = uint64_t;
|
||||
|
||||
/*
|
||||
The RowGroup family of classes encapsulate the data moved through the
|
||||
@ -270,14 +271,14 @@ class RGData
|
||||
|
||||
|
||||
// amount should be the # returned by RowGroup::getDataSize()
|
||||
void serialize(messageqcpp::ByteStream&, uint32_t amount) const;
|
||||
void serialize(messageqcpp::ByteStream&, RGDataSizeType amount) const;
|
||||
|
||||
// the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
|
||||
// inline data with a length field. Once that's converted to string table format, that
|
||||
// option can go away.
|
||||
void deserialize(messageqcpp::ByteStream&, uint32_t amount = 0); // returns the # of bytes read
|
||||
void deserialize(messageqcpp::ByteStream&, RGDataSizeType amount = 0); // returns the # of bytes read
|
||||
|
||||
inline uint64_t getStringTableMemUsage();
|
||||
inline RGDataSizeType getStringTableMemUsage();
|
||||
void clear();
|
||||
void reinit(const RowGroup& rg);
|
||||
void reinit(const RowGroup& rg, uint32_t rowCount);
|
||||
@ -1499,15 +1500,15 @@ class RowGroup : public messageqcpp::Serializeable
|
||||
uint32_t getDBRoot() const;
|
||||
void setDBRoot(uint32_t);
|
||||
|
||||
uint32_t getDataSize() const;
|
||||
uint32_t getDataSize(uint64_t n) const;
|
||||
uint32_t getMaxDataSize() const;
|
||||
uint32_t getMaxDataSizeWithStrings() const;
|
||||
uint32_t getEmptySize() const;
|
||||
RGDataSizeType getDataSize() const;
|
||||
RGDataSizeType getDataSize(uint64_t n) const;
|
||||
RGDataSizeType getMaxDataSize() const;
|
||||
RGDataSizeType getMaxDataSizeWithStrings() const;
|
||||
RGDataSizeType getEmptySize() const;
|
||||
|
||||
// this returns the size of the row data with the string table
|
||||
inline uint64_t getSizeWithStrings() const;
|
||||
inline uint64_t getSizeWithStrings(uint64_t n) const;
|
||||
inline RGDataSizeType getSizeWithStrings() const;
|
||||
inline RGDataSizeType getSizeWithStrings(uint64_t n) const;
|
||||
|
||||
// sets the row count to 0 and the baseRid to something
|
||||
// effectively initializing whatever chunk of memory
|
||||
@ -1628,11 +1629,11 @@ class RowGroup : public messageqcpp::Serializeable
|
||||
uint32_t sTableThreshold = 20;
|
||||
std::shared_ptr<bool[]> forceInline;
|
||||
|
||||
static const uint32_t headerSize = 18;
|
||||
static const uint32_t rowCountOffset = 0;
|
||||
static const uint32_t baseRidOffset = 4;
|
||||
static const uint32_t statusOffset = 12;
|
||||
static const uint32_t dbRootOffset = 14;
|
||||
static const uint64_t headerSize = 18;
|
||||
static const uint64_t rowCountOffset = 0;
|
||||
static const uint64_t baseRidOffset = 4;
|
||||
static const uint64_t statusOffset = 12;
|
||||
static const uint64_t dbRootOffset = 14;
|
||||
};
|
||||
|
||||
inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
|
||||
@ -1778,7 +1779,7 @@ inline uint32_t RowGroup::getRowSizeWithStrings() const
|
||||
return oldOffsets[columnCount] + columnCount;
|
||||
}
|
||||
|
||||
inline uint64_t RowGroup::getSizeWithStrings(uint64_t n) const
|
||||
inline RGDataSizeType RowGroup::getSizeWithStrings(uint64_t n) const
|
||||
{
|
||||
if (strings == nullptr)
|
||||
return getDataSize(n);
|
||||
|
@ -689,6 +689,7 @@ class RowGroupStorage
|
||||
if (fRGDatas[rgid])
|
||||
{
|
||||
fRowGroupOut->setData(fRGDatas[rgid].get());
|
||||
// An implicit s2u type cast.
|
||||
int64_t memSz = fRowGroupOut->getSizeWithStrings(fMaxRows);
|
||||
if (!fMM->acquire(memSz))
|
||||
{
|
||||
@ -965,7 +966,7 @@ class RowGroupStorage
|
||||
|
||||
while (rgid >= fRGDatas.size())
|
||||
{
|
||||
int64_t memSz = fRowGroupOut->getSizeWithStrings(fMaxRows);
|
||||
auto memSz = fRowGroupOut->getSizeWithStrings(fMaxRows);
|
||||
if (!fMM->acquire(memSz))
|
||||
{
|
||||
throw logging::IDBExcept(
|
||||
|
Reference in New Issue
Block a user