1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

Fix/mcol 5787 rgdata buffer max size dev (#3325)

* fix(rowgroup): RGData now uses uint64_t counter for the fixed sizes columns data buf.
	The buffer can utilize > 4GB RAM that is necessary for PM side join.
	RGData ctor uses uint32_t allocating data buffer.
 	This fact causes implicit heap overflow.

* feat(bytestream,serdes): BS buffer size type is uint64_t
	This necessary to handle 64bit RGData, that comes as
	a separate patch. The pair of patches would allow to
	have PM joins when SmallSide size > 4GB.

* feat(bytestream,serdes): Distribute BS buf size data type change to avoid implicit data type narrowing

* feat(rowgroup): this returns bits lost during cherry-pick. The bits lost caused the first RGData::serialize to crash a process
This commit is contained in:
drrtuy
2024-11-09 19:44:02 +00:00
committed by GitHub
parent 842a3c8a40
commit 8ae5a3da40
28 changed files with 1130 additions and 231 deletions

View File

@ -1,6 +1,6 @@
/*
Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2019 MariaDB Corporation
Copyright (C) 2019-2024 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -46,7 +46,7 @@ namespace messageqcpp
/* Copies only the data left to be read */
void ByteStream::doCopy(const ByteStream& rhs)
{
uint32_t rlen = rhs.length();
BSSizeType rlen = rhs.length();
if (fMaxLen < rlen)
{
@ -94,7 +94,7 @@ ByteStream& ByteStream::operator=(const ByteStream& rhs)
return *this;
}
ByteStream::ByteStream(uint32_t initSize) : fBuf(0), fCurInPtr(0), fCurOutPtr(0), fMaxLen(0)
ByteStream::ByteStream(BSSizeType initSize) : fBuf(0), fCurInPtr(0), fCurOutPtr(0), fMaxLen(0)
{
if (initSize > 0)
growBuf(initSize);
@ -102,13 +102,13 @@ ByteStream::ByteStream(uint32_t initSize) : fBuf(0), fCurInPtr(0), fCurOutPtr(0)
void ByteStream::add(const uint8_t b)
{
if (fBuf == 0 || (static_cast<uint32_t>(fCurInPtr - fBuf) == fMaxLen + ISSOverhead))
if (fBuf == 0 || (static_cast<BSSizeType>(fCurInPtr - fBuf) == fMaxLen + ISSOverhead))
growBuf();
*fCurInPtr++ = b;
}
void ByteStream::growBuf(uint32_t toSize)
void ByteStream::growBuf(BSSizeType toSize)
{
if (fBuf == 0)
{
@ -138,8 +138,8 @@ void ByteStream::growBuf(uint32_t toSize)
toSize = std::max(toSize, fMaxLen * 2);
uint8_t* t = new uint8_t[toSize + ISSOverhead];
uint32_t curOutOff = fCurOutPtr - fBuf;
uint32_t curInOff = fCurInPtr - fBuf;
BSSizeType curOutOff = fCurOutPtr - fBuf;
BSSizeType curInOff = fCurInPtr - fBuf;
memcpy(t, fBuf, fCurInPtr - fBuf);
#ifdef ZERO_ON_NEW
memset(t + (fCurInPtr - fBuf), 0, (toSize + ISSOverhead) - (fCurInPtr - fBuf));
@ -169,7 +169,7 @@ void ByteStream::setLongStrings(const std::vector<std::shared_ptr<uint8_t[]>>& o
ByteStream& ByteStream::operator<<(const int8_t b)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 1U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(b) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((int8_t*)fCurInPtr) = b;
@ -187,7 +187,7 @@ ByteStream& ByteStream::operator<<(const uint8_t b)
ByteStream& ByteStream::operator<<(const int16_t d)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 2U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(d) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((int16_t*)fCurInPtr) = d;
@ -198,7 +198,7 @@ ByteStream& ByteStream::operator<<(const int16_t d)
ByteStream& ByteStream::operator<<(const uint16_t d)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 2U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(d) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((uint16_t*)fCurInPtr) = d;
@ -209,7 +209,7 @@ ByteStream& ByteStream::operator<<(const uint16_t d)
ByteStream& ByteStream::operator<<(const int32_t q)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 4U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(q) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((int32_t*)fCurInPtr) = q;
@ -220,7 +220,7 @@ ByteStream& ByteStream::operator<<(const int32_t q)
ByteStream& ByteStream::operator<<(const uint32_t q)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 4U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(q) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((uint32_t*)fCurInPtr) = q;
@ -231,7 +231,7 @@ ByteStream& ByteStream::operator<<(const uint32_t q)
ByteStream& ByteStream::operator<<(const int64_t o)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 8U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(o) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((int64_t*)fCurInPtr) = o;
@ -242,7 +242,7 @@ ByteStream& ByteStream::operator<<(const int64_t o)
ByteStream& ByteStream::operator<<(const uint64_t o)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 8U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(o) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
*((uint64_t*)fCurInPtr) = o;
@ -251,20 +251,20 @@ ByteStream& ByteStream::operator<<(const uint64_t o)
return *this;
}
ByteStream& ByteStream::operator<<(const uint128_t& o)
ByteStream& ByteStream::operator<<(const uint128_t& h)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(h) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
datatypes::TSInt128::storeUnaligned(fCurInPtr, o);
datatypes::TSInt128::storeUnaligned(fCurInPtr, h);
fCurInPtr += 16;
return *this;
}
ByteStream& ByteStream::operator<<(const int128_t& o)
ByteStream& ByteStream::operator<<(const int128_t& h)
{
if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead))
if (fBuf == 0 || (fCurInPtr - fBuf + sizeof(h) > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
datatypes::TSInt128::storeUnaligned(fCurInPtr, o);
datatypes::TSInt128::storeUnaligned(fCurInPtr, h);
fCurInPtr += 16;
return *this;
}
@ -475,18 +475,18 @@ void ByteStream::peek(uint64_t& o) const
o = *((uint64_t*)fCurOutPtr);
}
void ByteStream::peek(uint128_t& o) const
void ByteStream::peek(uint128_t& h) const
{
if (length() < 16)
throw underflow_error("ByteStream>uint128_t: not enough data in stream to fill datatype");
datatypes::TSInt128::assignPtrPtr(&o, fCurOutPtr);
datatypes::TSInt128::assignPtrPtr(&h, fCurOutPtr);
}
void ByteStream::peek(int128_t& o) const
void ByteStream::peek(int128_t& h) const
{
if (length() < 16)
throw underflow_error("ByteStream>int128_t: not enough data in stream to fill datatype");
datatypes::TSInt128::assignPtrPtr(&o, fCurOutPtr);
datatypes::TSInt128::assignPtrPtr(&h, fCurOutPtr);
}
void ByteStream::peek(string& s) const
@ -519,7 +519,7 @@ void ByteStream::peek(string& s) const
throw logging::ProtocolError("expected a string");
// we know len >= 0 by now...
if (length() < static_cast<uint32_t>(len + 4))
if (length() < static_cast<BSSizeType>(len + 4))
{
#if DEBUG_DUMP_STRINGS_LESS_THAN > 0
cerr << "bs: wanted " << len + 4 << " bytes, but there are only " << length() << " remaining" << endl;
@ -531,13 +531,13 @@ void ByteStream::peek(string& s) const
s.assign((char*)&fCurOutPtr[4], len);
}
void ByteStream::load(const uint8_t* bp, uint32_t len)
void ByteStream::load(const uint8_t* bp, BSSizeType len)
{
// Do all the stuff that could throw an exception first
if (bp == 0 && len != 0)
throw invalid_argument("ByteStream::load: bp cannot equal 0 when len is not equal to 0");
uint32_t newMaxLen = (len + BlockSize - 1) / BlockSize * BlockSize;
BSSizeType newMaxLen = (len + BlockSize - 1) / BlockSize * BlockSize;
if (len > fMaxLen)
{
@ -551,7 +551,7 @@ void ByteStream::load(const uint8_t* bp, uint32_t len)
fCurInPtr = fBuf + len + ISSOverhead;
}
void ByteStream::append(const uint8_t* bp, uint32_t len)
void ByteStream::append(const uint8_t* bp, BSSizeType len)
{
if (len == 0)
return;
@ -559,7 +559,7 @@ void ByteStream::append(const uint8_t* bp, uint32_t len)
if (bp == 0)
throw invalid_argument("ByteStream::append: bp cannot equal 0 when len is not equal to 0");
uint32_t newSize = static_cast<uint32_t>(fCurInPtr - fBuf + len);
BSSizeType newSize = static_cast<BSSizeType>(fCurInPtr - fBuf + len);
if (fBuf == 0 || (newSize > fMaxLen))
growBuf(newSize);
@ -635,7 +635,7 @@ void ByteStream::serialize(ByteStream& bs) const
void ByteStream::deserialize(ByteStream& bs)
{
uint32_t len;
BSSizeType len;
restart();
bs >> len;
@ -643,9 +643,9 @@ void ByteStream::deserialize(ByteStream& bs)
bs.advance(len);
}
void ByteStream::needAtLeast(size_t amount)
void ByteStream::needAtLeast(BSSizeType amount)
{
size_t currentSpace;
BSSizeType currentSpace;
currentSpace = fMaxLen - (fCurInPtr - (fBuf + ISSOverhead));
@ -656,7 +656,7 @@ void ByteStream::needAtLeast(size_t amount)
ByteStream& ByteStream::operator<<(const ByteStream& bs)
{
uint32_t len = bs.length();
BSSizeType len = bs.length();
*this << len;
@ -668,20 +668,20 @@ ByteStream& ByteStream::operator<<(const ByteStream& bs)
ByteStream& ByteStream::operator>>(ByteStream& bs)
{
peek(bs);
fCurOutPtr += 4 + bs.length();
fCurOutPtr += sizeof(BSSizeType) + bs.length();
return *this;
}
void ByteStream::peek(ByteStream& bs) const
{
uint32_t len;
BSSizeType len;
peek(len);
if (length() < len)
throw underflow_error("ByteStream>ByteStream: not enough data in stream to fill datatype");
bs.load(&fCurOutPtr[4], len);
bs.load(&fCurOutPtr[sizeof(len)], len);
}
ByteStream& ByteStream::operator<<(const uuid& u)
@ -707,7 +707,7 @@ void ByteStream::peek(uuid& u) const
ByteStream& ByteStream::operator<<(const float f)
{
int sz = sizeof(float);
const constexpr BSSizeType sz = sizeof(float);
if (fBuf == 0 || (fCurInPtr - fBuf + sz > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
@ -719,7 +719,7 @@ ByteStream& ByteStream::operator<<(const float f)
}
ByteStream& ByteStream::operator<<(const double d)
{
int sz = sizeof(double);
const constexpr BSSizeType sz = sizeof(double);
if (fBuf == 0 || (fCurInPtr - fBuf + sz > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);
@ -731,7 +731,7 @@ ByteStream& ByteStream::operator<<(const double d)
}
ByteStream& ByteStream::operator<<(const long double d)
{
int sz = sizeof(long double);
const constexpr BSSizeType sz = sizeof(long double);
if (fBuf == 0 || (fCurInPtr - fBuf + sz > fMaxLen + ISSOverhead))
growBuf(fMaxLen + BlockSize);

View File

@ -45,6 +45,7 @@ class ByteStreamTestSuite;
namespace messageqcpp
{
typedef boost::shared_ptr<ByteStream> SBS;
using BSSizeType = uint64_t;
/**
* @brief A class to marshall bytes as a stream
@ -76,11 +77,11 @@ class ByteStream : public Serializeable
/**
* default ctor
*/
EXPORT explicit ByteStream(uint32_t initSize = 8192); // multiples of pagesize are best
EXPORT explicit ByteStream(BSSizeType initSize = 8192); // multiples of pagesize are best
/**
* ctor with a uint8_t array and len initializer
*/
inline ByteStream(const uint8_t* bp, const uint32_t len);
inline ByteStream(const uint8_t* bp, const BSSizeType len);
/**
* copy ctor
*/
@ -337,12 +338,12 @@ class ByteStream : public Serializeable
/**
* load the stream from an array. Clears out any previous data.
*/
EXPORT void load(const uint8_t* bp, uint32_t len);
EXPORT void load(const uint8_t* bp, BSSizeType len);
/**
* append bytes to the end of the stream.
*/
EXPORT void append(const uint8_t* bp, uint32_t len);
EXPORT void append(const uint8_t* bp, BSSizeType len);
/**
* equality check on buffer contents.
@ -378,19 +379,19 @@ class ByteStream : public Serializeable
* advance the output ptr without having to extract bytes
* @warning be careful advancing near 4GB!
*/
inline void advance(uint32_t amt);
inline void advance(BSSizeType amt);
/**
* returns the length of the queue (in bytes)
* @warning do not attempt to make a ByteStream bigger than 4GB!
*/
inline uint32_t length() const;
inline BSSizeType length() const;
inline bool empty() const;
/**
* returns the length of the queue, including header overhead (in bytes)
*/
inline uint32_t lengthWithHdrOverhead() const;
inline BSSizeType lengthWithHdrOverhead() const;
/**
* clears the stream. Releases any current stream and sets all pointers to 0. The state of the object
@ -422,7 +423,7 @@ class ByteStream : public Serializeable
/**
* Get the allocated size of the buffer.
*/
inline uint32_t getBufferSize() const;
inline BSSizeType getBufferSize() const;
/**
* Serializeable interface
@ -437,10 +438,10 @@ class ByteStream : public Serializeable
/**
* memory allocation chunk size
*/
EXPORT static const uint32_t BlockSize = 4096;
EXPORT static const BSSizeType BlockSize = 4096;
/** size of the space we want in front of the data */
EXPORT static const uint32_t ISSOverhead =
EXPORT static const BSSizeType ISSOverhead =
3 * sizeof(uint32_t); // space for the BS magic & length & number of long strings.
// Methods to get and set `long strings`.
@ -458,7 +459,7 @@ class ByteStream : public Serializeable
/**
* adds another BlockSize bytes to the internal buffer
*/
void growBuf(uint32_t toSize = 0);
void growBuf(BSSizeType toSize = 0);
/**
* handles member copying from one ByteStream to another
*/
@ -476,9 +477,8 @@ class ByteStream : public Serializeable
uint8_t* fBuf; /// the start of the allocated buffer
uint8_t* fCurInPtr; // the point in fBuf where data is inserted next
uint8_t* fCurOutPtr; // the point in fBuf where data is extracted from next
uint32_t fMaxLen; // how big fBuf is currently
// Stores `long strings`.
std::vector<std::shared_ptr<uint8_t[]>> longStrings;
BSSizeType fMaxLen; // how big fBuf is currently
std::vector<std::shared_ptr<uint8_t[]>> longStrings; // Stores `long strings`.
};
template <int W, typename T = void>
@ -527,7 +527,7 @@ static const uint8_t BS_BLOB = 9;
static const uint8_t BS_SERIALIZABLE = 10;
static const uint8_t BS_UUID = 11;
inline ByteStream::ByteStream(const uint8_t* bp, const uint32_t len) : fBuf(0), fMaxLen(0)
inline ByteStream::ByteStream(const uint8_t* bp, const BSSizeType len) : fBuf(0), fMaxLen(0)
{
load(bp, len);
}
@ -544,15 +544,15 @@ inline uint8_t* ByteStream::buf()
{
return fCurOutPtr;
}
inline uint32_t ByteStream::length() const
inline BSSizeType ByteStream::length() const
{
return (uint32_t)(fCurInPtr - fCurOutPtr);
return static_cast<BSSizeType>(fCurInPtr - fCurOutPtr);
}
inline bool ByteStream::empty() const
{
return (length() == 0);
}
inline uint32_t ByteStream::lengthWithHdrOverhead() const
inline BSSizeType ByteStream::lengthWithHdrOverhead() const
{
return (length() + ISSOverhead);
}
@ -570,7 +570,7 @@ inline void ByteStream::rewind()
{
fCurOutPtr = fBuf + ISSOverhead;
}
inline void ByteStream::advance(uint32_t adv)
inline void ByteStream::advance(BSSizeType adv)
{
// fCurOutPtr is always >= fBuf, so fCurOutPtr - fBuf is >= 0, and this difference is always <= 32 bits
// there is an edge condition not detected here: if fCurOutPtr - fBuf is nearly 4GB and you try to
@ -619,7 +619,7 @@ inline ByteStream& ByteStream::operator=(const SBS& rhs)
return *this;
}
inline uint32_t ByteStream::getBufferSize() const
inline BSSizeType ByteStream::getBufferSize() const
{
return fMaxLen;
}
@ -738,12 +738,6 @@ void deserializeSet(ByteStream& bs, std::set<T>& s)
s.insert(tmp);
}
}
/*
template<>
struct ByteStream::_ByteStreamType<1, ByteStream::byte>>
{
typedef ByteStream::byte type;
}*/
} // namespace messageqcpp

View File

@ -20,6 +20,7 @@
*
*
***********************************************************************/
#include "bytestream.h"
#include "mcsconfig.h"
#include <stdexcept>
@ -117,7 +118,7 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
{
size_t len = msg.length();
BSSizeType len = msg.length();
if (useCompression && (len > 512))
{
@ -126,6 +127,9 @@ void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
alg->compress((char*)msg.buf(), len, (char*)smsg.getInputPtr() + HEADER_SIZE, &outLen);
// Save original len.
// !!!
// !!! Reducing BS size type from 64bit down to 32 and potentially loosing data.
// !!!
*(uint32_t*)smsg.getInputPtr() = len;
smsg.advanceInputPtr(outLen + HEADER_SIZE);

View File

@ -572,6 +572,9 @@ void InetStreamSocket::write(SBS msg, Stats* stats)
void InetStreamSocket::do_write(const ByteStream& msg, uint32_t whichMagic, Stats* stats) const
{
// !!!
// !!! Reducing BS size type from 64bit down to 32 and potentially loosing data.
// !!!
uint32_t msglen = msg.length();
uint32_t magic = whichMagic;
uint32_t* realBuf;