1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

Fixes of bugs from ASAN warnings, part one (#2796)

This commit is contained in:
Leonid Fedorov
2023-03-30 18:29:04 +03:00
committed by GitHub
parent a1d20d82d5
commit 2f153184c3
71 changed files with 591 additions and 2038 deletions

View File

@ -50,19 +50,7 @@ namespace rowgroup
{
using cscType = execplan::CalpontSystemCatalog::ColDataType;
StringStore::StringStore() : empty(true), fUseStoreStringMutex(false)
{
}
StringStore::StringStore(const StringStore&)
{
throw logic_error("Don't call StringStore copy ctor");
}
StringStore& StringStore::operator=(const StringStore&)
{
throw logic_error("Don't call StringStore operator=");
}
StringStore::~StringStore()
{
@ -86,10 +74,10 @@ StringStore::~StringStore()
uint64_t StringStore::storeString(const uint8_t* data, uint32_t len)
{
MemChunk* lastMC = NULL;
MemChunk* lastMC = nullptr;
uint64_t ret = 0;
empty = false; // At least a NULL is being stored.
empty = false; // At least a nullptr is being stored.
// Sometimes the caller actually wants "" to be returned....... argggghhhh......
// if (len == 0)
@ -121,7 +109,7 @@ uint64_t StringStore::storeString(const uint8_t* data, uint32_t len)
}
else
{
if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < (len + 4)))
if ((lastMC == nullptr) || (lastMC->capacity - lastMC->currentSize < (len + 4)))
{
// mem usage debugging
// if (lastMC)
@ -215,20 +203,12 @@ void StringStore::clear()
empty = true;
}
UserDataStore::UserDataStore() : fUseUserDataMutex(false)
{
}
UserDataStore::~UserDataStore()
{
}
uint32_t UserDataStore::storeUserData(mcsv1sdk::mcsv1Context& context,
boost::shared_ptr<mcsv1sdk::UserData> data, uint32_t len)
{
uint32_t ret = 0;
if (len == 0 || data == NULL)
if (len == 0 || data == nullptr)
{
return numeric_limits<uint32_t>::max();
}
@ -305,7 +285,7 @@ void UserDataStore::deserialize(ByteStream& bs)
}
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
mcsv1sdk::UserData* userData = NULL;
mcsv1sdk::UserData* userData = nullptr;
rc = funcIter->second->createUserData(userData, vStoreData[i].length);
if (rc != mcsv1sdk::mcsv1_UDAF::SUCCESS)
@ -323,10 +303,6 @@ void UserDataStore::deserialize(ByteStream& bs)
return;
}
RGData::RGData()
{
// cout << "rgdata++ = " << __sync_add_and_fetch(&rgDataCount, 1) << endl;
}
RGData::RGData(const RowGroup& rg, uint32_t rowCount)
{
@ -336,6 +312,9 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount)
if (rg.usesStringTable() && rowCount > 0)
strings.reset(new StringStore());
userDataStore.reset();
#ifdef VALGRIND
/* In a PM-join, we can serialize entire tables; not every value has been
* filled in yet. Need to look into that. Valgrind complains that
@ -354,6 +333,8 @@ RGData::RGData(const RowGroup& rg)
if (rg.usesStringTable())
strings.reset(new StringStore());
userDataStore.reset();
#ifdef VALGRIND
/* In a PM-join, we can serialize entire tables; not every value has been
* filled in yet. Need to look into that. Valgrind complains that
@ -366,6 +347,7 @@ RGData::RGData(const RowGroup& rg)
void RGData::reinit(const RowGroup& rg, uint32_t rowCount)
{
rowData.reset(new uint8_t[rg.getDataSize(rowCount)]);
userDataStore.reset();
if (rg.usesStringTable())
strings.reset(new StringStore());
@ -386,16 +368,6 @@ void RGData::reinit(const RowGroup& rg)
reinit(rg, 8192);
}
RGData::RGData(const RGData& r) : rowData(r.rowData), strings(r.strings), userDataStore(r.userDataStore)
{
// cout << "rgdata++ = " << __sync_add_and_fetch(&rgDataCount, 1) << endl;
}
RGData::~RGData()
{
// cout << "rgdata-- = " << __sync_sub_and_fetch(&rgDataCount, 1) << endl;
}
void RGData::serialize(ByteStream& bs, uint32_t amount) const
{
// cout << "serializing!\n";
@ -465,6 +437,7 @@ void RGData::clear()
{
rowData.reset();
strings.reset();
userDataStore.reset();
}
// UserDataStore is only used for UDAF.
@ -479,10 +452,6 @@ UserDataStore* RGData::getUserDataStore()
return userDataStore.get();
}
Row::Row() : data(NULL), strings(NULL), userDataStore(NULL)
{
}
Row::Row(const Row& r)
: columnCount(r.columnCount)
, baseRid(r.baseRid)
@ -502,11 +471,7 @@ Row::Row(const Row& r)
, hasLongStringField(r.hasLongStringField)
, sTableThreshold(r.sTableThreshold)
, forceInline(r.forceInline)
, userDataStore(NULL)
{
}
Row::~Row()
, userDataStore(nullptr)
{
}
@ -1037,7 +1002,7 @@ bool Row::equals(const Row& r2, uint32_t lastCol) const
const CHARSET_INFO* Row::getCharset(uint32_t col) const
{
if (charsets[col] == NULL)
if (charsets[col] == nullptr)
{
const_cast<CHARSET_INFO**>(charsets)[col] = &datatypes::Charset(charsetNumbers[col]).getCharset();
}
@ -1045,14 +1010,6 @@ const CHARSET_INFO* Row::getCharset(uint32_t col) const
}
RowGroup::RowGroup()
: columnCount(0)
, data(NULL)
, rgData(NULL)
, strings(NULL)
, useStringTable(true)
, hasCollation(false)
, hasLongStringField(false)
, sTableThreshold(20)
{
// 1024 is too generous to waste.
oldOffsets.reserve(10);
@ -1071,7 +1028,7 @@ RowGroup::RowGroup(uint32_t colCount, const vector<uint32_t>& positions, const v
const vector<uint32_t>& cprecision, uint32_t stringTableThreshold, bool stringTable,
const vector<bool>& forceInlineData)
: columnCount(colCount)
, data(NULL)
, data(nullptr)
, oldOffsets(positions)
, oids(roids)
, keys(tkeys)
@ -1079,8 +1036,8 @@ RowGroup::RowGroup(uint32_t colCount, const vector<uint32_t>& positions, const v
, charsetNumbers(csNumbers)
, scale(cscale)
, precision(cprecision)
, rgData(NULL)
, strings(NULL)
, rgData(nullptr)
, strings(nullptr)
, sTableThreshold(stringTableThreshold)
{
uint32_t i;
@ -1121,8 +1078,8 @@ RowGroup::RowGroup(uint32_t colCount, const vector<uint32_t>& positions, const v
useStringTable = (stringTable && hasLongStringField);
offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
// Set all the charsets to NULL for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
// Set all the charsets to nullptr for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), nullptr);
}
RowGroup::RowGroup(const RowGroup& r)
@ -1189,14 +1146,6 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
}
RowGroup::RowGroup(ByteStream& bs)
: columnCount(0)
, data(nullptr)
, rgData(nullptr)
, strings(nullptr)
, useStringTable(true)
, hasCollation(false)
, hasLongStringField(false)
, sTableThreshold(20)
{
this->deserialize(bs);
}
@ -1267,22 +1216,13 @@ void RowGroup::deserialize(ByteStream& bs)
else if (!useStringTable && !oldOffsets.empty())
offsets = &oldOffsets[0];
// Set all the charsets to NULL for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
// Set all the charsets to nullptr for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), nullptr);
}
void RowGroup::serializeRGData(ByteStream& bs) const
{
// cout << "****** serializing\n" << toString() << en
// if (useStringTable || !hasLongStringField)
rgData->serialize(bs, getDataSize());
// else {
// uint64_t size;
// RGData *compressed = convertToStringTable(&size);
// compressed->serialize(bs, size);
// if (compressed != rgData)
// delete compressed;
// }
}
uint32_t RowGroup::getDataSize() const
@ -1367,7 +1307,7 @@ string RowGroup::toString(const std::vector<uint64_t>& used) const
// os << "strings = " << hex << (int64_t) strings << "\n";
// os << "data = " << (int64_t) data << "\n" << dec;
if (data != NULL)
if (data != nullptr)
{
Row r;
initRow(&r);
@ -1589,7 +1529,7 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
const CHARSET_INFO* RowGroup::getCharset(uint32_t col)
{
if (charsets[col] == NULL)
if (charsets[col] == nullptr)
{
charsets[col] = &datatypes::Charset(charsetNumbers[col]).getCharset();
}

View File

@ -129,7 +129,11 @@ inline T derefFromTwoVectorPtrs(const std::vector<T>* outer, const std::vector<T
class StringStore
{
public:
StringStore();
StringStore() = default;
StringStore(const StringStore&) = delete;
StringStore(StringStore&&) = delete;
StringStore& operator=(const StringStore&) = delete;
StringStore& operator=(StringStore&&) = delete;
virtual ~StringStore();
inline std::string getString(uint64_t offset) const;
@ -171,17 +175,14 @@ class StringStore
private:
std::string empty_str;
StringStore(const StringStore&);
StringStore& operator=(const StringStore&);
static constexpr const uint32_t CHUNK_SIZE = 64 * 1024; // allocators like powers of 2
std::vector<boost::shared_array<uint8_t>> mem;
// To store strings > 64KB (BLOB/TEXT)
std::vector<boost::shared_array<uint8_t>> longStrings;
bool empty;
bool fUseStoreStringMutex; //@bug6065, make StringStore::storeString() thread safe
bool empty = true;
bool fUseStoreStringMutex = false; //@bug6065, make StringStore::storeString() thread safe
boost::mutex fMutex;
};
@ -208,8 +209,13 @@ class UserDataStore
};
public:
UserDataStore();
virtual ~UserDataStore();
UserDataStore() = default;
virtual ~UserDataStore() = default;
UserDataStore(const UserDataStore&) = delete;
UserDataStore(UserDataStore&&) = delete;
UserDataStore& operator=(const UserDataStore&) = delete;
UserDataStore& operator=(UserDataStore&&) = delete;
void serialize(messageqcpp::ByteStream&) const;
void deserialize(messageqcpp::ByteStream&);
@ -231,12 +237,10 @@ class UserDataStore
boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t offset) const;
private:
UserDataStore(const UserDataStore&);
UserDataStore& operator=(const UserDataStore&);
std::vector<StoreData> vStoreData;
bool fUseUserDataMutex;
bool fUseUserDataMutex = false;
boost::mutex fMutex;
};
@ -248,13 +252,16 @@ class Row;
class RGData
{
public:
RGData(); // useless unless followed by an = or a deserialize operation
RGData() = default; // useless unless followed by an = or a deserialize operation
RGData(const RowGroup& rg, uint32_t rowCount); // allocates memory for rowData
explicit RGData(const RowGroup& rg);
RGData(const RGData&);
virtual ~RGData();
RGData& operator=(const RGData&) = default;
RGData& operator=(RGData&&) = default;
RGData(const RGData&) = default;
RGData(RGData&&) = default;
virtual ~RGData() = default;
inline RGData& operator=(const RGData&);
// amount should be the # returned by RowGroup::getDataSize()
void serialize(messageqcpp::ByteStream&, uint32_t amount) const;
@ -268,7 +275,7 @@ class RGData
void clear();
void reinit(const RowGroup& rg);
void reinit(const RowGroup& rg, uint32_t rowCount);
inline void setStringStore(boost::shared_ptr<StringStore>& ss)
inline void setStringStore(std::shared_ptr<StringStore>& ss)
{
strings = ss;
}
@ -301,18 +308,21 @@ class RGData
return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
}
boost::shared_array<uint8_t> rowData;
boost::shared_ptr<StringStore> strings;
boost::shared_ptr<UserDataStore> userDataStore;
bool hasRowData() const
{
return !!rowData;
}
private:
// boost::shared_array<uint8_t> rowData;
// boost::shared_ptr<StringStore> strings;
std::shared_ptr<uint8_t[]> rowData;
std::shared_ptr<StringStore> strings;
std::shared_ptr<UserDataStore> userDataStore;
// Need sig to support backward compat. RGData can deserialize both forms.
static const uint32_t RGDATA_SIG = 0xffffffff; // won't happen for 'old' Rowgroup data
friend class RowGroup;
friend class RowGroupStorage;
};
class Row
@ -320,28 +330,26 @@ class Row
public:
struct Pointer
{
inline Pointer() : data(NULL), strings(NULL), userDataStore(NULL)
{
}
inline Pointer() = default;
// Pointer(uint8_t*) implicitly makes old code compatible with the string table impl;
inline Pointer(uint8_t* d) : data(d), strings(NULL), userDataStore(NULL)
inline Pointer(uint8_t* d) : data(d)
{
}
inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s), userDataStore(NULL)
inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s)
{
}
inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) : data(d), strings(s), userDataStore(u)
{
}
uint8_t* data;
StringStore* strings;
UserDataStore* userDataStore;
uint8_t* data = nullptr;
StringStore* strings = nullptr;
UserDataStore* userDataStore = nullptr;
};
Row();
Row() = default;
Row(const Row&);
~Row();
~Row() = default;
Row& operator=(const Row&);
bool operator==(const Row&) const;
@ -491,7 +499,7 @@ class Row
template <typename T>
inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex);
// support VARBINARY
// Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are
// Add 2-byte length at the CHARSET_INFO*beginning of the field. nullptr and zero length field are
// treated the same, could use one of the length bit to distinguish these two cases.
inline std::string getVarBinaryStringField(uint32_t colIndex) const;
inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
@ -579,31 +587,32 @@ class Row
const CHARSET_INFO* getCharset(uint32_t col) const;
private:
uint32_t columnCount;
uint64_t baseRid;
private:
inline bool inStringTable(uint32_t col) const;
private:
uint32_t columnCount = 0;
uint64_t baseRid = 0;
// Note, the mem behind these pointer fields is owned by RowGroup not Row
uint32_t* oldOffsets;
uint32_t* stOffsets;
uint32_t* offsets;
uint32_t* colWidths;
execplan::CalpontSystemCatalog::ColDataType* types;
uint32_t* charsetNumbers;
CHARSET_INFO** charsets;
uint8_t* data;
uint32_t* scale;
uint32_t* precision;
uint32_t* oldOffsets = nullptr;
uint32_t* stOffsets = nullptr;
uint32_t* offsets = nullptr;
uint32_t* colWidths = nullptr;
execplan::CalpontSystemCatalog::ColDataType* types = nullptr;
uint32_t* charsetNumbers = nullptr;
CHARSET_INFO** charsets = nullptr;
uint8_t* data = nullptr;
uint32_t* scale = nullptr;
uint32_t* precision = nullptr;
StringStore* strings;
bool useStringTable;
bool hasCollation;
bool hasLongStringField;
uint32_t sTableThreshold;
StringStore* strings = nullptr;
bool useStringTable = true;
bool hasCollation = false;
bool hasLongStringField = false;
uint32_t sTableThreshold = 20;
boost::shared_array<bool> forceInline;
inline bool inStringTable(uint32_t col) const;
UserDataStore* userDataStore; // For UDAF
UserDataStore* userDataStore = nullptr; // For UDAF
friend class RowGroup;
};
@ -1478,9 +1487,6 @@ class RowGroup : public messageqcpp::Serializeable
inline bool usesStringTable() const;
inline void setUseStringTable(bool);
// RGData *convertToInlineData(uint64_t *size = NULL) const; // caller manages the memory returned by
// this void convertToInlineDataInPlace(); RGData *convertToStringTable(uint64_t *size = NULL)
// const; void convertToStringTableInPlace();
void serializeRGData(messageqcpp::ByteStream&) const;
inline uint32_t getStringTableThreshold() const;
@ -1516,17 +1522,17 @@ class RowGroup : public messageqcpp::Serializeable
const uint16_t& blockNum);
inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
inline void setStringStore(boost::shared_ptr<StringStore>);
inline void setStringStore(std::shared_ptr<StringStore>);
const CHARSET_INFO* getCharset(uint32_t col);
private:
uint32_t columnCount;
uint8_t* data;
uint32_t columnCount = 0;
uint8_t* data = nullptr;
std::vector<uint32_t> oldOffsets; // inline data offsets
std::vector<uint32_t> stOffsets; // string table offsets
uint32_t* offsets; // offsets either points to oldOffsets or stOffsets
uint32_t* offsets = nullptr; // offsets either points to oldOffsets or stOffsets
std::vector<uint32_t> colWidths;
// oids: the real oid of the column, may have duplicates with alias.
// This oid is necessary for front-end to decide the real column width.
@ -1544,12 +1550,12 @@ class RowGroup : public messageqcpp::Serializeable
std::vector<uint32_t> precision;
// string table impl
RGData* rgData;
StringStore* strings; // note, strings and data belong to rgData
bool useStringTable;
bool hasCollation;
bool hasLongStringField;
uint32_t sTableThreshold;
RGData* rgData = nullptr;
StringStore* strings = nullptr; // note, strings and data belong to rgData
bool useStringTable = true;
bool hasCollation = false;
bool hasLongStringField = false;
uint32_t sTableThreshold = 20;
boost::shared_array<bool> forceInline;
static const uint32_t headerSize = 18;
@ -1586,7 +1592,7 @@ every row, they're a measurable performance penalty */
inline uint32_t RowGroup::getRowCount() const
{
// idbassert(data);
// if (!data) throw std::logic_error("RowGroup::getRowCount(): data is NULL!");
// if (!data) throw std::logic_error("RowGroup::getRowCount(): data is nullptr!");
return *((uint32_t*)&data[rowCountOffset]);
}
@ -1617,8 +1623,8 @@ inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
inline void RowGroup::setData(uint8_t* d)
{
data = d;
strings = NULL;
rgData = NULL;
strings = nullptr;
rgData = nullptr;
setUseStringTable(false);
}
@ -1652,7 +1658,7 @@ inline void RowGroup::setUseStringTable(bool b)
offsets = &oldOffsets[0];
if (!useStringTable)
strings = NULL;
strings = nullptr;
}
inline uint64_t RowGroup::getBaseRid() const
@ -1712,7 +1718,7 @@ inline uint32_t RowGroup::getRowSizeWithStrings() const
inline uint64_t RowGroup::getSizeWithStrings(uint64_t n) const
{
if (strings == NULL)
if (strings == nullptr)
return getDataSize(n);
else
return getDataSize(n) + strings->getSize();
@ -1836,7 +1842,7 @@ inline uint32_t RowGroup::getStringTableThreshold() const
return sTableThreshold;
}
inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
inline void RowGroup::setStringStore(std::shared_ptr<StringStore> ss)
{
if (useStringTable)
{
@ -2031,7 +2037,7 @@ inline bool StringStore::isNullValue(uint64_t off) const
if (off == std::numeric_limits<uint64_t>::max())
return true;
// Long strings won't be NULL
// Long strings won't be nullptr
if (off & 0x8000000000000000)
return false;
@ -2054,7 +2060,7 @@ inline bool StringStore::isNullValue(uint64_t off) const
if ((offset + length) > mc->currentSize)
return true;
if (mc->data[offset + 4] == 0) // "" = NULL string for some reason...
if (mc->data[offset + 4] == 0) // "" = nullptr string for some reason...
return true;
return (memcmp(&mc->data[offset + 4], joblist::CPNULLSTRMARK.c_str(), 8) == 0);
}
@ -2121,14 +2127,6 @@ inline uint64_t StringStore::getSize() const
return ret;
}
inline RGData& RGData::operator=(const RGData& r)
{
rowData = r.rowData;
strings = r.strings;
userDataStore = r.userDataStore;
return *this;
}
inline void RGData::getRow(uint32_t num, Row* row)
{
uint32_t size = row->getSize();