You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
MCOL-3536 Collation
This commit is contained in:
@ -108,7 +108,7 @@ public:
|
||||
inline bool isEmpty() const;
|
||||
inline uint64_t getSize() const;
|
||||
inline bool isNullValue(uint64_t offset) const;
|
||||
inline bool equals(const std::string& str, uint64_t offset) const;
|
||||
bool equals(const std::string& str, uint64_t offset, CHARSET_INFO* cs) const;
|
||||
|
||||
void clear();
|
||||
|
||||
@ -325,7 +325,7 @@ public:
|
||||
inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
|
||||
inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
|
||||
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
||||
|
||||
|
||||
// this returns true if the type is not CHAR or VARCHAR
|
||||
inline bool isCharType(uint32_t colIndex) const;
|
||||
inline bool isUnsigned(uint32_t colIndex) const;
|
||||
@ -338,7 +338,7 @@ public:
|
||||
inline int64_t getIntField(uint32_t colIndex) const;
|
||||
template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
|
||||
inline bool equals(long double val, uint32_t colIndex) const;
|
||||
inline bool equals(const std::string& val, uint32_t colIndex) const;
|
||||
bool equals(const std::string& val, uint32_t colIndex) const;
|
||||
|
||||
inline double getDoubleField(uint32_t colIndex) const;
|
||||
inline float getFloatField(uint32_t colIndex) const;
|
||||
@ -387,7 +387,7 @@ public:
|
||||
inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
|
||||
|
||||
// support VARBINARY
|
||||
// Add 2-byte length at the beginning of the field. NULL and zero length field are
|
||||
// Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are
|
||||
// treated the same, could use one of the length bit to distinguish these two cases.
|
||||
inline std::string getVarBinaryStringField(uint32_t colIndex) const;
|
||||
inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
|
||||
@ -449,14 +449,17 @@ public:
|
||||
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
||||
inline uint64_t hash() const; // generates a hash for all cols
|
||||
|
||||
inline bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
|
||||
inline bool equals(const Row&, uint32_t lastCol) const;
|
||||
bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
|
||||
bool equals(const Row&, uint32_t lastCol) const;
|
||||
inline bool equals(const Row&) const;
|
||||
|
||||
inline void setUserDataStore(UserDataStore* u)
|
||||
{
|
||||
userDataStore = u;
|
||||
}
|
||||
|
||||
const CHARSET_INFO* getCharset(uint32_t col) const;
|
||||
|
||||
private:
|
||||
uint32_t columnCount;
|
||||
uint64_t baseRid;
|
||||
@ -468,12 +471,14 @@ private:
|
||||
uint32_t* colWidths;
|
||||
execplan::CalpontSystemCatalog::ColDataType* types;
|
||||
uint32_t* charsetNumbers;
|
||||
CHARSET_INFO** charsets;
|
||||
uint8_t* data;
|
||||
uint32_t* scale;
|
||||
uint32_t* precision;
|
||||
|
||||
StringStore* strings;
|
||||
bool useStringTable;
|
||||
bool hasStrings;
|
||||
bool hasLongStringField;
|
||||
uint32_t sTableThreshold;
|
||||
boost::shared_array<bool> forceInline;
|
||||
@ -634,18 +639,6 @@ inline bool Row::equals(long double val, uint32_t colIndex) const
|
||||
{
|
||||
return *((long double*) &data[offsets[colIndex]]) == val;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const std::string& val, uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
uint64_t offset = *((uint64_t*) &data[offsets[colIndex]]);
|
||||
return strings->equals(val, offset);
|
||||
}
|
||||
else
|
||||
return (strncmp(val.c_str(), (char*) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0);
|
||||
}
|
||||
|
||||
template<int len>
|
||||
inline uint64_t Row::getUintField(uint32_t colIndex) const
|
||||
{
|
||||
@ -1182,69 +1175,6 @@ inline uint64_t Row::hash(uint32_t lastCol) const
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) const
|
||||
{
|
||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
||||
{
|
||||
const uint32_t& col = keyCols[i];
|
||||
|
||||
if (!isLongString(col))
|
||||
{
|
||||
if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
if (getLongDoubleField(i) != r2.getLongDoubleField(i))
|
||||
return false;
|
||||
}
|
||||
else if (getUintField(col) != r2.getUintField(col))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getStringLength(col) != r2.getStringLength(col))
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const Row& r2, uint32_t lastCol) const
|
||||
{
|
||||
// This check fires with empty r2 only.
|
||||
if (lastCol >= columnCount)
|
||||
return true;
|
||||
|
||||
if (!useStringTable && !r2.useStringTable)
|
||||
return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0]));
|
||||
|
||||
for (uint32_t i = 0; i <= lastCol; i++)
|
||||
if (!isLongString(i))
|
||||
{
|
||||
if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
if (getLongDoubleField(i) != r2.getLongDoubleField(i))
|
||||
return false;
|
||||
}
|
||||
else if (getUintField(i) != r2.getUintField(i))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t len = getStringLength(i);
|
||||
|
||||
if (len != r2.getStringLength(i))
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(i), r2.getStringPointer(i), len))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const Row& r2) const
|
||||
{
|
||||
return equals(r2, columnCount - 1);
|
||||
@ -1414,7 +1344,7 @@ public:
|
||||
|
||||
inline void setStringStore(boost::shared_ptr<StringStore>);
|
||||
|
||||
CHARSET_INFO* getCharset(uint32_t col);
|
||||
const CHARSET_INFO* getCharset(uint32_t col);
|
||||
|
||||
private:
|
||||
uint32_t columnCount;
|
||||
@ -1443,6 +1373,7 @@ private:
|
||||
RGData* rgData;
|
||||
StringStore* strings; // note, strings and data belong to rgData
|
||||
bool useStringTable;
|
||||
bool hasStrings;
|
||||
bool hasLongStringField;
|
||||
uint32_t sTableThreshold;
|
||||
boost::shared_array<bool> forceInline;
|
||||
@ -1569,6 +1500,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
|
||||
r->colWidths = (uint32_t*) &colWidths[0];
|
||||
r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
|
||||
r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
|
||||
r->charsets = (CHARSET_INFO**) & (charsets[0]);
|
||||
r->scale = (uint32_t*) & (scale[0]);
|
||||
r->precision = (uint32_t*) & (precision[0]);
|
||||
}
|
||||
@ -1591,6 +1523,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
|
||||
r->hasLongStringField = hasLongStringField;
|
||||
r->sTableThreshold = sTableThreshold;
|
||||
r->forceInline = forceInline;
|
||||
r->hasStrings = hasStrings;
|
||||
}
|
||||
|
||||
inline uint32_t RowGroup::getRowSize() const
|
||||
@ -1935,45 +1868,6 @@ inline bool StringStore::isNullValue(uint64_t off) const
|
||||
return (memcmp(&mc->data[offset+4], joblist::CPNULLSTRMARK.c_str(), 8) == 0);
|
||||
}
|
||||
|
||||
inline bool StringStore::equals(const std::string& str, uint64_t off) const
|
||||
{
|
||||
uint32_t length;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return str == joblist::CPNULLSTRMARK;
|
||||
|
||||
MemChunk* mc;
|
||||
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
if (longStrings.size() <= (off & ~0x8000000000000000))
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get();
|
||||
|
||||
memcpy(&length, mc->data, 4);
|
||||
|
||||
// Not sure if this check it needed, but adds safety
|
||||
if (length > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (strncmp(str.c_str(), (const char*) mc->data + 4, length) == 0);
|
||||
}
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
uint32_t offset = off % CHUNK_SIZE;
|
||||
|
||||
if (mem.size() <= chunk)
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
|
||||
if ((offset + length) > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (strncmp(str.c_str(), (const char*) &mc->data[offset] + 4, length) == 0);
|
||||
}
|
||||
inline uint32_t StringStore::getStringLength(uint64_t off)
|
||||
{
|
||||
uint32_t length;
|
||||
|
Reference in New Issue
Block a user