1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-3536 collation

This commit is contained in:
David Hall
2020-05-26 12:42:11 -05:00
parent 11ba12f6ea
commit 06e50e0926
47 changed files with 516 additions and 535 deletions

View File

@ -24,6 +24,10 @@
* is the primary class.
*/
#include <mariadb.h>
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
#include <my_sys.h>
#include <unistd.h>
#include <sstream>
#include <stdexcept>
@ -384,36 +388,16 @@ inline void RowAggregation::updateFloatMinMax(float val1, float val2, int64_t co
fRow.setFloatField(val1, col);
}
#define STRCOLL_ENH__
void RowAggregation::updateStringMinMax(string val1, string val2, int64_t col, int func)
{
if (isNull(fRowGroupOut, fRow, col))
CHARSET_INFO* cs = fRowGroupIn.getCharset(col);
int tmp = cs->strnncoll(val1.c_str(), val1.length(), val2.c_str(), val2.length());
if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) ||
(tmp > 0 && func == rowgroup::ROWAGG_MAX))
{
fRow.setStringField(val1, col);
}
#ifdef STRCOLL_ENH__
else
{
int tmp = utf8::idb_strcoll(val1.c_str(), val2.c_str());
if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) ||
(tmp > 0 && func == rowgroup::ROWAGG_MAX))
{
fRow.setStringField(val1, col);
}
}
#else
else if (minMax(val1, val2, func))
{
fRow.setStringField(val1, col);
}
#endif
}
//------------------------------------------------------------------------------

View File

@ -53,6 +53,9 @@
#include "mcsv1_udaf.h"
#include "constantcolumn.h"
// Because including my_sys.h in a Columnstore header causes too many conflicts
struct charset_info_st;
typedef const struct charset_info_st CHARSET_INFO;
// To do: move code that depends on joblist to a proper subsystem.
namespace joblist
{
@ -706,7 +709,7 @@ protected:
// We need a separate copy for each thread.
mcsv1sdk::mcsv1Context fRGContext;
// These are handy for testing the actual type of static_any for UDAF
static const static_any::any& charTypeId;
static const static_any::any& scharTypeId;

View File

@ -505,8 +505,8 @@ Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { }
Row::Row(const Row& r) : columnCount(r.columnCount), baseRid(r.baseRid),
oldOffsets(r.oldOffsets), stOffsets(r.stOffsets),
offsets(r.offsets), colWidths(r.colWidths), types(r.types), data(r.data),
scale(r.scale), precision(r.precision), strings(r.strings),
offsets(r.offsets), colWidths(r.colWidths), types(r.types), charsetNumbers(r.charsetNumbers),
data(r.data), scale(r.scale), precision(r.precision), strings(r.strings),
useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField),
sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL)
{ }
@ -522,6 +522,7 @@ Row& Row::operator=(const Row& r)
offsets = r.offsets;
colWidths = r.colWidths;
types = r.types;
charsetNumbers = r.charsetNumbers;
data = r.data;
scale = r.scale;
precision = r.precision;
@ -1006,6 +1007,7 @@ RowGroup::RowGroup(uint32_t colCount,
const vector<uint32_t>& roids,
const vector<uint32_t>& tkeys,
const vector<CalpontSystemCatalog::ColDataType>& colTypes,
const vector<uint32_t>& csNumbers,
const vector<uint32_t>& cscale,
const vector<uint32_t>& cprecision,
uint32_t stringTableThreshold,
@ -1013,7 +1015,7 @@ RowGroup::RowGroup(uint32_t colCount,
const vector<bool>& forceInlineData
) :
columnCount(colCount), data(NULL), oldOffsets(positions), oids(roids), keys(tkeys),
types(colTypes), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL),
types(colTypes), charsetNumbers(csNumbers), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL),
sTableThreshold(stringTableThreshold)
{
uint32_t i;
@ -1047,12 +1049,16 @@ RowGroup::RowGroup(uint32_t colCount,
useStringTable = (stringTable && hasLongStringField);
offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
// Set all the charsets to NULL for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
}
RowGroup::RowGroup(const RowGroup& r) :
columnCount(r.columnCount), data(r.data), oldOffsets(r.oldOffsets),
stOffsets(r.stOffsets), colWidths(r.colWidths),
oids(r.oids), keys(r.keys), types(r.types), scale(r.scale), precision(r.precision),
oids(r.oids), keys(r.keys), types(r.types), charsetNumbers(r.charsetNumbers),
charsets(r.charsets), scale(r.scale), precision(r.precision),
rgData(r.rgData), strings(r.strings), useStringTable(r.useStringTable),
hasLongStringField(r.hasLongStringField), sTableThreshold(r.sTableThreshold),
forceInline(r.forceInline)
@ -1076,6 +1082,8 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
oids = r.oids;
keys = r.keys;
types = r.types;
charsetNumbers = r.charsetNumbers;
charsets = r.charsets;
data = r.data;
scale = r.scale;
precision = r.precision;
@ -1120,6 +1128,7 @@ void RowGroup::serialize(ByteStream& bs) const
serializeInlineVector<uint32_t>(bs, oids);
serializeInlineVector<uint32_t>(bs, keys);
serializeInlineVector<CalpontSystemCatalog::ColDataType>(bs, types);
serializeInlineVector<uint32_t>(bs, charsetNumbers);
serializeInlineVector<uint32_t>(bs, scale);
serializeInlineVector<uint32_t>(bs, precision);
bs << (uint8_t) useStringTable;
@ -1139,6 +1148,7 @@ void RowGroup::deserialize(ByteStream& bs)
deserializeInlineVector<uint32_t>(bs, oids);
deserializeInlineVector<uint32_t>(bs, keys);
deserializeInlineVector<CalpontSystemCatalog::ColDataType>(bs, types);
deserializeInlineVector<uint32_t>(bs, charsetNumbers);
deserializeInlineVector<uint32_t>(bs, scale);
deserializeInlineVector<uint32_t>(bs, precision);
bs >> tmp8;
@ -1156,6 +1166,10 @@ void RowGroup::deserialize(ByteStream& bs)
offsets = &stOffsets[0];
else if (!useStringTable && !oldOffsets.empty())
offsets = &oldOffsets[0];
// Set all the charsets to NULL for jit initialization.
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
}
void RowGroup::serializeRGData(ByteStream& bs) const
@ -1467,6 +1481,15 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
}
}
CHARSET_INFO* RowGroup::getCharset(uint32_t col)
{
if (charsets[col] == NULL)
{
charsets[col] = get_charset(charsetNumbers[col], MYF(MY_WME));
}
return charsets[col];
}
void RowGroup::setDBRoot(uint32_t dbroot)
{
*((uint32_t*) &data[dbRootOffset]) = dbroot;

View File

@ -58,6 +58,11 @@
#include "../winport/winport.h"
// Because including my_sys.h in a Columnstore header causes too many conflicts
struct charset_info_st;
typedef const struct charset_info_st CHARSET_INFO;
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
namespace rowgroup
@ -319,6 +324,7 @@ public:
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
// this returns true if the type is not CHAR or VARCHAR
inline bool isCharType(uint32_t colIndex) const;
@ -461,6 +467,7 @@ private:
uint32_t* offsets;
uint32_t* colWidths;
execplan::CalpontSystemCatalog::ColDataType* types;
uint32_t* charsetNumbers;
uint8_t* data;
uint32_t* scale;
uint32_t* precision;
@ -569,6 +576,11 @@ inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() con
return types;
}
inline uint32_t Row::getCharsetNumber(uint32_t col) const
{
return charsetNumbers[col];
}
inline bool Row::isCharType(uint32_t colIndex) const
{
return execplan::isCharType(types[colIndex]);
@ -1268,6 +1280,7 @@ public:
@param coids An array of oids for each column.
@param tkeys An array of unique id for each column.
@param colTypes An array of COLTYPEs for each column.
@param charsetNumbers an Array of the lookup numbers for the charset/collation object.
@param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
@param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
*/
@ -1277,6 +1290,7 @@ public:
const std::vector<uint32_t>& cOids,
const std::vector<uint32_t>& tkeys,
const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
const std::vector<uint32_t>& charsetNumbers,
const std::vector<uint32_t>& scale,
const std::vector<uint32_t>& precision,
uint32_t stringTableThreshold,
@ -1284,7 +1298,7 @@ public:
const std::vector<bool>& forceInlineData = std::vector<bool>()
);
/** @brief The copiers. It copies metadata, not the row data */
/** @brief The copiers. It copies metadata, not thetypes row data */
RowGroup(const RowGroup&);
/** @brief Assignment operator. It copies metadata, not the row data */
@ -1338,6 +1352,8 @@ public:
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
inline const std::vector<uint32_t>& getCharsetNumbers() const;
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
inline boost::shared_array<bool>& getForceInline();
static inline uint32_t getHeaderSize()
{
@ -1397,6 +1413,8 @@ public:
uint16_t* blockNum);
inline void setStringStore(boost::shared_ptr<StringStore>);
CHARSET_INFO* getCharset(uint32_t col);
private:
uint32_t columnCount;
@ -1413,8 +1431,11 @@ private:
// Used to map the projected column and rowgroup index
std::vector<uint32_t> keys;
std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
// DECIMAL support. For non-decimal fields, the values are 0.
// For string collation
std::vector<uint32_t> charsetNumbers;
std::vector<CHARSET_INFO*> charsets;
// DECIMAL support. For non-decimal fields, the valutypeses are 0.
std::vector<uint32_t> scale;
std::vector<uint32_t> precision;
@ -1547,6 +1568,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
{
r->colWidths = (uint32_t*) &colWidths[0];
r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
r->scale = (uint32_t*) & (scale[0]);
r->precision = (uint32_t*) & (precision[0]);
}
@ -1649,6 +1671,16 @@ inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getCo
return types;
}
inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
{
return charsetNumbers;
}
inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
{
return charsetNumbers[colIndex];
}
inline const std::vector<uint32_t>& RowGroup::getScale() const
{
return scale;