You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-3536 collation
This commit is contained in:
@ -24,6 +24,10 @@
|
||||
* is the primary class.
|
||||
*/
|
||||
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
@ -384,36 +388,16 @@ inline void RowAggregation::updateFloatMinMax(float val1, float val2, int64_t co
|
||||
fRow.setFloatField(val1, col);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define STRCOLL_ENH__
|
||||
|
||||
void RowAggregation::updateStringMinMax(string val1, string val2, int64_t col, int func)
|
||||
{
|
||||
if (isNull(fRowGroupOut, fRow, col))
|
||||
CHARSET_INFO* cs = fRowGroupIn.getCharset(col);
|
||||
int tmp = cs->strnncoll(val1.c_str(), val1.length(), val2.c_str(), val2.length());
|
||||
|
||||
if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) ||
|
||||
(tmp > 0 && func == rowgroup::ROWAGG_MAX))
|
||||
{
|
||||
fRow.setStringField(val1, col);
|
||||
}
|
||||
|
||||
#ifdef STRCOLL_ENH__
|
||||
else
|
||||
{
|
||||
int tmp = utf8::idb_strcoll(val1.c_str(), val2.c_str());
|
||||
|
||||
if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) ||
|
||||
(tmp > 0 && func == rowgroup::ROWAGG_MAX))
|
||||
{
|
||||
fRow.setStringField(val1, col);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
else if (minMax(val1, val2, func))
|
||||
{
|
||||
fRow.setStringField(val1, col);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -53,6 +53,9 @@
|
||||
#include "mcsv1_udaf.h"
|
||||
#include "constantcolumn.h"
|
||||
|
||||
// Because including my_sys.h in a Columnstore header causes too many conflicts
|
||||
struct charset_info_st;
|
||||
typedef const struct charset_info_st CHARSET_INFO;
|
||||
// To do: move code that depends on joblist to a proper subsystem.
|
||||
namespace joblist
|
||||
{
|
||||
@ -706,7 +709,7 @@ protected:
|
||||
|
||||
// We need a separate copy for each thread.
|
||||
mcsv1sdk::mcsv1Context fRGContext;
|
||||
|
||||
|
||||
// These are handy for testing the actual type of static_any for UDAF
|
||||
static const static_any::any& charTypeId;
|
||||
static const static_any::any& scharTypeId;
|
||||
|
@ -505,8 +505,8 @@ Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { }
|
||||
|
||||
Row::Row(const Row& r) : columnCount(r.columnCount), baseRid(r.baseRid),
|
||||
oldOffsets(r.oldOffsets), stOffsets(r.stOffsets),
|
||||
offsets(r.offsets), colWidths(r.colWidths), types(r.types), data(r.data),
|
||||
scale(r.scale), precision(r.precision), strings(r.strings),
|
||||
offsets(r.offsets), colWidths(r.colWidths), types(r.types), charsetNumbers(r.charsetNumbers),
|
||||
data(r.data), scale(r.scale), precision(r.precision), strings(r.strings),
|
||||
useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField),
|
||||
sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL)
|
||||
{ }
|
||||
@ -522,6 +522,7 @@ Row& Row::operator=(const Row& r)
|
||||
offsets = r.offsets;
|
||||
colWidths = r.colWidths;
|
||||
types = r.types;
|
||||
charsetNumbers = r.charsetNumbers;
|
||||
data = r.data;
|
||||
scale = r.scale;
|
||||
precision = r.precision;
|
||||
@ -1006,6 +1007,7 @@ RowGroup::RowGroup(uint32_t colCount,
|
||||
const vector<uint32_t>& roids,
|
||||
const vector<uint32_t>& tkeys,
|
||||
const vector<CalpontSystemCatalog::ColDataType>& colTypes,
|
||||
const vector<uint32_t>& csNumbers,
|
||||
const vector<uint32_t>& cscale,
|
||||
const vector<uint32_t>& cprecision,
|
||||
uint32_t stringTableThreshold,
|
||||
@ -1013,7 +1015,7 @@ RowGroup::RowGroup(uint32_t colCount,
|
||||
const vector<bool>& forceInlineData
|
||||
) :
|
||||
columnCount(colCount), data(NULL), oldOffsets(positions), oids(roids), keys(tkeys),
|
||||
types(colTypes), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL),
|
||||
types(colTypes), charsetNumbers(csNumbers), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL),
|
||||
sTableThreshold(stringTableThreshold)
|
||||
{
|
||||
uint32_t i;
|
||||
@ -1047,12 +1049,16 @@ RowGroup::RowGroup(uint32_t colCount,
|
||||
|
||||
useStringTable = (stringTable && hasLongStringField);
|
||||
offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
|
||||
|
||||
// Set all the charsets to NULL for jit initialization.
|
||||
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
|
||||
}
|
||||
|
||||
RowGroup::RowGroup(const RowGroup& r) :
|
||||
columnCount(r.columnCount), data(r.data), oldOffsets(r.oldOffsets),
|
||||
stOffsets(r.stOffsets), colWidths(r.colWidths),
|
||||
oids(r.oids), keys(r.keys), types(r.types), scale(r.scale), precision(r.precision),
|
||||
oids(r.oids), keys(r.keys), types(r.types), charsetNumbers(r.charsetNumbers),
|
||||
charsets(r.charsets), scale(r.scale), precision(r.precision),
|
||||
rgData(r.rgData), strings(r.strings), useStringTable(r.useStringTable),
|
||||
hasLongStringField(r.hasLongStringField), sTableThreshold(r.sTableThreshold),
|
||||
forceInline(r.forceInline)
|
||||
@ -1076,6 +1082,8 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
|
||||
oids = r.oids;
|
||||
keys = r.keys;
|
||||
types = r.types;
|
||||
charsetNumbers = r.charsetNumbers;
|
||||
charsets = r.charsets;
|
||||
data = r.data;
|
||||
scale = r.scale;
|
||||
precision = r.precision;
|
||||
@ -1120,6 +1128,7 @@ void RowGroup::serialize(ByteStream& bs) const
|
||||
serializeInlineVector<uint32_t>(bs, oids);
|
||||
serializeInlineVector<uint32_t>(bs, keys);
|
||||
serializeInlineVector<CalpontSystemCatalog::ColDataType>(bs, types);
|
||||
serializeInlineVector<uint32_t>(bs, charsetNumbers);
|
||||
serializeInlineVector<uint32_t>(bs, scale);
|
||||
serializeInlineVector<uint32_t>(bs, precision);
|
||||
bs << (uint8_t) useStringTable;
|
||||
@ -1139,6 +1148,7 @@ void RowGroup::deserialize(ByteStream& bs)
|
||||
deserializeInlineVector<uint32_t>(bs, oids);
|
||||
deserializeInlineVector<uint32_t>(bs, keys);
|
||||
deserializeInlineVector<CalpontSystemCatalog::ColDataType>(bs, types);
|
||||
deserializeInlineVector<uint32_t>(bs, charsetNumbers);
|
||||
deserializeInlineVector<uint32_t>(bs, scale);
|
||||
deserializeInlineVector<uint32_t>(bs, precision);
|
||||
bs >> tmp8;
|
||||
@ -1156,6 +1166,10 @@ void RowGroup::deserialize(ByteStream& bs)
|
||||
offsets = &stOffsets[0];
|
||||
else if (!useStringTable && !oldOffsets.empty())
|
||||
offsets = &oldOffsets[0];
|
||||
|
||||
// Set all the charsets to NULL for jit initialization.
|
||||
charsets.insert(charsets.begin(), charsetNumbers.size(), NULL);
|
||||
|
||||
}
|
||||
|
||||
void RowGroup::serializeRGData(ByteStream& bs) const
|
||||
@ -1467,6 +1481,15 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
|
||||
}
|
||||
}
|
||||
|
||||
CHARSET_INFO* RowGroup::getCharset(uint32_t col)
|
||||
{
|
||||
if (charsets[col] == NULL)
|
||||
{
|
||||
charsets[col] = get_charset(charsetNumbers[col], MYF(MY_WME));
|
||||
}
|
||||
return charsets[col];
|
||||
}
|
||||
|
||||
void RowGroup::setDBRoot(uint32_t dbroot)
|
||||
{
|
||||
*((uint32_t*) &data[dbRootOffset]) = dbroot;
|
||||
|
@ -58,6 +58,11 @@
|
||||
|
||||
#include "../winport/winport.h"
|
||||
|
||||
// Because including my_sys.h in a Columnstore header causes too many conflicts
|
||||
struct charset_info_st;
|
||||
typedef const struct charset_info_st CHARSET_INFO;
|
||||
|
||||
|
||||
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
|
||||
|
||||
namespace rowgroup
|
||||
@ -319,6 +324,7 @@ public:
|
||||
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
|
||||
inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
|
||||
inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
|
||||
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
||||
|
||||
// this returns true if the type is not CHAR or VARCHAR
|
||||
inline bool isCharType(uint32_t colIndex) const;
|
||||
@ -461,6 +467,7 @@ private:
|
||||
uint32_t* offsets;
|
||||
uint32_t* colWidths;
|
||||
execplan::CalpontSystemCatalog::ColDataType* types;
|
||||
uint32_t* charsetNumbers;
|
||||
uint8_t* data;
|
||||
uint32_t* scale;
|
||||
uint32_t* precision;
|
||||
@ -569,6 +576,11 @@ inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() con
|
||||
return types;
|
||||
}
|
||||
|
||||
inline uint32_t Row::getCharsetNumber(uint32_t col) const
|
||||
{
|
||||
return charsetNumbers[col];
|
||||
}
|
||||
|
||||
inline bool Row::isCharType(uint32_t colIndex) const
|
||||
{
|
||||
return execplan::isCharType(types[colIndex]);
|
||||
@ -1268,6 +1280,7 @@ public:
|
||||
@param coids An array of oids for each column.
|
||||
@param tkeys An array of unique id for each column.
|
||||
@param colTypes An array of COLTYPEs for each column.
|
||||
@param charsetNumbers an Array of the lookup numbers for the charset/collation object.
|
||||
@param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
|
||||
@param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
|
||||
*/
|
||||
@ -1277,6 +1290,7 @@ public:
|
||||
const std::vector<uint32_t>& cOids,
|
||||
const std::vector<uint32_t>& tkeys,
|
||||
const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
|
||||
const std::vector<uint32_t>& charsetNumbers,
|
||||
const std::vector<uint32_t>& scale,
|
||||
const std::vector<uint32_t>& precision,
|
||||
uint32_t stringTableThreshold,
|
||||
@ -1284,7 +1298,7 @@ public:
|
||||
const std::vector<bool>& forceInlineData = std::vector<bool>()
|
||||
);
|
||||
|
||||
/** @brief The copiers. It copies metadata, not the row data */
|
||||
/** @brief The copiers. It copies metadata, not thetypes row data */
|
||||
RowGroup(const RowGroup&);
|
||||
|
||||
/** @brief Assignment operator. It copies metadata, not the row data */
|
||||
@ -1338,6 +1352,8 @@ public:
|
||||
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
|
||||
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
|
||||
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
|
||||
inline const std::vector<uint32_t>& getCharsetNumbers() const;
|
||||
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
||||
inline boost::shared_array<bool>& getForceInline();
|
||||
static inline uint32_t getHeaderSize()
|
||||
{
|
||||
@ -1397,6 +1413,8 @@ public:
|
||||
uint16_t* blockNum);
|
||||
|
||||
inline void setStringStore(boost::shared_ptr<StringStore>);
|
||||
|
||||
CHARSET_INFO* getCharset(uint32_t col);
|
||||
|
||||
private:
|
||||
uint32_t columnCount;
|
||||
@ -1413,8 +1431,11 @@ private:
|
||||
// Used to map the projected column and rowgroup index
|
||||
std::vector<uint32_t> keys;
|
||||
std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
|
||||
|
||||
// DECIMAL support. For non-decimal fields, the values are 0.
|
||||
// For string collation
|
||||
std::vector<uint32_t> charsetNumbers;
|
||||
std::vector<CHARSET_INFO*> charsets;
|
||||
|
||||
// DECIMAL support. For non-decimal fields, the valutypeses are 0.
|
||||
std::vector<uint32_t> scale;
|
||||
std::vector<uint32_t> precision;
|
||||
|
||||
@ -1547,6 +1568,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
|
||||
{
|
||||
r->colWidths = (uint32_t*) &colWidths[0];
|
||||
r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
|
||||
r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
|
||||
r->scale = (uint32_t*) & (scale[0]);
|
||||
r->precision = (uint32_t*) & (precision[0]);
|
||||
}
|
||||
@ -1649,6 +1671,16 @@ inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getCo
|
||||
return types;
|
||||
}
|
||||
|
||||
inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
|
||||
{
|
||||
return charsetNumbers;
|
||||
}
|
||||
|
||||
inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
|
||||
{
|
||||
return charsetNumbers[colIndex];
|
||||
}
|
||||
|
||||
inline const std::vector<uint32_t>& RowGroup::getScale() const
|
||||
{
|
||||
return scale;
|
||||
|
Reference in New Issue
Block a user