You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-3536 Collation
This commit is contained in:
@ -121,8 +121,6 @@ public:
|
||||
private:
|
||||
template <typename result_t>
|
||||
inline bool numericCompare(result_t op1, result_t op2);
|
||||
inline bool strCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber);
|
||||
inline bool strTrimCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber);
|
||||
|
||||
const CHARSET_INFO* cs;
|
||||
};
|
||||
@ -160,68 +158,6 @@ inline bool PredicateOperator::numericCompare(result_t op1, result_t op2)
|
||||
}
|
||||
}
|
||||
|
||||
inline bool PredicateOperator::strCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber)
|
||||
{
|
||||
switch (fOp)
|
||||
{
|
||||
case OP_EQ:
|
||||
return utf8::mcs_strcoll(op1, op2, charsetNumber) == 0;
|
||||
|
||||
case OP_NE:
|
||||
return utf8::mcs_strcoll(op1, op2, charsetNumber) != 0;
|
||||
|
||||
case OP_GT:
|
||||
return utf8::mcs_strcoll(op1, op2, charsetNumber) > 0;
|
||||
|
||||
case OP_GE:
|
||||
return utf8::mcs_strcoll(op1, op2, charsetNumber) >= 0;
|
||||
|
||||
case OP_LT:
|
||||
return utf8::mcs_strcoll(op1, op2, charsetNumber) < 0;
|
||||
|
||||
case OP_LE:
|
||||
return utf8::mcs_strcoll(op1, op2, charsetNumber) <= 0;
|
||||
|
||||
default:
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Non support predicate operation: " << fOp;
|
||||
throw logging::InvalidOperationExcept(oss.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline bool PredicateOperator::strTrimCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber)
|
||||
{
|
||||
switch (fOp)
|
||||
{
|
||||
case OP_EQ:
|
||||
return utf8::mcs_strcollsp(op1, op2, charsetNumber) == 0;
|
||||
|
||||
case OP_NE:
|
||||
return utf8::mcs_strcollsp(op1, op2, charsetNumber) != 0;
|
||||
|
||||
case OP_GT:
|
||||
return utf8::mcs_strcollsp(op1, op2, charsetNumber) > 0;
|
||||
|
||||
case OP_GE:
|
||||
return utf8::mcs_strcollsp(op1, op2, charsetNumber) >= 0;
|
||||
|
||||
case OP_LT:
|
||||
return utf8::mcs_strcollsp(op1, op2, charsetNumber) < 0;
|
||||
|
||||
case OP_LE:
|
||||
return utf8::mcs_strcollsp(op1, op2, charsetNumber) <= 0;
|
||||
|
||||
default:
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Non support predicate operation: " << fOp;
|
||||
throw logging::InvalidOperationExcept(oss.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const PredicateOperator& rhs);
|
||||
}
|
||||
|
||||
|
@ -397,9 +397,11 @@ void ExpressionStep::populateColumnInfo(SimpleColumn* sc, JobInfo& jobInfo)
|
||||
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
// if (dynamic_cast<PseudoColumn*>(sc) == NULL)
|
||||
// ct = jobInfo.csc->colType(sc->oid());
|
||||
|
||||
if (dynamic_cast<PseudoColumn*>(sc) == NULL)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
if (ct.scale == 0) // keep passed original ct for decimal type
|
||||
sc->resultType(ct); // update from mysql type to calpont type
|
||||
@ -526,7 +528,10 @@ void ExpressionStep::updateInputIndex(map<uint32_t, uint32_t>& indexMap, const J
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (dynamic_cast<PseudoColumn*>(sc) == NULL)
|
||||
{
|
||||
ct = jobInfo.csc->colType(oid);
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
|
||||
//X
|
||||
dictOid = joblist::isDictCol(ct);
|
||||
|
@ -650,11 +650,15 @@ const JobStepVector doColFilter(const SimpleColumn* sc1, const SimpleColumn* sc2
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (!sc1->schemaName().empty() && sc1->isColumnStore() && !pc1)
|
||||
{
|
||||
ct1 = jobInfo.csc->colType(sc1->oid());
|
||||
|
||||
ct1.charsetNumber =sc1->colType().charsetNumber;
|
||||
}
|
||||
if (!sc2->schemaName().empty() && sc2->isColumnStore() && !pc2)
|
||||
{
|
||||
ct2 = jobInfo.csc->colType(sc2->oid());
|
||||
|
||||
ct2.charsetNumber =sc2->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
int8_t op = op2num(sop);
|
||||
|
||||
@ -1075,11 +1079,15 @@ const JobStepVector doJoin(
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (!sc1->schemaName().empty() && sc1->isColumnStore() && !pc1)
|
||||
{
|
||||
ct1 = jobInfo.csc->colType(sc1->oid());
|
||||
|
||||
ct1.charsetNumber =sc1->colType().charsetNumber;
|
||||
}
|
||||
if (!sc2->schemaName().empty() && sc2->isColumnStore() && !pc2)
|
||||
{
|
||||
ct2 = jobInfo.csc->colType(sc2->oid());
|
||||
|
||||
ct2.charsetNumber =sc2->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
uint64_t joinInfo = sc1->joinInfo() | sc2->joinInfo();
|
||||
|
||||
@ -1342,8 +1350,10 @@ const JobStepVector doSemiJoin(const SimpleColumn* sc, const ReturnedColumn* rc,
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (!sc->schemaName().empty() && sc->isColumnStore() && !pc1)
|
||||
{
|
||||
ct1 = jobInfo.csc->colType(sc->oid());
|
||||
|
||||
ct1.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
JobStepVector jsv;
|
||||
SJSTEP step;
|
||||
@ -1651,7 +1661,10 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo)
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (!sc->schemaName().empty() && sc->isColumnStore() && !pc)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
|
||||
// Because, on a filter, we want to compare ignoring trailing spaces in many cases
|
||||
@ -2730,7 +2743,10 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo)
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (!sc->schemaName().empty() && sc->isColumnStore() && !pc)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
|
||||
//X
|
||||
CalpontSystemCatalog::OID tbOID = tableOid(sc.get(), jobInfo.csc);
|
||||
@ -3008,8 +3024,10 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo)
|
||||
CalpontSystemCatalog::ColType ct = sc->colType();
|
||||
|
||||
if (!sc->schemaName().empty() && sc->isColumnStore() && !pc)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
TupleInfo ti(setTupleInfo(ct, sc->oid(), jobInfo, tblOid, sc.get(), alias));
|
||||
//X TupleInfo ti(setTupleInfo(sc->colType(), sc->oid(), jobInfo, tblOid, sc.get(), alias));
|
||||
pcs->tupleId(ti.key);
|
||||
|
@ -800,8 +800,10 @@ void addOrderByAndLimit(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (sc->isColumnStore() && !(dynamic_cast<PseudoColumn*>(sc)))
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
dictOid = isDictCol(ct);
|
||||
}
|
||||
|
@ -128,8 +128,10 @@ void projectSimpleColumn(const SimpleColumn* sc, JobStepVector& jsv, JobInfo& jo
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (sc->isColumnStore() && !pc)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
if (pc == NULL)
|
||||
pcs = new pColStep(oid, tbl_oid, ct, jobInfo);
|
||||
@ -717,8 +719,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
|
||||
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
//X
|
||||
dictOid = isDictCol(ct);
|
||||
}
|
||||
@ -1007,7 +1011,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
|
||||
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
|
||||
//X
|
||||
dictOid = isDictCol(ct);
|
||||
@ -1160,7 +1167,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
|
||||
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
|
||||
//X
|
||||
dictOid = isDictCol(ct);
|
||||
@ -1646,7 +1656,10 @@ void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
||||
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
|
||||
//X
|
||||
|
||||
|
@ -95,7 +95,10 @@ uint64_t getColumnIndex(const SRCP& c, const map<uint64_t, uint64_t>& m, JobInfo
|
||||
//XXX use this before connector sets colType in sc correctly.
|
||||
// type of pseudo column is set by connector
|
||||
if (!(dynamic_cast<const PseudoColumn*>(sc)))
|
||||
{
|
||||
ct = jobInfo.csc->colType(sc->oid());
|
||||
ct.charsetNumber =sc->colType().charsetNumber;
|
||||
}
|
||||
|
||||
//X
|
||||
CalpontSystemCatalog::OID dictOid = isDictCol(ct);
|
||||
|
@ -73,7 +73,6 @@
|
||||
#include "liboamcpp.h"
|
||||
#include "crashtrace.h"
|
||||
#include "utils_utf8.h"
|
||||
#include "mcsconfig.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
@ -344,7 +344,45 @@ void UserDataStore::deserialize(ByteStream& bs)
|
||||
return;
|
||||
}
|
||||
|
||||
//uint32_t rgDataCount = 0;
|
||||
inline bool StringStore::equals(const std::string& str, uint64_t off, CHARSET_INFO* cs) const
|
||||
{
|
||||
uint32_t length;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return str == joblist::CPNULLSTRMARK;
|
||||
|
||||
MemChunk* mc;
|
||||
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
if (longStrings.size() <= (off & ~0x8000000000000000))
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get();
|
||||
|
||||
memcpy(&length, mc->data, 4);
|
||||
|
||||
// Not sure if this check it needed, but adds safety
|
||||
if (length > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (cs->strnncoll(str.c_str(), str.length(), (const char*)mc->data+4, length) == 0);
|
||||
}
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
uint32_t offset = off % CHUNK_SIZE;
|
||||
|
||||
if (mem.size() <= chunk)
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
|
||||
if ((offset + length) > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (cs->strnncoll(str.c_str(), str.length(), (const char*)&mc->data[offset]+4, length) == 0);
|
||||
}
|
||||
|
||||
RGData::RGData()
|
||||
{
|
||||
@ -505,9 +543,10 @@ Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { }
|
||||
|
||||
Row::Row(const Row& r) : columnCount(r.columnCount), baseRid(r.baseRid),
|
||||
oldOffsets(r.oldOffsets), stOffsets(r.stOffsets),
|
||||
offsets(r.offsets), colWidths(r.colWidths), types(r.types), charsetNumbers(r.charsetNumbers),
|
||||
offsets(r.offsets), colWidths(r.colWidths), types(r.types),
|
||||
charsetNumbers(r.charsetNumbers), charsets(r.charsets),
|
||||
data(r.data), scale(r.scale), precision(r.precision), strings(r.strings),
|
||||
useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField),
|
||||
useStringTable(r.useStringTable), hasStrings(r.hasStrings), hasLongStringField(r.hasLongStringField),
|
||||
sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL)
|
||||
{ }
|
||||
|
||||
@ -523,11 +562,13 @@ Row& Row::operator=(const Row& r)
|
||||
colWidths = r.colWidths;
|
||||
types = r.types;
|
||||
charsetNumbers = r.charsetNumbers;
|
||||
charsets = r.charsets;
|
||||
data = r.data;
|
||||
scale = r.scale;
|
||||
precision = r.precision;
|
||||
strings = r.strings;
|
||||
useStringTable = r.useStringTable;
|
||||
hasStrings = r.hasStrings;
|
||||
hasLongStringField = r.hasLongStringField;
|
||||
sTableThreshold = r.sTableThreshold;
|
||||
forceInline = r.forceInline;
|
||||
@ -990,6 +1031,128 @@ int64_t Row::getSignedNullValue(uint32_t colIndex) const
|
||||
return utils::getSignedNullValue(types[colIndex], getColumnWidth(colIndex));
|
||||
}
|
||||
|
||||
bool Row::equals(const std::string& val, uint32_t col) const
|
||||
{
|
||||
const CHARSET_INFO* cs = getCharset(col);
|
||||
if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB))
|
||||
{
|
||||
if (getStringLength(col) != val.length())
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(col), val.c_str(), val.length()))
|
||||
return false;
|
||||
}
|
||||
else if (inStringTable(col))
|
||||
{
|
||||
uint64_t offset = *((uint64_t*) &data[offsets[col]]);
|
||||
return strings->equals(val, offset, cs);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (cs->strnncollsp(val.c_str(), val.length(), (char*)&data[offsets[col]], getColumnWidth(col)) == 0);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) const
|
||||
{
|
||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
||||
{
|
||||
const uint32_t& col = keyCols[i];
|
||||
|
||||
if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::VARCHAR ||
|
||||
getColType(col) == execplan::CalpontSystemCatalog::CHAR ||
|
||||
getColType(col) == execplan::CalpontSystemCatalog::TEXT))
|
||||
{
|
||||
CHARSET_INFO* cs = getCharset(col);
|
||||
if (cs->strnncollsp(getStringPointer(col), getStringLength(col),
|
||||
r2.getStringPointer(col), r2.getStringLength(col)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB))
|
||||
{
|
||||
if (getStringLength(col) != r2.getStringLength(col))
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getColType(col) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
if (getLongDoubleField(col) != r2.getLongDoubleField(col))
|
||||
return false;
|
||||
}
|
||||
else if (getUintField(col) != r2.getUintField(col))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Row::equals(const Row& r2, uint32_t lastCol) const
|
||||
{
|
||||
// This check fires with empty r2 only.
|
||||
if (lastCol >= columnCount)
|
||||
return true;
|
||||
|
||||
// If there are no strings in the row, then we can just memcmp the whole row.
|
||||
// hasStrings is true if there is any column of type CHAR, VARCHAR or TEXT
|
||||
// useStringTable is true if any field declared > max inline field size, including BLOB
|
||||
// For memcmp to be correct, both must be false.
|
||||
if (!hasStrings && !useStringTable && !r2.hasStrings && !r2.useStringTable)
|
||||
return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0]));
|
||||
|
||||
// There are strings involved, so we need to check each column
|
||||
// because binary equality is not equality for many charsets/collations
|
||||
for (uint32_t col = 0; col <= lastCol; col++)
|
||||
{
|
||||
if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::VARCHAR ||
|
||||
getColType(col) == execplan::CalpontSystemCatalog::CHAR ||
|
||||
getColType(col) == execplan::CalpontSystemCatalog::TEXT))
|
||||
{
|
||||
CHARSET_INFO* cs = getCharset(col);
|
||||
if (cs->strnncollsp(getStringPointer(col), getStringLength(col),
|
||||
r2.getStringPointer(col), r2.getStringLength(col)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB))
|
||||
{
|
||||
if (getStringLength(col) != r2.getStringLength(col))
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getColType(col) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
if (getLongDoubleField(col) != r2.getLongDoubleField(col))
|
||||
return false;
|
||||
}
|
||||
else if (getUintField(col) != r2.getUintField(col))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const CHARSET_INFO* Row::getCharset(uint32_t col) const
|
||||
{
|
||||
if (charsets[col] == NULL)
|
||||
{
|
||||
const_cast<CHARSET_INFO**>(charsets)[col] = get_charset(charsetNumbers[col], MYF(MY_WME));
|
||||
}
|
||||
return charsets[col];
|
||||
}
|
||||
|
||||
RowGroup::RowGroup() : columnCount(0), data(NULL), rgData(NULL), strings(NULL),
|
||||
useStringTable(true), hasLongStringField(false), sTableThreshold(20)
|
||||
{
|
||||
@ -1045,6 +1208,15 @@ RowGroup::RowGroup(uint32_t colCount,
|
||||
}
|
||||
else
|
||||
stOffsets[i + 1] = stOffsets[i] + colWidths[i];
|
||||
|
||||
execplan::CalpontSystemCatalog::ColDataType type = types[i];
|
||||
if (type == execplan::CalpontSystemCatalog::CHAR ||
|
||||
type == execplan::CalpontSystemCatalog::VARCHAR ||
|
||||
type == execplan::CalpontSystemCatalog::TEXT)
|
||||
{
|
||||
hasStrings = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
useStringTable = (stringTable && hasLongStringField);
|
||||
@ -1090,6 +1262,7 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
|
||||
rgData = r.rgData;
|
||||
strings = r.strings;
|
||||
useStringTable = r.useStringTable;
|
||||
hasStrings = r.hasStrings;
|
||||
hasLongStringField = r.hasLongStringField;
|
||||
sTableThreshold = r.sTableThreshold;
|
||||
forceInline = r.forceInline;
|
||||
@ -1481,7 +1654,7 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList&
|
||||
}
|
||||
}
|
||||
|
||||
CHARSET_INFO* RowGroup::getCharset(uint32_t col)
|
||||
const CHARSET_INFO* RowGroup::getCharset(uint32_t col)
|
||||
{
|
||||
if (charsets[col] == NULL)
|
||||
{
|
||||
|
@ -108,7 +108,7 @@ public:
|
||||
inline bool isEmpty() const;
|
||||
inline uint64_t getSize() const;
|
||||
inline bool isNullValue(uint64_t offset) const;
|
||||
inline bool equals(const std::string& str, uint64_t offset) const;
|
||||
bool equals(const std::string& str, uint64_t offset, CHARSET_INFO* cs) const;
|
||||
|
||||
void clear();
|
||||
|
||||
@ -338,7 +338,7 @@ public:
|
||||
inline int64_t getIntField(uint32_t colIndex) const;
|
||||
template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
|
||||
inline bool equals(long double val, uint32_t colIndex) const;
|
||||
inline bool equals(const std::string& val, uint32_t colIndex) const;
|
||||
bool equals(const std::string& val, uint32_t colIndex) const;
|
||||
|
||||
inline double getDoubleField(uint32_t colIndex) const;
|
||||
inline float getFloatField(uint32_t colIndex) const;
|
||||
@ -387,7 +387,7 @@ public:
|
||||
inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
|
||||
|
||||
// support VARBINARY
|
||||
// Add 2-byte length at the beginning of the field. NULL and zero length field are
|
||||
// Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are
|
||||
// treated the same, could use one of the length bit to distinguish these two cases.
|
||||
inline std::string getVarBinaryStringField(uint32_t colIndex) const;
|
||||
inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
|
||||
@ -449,14 +449,17 @@ public:
|
||||
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
||||
inline uint64_t hash() const; // generates a hash for all cols
|
||||
|
||||
inline bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
|
||||
inline bool equals(const Row&, uint32_t lastCol) const;
|
||||
bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
|
||||
bool equals(const Row&, uint32_t lastCol) const;
|
||||
inline bool equals(const Row&) const;
|
||||
|
||||
inline void setUserDataStore(UserDataStore* u)
|
||||
{
|
||||
userDataStore = u;
|
||||
}
|
||||
|
||||
const CHARSET_INFO* getCharset(uint32_t col) const;
|
||||
|
||||
private:
|
||||
uint32_t columnCount;
|
||||
uint64_t baseRid;
|
||||
@ -468,12 +471,14 @@ private:
|
||||
uint32_t* colWidths;
|
||||
execplan::CalpontSystemCatalog::ColDataType* types;
|
||||
uint32_t* charsetNumbers;
|
||||
CHARSET_INFO** charsets;
|
||||
uint8_t* data;
|
||||
uint32_t* scale;
|
||||
uint32_t* precision;
|
||||
|
||||
StringStore* strings;
|
||||
bool useStringTable;
|
||||
bool hasStrings;
|
||||
bool hasLongStringField;
|
||||
uint32_t sTableThreshold;
|
||||
boost::shared_array<bool> forceInline;
|
||||
@ -634,18 +639,6 @@ inline bool Row::equals(long double val, uint32_t colIndex) const
|
||||
{
|
||||
return *((long double*) &data[offsets[colIndex]]) == val;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const std::string& val, uint32_t colIndex) const
|
||||
{
|
||||
if (inStringTable(colIndex))
|
||||
{
|
||||
uint64_t offset = *((uint64_t*) &data[offsets[colIndex]]);
|
||||
return strings->equals(val, offset);
|
||||
}
|
||||
else
|
||||
return (strncmp(val.c_str(), (char*) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0);
|
||||
}
|
||||
|
||||
template<int len>
|
||||
inline uint64_t Row::getUintField(uint32_t colIndex) const
|
||||
{
|
||||
@ -1182,69 +1175,6 @@ inline uint64_t Row::hash(uint32_t lastCol) const
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) const
|
||||
{
|
||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
||||
{
|
||||
const uint32_t& col = keyCols[i];
|
||||
|
||||
if (!isLongString(col))
|
||||
{
|
||||
if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
if (getLongDoubleField(i) != r2.getLongDoubleField(i))
|
||||
return false;
|
||||
}
|
||||
else if (getUintField(col) != r2.getUintField(col))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getStringLength(col) != r2.getStringLength(col))
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const Row& r2, uint32_t lastCol) const
|
||||
{
|
||||
// This check fires with empty r2 only.
|
||||
if (lastCol >= columnCount)
|
||||
return true;
|
||||
|
||||
if (!useStringTable && !r2.useStringTable)
|
||||
return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0]));
|
||||
|
||||
for (uint32_t i = 0; i <= lastCol; i++)
|
||||
if (!isLongString(i))
|
||||
{
|
||||
if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
if (getLongDoubleField(i) != r2.getLongDoubleField(i))
|
||||
return false;
|
||||
}
|
||||
else if (getUintField(i) != r2.getUintField(i))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t len = getStringLength(i);
|
||||
|
||||
if (len != r2.getStringLength(i))
|
||||
return false;
|
||||
|
||||
if (memcmp(getStringPointer(i), r2.getStringPointer(i), len))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool Row::equals(const Row& r2) const
|
||||
{
|
||||
return equals(r2, columnCount - 1);
|
||||
@ -1414,7 +1344,7 @@ public:
|
||||
|
||||
inline void setStringStore(boost::shared_ptr<StringStore>);
|
||||
|
||||
CHARSET_INFO* getCharset(uint32_t col);
|
||||
const CHARSET_INFO* getCharset(uint32_t col);
|
||||
|
||||
private:
|
||||
uint32_t columnCount;
|
||||
@ -1443,6 +1373,7 @@ private:
|
||||
RGData* rgData;
|
||||
StringStore* strings; // note, strings and data belong to rgData
|
||||
bool useStringTable;
|
||||
bool hasStrings;
|
||||
bool hasLongStringField;
|
||||
uint32_t sTableThreshold;
|
||||
boost::shared_array<bool> forceInline;
|
||||
@ -1569,6 +1500,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
|
||||
r->colWidths = (uint32_t*) &colWidths[0];
|
||||
r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
|
||||
r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
|
||||
r->charsets = (CHARSET_INFO**) & (charsets[0]);
|
||||
r->scale = (uint32_t*) & (scale[0]);
|
||||
r->precision = (uint32_t*) & (precision[0]);
|
||||
}
|
||||
@ -1591,6 +1523,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const
|
||||
r->hasLongStringField = hasLongStringField;
|
||||
r->sTableThreshold = sTableThreshold;
|
||||
r->forceInline = forceInline;
|
||||
r->hasStrings = hasStrings;
|
||||
}
|
||||
|
||||
inline uint32_t RowGroup::getRowSize() const
|
||||
@ -1935,45 +1868,6 @@ inline bool StringStore::isNullValue(uint64_t off) const
|
||||
return (memcmp(&mc->data[offset+4], joblist::CPNULLSTRMARK.c_str(), 8) == 0);
|
||||
}
|
||||
|
||||
inline bool StringStore::equals(const std::string& str, uint64_t off) const
|
||||
{
|
||||
uint32_t length;
|
||||
|
||||
if (off == std::numeric_limits<uint64_t>::max())
|
||||
return str == joblist::CPNULLSTRMARK;
|
||||
|
||||
MemChunk* mc;
|
||||
|
||||
if (off & 0x8000000000000000)
|
||||
{
|
||||
if (longStrings.size() <= (off & ~0x8000000000000000))
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get();
|
||||
|
||||
memcpy(&length, mc->data, 4);
|
||||
|
||||
// Not sure if this check it needed, but adds safety
|
||||
if (length > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (strncmp(str.c_str(), (const char*) mc->data + 4, length) == 0);
|
||||
}
|
||||
|
||||
uint32_t chunk = off / CHUNK_SIZE;
|
||||
uint32_t offset = off % CHUNK_SIZE;
|
||||
|
||||
if (mem.size() <= chunk)
|
||||
return false;
|
||||
|
||||
mc = (MemChunk*) mem[chunk].get();
|
||||
memcpy(&length, &mc->data[offset], 4);
|
||||
|
||||
if ((offset + length) > mc->currentSize)
|
||||
return false;
|
||||
|
||||
return (strncmp(str.c_str(), (const char*) &mc->data[offset] + 4, length) == 0);
|
||||
}
|
||||
inline uint32_t StringStore::getStringLength(uint64_t off)
|
||||
{
|
||||
uint32_t length;
|
||||
|
@ -301,16 +301,11 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2)
|
||||
int len2 = l->row2().getStringLength(fSpec.fIndex);
|
||||
const char* s1 = (const char*)l->row1().getStringPointer(fSpec.fIndex);
|
||||
const char* s2 = (const char*)l->row2().getStringPointer(fSpec.fIndex);
|
||||
// For Japanese, coll.compare() may not be as correct as strncmp
|
||||
// if (JPcodePoint)
|
||||
{
|
||||
// ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2));
|
||||
}
|
||||
// else
|
||||
{
|
||||
const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
|
||||
ret = fSpec.fAsc * coll.compare(s1, s1+len1, s2, s2+len2);
|
||||
}
|
||||
|
||||
if (!cs)
|
||||
cs = l->rowGroup()->getCharset(fSpec.fIndex);
|
||||
|
||||
ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -80,7 +80,6 @@ struct IdbSortSpec
|
||||
// TODO There are three ordering specs since 10.2
|
||||
int fAsc; // <ordering specification> ::= ASC | DESC
|
||||
int fNf; // <null ordering> ::= NULLS FIRST | NULLS LAST
|
||||
std::string fLocale;
|
||||
|
||||
IdbSortSpec() : fIndex(-1), fAsc(1), fNf(1) {}
|
||||
IdbSortSpec(int i, bool b) : fIndex(i), fAsc(b ? 1 : -1), fNf(fAsc) {}
|
||||
@ -93,39 +92,7 @@ struct IdbSortSpec
|
||||
class Compare
|
||||
{
|
||||
public:
|
||||
Compare(const IdbSortSpec& spec) : fSpec(spec)
|
||||
{
|
||||
// Save off the current Locale in case something goes wrong.
|
||||
std::string curLocale = setlocale(LC_COLLATE, NULL);
|
||||
if (spec.fLocale.length() > 0)
|
||||
{
|
||||
fLocale = spec.fLocale;
|
||||
}
|
||||
else
|
||||
{
|
||||
fLocale = curLocale;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
std::locale localloc(fLocale.c_str());
|
||||
loc = localloc;
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
fLocale = curLocale;
|
||||
std::locale localloc(fLocale.c_str());
|
||||
loc = localloc;
|
||||
}
|
||||
if (fLocale.find("ja_JP") != std::string::npos)
|
||||
{
|
||||
JPcodePoint = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
JPcodePoint = false;
|
||||
}
|
||||
}
|
||||
Compare(const IdbSortSpec& spec) : fSpec(spec) {}
|
||||
virtual ~Compare() {}
|
||||
|
||||
virtual int operator()(IdbCompare*, rowgroup::Row::Pointer, rowgroup::Row::Pointer) = 0;
|
||||
@ -137,9 +104,6 @@ public:
|
||||
|
||||
protected:
|
||||
IdbSortSpec fSpec;
|
||||
std::string fLocale;
|
||||
std::locale loc;
|
||||
bool JPcodePoint; // code point ordering (Japanese UTF) flag
|
||||
};
|
||||
|
||||
// Comparators for signed types
|
||||
@ -283,9 +247,11 @@ public:
|
||||
class StringCompare : public Compare
|
||||
{
|
||||
public:
|
||||
StringCompare(const IdbSortSpec& spec) : Compare(spec) {}
|
||||
StringCompare(const IdbSortSpec& spec) : Compare(spec), cs(NULL) {}
|
||||
|
||||
int operator()(IdbCompare*, rowgroup::Row::Pointer, rowgroup::Row::Pointer);
|
||||
|
||||
CHARSET_INFO* cs;
|
||||
};
|
||||
|
||||
// End of comparators for variable sized types
|
||||
@ -324,6 +290,10 @@ public:
|
||||
return fRow2;
|
||||
}
|
||||
|
||||
rowgroup::RowGroup* rowGroup()
|
||||
{
|
||||
return &fRowGroup;
|
||||
}
|
||||
protected:
|
||||
rowgroup::RowGroup fRowGroup;
|
||||
rowgroup::Row fRow1;
|
||||
|
Reference in New Issue
Block a user