diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index b8f1f7079..fcd722e99 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -121,8 +121,6 @@ public: private: template inline bool numericCompare(result_t op1, result_t op2); - inline bool strCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber); - inline bool strTrimCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber); const CHARSET_INFO* cs; }; @@ -160,68 +158,6 @@ inline bool PredicateOperator::numericCompare(result_t op1, result_t op2) } } -inline bool PredicateOperator::strCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber) -{ - switch (fOp) - { - case OP_EQ: - return utf8::mcs_strcoll(op1, op2, charsetNumber) == 0; - - case OP_NE: - return utf8::mcs_strcoll(op1, op2, charsetNumber) != 0; - - case OP_GT: - return utf8::mcs_strcoll(op1, op2, charsetNumber) > 0; - - case OP_GE: - return utf8::mcs_strcoll(op1, op2, charsetNumber) >= 0; - - case OP_LT: - return utf8::mcs_strcoll(op1, op2, charsetNumber) < 0; - - case OP_LE: - return utf8::mcs_strcoll(op1, op2, charsetNumber) <= 0; - - default: - { - std::ostringstream oss; - oss << "Non support predicate operation: " << fOp; - throw logging::InvalidOperationExcept(oss.str()); - } - } -} - -inline bool PredicateOperator::strTrimCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber) -{ - switch (fOp) - { - case OP_EQ: - return utf8::mcs_strcollsp(op1, op2, charsetNumber) == 0; - - case OP_NE: - return utf8::mcs_strcollsp(op1, op2, charsetNumber) != 0; - - case OP_GT: - return utf8::mcs_strcollsp(op1, op2, charsetNumber) > 0; - - case OP_GE: - return utf8::mcs_strcollsp(op1, op2, charsetNumber) >= 0; - - case OP_LT: - return utf8::mcs_strcollsp(op1, op2, charsetNumber) < 0; - - case OP_LE: - return utf8::mcs_strcollsp(op1, op2, charsetNumber) <= 0; - - default: - { - std::ostringstream oss; - oss << "Non support predicate operation: " << fOp; - throw logging::InvalidOperationExcept(oss.str()); - } - } -} - std::ostream& operator<<(std::ostream& os, const PredicateOperator& rhs); } diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index dab4089d3..9e949196d 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -397,9 +397,11 @@ void ExpressionStep::populateColumnInfo(SimpleColumn* sc, JobInfo& jobInfo) //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector -// if (dynamic_cast(sc) == NULL) -// ct = jobInfo.csc->colType(sc->oid()); - + if (dynamic_cast(sc) == NULL) + { + ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X if (ct.scale == 0) // keep passed original ct for decimal type sc->resultType(ct); // update from mysql type to calpont type @@ -526,7 +528,10 @@ void ExpressionStep::updateInputIndex(map& indexMap, const J //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (dynamic_cast(sc) == NULL) + { ct = jobInfo.csc->colType(oid); + ct.charsetNumber =sc->colType().charsetNumber; + } //X dictOid = joblist::isDictCol(ct); diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index 8ce36a478..35f4110fa 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -650,11 +650,15 @@ const JobStepVector doColFilter(const SimpleColumn* sc1, const SimpleColumn* sc2 //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (!sc1->schemaName().empty() && sc1->isColumnStore() && !pc1) + { ct1 = jobInfo.csc->colType(sc1->oid()); - + ct1.charsetNumber =sc1->colType().charsetNumber; + } if (!sc2->schemaName().empty() && sc2->isColumnStore() && !pc2) + { ct2 = jobInfo.csc->colType(sc2->oid()); - + ct2.charsetNumber =sc2->colType().charsetNumber; + } //X int8_t op = op2num(sop); @@ -1075,11 +1079,15 @@ const JobStepVector doJoin( //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (!sc1->schemaName().empty() && sc1->isColumnStore() && !pc1) + { ct1 = jobInfo.csc->colType(sc1->oid()); - + ct1.charsetNumber =sc1->colType().charsetNumber; + } if (!sc2->schemaName().empty() && sc2->isColumnStore() && !pc2) + { ct2 = jobInfo.csc->colType(sc2->oid()); - + ct2.charsetNumber =sc2->colType().charsetNumber; + } //X uint64_t joinInfo = sc1->joinInfo() | sc2->joinInfo(); @@ -1342,8 +1350,10 @@ const JobStepVector doSemiJoin(const SimpleColumn* sc, const ReturnedColumn* rc, //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (!sc->schemaName().empty() && sc->isColumnStore() && !pc1) + { ct1 = jobInfo.csc->colType(sc->oid()); - + ct1.charsetNumber =sc->colType().charsetNumber; + } //X JobStepVector jsv; SJSTEP step; @@ -1651,7 +1661,10 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (!sc->schemaName().empty() && sc->isColumnStore() && !pc) + { ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X // Because, on a filter, we want to compare ignoring trailing spaces in many cases @@ -2730,7 +2743,10 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (!sc->schemaName().empty() && sc->isColumnStore() && !pc) + { ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X CalpontSystemCatalog::OID tbOID = tableOid(sc.get(), jobInfo.csc); @@ -3008,8 +3024,10 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) CalpontSystemCatalog::ColType ct = sc->colType(); if (!sc->schemaName().empty() && sc->isColumnStore() && !pc) + { ct = jobInfo.csc->colType(sc->oid()); - + ct.charsetNumber =sc->colType().charsetNumber; + } TupleInfo ti(setTupleInfo(ct, sc->oid(), jobInfo, tblOid, sc.get(), alias)); //X TupleInfo ti(setTupleInfo(sc->colType(), sc->oid(), jobInfo, tblOid, sc.get(), alias)); pcs->tupleId(ti.key); diff --git a/dbcon/joblist/jlf_subquery.cpp b/dbcon/joblist/jlf_subquery.cpp index 11179eee2..008bdc758 100644 --- a/dbcon/joblist/jlf_subquery.cpp +++ b/dbcon/joblist/jlf_subquery.cpp @@ -800,8 +800,10 @@ void addOrderByAndLimit(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (sc->isColumnStore() && !(dynamic_cast(sc))) + { ct = jobInfo.csc->colType(sc->oid()); - + ct.charsetNumber =sc->colType().charsetNumber; + } //X dictOid = isDictCol(ct); } diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 392387d4b..d762d912c 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -128,8 +128,10 @@ void projectSimpleColumn(const SimpleColumn* sc, JobStepVector& jsv, JobInfo& jo //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (sc->isColumnStore() && !pc) + { ct = jobInfo.csc->colType(sc->oid()); - + ct.charsetNumber =sc->colType().charsetNumber; + } //X if (pc == NULL) pcs = new pColStep(oid, tbl_oid, ct, jobInfo); @@ -717,8 +719,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo //XXX use this before connector sets colType in sc correctly. if (sc->isColumnStore() && dynamic_cast(sc) == NULL) + { ct = jobInfo.csc->colType(sc->oid()); - + ct.charsetNumber =sc->colType().charsetNumber; + } //X dictOid = isDictCol(ct); } @@ -1007,7 +1011,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo //XXX use this before connector sets colType in sc correctly. if (sc->isColumnStore() && dynamic_cast(sc) == NULL) + { ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X dictOid = isDictCol(ct); @@ -1160,7 +1167,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo //XXX use this before connector sets colType in sc correctly. if (sc->isColumnStore() && dynamic_cast(sc) == NULL) + { ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X dictOid = isDictCol(ct); @@ -1646,7 +1656,10 @@ void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, //XXX use this before connector sets colType in sc correctly. if (sc->isColumnStore() && dynamic_cast(sc) == NULL) + { ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 6a76579a0..1da9ef60b 100755 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -95,7 +95,10 @@ uint64_t getColumnIndex(const SRCP& c, const map& m, JobInfo //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector if (!(dynamic_cast(sc))) + { ct = jobInfo.csc->colType(sc->oid()); + ct.charsetNumber =sc->colType().charsetNumber; + } //X CalpontSystemCatalog::OID dictOid = isDictCol(ct); diff --git a/exemgr/main.cpp b/exemgr/main.cpp index dec1c15ad..10854ad43 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -73,7 +73,6 @@ #include "liboamcpp.h" #include "crashtrace.h" #include "utils_utf8.h" -#include "mcsconfig.h" #include #include diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 049106102..d98ad2802 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -344,7 +344,45 @@ void UserDataStore::deserialize(ByteStream& bs) return; } -//uint32_t rgDataCount = 0; +inline bool StringStore::equals(const std::string& str, uint64_t off, CHARSET_INFO* cs) const +{ + uint32_t length; + + if (off == std::numeric_limits::max()) + return str == joblist::CPNULLSTRMARK; + + MemChunk* mc; + + if (off & 0x8000000000000000) + { + if (longStrings.size() <= (off & ~0x8000000000000000)) + return false; + + mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get(); + + memcpy(&length, mc->data, 4); + + // Not sure if this check it needed, but adds safety + if (length > mc->currentSize) + return false; + + return (cs->strnncoll(str.c_str(), str.length(), (const char*)mc->data+4, length) == 0); + } + + uint32_t chunk = off / CHUNK_SIZE; + uint32_t offset = off % CHUNK_SIZE; + + if (mem.size() <= chunk) + return false; + + mc = (MemChunk*) mem[chunk].get(); + memcpy(&length, &mc->data[offset], 4); + + if ((offset + length) > mc->currentSize) + return false; + + return (cs->strnncoll(str.c_str(), str.length(), (const char*)&mc->data[offset]+4, length) == 0); +} RGData::RGData() { @@ -505,9 +543,10 @@ Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { } Row::Row(const Row& r) : columnCount(r.columnCount), baseRid(r.baseRid), oldOffsets(r.oldOffsets), stOffsets(r.stOffsets), - offsets(r.offsets), colWidths(r.colWidths), types(r.types), charsetNumbers(r.charsetNumbers), + offsets(r.offsets), colWidths(r.colWidths), types(r.types), + charsetNumbers(r.charsetNumbers), charsets(r.charsets), data(r.data), scale(r.scale), precision(r.precision), strings(r.strings), - useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField), + useStringTable(r.useStringTable), hasStrings(r.hasStrings), hasLongStringField(r.hasLongStringField), sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL) { } @@ -523,11 +562,13 @@ Row& Row::operator=(const Row& r) colWidths = r.colWidths; types = r.types; charsetNumbers = r.charsetNumbers; + charsets = r.charsets; data = r.data; scale = r.scale; precision = r.precision; strings = r.strings; useStringTable = r.useStringTable; + hasStrings = r.hasStrings; hasLongStringField = r.hasLongStringField; sTableThreshold = r.sTableThreshold; forceInline = r.forceInline; @@ -990,6 +1031,128 @@ int64_t Row::getSignedNullValue(uint32_t colIndex) const return utils::getSignedNullValue(types[colIndex], getColumnWidth(colIndex)); } +bool Row::equals(const std::string& val, uint32_t col) const +{ + const CHARSET_INFO* cs = getCharset(col); + if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB)) + { + if (getStringLength(col) != val.length()) + return false; + + if (memcmp(getStringPointer(col), val.c_str(), val.length())) + return false; + } + else if (inStringTable(col)) + { + uint64_t offset = *((uint64_t*) &data[offsets[col]]); + return strings->equals(val, offset, cs); + } + else + { + return (cs->strnncollsp(val.c_str(), val.length(), (char*)&data[offsets[col]], getColumnWidth(col)) == 0); + } + return true; +} + +bool Row::equals(const Row& r2, const std::vector& keyCols) const +{ + for (uint32_t i = 0; i < keyCols.size(); i++) + { + const uint32_t& col = keyCols[i]; + + if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::VARCHAR || + getColType(col) == execplan::CalpontSystemCatalog::CHAR || + getColType(col) == execplan::CalpontSystemCatalog::TEXT)) + { + CHARSET_INFO* cs = getCharset(col); + if (cs->strnncollsp(getStringPointer(col), getStringLength(col), + r2.getStringPointer(col), r2.getStringLength(col))) + { + return false; + } + } + else if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB)) + { + if (getStringLength(col) != r2.getStringLength(col)) + return false; + + if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col))) + return false; + } + else + { + if (getColType(col) == execplan::CalpontSystemCatalog::LONGDOUBLE) + { + if (getLongDoubleField(col) != r2.getLongDoubleField(col)) + return false; + } + else if (getUintField(col) != r2.getUintField(col)) + return false; + } + } + + return true; +} + +bool Row::equals(const Row& r2, uint32_t lastCol) const +{ + // This check fires with empty r2 only. + if (lastCol >= columnCount) + return true; + + // If there are no strings in the row, then we can just memcmp the whole row. + // hasStrings is true if there is any column of type CHAR, VARCHAR or TEXT + // useStringTable is true if any field declared > max inline field size, including BLOB + // For memcmp to be correct, both must be false. + if (!hasStrings && !useStringTable && !r2.hasStrings && !r2.useStringTable) + return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0])); + + // There are strings involved, so we need to check each column + // because binary equality is not equality for many charsets/collations + for (uint32_t col = 0; col <= lastCol; col++) + { + if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::VARCHAR || + getColType(col) == execplan::CalpontSystemCatalog::CHAR || + getColType(col) == execplan::CalpontSystemCatalog::TEXT)) + { + CHARSET_INFO* cs = getCharset(col); + if (cs->strnncollsp(getStringPointer(col), getStringLength(col), + r2.getStringPointer(col), r2.getStringLength(col))) + { + return false; + } + } + else if (UNLIKELY(getColType(col) == execplan::CalpontSystemCatalog::BLOB)) + { + if (getStringLength(col) != r2.getStringLength(col)) + return false; + + if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col))) + return false; + } + else + { + if (getColType(col) == execplan::CalpontSystemCatalog::LONGDOUBLE) + { + if (getLongDoubleField(col) != r2.getLongDoubleField(col)) + return false; + } + else if (getUintField(col) != r2.getUintField(col)) + return false; + } + } + return true; +} + +const CHARSET_INFO* Row::getCharset(uint32_t col) const +{ + if (charsets[col] == NULL) + { + const_cast(charsets)[col] = get_charset(charsetNumbers[col], MYF(MY_WME)); + } + return charsets[col]; +} + RowGroup::RowGroup() : columnCount(0), data(NULL), rgData(NULL), strings(NULL), useStringTable(true), hasLongStringField(false), sTableThreshold(20) { @@ -1045,6 +1208,15 @@ RowGroup::RowGroup(uint32_t colCount, } else stOffsets[i + 1] = stOffsets[i] + colWidths[i]; + + execplan::CalpontSystemCatalog::ColDataType type = types[i]; + if (type == execplan::CalpontSystemCatalog::CHAR || + type == execplan::CalpontSystemCatalog::VARCHAR || + type == execplan::CalpontSystemCatalog::TEXT) + { + hasStrings = true; + break; + } } useStringTable = (stringTable && hasLongStringField); @@ -1090,6 +1262,7 @@ RowGroup& RowGroup::operator=(const RowGroup& r) rgData = r.rgData; strings = r.strings; useStringTable = r.useStringTable; + hasStrings = r.hasStrings; hasLongStringField = r.hasLongStringField; sTableThreshold = r.sTableThreshold; forceInline = r.forceInline; @@ -1481,7 +1654,7 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& } } -CHARSET_INFO* RowGroup::getCharset(uint32_t col) +const CHARSET_INFO* RowGroup::getCharset(uint32_t col) { if (charsets[col] == NULL) { diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 2ba350abb..7a82463c1 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -108,7 +108,7 @@ public: inline bool isEmpty() const; inline uint64_t getSize() const; inline bool isNullValue(uint64_t offset) const; - inline bool equals(const std::string& str, uint64_t offset) const; + bool equals(const std::string& str, uint64_t offset, CHARSET_INFO* cs) const; void clear(); @@ -325,7 +325,7 @@ public: inline execplan::CalpontSystemCatalog::ColDataType* getColTypes(); inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const; inline uint32_t getCharsetNumber(uint32_t colIndex) const; - + // this returns true if the type is not CHAR or VARCHAR inline bool isCharType(uint32_t colIndex) const; inline bool isUnsigned(uint32_t colIndex) const; @@ -338,7 +338,7 @@ public: inline int64_t getIntField(uint32_t colIndex) const; template inline bool equals(uint64_t val, uint32_t colIndex) const; inline bool equals(long double val, uint32_t colIndex) const; - inline bool equals(const std::string& val, uint32_t colIndex) const; + bool equals(const std::string& val, uint32_t colIndex) const; inline double getDoubleField(uint32_t colIndex) const; inline float getFloatField(uint32_t colIndex) const; @@ -387,7 +387,7 @@ public: inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex); // support VARBINARY - // Add 2-byte length at the beginning of the field. NULL and zero length field are + // Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are // treated the same, could use one of the length bit to distinguish these two cases. inline std::string getVarBinaryStringField(uint32_t colIndex) const; inline void setVarBinaryField(const std::string& val, uint32_t colIndex); @@ -449,14 +449,17 @@ public: inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol] inline uint64_t hash() const; // generates a hash for all cols - inline bool equals(const Row&, const std::vector& keyColumns) const; - inline bool equals(const Row&, uint32_t lastCol) const; + bool equals(const Row&, const std::vector& keyColumns) const; + bool equals(const Row&, uint32_t lastCol) const; inline bool equals(const Row&) const; inline void setUserDataStore(UserDataStore* u) { userDataStore = u; } + + const CHARSET_INFO* getCharset(uint32_t col) const; + private: uint32_t columnCount; uint64_t baseRid; @@ -468,12 +471,14 @@ private: uint32_t* colWidths; execplan::CalpontSystemCatalog::ColDataType* types; uint32_t* charsetNumbers; + CHARSET_INFO** charsets; uint8_t* data; uint32_t* scale; uint32_t* precision; StringStore* strings; bool useStringTable; + bool hasStrings; bool hasLongStringField; uint32_t sTableThreshold; boost::shared_array forceInline; @@ -634,18 +639,6 @@ inline bool Row::equals(long double val, uint32_t colIndex) const { return *((long double*) &data[offsets[colIndex]]) == val; } - -inline bool Row::equals(const std::string& val, uint32_t colIndex) const -{ - if (inStringTable(colIndex)) - { - uint64_t offset = *((uint64_t*) &data[offsets[colIndex]]); - return strings->equals(val, offset); - } - else - return (strncmp(val.c_str(), (char*) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0); -} - template inline uint64_t Row::getUintField(uint32_t colIndex) const { @@ -1182,69 +1175,6 @@ inline uint64_t Row::hash(uint32_t lastCol) const return ret; } -inline bool Row::equals(const Row& r2, const std::vector& keyCols) const -{ - for (uint32_t i = 0; i < keyCols.size(); i++) - { - const uint32_t& col = keyCols[i]; - - if (!isLongString(col)) - { - if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE) - { - if (getLongDoubleField(i) != r2.getLongDoubleField(i)) - return false; - } - else if (getUintField(col) != r2.getUintField(col)) - return false; - } - else - { - if (getStringLength(col) != r2.getStringLength(col)) - return false; - - if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col))) - return false; - } - } - - return true; -} - -inline bool Row::equals(const Row& r2, uint32_t lastCol) const -{ - // This check fires with empty r2 only. - if (lastCol >= columnCount) - return true; - - if (!useStringTable && !r2.useStringTable) - return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0])); - - for (uint32_t i = 0; i <= lastCol; i++) - if (!isLongString(i)) - { - if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE) - { - if (getLongDoubleField(i) != r2.getLongDoubleField(i)) - return false; - } - else if (getUintField(i) != r2.getUintField(i)) - return false; - } - else - { - uint32_t len = getStringLength(i); - - if (len != r2.getStringLength(i)) - return false; - - if (memcmp(getStringPointer(i), r2.getStringPointer(i), len)) - return false; - } - - return true; -} - inline bool Row::equals(const Row& r2) const { return equals(r2, columnCount - 1); @@ -1414,7 +1344,7 @@ public: inline void setStringStore(boost::shared_ptr); - CHARSET_INFO* getCharset(uint32_t col); + const CHARSET_INFO* getCharset(uint32_t col); private: uint32_t columnCount; @@ -1443,6 +1373,7 @@ private: RGData* rgData; StringStore* strings; // note, strings and data belong to rgData bool useStringTable; + bool hasStrings; bool hasLongStringField; uint32_t sTableThreshold; boost::shared_array forceInline; @@ -1569,6 +1500,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const r->colWidths = (uint32_t*) &colWidths[0]; r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]); r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]); + r->charsets = (CHARSET_INFO**) & (charsets[0]); r->scale = (uint32_t*) & (scale[0]); r->precision = (uint32_t*) & (precision[0]); } @@ -1591,6 +1523,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const r->hasLongStringField = hasLongStringField; r->sTableThreshold = sTableThreshold; r->forceInline = forceInline; + r->hasStrings = hasStrings; } inline uint32_t RowGroup::getRowSize() const @@ -1935,45 +1868,6 @@ inline bool StringStore::isNullValue(uint64_t off) const return (memcmp(&mc->data[offset+4], joblist::CPNULLSTRMARK.c_str(), 8) == 0); } -inline bool StringStore::equals(const std::string& str, uint64_t off) const -{ - uint32_t length; - - if (off == std::numeric_limits::max()) - return str == joblist::CPNULLSTRMARK; - - MemChunk* mc; - - if (off & 0x8000000000000000) - { - if (longStrings.size() <= (off & ~0x8000000000000000)) - return false; - - mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get(); - - memcpy(&length, mc->data, 4); - - // Not sure if this check it needed, but adds safety - if (length > mc->currentSize) - return false; - - return (strncmp(str.c_str(), (const char*) mc->data + 4, length) == 0); - } - - uint32_t chunk = off / CHUNK_SIZE; - uint32_t offset = off % CHUNK_SIZE; - - if (mem.size() <= chunk) - return false; - - mc = (MemChunk*) mem[chunk].get(); - memcpy(&length, &mc->data[offset], 4); - - if ((offset + length) > mc->currentSize) - return false; - - return (strncmp(str.c_str(), (const char*) &mc->data[offset] + 4, length) == 0); -} inline uint32_t StringStore::getStringLength(uint64_t off) { uint32_t length; diff --git a/utils/windowfunction/idborderby.cpp b/utils/windowfunction/idborderby.cpp index bc9ac16dd..cbd7a2028 100644 --- a/utils/windowfunction/idborderby.cpp +++ b/utils/windowfunction/idborderby.cpp @@ -301,16 +301,11 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2) int len2 = l->row2().getStringLength(fSpec.fIndex); const char* s1 = (const char*)l->row1().getStringPointer(fSpec.fIndex); const char* s2 = (const char*)l->row2().getStringPointer(fSpec.fIndex); - // For Japanese, coll.compare() may not be as correct as strncmp -// if (JPcodePoint) - { -// ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2)); - } -// else - { - const std::collate& coll = std::use_facet >(loc); - ret = fSpec.fAsc * coll.compare(s1, s1+len1, s2, s2+len2); - } + + if (!cs) + cs = l->rowGroup()->getCharset(fSpec.fIndex); + + ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2); } return ret; diff --git a/utils/windowfunction/idborderby.h b/utils/windowfunction/idborderby.h index e26ba1aa3..dc7ccef28 100644 --- a/utils/windowfunction/idborderby.h +++ b/utils/windowfunction/idborderby.h @@ -80,7 +80,6 @@ struct IdbSortSpec // TODO There are three ordering specs since 10.2 int fAsc; // ::= ASC | DESC int fNf; // ::= NULLS FIRST | NULLS LAST - std::string fLocale; IdbSortSpec() : fIndex(-1), fAsc(1), fNf(1) {} IdbSortSpec(int i, bool b) : fIndex(i), fAsc(b ? 1 : -1), fNf(fAsc) {} @@ -93,39 +92,7 @@ struct IdbSortSpec class Compare { public: - Compare(const IdbSortSpec& spec) : fSpec(spec) - { - // Save off the current Locale in case something goes wrong. - std::string curLocale = setlocale(LC_COLLATE, NULL); - if (spec.fLocale.length() > 0) - { - fLocale = spec.fLocale; - } - else - { - fLocale = curLocale; - } - - try - { - std::locale localloc(fLocale.c_str()); - loc = localloc; - } - catch(...) - { - fLocale = curLocale; - std::locale localloc(fLocale.c_str()); - loc = localloc; - } - if (fLocale.find("ja_JP") != std::string::npos) - { - JPcodePoint = true; - } - else - { - JPcodePoint = false; - } - } + Compare(const IdbSortSpec& spec) : fSpec(spec) {} virtual ~Compare() {} virtual int operator()(IdbCompare*, rowgroup::Row::Pointer, rowgroup::Row::Pointer) = 0; @@ -137,9 +104,6 @@ public: protected: IdbSortSpec fSpec; - std::string fLocale; - std::locale loc; - bool JPcodePoint; // code point ordering (Japanese UTF) flag }; // Comparators for signed types @@ -283,9 +247,11 @@ public: class StringCompare : public Compare { public: - StringCompare(const IdbSortSpec& spec) : Compare(spec) {} + StringCompare(const IdbSortSpec& spec) : Compare(spec), cs(NULL) {} int operator()(IdbCompare*, rowgroup::Row::Pointer, rowgroup::Row::Pointer); + + CHARSET_INFO* cs; }; // End of comparators for variable sized types @@ -324,6 +290,10 @@ public: return fRow2; } + rowgroup::RowGroup* rowGroup() + { + return &fRowGroup; + } protected: rowgroup::RowGroup fRowGroup; rowgroup::Row fRow1;