diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d1f7ef29..8191f653a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,6 +249,12 @@ ELSE() SET (MARIADB_CLIENT_LIBS -L${SERVER_BUILD_INCLUDE_DIR}/../libmariadb/libmariadb/ libmariadb.so) ENDIF() +IF (INSTALL_LAYOUT) + SET (MARIADB_STRING_LIBS dbug strings mysys) +ELSE() + SET (MARIADB_STRING_LIBS -L${SERVER_BUILD_INCLUDE_DIR}/../strings/ libstrings.a -L${SERVER_BUILD_INCLUDE_DIR}/../mysys/ libmysys.a -L${SERVER_BUILD_INCLUDE_DIR}/../dbug/ libdbug.a) +ENDIF() + #SET (ENGINE_UTILS_BOOSTIDB_INCLUDE "{CMAKE_CURRENT_SOURCE_DIR}/utils/boost_idb") SET (ENGINE_UTILS_MESSAGEQCPP_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/messageqcpp") SET (ENGINE_WE_SHARED_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/writeengine/shared") diff --git a/dbcon/execplan/CMakeLists.txt b/dbcon/execplan/CMakeLists.txt index a038384cc..a0293e8f8 100755 --- a/dbcon/execplan/CMakeLists.txt +++ b/dbcon/execplan/CMakeLists.txt @@ -46,7 +46,7 @@ set(execplan_LIB_SRCS add_library(execplan SHARED ${execplan_LIB_SRCS}) -target_link_libraries(execplan ${NETSNMP_LIBRARIES}) +target_link_libraries(execplan ${NETSNMP_LIBRARIES} ${MARIADB_STRING_LIBS}) install(TARGETS execplan DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-libs) diff --git a/dbcon/execplan/calpontsystemcatalog.cpp b/dbcon/execplan/calpontsystemcatalog.cpp index aa369e005..f5f15a2ce 100644 --- a/dbcon/execplan/calpontsystemcatalog.cpp +++ b/dbcon/execplan/calpontsystemcatalog.cpp @@ -21,6 +21,10 @@ * * ***********************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -6084,6 +6088,41 @@ void CalpontSystemCatalog::checkSysCatVer() } } +CalpontSystemCatalog::ColType::ColType() : + colWidth(0), + constraintType(NO_CONSTRAINT), + colDataType(MEDINT), + defaultValue(""), + colPosition(-1), + scale(0), + precision(-1), + compressionType(NO_COMPRESSION), + columnOID(0), + autoincrement(0), + nextvalue(0) +{ + charsetNumber = default_charset_info->number; +} + +CalpontSystemCatalog::ColType::ColType(const ColType& rhs) +{ + colWidth = rhs.colWidth; + constraintType = rhs.constraintType; + colDataType = rhs.colDataType; + ddn = rhs.ddn; + defaultValue = rhs.defaultValue; + colPosition = rhs.colPosition; + scale = rhs.scale; + precision = rhs.precision; + compressionType = rhs.compressionType; + columnOID = rhs.columnOID; + autoincrement = rhs.autoincrement; + nextvalue = rhs.nextvalue; + charsetNumber = default_charset_info->number; + charsetNumber = default_charset_info->number; +} + + const string CalpontSystemCatalog::ColType::toString() const { ostringstream output; diff --git a/dbcon/execplan/calpontsystemcatalog.h b/dbcon/execplan/calpontsystemcatalog.h index 81b6c2218..dc9f1fc13 100644 --- a/dbcon/execplan/calpontsystemcatalog.h +++ b/dbcon/execplan/calpontsystemcatalog.h @@ -279,8 +279,7 @@ public: */ struct ColType { - ColType() : colWidth(0), constraintType(NO_CONSTRAINT), colDataType(MEDINT), defaultValue(""), colPosition(-1), scale(0), precision(-1), compressionType(NO_COMPRESSION), columnOID(0), - autoincrement(0), nextvalue(0) { } + ColType(); int32_t colWidth; ConstraintType constraintType; ColDataType colDataType; @@ -293,23 +292,10 @@ public: OID columnOID; bool autoincrement; //set to true if SYSCOLUMN autoincrement is �y� uint64_t nextvalue; //next autoincrement value + uint32_t charsetNumber; - ColType(const ColType& rhs) - { - colWidth = rhs.colWidth; - constraintType = rhs.constraintType; - colDataType = rhs.colDataType; - ddn = rhs.ddn; - defaultValue = rhs.defaultValue; - colPosition = rhs.colPosition; - scale = rhs.scale; - precision = rhs.precision; - compressionType = rhs.compressionType; - columnOID = rhs.columnOID; - autoincrement = rhs.autoincrement; - nextvalue = rhs.nextvalue; + ColType(const ColType& rhs); - } // for F&E use. only serialize necessary info for now void serialize (messageqcpp::ByteStream& b) const { @@ -318,6 +304,7 @@ public: b << (uint32_t)scale; b << (uint32_t)precision; b << (uint32_t)compressionType; + b << charsetNumber; } void unserialize (messageqcpp::ByteStream& b) @@ -329,6 +316,7 @@ public: b >> (uint32_t&)scale; b >> (uint32_t&)precision; b >> (uint32_t&)compressionType; + b >> charsetNumber; } const std::string toString() const; diff --git a/dbcon/execplan/predicateoperator.cpp b/dbcon/execplan/predicateoperator.cpp index efd748c99..370db3dd3 100644 --- a/dbcon/execplan/predicateoperator.cpp +++ b/dbcon/execplan/predicateoperator.cpp @@ -21,6 +21,11 @@ * * ***********************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include +#include + #include #include "bytestream.h" @@ -32,8 +37,6 @@ using namespace oam; using namespace std; -bool futf8 = true; - namespace { @@ -54,62 +57,21 @@ namespace execplan /** * Constructors/Destructors */ -PredicateOperator::PredicateOperator() +PredicateOperator::PredicateOperator() : + cs(NULL) { - Oam oam; - // get and set locale language - string systemLang = "C"; - - try - { - oam.getSystemConfig("SystemLang", systemLang); - } - catch (...) - {} - - if ( systemLang != "en_US.UTF-8" && - systemLang.find("UTF") != string::npos ) - futf8 = true; } -PredicateOperator::PredicateOperator(const string& operatorName) +PredicateOperator::PredicateOperator(const string& operatorName) : + cs(NULL) { - Oam oam; - // get and set locale language - string systemLang = "C"; - - try - { - oam.getSystemConfig("SystemLang", systemLang); - } - catch (...) - {} - - if ( systemLang != "en_US.UTF-8" && - systemLang.find("UTF") != string::npos ) - futf8 = true; - data(operatorName); } PredicateOperator::PredicateOperator(const PredicateOperator& rhs) : Operator(rhs) { - Oam oam; - // get and set locale language - string systemLang = "C"; - - try - { - oam.getSystemConfig("SystemLang", systemLang); - } - catch (...) - {} - - if ( systemLang != "en_US.UTF-8" && - systemLang.find("UTF") != string::npos ) - futf8 = true; - data(rhs.data()); + cs = rhs.getCharset(); } PredicateOperator:: ~PredicateOperator() @@ -180,6 +142,7 @@ bool PredicateOperator::operator!=(const TreeNode* t) const //FIXME: VARBINARY??? void PredicateOperator::setOpType(Type& l, Type& r) { + fOperationType = l; // Default to left side. Modify as needed. if ( l.colDataType == execplan::CalpontSystemCatalog::DATETIME || l.colDataType == execplan::CalpontSystemCatalog::TIME || l.colDataType == execplan::CalpontSystemCatalog::TIMESTAMP || @@ -189,7 +152,7 @@ void PredicateOperator::setOpType(Type& l, Type& r) { case execplan::CalpontSystemCatalog::CHAR: case execplan::CalpontSystemCatalog::VARCHAR: - fOperationType = l; + fOperationType.charsetNumber = r.charsetNumber; break; case execplan::CalpontSystemCatalog::DATETIME: @@ -349,20 +312,25 @@ void PredicateOperator::setOpType(Type& l, Type& r) ( (r.colDataType == execplan::CalpontSystemCatalog::CHAR && r.colWidth <= 8) || (r.colDataType == execplan::CalpontSystemCatalog::VARCHAR && r.colWidth < 8) ) ) { - if ( futf8 ) + switch (fOperationType.charsetNumber) { - fOperationType.colDataType = execplan::CalpontSystemCatalog::VARCHAR; - fOperationType.colWidth = 255; - } - else - { - fOperationType.colDataType = execplan::CalpontSystemCatalog::BIGINT; - fOperationType.scale = 0; - fOperationType.colWidth = 8; - - // @bug3532, char[] as network order int for fast comparison. - l.colDataType = execplan::CalpontSystemCatalog::STRINT; - r.colDataType = execplan::CalpontSystemCatalog::STRINT; + case 8: // latin1_swedish_ci + case 9: // latin2_general_ci + case 11: // ascii_general_ci + case 47: // latin1_bin + case 48: // latin1_general_ci + case 49: // latin1_general_cs + case 65: // ascii_bin + case 77: // latin2_bin + // char[] as network order int for fast comparison. + fOperationType.colDataType = execplan::CalpontSystemCatalog::BIGINT; + fOperationType.scale = 0; + fOperationType.colWidth = 8; + l.colDataType = execplan::CalpontSystemCatalog::STRINT; + r.colDataType = execplan::CalpontSystemCatalog::STRINT; + default: + fOperationType.colDataType = execplan::CalpontSystemCatalog::VARCHAR; + fOperationType.colWidth = 255; } } else @@ -382,6 +350,409 @@ void PredicateOperator::setOpType(Type& l, Type& r) fOperationType.colDataType = execplan::CalpontSystemCatalog::DOUBLE; fOperationType.colWidth = 8; } + + cs = get_charset(fOperationType.charsetNumber, MYF(MY_WME)); +} + +bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop) +{ + // like operator. both sides are string. + if (fOp == OP_LIKE || fOp == OP_NOTLIKE) + { + SP_CNX_Regex regex = rop->regex(); + + // Ugh. The strings returned by getStrVal have null padding out to the col width. boost::regex + // considers these nulls significant, but they're not in the pattern, so we need to strip + // them off... + const std::string& v = lop->getStrVal(row, isNull); +// char* c = (char*)alloca(v.length() + 1); +// memcpy(c, v.c_str(), v.length()); +// c[v.length()] = 0; +// std::string vv(c); + + if (regex) + { +#ifdef POSIX_REGEX + bool ret = regexec(regex.get(), v.c_str(), 0, NULL, 0) == 0; +#else + bool ret = boost::regex_match(v.c_str(), *regex); +#endif + return (((fOp == OP_LIKE) ? ret : !ret) && !isNull); + } + else + { +#ifdef POSIX_REGEX + regex_t regex; + std::string str = dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull)); + regcomp(®ex, str.c_str(), REG_NOSUB | REG_EXTENDED); + bool ret = regexec(®ex, v.c_str(), 0, NULL, 0) == 0; + regfree(®ex); +#else + boost::regex regex(dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull))); + bool ret = boost::regex_match(v.c_str(), regex); +#endif + return (((fOp == OP_LIKE) ? ret : !ret) && !isNull); + } + } + + // fOpType should have already been set on the connector during parsing + switch (fOperationType.colDataType) + { + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + { + if (fOp == OP_ISNULL) + { + lop->getIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + int64_t val1 = lop->getIntVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getIntVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::UBIGINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + { + if (fOp == OP_ISNULL) + { + lop->getUintVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getUintVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + uint64_t val1 = lop->getUintVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getUintVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + if (fOp == OP_ISNULL) + { + lop->getDoubleVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getDoubleVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + double val1 = lop->getDoubleVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getDoubleVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::LONGDOUBLE: + { + if (fOp == OP_ISNULL) + { + lop->getLongDoubleVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getLongDoubleVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + long double val1 = lop->getLongDoubleVal(row, isNull); + if (isNull) + return false; + + long double val2 = rop->getLongDoubleVal(row, isNull); + if (isNull) + return false; + + // In many case, rounding error will prevent an eq compare to work + // In these cases, use the largest scale of the two items. + if (fOp == execplan::OP_EQ) + { + // In case a val is a representation of a very large integer, + // we won't want to just multiply by scale, as it may move + // significant digits out of scope. So we break them apart + // and compare each separately + int64_t scale = std::max(lop->resultType().scale, rop->resultType().scale); + if (scale) + { + long double intpart1; + long double fract1 = modfl(val1, &intpart1); + long double intpart2; + long double fract2 = modfl(val2, &intpart2); + if (numericCompare(intpart1, intpart2)) + { + double factor = pow(10.0, (double)scale); + fract1 = roundl(fract1 * factor); + fract2 = roundl(fract2 * factor); + return numericCompare(fract1, fract2); + } + else + { + return false; + } + } + } + return numericCompare(val1, val2); + } + + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + if (fOp == OP_ISNULL) + { + lop->getDecimalVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getDecimalVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + IDB_Decimal val1 = lop->getDecimalVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getDecimalVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::DATE: + { + if (fOp == OP_ISNULL) + { + lop->getDateIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getDateIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + int64_t val1 = lop->getDateIntVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, (int64_t)rop->getDateIntVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::DATETIME: + { + if (fOp == OP_ISNULL) + { + lop->getDatetimeIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getDatetimeIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + int64_t val1 = lop->getDatetimeIntVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getDatetimeIntVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::TIMESTAMP: + { + if (fOp == OP_ISNULL) + { + lop->getTimestampIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getTimestampIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + int64_t val1 = lop->getTimestampIntVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getTimestampIntVal(row, isNull)) && !isNull; + } + + case execplan::CalpontSystemCatalog::TIME: + { + if (fOp == OP_ISNULL) + { + lop->getTimeIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getTimeIntVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + int64_t val1 = lop->getTimeIntVal(row, isNull); + + if (isNull) + return false; + + return numericCompare(val1, rop->getTimeIntVal(row, isNull)) && !isNull; + } + + + + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::TEXT: + { + if (fOp == OP_ISNULL) + { + lop->getStrVal(row, isNull); + bool ret = isNull; + isNull = false; + return ret; + } + + if (fOp == OP_ISNOTNULL) + { + lop->getStrVal(row, isNull); + bool ret = isNull; + isNull = false; + return !ret; + } + + if (isNull) + return false; + + const std::string& val1 = lop->getStrVal(row, isNull); + if (isNull) + return false; + const std::string& val2 = rop->getStrVal(row, isNull); + + cs->strnncollsp(val1.c_str(), val1.length(), val2.c_str(), val2.length()); +// return strTrimCompare(val1, rop->getStrVal(row, isNull), fOperationType.charsetNumber) && !isNull; +// return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull; + + } + + //FIXME: ??? + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::BLOB: + return false; + break; + + default: + { + std::ostringstream oss; + oss << "invalid predicate operation type: " << fOperationType.colDataType; + throw logging::InvalidOperationExcept(oss.str()); + } + } + + return false; } } // namespace diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index bf51cbeed..b8f1f7079 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -25,6 +25,7 @@ #ifndef PREDICATEOPERATOR_H #define PREDICATEOPERATOR_H + #include #include #if defined(_MSC_VER) @@ -38,10 +39,13 @@ #include #include +#include "utils_utf8.h" #include "expressionparser.h" #include "returnedcolumn.h" #include "dataconvert.h" -#include "utils_utf8.h" + +struct charset_info_st; +typedef const struct charset_info_st CHARSET_INFO; namespace messageqcpp { @@ -104,419 +108,25 @@ public: */ bool operator!=(const PredicateOperator& t) const; + const CHARSET_INFO* getCharset() const + { + return cs; + } /*********************************************************** * F&E framework * ***********************************************************/ - inline virtual bool getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop); + virtual bool getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop); void setOpType(Type& l, Type& r); private: template inline bool numericCompare(result_t op1, result_t op2); - inline bool strCompare(const std::string& op1, const std::string& op2); - // MCOL-1559 - inline bool strTrimCompare(const std::string& op1, const std::string& op2); + inline bool strCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber); + inline bool strTrimCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber); + + const CHARSET_INFO* cs; }; -inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop) -{ - // like operator. both sides are string. - if (fOp == OP_LIKE || fOp == OP_NOTLIKE) - { - SP_CNX_Regex regex = rop->regex(); - - // Ugh. The strings returned by getStrVal have null padding out to the col width. boost::regex - // considers these nulls significant, but they're not in the pattern, so we need to strip - // them off... - const std::string& v = lop->getStrVal(row, isNull); -// char* c = (char*)alloca(v.length() + 1); -// memcpy(c, v.c_str(), v.length()); -// c[v.length()] = 0; -// std::string vv(c); - - if (regex) - { -#ifdef POSIX_REGEX - bool ret = regexec(regex.get(), v.c_str(), 0, NULL, 0) == 0; -#else - bool ret = boost::regex_match(v.c_str(), *regex); -#endif - return (((fOp == OP_LIKE) ? ret : !ret) && !isNull); - } - else - { -#ifdef POSIX_REGEX - regex_t regex; - std::string str = dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull)); - regcomp(®ex, str.c_str(), REG_NOSUB | REG_EXTENDED); - bool ret = regexec(®ex, v.c_str(), 0, NULL, 0) == 0; - regfree(®ex); -#else - boost::regex regex(dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull))); - bool ret = boost::regex_match(v.c_str(), regex); -#endif - return (((fOp == OP_LIKE) ? ret : !ret) && !isNull); - } - } - - // fOpType should have already been set on the connector during parsing - switch (fOperationType.colDataType) - { - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - { - if (fOp == OP_ISNULL) - { - lop->getIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - int64_t val1 = lop->getIntVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getIntVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::UBIGINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - { - if (fOp == OP_ISNULL) - { - lop->getUintVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getUintVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - uint64_t val1 = lop->getUintVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getUintVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - if (fOp == OP_ISNULL) - { - lop->getDoubleVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getDoubleVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - double val1 = lop->getDoubleVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getDoubleVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::LONGDOUBLE: - { - if (fOp == OP_ISNULL) - { - lop->getLongDoubleVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getLongDoubleVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - long double val1 = lop->getLongDoubleVal(row, isNull); - if (isNull) - return false; - - long double val2 = rop->getLongDoubleVal(row, isNull); - if (isNull) - return false; - - // In many case, rounding error will prevent an eq compare to work - // In these cases, use the largest scale of the two items. - if (fOp == execplan::OP_EQ) - { - // In case a val is a representation of a very large integer, - // we won't want to just multiply by scale, as it may move - // significant digits out of scope. So we break them apart - // and compare each separately - int64_t scale = std::max(lop->resultType().scale, rop->resultType().scale); - if (scale) - { - long double intpart1; - long double fract1 = modfl(val1, &intpart1); - long double intpart2; - long double fract2 = modfl(val2, &intpart2); - if (numericCompare(intpart1, intpart2)) - { - double factor = pow(10.0, (double)scale); - fract1 = roundl(fract1 * factor); - fract2 = roundl(fract2 * factor); - return numericCompare(fract1, fract2); - } - else - { - return false; - } - } - } - return numericCompare(val1, val2); - } - - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - if (fOp == OP_ISNULL) - { - lop->getDecimalVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getDecimalVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - IDB_Decimal val1 = lop->getDecimalVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getDecimalVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::DATE: - { - if (fOp == OP_ISNULL) - { - lop->getDateIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getDateIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - int64_t val1 = lop->getDateIntVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, (int64_t)rop->getDateIntVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::DATETIME: - { - if (fOp == OP_ISNULL) - { - lop->getDatetimeIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getDatetimeIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - int64_t val1 = lop->getDatetimeIntVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getDatetimeIntVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::TIMESTAMP: - { - if (fOp == OP_ISNULL) - { - lop->getTimestampIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getTimestampIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - int64_t val1 = lop->getTimestampIntVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getTimestampIntVal(row, isNull)) && !isNull; - } - - case execplan::CalpontSystemCatalog::TIME: - { - if (fOp == OP_ISNULL) - { - lop->getTimeIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getTimeIntVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - int64_t val1 = lop->getTimeIntVal(row, isNull); - - if (isNull) - return false; - - return numericCompare(val1, rop->getTimeIntVal(row, isNull)) && !isNull; - } - - - - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::TEXT: - { - if (fOp == OP_ISNULL) - { - lop->getStrVal(row, isNull); - bool ret = isNull; - isNull = false; - return ret; - } - - if (fOp == OP_ISNOTNULL) - { - lop->getStrVal(row, isNull); - bool ret = isNull; - isNull = false; - return !ret; - } - - if (isNull) - return false; - - const std::string& val1 = lop->getStrVal(row, isNull); - if (isNull) - return false; - - return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull; -// return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull; - - } - - //FIXME: ??? - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::BLOB: - return false; - break; - - default: - { - std::ostringstream oss; - oss << "invalid predicate operation type: " << fOperationType.colDataType; - throw logging::InvalidOperationExcept(oss.str()); - } - } - - return false; -} - template inline bool PredicateOperator::numericCompare(result_t op1, result_t op2) @@ -550,27 +160,27 @@ inline bool PredicateOperator::numericCompare(result_t op1, result_t op2) } } -inline bool PredicateOperator::strCompare(const std::string& op1, const std::string& op2) +inline bool PredicateOperator::strCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber) { switch (fOp) { case OP_EQ: - return funcexp::utf8::idb_strcoll(op1.c_str(), op2.c_str()) == 0; + return utf8::mcs_strcoll(op1, op2, charsetNumber) == 0; case OP_NE: - return funcexp::utf8::idb_strcoll(op1.c_str(), op2.c_str()) != 0; + return utf8::mcs_strcoll(op1, op2, charsetNumber) != 0; case OP_GT: - return funcexp::utf8::idb_strcoll(op1.c_str(), op2.c_str()) > 0; + return utf8::mcs_strcoll(op1, op2, charsetNumber) > 0; case OP_GE: - return funcexp::utf8::idb_strcoll(op1.c_str(), op2.c_str()) >= 0; + return utf8::mcs_strcoll(op1, op2, charsetNumber) >= 0; case OP_LT: - return funcexp::utf8::idb_strcoll(op1.c_str(), op2.c_str()) < 0; + return utf8::mcs_strcoll(op1, op2, charsetNumber) < 0; case OP_LE: - return funcexp::utf8::idb_strcoll(op1.c_str(), op2.c_str()) <= 0; + return utf8::mcs_strcoll(op1, op2, charsetNumber) <= 0; default: { @@ -581,27 +191,27 @@ inline bool PredicateOperator::strCompare(const std::string& op1, const std::str } } -inline bool PredicateOperator::strTrimCompare(const std::string& op1, const std::string& op2) +inline bool PredicateOperator::strTrimCompare(const std::string& op1, const std::string& op2, uint32_t charsetNumber) { switch (fOp) { case OP_EQ: - return funcexp::utf8::idb_strtrimcoll(op1, op2) == 0; + return utf8::mcs_strcollsp(op1, op2, charsetNumber) == 0; case OP_NE: - return funcexp::utf8::idb_strtrimcoll(op1, op2) != 0; + return utf8::mcs_strcollsp(op1, op2, charsetNumber) != 0; case OP_GT: - return funcexp::utf8::idb_strtrimcoll(op1, op2) > 0; + return utf8::mcs_strcollsp(op1, op2, charsetNumber) > 0; case OP_GE: - return funcexp::utf8::idb_strtrimcoll(op1, op2) >= 0; + return utf8::mcs_strcollsp(op1, op2, charsetNumber) >= 0; case OP_LT: - return funcexp::utf8::idb_strtrimcoll(op1, op2) < 0; + return utf8::mcs_strcollsp(op1, op2, charsetNumber) < 0; case OP_LE: - return funcexp::utf8::idb_strtrimcoll(op1, op2) <= 0; + return utf8::mcs_strcollsp(op1, op2, charsetNumber) <= 0; default: { diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index ca1196b27..6a85d1ee3 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -451,6 +451,15 @@ public: return fRegex; } + uint32_t charsetNumber() const + { + return fResultType.charsetNumber; + } + void charsetNumber(uint32_t cnum) + { + fResultType.charsetNumber = cnum; + } + protected: Result fResult; execplan::CalpontSystemCatalog::ColType fResultType; // mapped from mysql data type diff --git a/dbcon/joblist/pdictionaryscan.cpp b/dbcon/joblist/pdictionaryscan.cpp index 39bb561d0..8cf910dd2 100644 --- a/dbcon/joblist/pdictionaryscan.cpp +++ b/dbcon/joblist/pdictionaryscan.cpp @@ -506,6 +506,7 @@ void pDictionaryScan::sendAPrimitiveMessage( hdr.NVALS = fFilterCount; hdr.Count = msgLbidCount; hdr.CompType = colType.ddn.compressionType; + hdr.charsetNumber = colType.charsetNumber; idbassert(hdr.Count > 0); if (isEquality) @@ -628,7 +629,8 @@ void pDictionaryScan::receivePrimitiveMessages() if (fOid >= 3000 && traceOn() && dlTimes.FirstReadTime().tv_sec == 0) dlTimes.setFirstReadTime(); - if (fOid >= 3000 && traceOn()) dlTimes.setLastReadTime(); + if (fOid >= 3000 && traceOn()) + dlTimes.setLastReadTime(); if (bs->length() == 0) { diff --git a/dbcon/joblist/primitivemsg.h b/dbcon/joblist/primitivemsg.h index 6b959d014..f2cad3e76 100644 --- a/dbcon/joblist/primitivemsg.h +++ b/dbcon/joblist/primitivemsg.h @@ -560,6 +560,7 @@ struct TokenByScanRequestHeader uint16_t flags; uint32_t Pad2; uint16_t Count; + uint32_t charsetNumber; }; // what follows is NVALS DataValues. // compatibility with Ron's stuff. diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 6c438744c..e0d46abb6 100755 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -18,6 +18,11 @@ //#define DEBUG_WALK_COND #include +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include +#include + #include #include #include @@ -3406,6 +3411,8 @@ ReturnedColumn* buildReturnedColumn( if (rc && item->name.length) rc->alias(item->name.str); + rc->charsetNumber(item->collation.collation->number); + return rc; } @@ -4080,6 +4087,7 @@ ReturnedColumn* buildFunctionColumn( fc->operationType(functor->operationType(funcParms, fc->resultType())); fc->expressionId(ci->expressionId++); + fc->charsetNumber(ifp->collation.collation->number); } else if (ifp->type() == Item::COND_ITEM || ifp->functype() == Item_func::EQ_FUNC || @@ -4348,6 +4356,7 @@ ConstantColumn* buildDecimalColumn(Item* item, gp_walk_info& gwi) columnstore_decimal.precision = idp->max_length - idp->decimals; ConstantColumn* cc = new ConstantColumn(valStr, columnstore_decimal); cc->timeZone(gwi.thd->variables.time_zone->get_name()->ptr()); + cc->charsetNumber(idp->collation.collation->number); return cc; } @@ -4467,8 +4476,8 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) default: sc = new SimpleColumn(ifp->db_name.str, bestTableName(ifp), ifp->field_name.str, columnStore, gwi.sessionid); } - sc->resultType(ct); + sc->charsetNumber(ifp->collation.collation->number); string tbname(ifp->table_name.str); if (isInformationSchema) @@ -5079,6 +5088,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) return NULL; } + ac->charsetNumber(item->collation.collation->number); return ac; } diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index f70715118..d7b112d0e 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -938,6 +938,8 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n if (item->full_name()) ac->alias(item->full_name()); + ac->charsetNumber(item->collation.collation->number); + // put ac on windowFuncList gwi.windowFuncList.push_back(ac); return ac; diff --git a/dbcon/mysql/sm.cpp b/dbcon/mysql/sm.cpp index b1a1dad63..5acef3805 100644 --- a/dbcon/mysql/sm.cpp +++ b/dbcon/mysql/sm.cpp @@ -20,7 +20,8 @@ * ***********************************************************************/ -#include + +#include #include #include #include diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index 5cc663fc2..d9563d786 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -97,9 +97,8 @@ void added_a_pm(int) int main(int argc, char* argv[]) { - // get and set locale language - string systemLang = "C"; - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); // This is unset due to the way we start it program_invocation_short_name = const_cast("DDLProc"); diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index 09e5b9128..4f67ef3ba 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -511,13 +511,10 @@ int8_t setupCwd() int main(int argc, char* argv[]) { - // get and set locale language - string systemLang = "C"; - BRM::DBRM dbrm; Oam oam; - //BUG 5362 - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); // This is unset due to the way we start it program_invocation_short_name = const_cast("DMLProc"); diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 3fa3a1c79..af37a6244 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1433,9 +1433,8 @@ void cleanTempDir() int main(int argc, char* argv[]) { - // get and set locale language - std::string systemLang = "C"; - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); // This is unset due to the way we start it program_invocation_short_name = const_cast("ExeMgr"); diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml index e1445deb2..84922e166 100644 --- a/oam/etc/Columnstore.xml +++ b/oam/etc/Columnstore.xml @@ -228,7 +228,6 @@ 8620 - C columnstore-1 pm1 unassigned diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index 003fd3816..87eb6638f 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -19,6 +19,11 @@ * $Id: dictionary.cpp 2122 2013-07-08 16:33:50Z bpaul $ */ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include +#include + #include #include #include @@ -34,7 +39,6 @@ using namespace std; #include "dataconvert.h" #include -using namespace funcexp; using namespace logging; const char* nullString = " "; // this is not NULL to preempt segfaults. @@ -144,6 +148,8 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, niceBlock = reinterpret_cast(block); offsets = reinterpret_cast(&niceBlock[10]); niceInput = reinterpret_cast(h); + + const CHARSET_INFO* cs = get_charset(h->charsetNumber, MYF(MY_WME)); // if LIKE is an operator, compile regexp's in advance. if ((h->NVALS > 0 && h->COP1 & COMPARE_LIKE) || @@ -182,9 +188,6 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, argIndex = 0; args = reinterpret_cast(&niceInput[argsOffset]); - string sig_utf8; - string arg_utf8; - if (eqFilter) { // MCOL-1246 Trim whitespace before match @@ -199,41 +202,34 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, goto no_store; } - // BUG 5110: If it is utf, we need to create utf strings to compare - if (utf8) + if (h->COP1 & COMPARE_LIKE) { - sig_utf8 = string(sig, siglen); - arg_utf8 = string(args->data, args->len); + p_DataValue dv; + + dv.len = siglen; + dv.data = (uint8_t*) sig; + cmpResult = isLike(&dv, ®ex[argIndex]); + + if (h->COP1 & COMPARE_NOT) + cmpResult = !cmpResult; + } + else + { + if (utf8) + { + tmp = cs->strnncoll(sig, siglen, args->data, args->len); + } + else + { + tmp = strncmp(sig, args->data, std::min(siglen, args->len)); + } + cmpResult = compare(tmp, h->COP1, siglen, args->len); } switch (h->NVALS) { case 1: { - if (h->COP1 & COMPARE_LIKE) - { - p_DataValue dv; - - dv.len = siglen; - dv.data = (uint8_t*) sig; - cmpResult = isLike(&dv, ®ex[argIndex]); - - if (h->COP1 & COMPARE_NOT) - cmpResult = !cmpResult; - } - else - { - if (utf8) - { - tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str()); - cmpResult = compare(tmp, h->COP1, siglen, args->len); - } - else - { - tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - cmpResult = compare(tmp, h->COP1, siglen, args->len); - } - } if (cmpResult) goto store; @@ -243,32 +239,6 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, case 2: { - if (h->COP1 & COMPARE_LIKE) - { - p_DataValue dv; - - dv.len = siglen; - dv.data = (uint8_t*) sig; - cmpResult = isLike(&dv, ®ex[argIndex]); - - if (h->COP1 & COMPARE_NOT) - cmpResult = !cmpResult; - } - - else - { - if (utf8) - { - tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str()); - cmpResult = compare(tmp, h->COP1, siglen, args->len); - } - else - { - tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - cmpResult = compare(tmp, h->COP1, siglen, args->len); - } - } - if (!cmpResult && h->BOP == BOP_AND) goto no_store; @@ -295,15 +265,13 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, { if (utf8) { - arg_utf8 = string(args->data, args->len); - tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str()); - cmpResult = compare(tmp, h->COP2, siglen, args->len); + tmp = cs->strnncoll(sig, siglen, args->data, args->len); } else { tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - cmpResult = compare(tmp, h->COP2, siglen, args->len); } + cmpResult = compare(tmp, h->COP2, siglen, args->len); } if (cmpResult) @@ -332,14 +300,13 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, { if (utf8) { - tmp = utf8::idb_strcoll(sig_utf8.c_str(), arg_utf8.c_str()); - cmpResult = compare(tmp, h->COP2, siglen, args->len); + tmp = cs->strnncoll(sig, siglen, args->data, args->len); } else { tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - cmpResult = compare(tmp, h->COP1, siglen, args->len); } + cmpResult = compare(tmp, h->COP2, siglen, args->len); } if (!cmpResult && h->BOP == BOP_AND) @@ -351,11 +318,6 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, argsOffset += sizeof(uint16_t) + args->len; argIndex++; args = (DataValue*) &niceInput[argsOffset]; - - if ( utf8) - { - arg_utf8 = string(args->data, args->len); - } } if (i == h->NVALS && cmpResult) @@ -566,112 +528,6 @@ again: dict_OffsetIndex++; } - -void PrimitiveProcessor::p_AggregateSignature(const AggregateSignatureRequestHeader* in, - AggregateSignatureResultHeader* out, unsigned outSize, unsigned* written, bool utf8) -{ - - uint8_t* niceOutput; // h cast to a byte-indexed type - int cmp; - char cMin[BLOCK_SIZE], cMax[BLOCK_SIZE]; - int cMinLen, cMaxLen; - p_DataValue sigptr; - - DataValue* min; - DataValue* max; - - { - void *outp = static_cast(out); - memcpy(outp, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); - } - out->ism.Command = DICT_AGGREGATE_RESULTS; - niceOutput = reinterpret_cast(out); - - // The first sig is the min and the max. - out->Count = 0; - dict_OffsetIndex = 0; - nextSig(in->NVALS, in->tokens, &sigptr); - - if (sigptr.len == -1) - return; - - out->Count++; - memcpy(cMin, sigptr.data, sigptr.len); - memcpy(cMax, sigptr.data, sigptr.len); - cMinLen = cMaxLen = sigptr.len; - - for (nextSig(in->NVALS, in->tokens, &sigptr); sigptr.len != -1; - nextSig(in->NVALS, in->tokens, &sigptr), out->Count++) - { - string sig_utf8; - - if (utf8) - { - string cMin_utf8(cMin, cMinLen); - string tmpString((char*)sigptr.data, sigptr.len); - sig_utf8 = tmpString; - cmp = utf8::idb_strcoll(cMin_utf8.c_str(), sig_utf8.c_str()); - } - else - { - cmp = strncmp(cMin, (char*)sigptr.data, std::min(cMinLen, sigptr.len)); - } - - if (cmp > 0) - { - memcpy(cMin, sigptr.data, sigptr.len); - cMinLen = sigptr.len; - } - - if (utf8) - { - string cMax_utf8(cMax, cMaxLen); - cmp = utf8::idb_strcoll(cMax_utf8.c_str(), sig_utf8.c_str()); - } - else - { - cmp = strncmp(cMax, (char*)sigptr.data, std::min(cMaxLen, sigptr.len)); - } - - if (cmp < 0) - { - memcpy(cMax, sigptr.data, sigptr.len); - cMaxLen = sigptr.len; - } - } - - //we now have the results, stuff them into the output buffer -#ifdef PRIM_DEBUG - unsigned size = sizeof(AggregateSignatureResultHeader) + cMaxLen + cMinLen - + sizeof(uint16_t) * 2; - - if (outSize < size) - { - MessageLog logger(LoggingID(28)); - logging::Message::Args marker; - Message msg(35); - - marker.add(11); - msg.format(marker); - logger.logErrorMessage(msg); - - throw length_error("PrimitiveProcessor::p_AggregateSignature(): output buffer is too small"); - } - -#endif - - min = reinterpret_cast - (&niceOutput[sizeof(AggregateSignatureResultHeader)]); - max = reinterpret_cast - (&niceOutput[sizeof(AggregateSignatureResultHeader) + cMinLen + sizeof(uint16_t)]); - min->len = cMinLen; - max->len = cMaxLen; - memcpy(min->data, cMin, cMinLen); - memcpy(max->data, cMax, cMaxLen); - *written = sizeof(AggregateSignatureResultHeader) + cMaxLen + cMinLen - + sizeof(uint16_t) * 2; -} - const char backslash = '\\'; inline bool PrimitiveProcessor::isEscapedChar(char c) diff --git a/primitives/linux-port/primitiveprocessor.h b/primitives/linux-port/primitiveprocessor.h index 366e90daf..677d0538e 100644 --- a/primitives/linux-port/primitiveprocessor.h +++ b/primitives/linux-port/primitiveprocessor.h @@ -227,19 +227,6 @@ public: */ void p_IdxList(const IndexListHeader* rqst, IndexListHeader* rslt, int mode = 1); - /** @brief The p_AggregateSignature primitive processor. - * - * The p_AggregateSignature primitive processor. It operates on a dictionary - * block and assumes the continuation pointer is not used. - * @param in The input parameters - * @param out A pointer to a buffer where the result will be written. - * @param outSize The size of the output buffer in bytes. - * @param written (out parameter) A pointer to 1 int, which will contain the - * number of bytes written to out. - */ - void p_AggregateSignature(const AggregateSignatureRequestHeader* in, - AggregateSignatureResultHeader* out, unsigned outSize, unsigned* written, bool utf8); - /** @brief The p_Col primitive processor. * * The p_Col primitive processor. It operates on a column block specified using setBlockPtr(). diff --git a/primitives/primproc/dictstep.cpp b/primitives/primproc/dictstep.cpp index abd99ada3..107500d05 100644 --- a/primitives/primproc/dictstep.cpp +++ b/primitives/primproc/dictstep.cpp @@ -44,7 +44,6 @@ namespace primitiveprocessor { extern uint32_t dictBufferSize; -extern bool utf8; DictStep::DictStep() : Command(DICT_STEP), strValues(NULL), filterCount(0), bufferSize(0) @@ -174,7 +173,8 @@ void DictStep::issuePrimitive(bool isFilter) } bpp->pp.setLikeFilter(likeFilter); - bpp->pp.p_Dictionary(primMsg, &result, utf8, isFilter, eqFilter, eqOp); + // MCOL-3536 We shouldn't need to pass in utf8 -- maybe?? + bpp->pp.p_Dictionary(primMsg, &result, true, isFilter, eqFilter, eqOp); } void DictStep::copyResultToTmpSpace(OrderedToken* ot) diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index 13abe2a04..26ae03ffa 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -160,8 +160,6 @@ std::map djLock; // djLock synchronizes destroy and j volatile int32_t asyncCounter; const int asyncMax = 20; // current number of asynchronous loads -extern bool utf8; - struct preFetchCond { //uint64_t lbid; @@ -1156,6 +1154,7 @@ int DictScanJob::operator()() PrimitiveProcessor pproc(gDebugLevel); TokenByScanResultHeader* output; QueryContext verInfo; + bool bUtf8; try { @@ -1167,6 +1166,25 @@ int DictScanJob::operator()() *fByteStream >> verInfo; cmd = (TokenByScanRequestHeader*) fByteStream->buf(); + // If charset is one of those that can be representedby standard ascii, + // we can get a performance improvement by using strcmp rather than + // the full charset compare system. + switch (cmd->charsetNumber) + { + case 8: // latin1_swedish_ci + case 9: // latin2_general_ci + case 11: // ascii_general_ci + case 47: // latin1_bin + case 48: // latin1_general_ci + case 49: // latin1_general_cs + case 65: // ascii_bin + case 77: // latin2_bin + bUtf8 = false; + break; + default: + bUtf8 = true; + } + session = cmd->Hdr.SessionID; uniqueId = cmd->Hdr.UniqueID; runCount = cmd->Count; @@ -1211,7 +1229,8 @@ int DictScanJob::operator()() fLBIDTraceOn, session); pproc.setBlockPtr((int*) data); - pproc.p_TokenByScan(cmd, output, output_buf_size, utf8, eqFilter); + // MCOL-3536 We shouldn't need to pass in utf8 -- maybe?? + pproc.p_TokenByScan(cmd, output, output_buf_size, bUtf8, eqFilter); if (wasBlockInCache) output->CacheIO++; diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index 201e76fa7..04f9552c6 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -89,18 +89,15 @@ extern uint32_t lowPriorityThreads; extern int directIOFlag; extern int noVB; - DebugLevel gDebugLevel; Logger* mlp; -string systemLang; -bool utf8 = false; bool isDebug( const DebugLevel level ) { return level <= gDebugLevel; } -} +} //namespace primitiveprocessor namespace { @@ -316,13 +313,6 @@ void* waitForSIGUSR1(void* p) int main(int argc, char* argv[]) { - // get and set locale language - systemLang = funcexp::utf8::idb_setlocale(); - - if ( systemLang != "en_US.UTF-8" && - systemLang.find("UTF") != string::npos ) - utf8 = true; - // This is unset due to the way we start it program_invocation_short_name = const_cast("PrimProc"); diff --git a/procmgr/main.cpp b/procmgr/main.cpp index f1f7bfb47..79a806a7a 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -101,10 +101,32 @@ int main(int argc, char** argv) #ifndef _MSC_VER setuid(0); // set effective ID to root; ignore return status #endif - // get and set locale language - string systemLang = "C"; - - setlocale(LC_ALL, systemLang.c_str()); + // Set locale language + const char* pLoc = utf8::idb_setlocale(); + try + { + logging::LoggingID lid(17); // ProcessManager + logging::MessageLog ml(lid); + logging::Message msg(1); + logging::Message::Args args; + if (pLoc) + { + // Log one line + args.add("Set locale to "); + args.add(pLoc); + msg.format( args ); + } + else + { + args.add("Failed to set locale "); + msg.format( args ); + } + ml.logErrorMessage(msg); + } + catch (...) + { + // Ignoring for time being. + } idbdatafile::IDBPolicy::configIDBPolicy(); diff --git a/procmon/main.cpp b/procmon/main.cpp index 9112d3962..34b4ad656 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -26,7 +26,7 @@ namespace bi = boost::interprocess; #include "installdir.h" #include "IDBPolicy.h" - +#include "utils_utf8.h" #include "crashtrace.h" using namespace std; @@ -177,10 +177,8 @@ int main(int argc, char** argv) if (p && *p) USER = p; - // get and set locale language - string systemLang = "C"; - - setlocale(LC_ALL, systemLang.c_str()); + // Set locale language + utf8::idb_setlocale(); //get tmp log directory tmpLogDir = startup::StartUp::tmpDir(); diff --git a/tools/configMgt/autoConfigure.cpp b/tools/configMgt/autoConfigure.cpp index 2f30a14bd..d48399893 100644 --- a/tools/configMgt/autoConfigure.cpp +++ b/tools/configMgt/autoConfigure.cpp @@ -372,23 +372,6 @@ int main(int argc, char* argv[]) catch (...) { } - //setup System Language - string systemLang = "C"; - - try - { - systemLang = sysConfigOld->getConfig(SystemSection, "SystemLang"); - } - catch (...) - { } - - try - { - sysConfigNew->setConfig(SystemSection, "SystemLang", systemLang); - } - catch (...) - {} - //setup HA IP Address string HA_IPadd; diff --git a/tools/pingproc/pingproc.cpp b/tools/pingproc/pingproc.cpp index f1dd7b8bc..e417a2baa 100644 --- a/tools/pingproc/pingproc.cpp +++ b/tools/pingproc/pingproc.cpp @@ -466,6 +466,7 @@ const ByteStream formatDictionaryScanMsg(const uint64_t lbid, hdr.COP2 = oidOp.COP2(); hdr.NVALS = oidOp.FilterCount(); hdr.Count = count; + hdr.charsetNumber = oidOp.ColumnType().charsetNumber; idbassert(hdr.Count > 0); primMsg.load((const uint8_t*) &hdr.ism, sizeof(ISMPacketHeader)); diff --git a/utils/common/CMakeLists.txt b/utils/common/CMakeLists.txt index 17dac6385..98405a032 100644 --- a/utils/common/CMakeLists.txt +++ b/utils/common/CMakeLists.txt @@ -9,7 +9,8 @@ set(common_LIB_SRCS cgroupconfigurator.cpp MonitorProcMem.cpp nullvaluemanip.cpp - threadnaming.cpp) + threadnaming.cpp + utils_utf8.cpp) add_library(common SHARED ${common_LIB_SRCS}) diff --git a/utils/funcexp/func_between.cpp b/utils/funcexp/func_between.cpp index d742c2d16..92abb080f 100644 --- a/utils/funcexp/func_between.cpp +++ b/utils/funcexp/func_between.cpp @@ -39,10 +39,6 @@ using namespace execplan; #include "errorids.h" using namespace logging; -#include "utils_utf8.h" -using namespace funcexp; - - namespace { template diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index 5f15ec991..79039c923 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -44,13 +44,6 @@ using namespace joblist; namespace funcexp { -namespace utf8 -{ -// A global loc object so we don't construct one at every compare -std::locale loc; - -bool JPcodePoint = false; // extern-ed in utils_utf8.h -} /* static */ FuncExp* FuncExp::fInstance = 0; diff --git a/utils/funcexp/funchelpers.h b/utils/funcexp/funchelpers.h index 013b81b0d..8f017d544 100644 --- a/utils/funcexp/funchelpers.h +++ b/utils/funcexp/funchelpers.h @@ -41,8 +41,9 @@ #include "intervalcolumn.h" #include "treenode.h" +#ifndef ULONGLONG_MAX #define ULONGLONG_MAX ulonglong_max - +#endif namespace funcexp { namespace helpers diff --git a/utils/idbdatafile/IDBPolicy.cpp b/utils/idbdatafile/IDBPolicy.cpp index b88c918a5..9d2766fcf 100644 --- a/utils/idbdatafile/IDBPolicy.cpp +++ b/utils/idbdatafile/IDBPolicy.cpp @@ -122,9 +122,9 @@ bool IDBPolicy::isLocalFile( const std::string& path ) { boost::filesystem::path filepath( path ); #ifdef _MSC_VER - size_t strmblen = funcexp::utf8::idb_wcstombs(0, filepath.extension().c_str(), 0) + 1; + size_t strmblen = utf8::idb_wcstombs(0, filepath.extension().c_str(), 0) + 1; char* outbuf = (char*)alloca(strmblen * sizeof(char)); - strmblen = funcexp::utf8::idb_wcstombs(outbuf, filepath.extension().c_str(), strmblen); + strmblen = utf8::idb_wcstombs(outbuf, filepath.extension().c_str(), strmblen); string fileExt(outbuf, strmblen); #else //string fileExt = filepath.extension().c_str(); diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 71f43652c..faa1dbad1 100755 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -49,18 +49,15 @@ #include "funcexp.h" #include "rowaggregation.h" #include "calpontsystemcatalog.h" -//#include "utils_utf8.h" +#include "utils_utf8.h" //..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable //#define NDEBUG -#include "funcexp/utils_utf8.h" - using namespace std; using namespace boost; using namespace dataconvert; - // inlines of RowAggregation that used only in this file namespace { @@ -401,7 +398,7 @@ void RowAggregation::updateStringMinMax(string val1, string val2, int64_t col, i #ifdef STRCOLL_ENH__ else { - int tmp = funcexp::utf8::idb_strcoll(val1.c_str(), val2.c_str()); + int tmp = utf8::idb_strcoll(val1.c_str(), val2.c_str()); if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) || (tmp > 0 && func == rowgroup::ROWAGG_MAX)) diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index a8bae1086..31702fb43 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -26,10 +26,11 @@ // Author: Patrick LeBlanc , (C) 2008 // -#include +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + //#define NDEBUG -#include -#include #include #include using namespace std; @@ -43,7 +44,6 @@ using namespace messageqcpp; #include "calpontsystemcatalog.h" using namespace execplan; -#include "joblisttypes.h" #include "nullvaluemanip.h" #include "rowgroup.h" diff --git a/utils/windowfunction/idborderby.cpp b/utils/windowfunction/idborderby.cpp index 8009492b3..bc9ac16dd 100644 --- a/utils/windowfunction/idborderby.cpp +++ b/utils/windowfunction/idborderby.cpp @@ -19,12 +19,18 @@ // $Id: idborderby.cpp 3932 2013-06-25 16:08:10Z xlou $ +#include +#include #include #include #include #include using namespace std; +#include "objectreader.h" +#include "calpontselectexecutionplan.h" +#include "rowgroup.h" + #include using namespace boost; @@ -45,6 +51,8 @@ using namespace rowgroup; #include "joblisttypes.h" +// See agg_arg_charsets in sql_type.h to see conversion rules for +// items that have different char sets namespace ordering { int TinyIntCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2) @@ -294,11 +302,11 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2) const char* s1 = (const char*)l->row1().getStringPointer(fSpec.fIndex); const char* s2 = (const char*)l->row2().getStringPointer(fSpec.fIndex); // For Japanese, coll.compare() may not be as correct as strncmp - if (JPcodePoint) +// if (JPcodePoint) { - ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2)); +// ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2)); } - else +// else { const std::collate& coll = std::use_facet >(loc); ret = fSpec.fAsc * coll.compare(s1, s1+len1, s2, s2+len2); diff --git a/versioning/BRM/masternode.cpp b/versioning/BRM/masternode.cpp index a43c41479..bd28649db 100644 --- a/versioning/BRM/masternode.cpp +++ b/versioning/BRM/masternode.cpp @@ -102,9 +102,8 @@ void reload(int num) int main(int argc, char** argv) { - // get and set locale language - BUG 5362 - string systemLang = "C"; - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); BRM::logInit ( BRM::SubSystemLogId_controllerNode ); diff --git a/versioning/BRM/slavenode.cpp b/versioning/BRM/slavenode.cpp index 7c0bf2638..5a981634c 100644 --- a/versioning/BRM/slavenode.cpp +++ b/versioning/BRM/slavenode.cpp @@ -78,10 +78,8 @@ void reset(int sig) int main(int argc, char** argv) { - - // get and set locale language - BUG 5362 - string systemLang = "C"; - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); BRM::logInit ( BRM::SubSystemLogId_workerNode ); diff --git a/writeengine/bulk/cpimport.cpp b/writeengine/bulk/cpimport.cpp index 4f596ba7a..6242c40a5 100644 --- a/writeengine/bulk/cpimport.cpp +++ b/writeengine/bulk/cpimport.cpp @@ -1091,9 +1091,8 @@ int main(int argc, char** argv) #endif setupSignalHandlers(); - // Set up LOCALE - BUG 5362 - std::string systemLang("C"); - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); // Initialize singleton instance of syslogging if (argc > 0) @@ -1377,7 +1376,7 @@ int main(int argc, char** argv) //-------------------------------------------------------------------------- task = TASK_LOAD_JOBFILE; rc = curJob.loadJobInfo( sFileName.string(), bUseTempJobFile, - systemLang, argc, argv, bLogInfo2ToConsole, bValidateColumnList ); + argc, argv, bLogInfo2ToConsole, bValidateColumnList ); if ( rc != NO_ERROR ) { diff --git a/writeengine/bulk/we_bulkload.cpp b/writeengine/bulk/we_bulkload.cpp index 9642e19fc..9c97fc5c0 100644 --- a/writeengine/bulk/we_bulkload.cpp +++ b/writeengine/bulk/we_bulkload.cpp @@ -231,7 +231,6 @@ int BulkLoad::setAlternateImportDir(const std::string& loadDir, // PARAMETERS: // fullName - full filename for job description file // bUseTempJobFile - are we using a temporary job XML file -// systemLang-SystemLang setting used to set locale. // argc - command line arg count // argv - command line arguments // bLogInfo2ToConsole - Log info2 msgs to the console @@ -244,7 +243,6 @@ int BulkLoad::setAlternateImportDir(const std::string& loadDir, int BulkLoad::loadJobInfo( const string& fullName, bool bUseTempJobFile, - const string& systemLang, int argc, char** argv, bool bLogInfo2ToConsole, @@ -285,13 +283,8 @@ int BulkLoad::loadJobInfo( else fLog.setLogFileName(logFile.c_str(), errlogFile.c_str(), (int)bLogInfo2ToConsole); - std::ostringstream ossLocale; - ossLocale << "Locale is : " << systemLang; - if (!(disableConsoleOutput())) { - fLog.logMsg( ossLocale.str(), MSGLVL_INFO2 ); - if (!BulkLoad::disableConsoleOutput()) cout << "Log file for this job: " << logFile << std::endl; diff --git a/writeengine/bulk/we_bulkload.h b/writeengine/bulk/we_bulkload.h index 7ea707177..682567b1b 100644 --- a/writeengine/bulk/we_bulkload.h +++ b/writeengine/bulk/we_bulkload.h @@ -78,7 +78,6 @@ public: */ EXPORT int loadJobInfo( const std::string& fullFileName, bool bUseTempJobFile, - const std::string& systemLang, int argc, char** argv, bool bLogInfo2ToConsole, diff --git a/writeengine/bulk/we_bulkloadbuffer.cpp b/writeengine/bulk/we_bulkloadbuffer.cpp index 60a0a457e..069aa0c64 100644 --- a/writeengine/bulk/we_bulkloadbuffer.cpp +++ b/writeengine/bulk/we_bulkloadbuffer.cpp @@ -561,7 +561,7 @@ void BulkLoadBuffer::convert(char* field, int fieldLength, // on disk (e.g. 5 for a varchar(5) instead of 8). if (fieldLength > column.definedWidth) { - uint8_t truncate_point = funcexp::utf8::utf8_truncate_point(field, column.definedWidth); + uint8_t truncate_point = utf8::utf8_truncate_point(field, column.definedWidth); memcpy( charTmpBuf, field, column.definedWidth - truncate_point ); bufStats.satCount++; } diff --git a/writeengine/dictionary/we_dctnry.cpp b/writeengine/dictionary/we_dctnry.cpp index e8b5e03c8..df87a9789 100644 --- a/writeengine/dictionary/we_dctnry.cpp +++ b/writeengine/dictionary/we_dctnry.cpp @@ -862,7 +862,7 @@ int Dctnry::insertDctnry(const char* buf, // @Bug 2565: Truncate any strings longer than schema's column width if (curSig.size > m_colWidth) { - uint8_t truncate_point = funcexp::utf8::utf8_truncate_point((const char*)curSig.signature, m_colWidth); + uint8_t truncate_point = utf8::utf8_truncate_point((const char*)curSig.signature, m_colWidth); curSig.size = m_colWidth - truncate_point; ++truncCount; } diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index 826b2dc73..d0cc3d889 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -101,15 +101,12 @@ int setupResources() int main(int argc, char** argv) { - // get and set locale language - string systemLang = "C"; - systemLang = funcexp::utf8::idb_setlocale(); + // Set locale language + utf8::idb_setlocale(); // This is unset due to the way we start it program_invocation_short_name = const_cast("WriteEngineServ"); - printf ("Locale is : %s\n", systemLang.c_str() ); - int gDebug = 0; int c; while ((c = getopt(argc, argv, "d")) != EOF)