From 57152cf88188d2fdf4f9b22e4839a7c5a552ae2e Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 23 May 2019 10:42:25 -0500 Subject: [PATCH 01/26] MCOL-1559 experimental --- dbcon/execplan/predicateoperator.h | 43 ++++++++++++++--- utils/funcexp/utils_utf8.h | 75 ++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 7 deletions(-) diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index 08f0c40cf..b35680ffc 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -114,6 +114,8 @@ private: template inline bool numericCompare(result_t op1, result_t op2); inline bool strCompare(const std::string& op1, const std::string& op2); + // MCOL-1559 + inline bool strTrimCompare(const std::string& op1, const std::string& op2); }; inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop) @@ -457,20 +459,16 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu return !ret; } - // MCOL-1559 - std::string val1 = lop->getStrVal(row, isNull); if (isNull) return false; - std::string val2 = rop->getStrVal(row, isNull); + const std::string& val1 = lop->getStrVal(row, isNull); if (isNull) return false; - boost::trim_right_if(val1, boost::is_any_of(" ")); - boost::trim_right_if(val2, boost::is_any_of(" ")); + return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull; - return strCompare(val1, val2); - } + } //FIXME: ??? case execplan::CalpontSystemCatalog::VARBINARY: @@ -553,6 +551,37 @@ inline bool PredicateOperator::strCompare(const std::string& op1, const std::str } } +inline bool PredicateOperator::strTrimCompare(const std::string& op1, const std::string& op2) +{ + switch (fOp) + { + case OP_EQ: + return funcexp::utf8::idb_strtrimcoll(op1, op2) == 0; + + case OP_NE: + return funcexp::utf8::idb_strtrimcoll(op1, op2) != 0; + + case OP_GT: + return funcexp::utf8::idb_strtrimcoll(op1, op2) > 0; + + case OP_GE: + return funcexp::utf8::idb_strtrimcoll(op1, op2) >= 0; + + case OP_LT: + return funcexp::utf8::idb_strtrimcoll(op1, op2) < 0; + + case OP_LE: + return funcexp::utf8::idb_strtrimcoll(op1, op2) <= 0; + + default: + { + std::ostringstream oss; + oss << "Non support predicate operation: " << fOp; + throw logging::InvalidOperationExcept(oss.str()); + } + } +} + std::ostream& operator<<(std::ostream& os, const PredicateOperator& rhs); } diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h index 6f5cb26a8..878f7aee7 100644 --- a/utils/funcexp/utils_utf8.h +++ b/utils/funcexp/utils_utf8.h @@ -130,6 +130,81 @@ int idb_strcoll(const char* str1, const char* str2) return strcoll(str1, str2); } +// MCOL-1559 Add a trimmed version of strcoll +// We want to compare str1 and str2 ignoring any trailing whitespace +// without making a copy of the strings (performance hit). +// I can't find any library that does this while paying attention to +// Locale. string::compare can be used to compare substrings, but +// it's a byte by byte compare. +// We find the last real character, and if a space is following, we +// temporarily replace it with a NULL, do the compare, and restore the +// original value to that spot. +// WARNING: This is not thread safe. It temporarily modifies the +// strings and assumes it is free to do so. +inline +int idb_strtrimcoll(const std::string& str1, const std::string& str2) +{ + const std::string whitespaces (" \t\f\v\n\r"); + int rtn = 0; + char orig1; + char orig2; + char* s1 = NULL; + char* s2 = NULL; + + // Set found1 to the first whitespace char in str1 + std::size_t found1 = str1.find_last_not_of(whitespaces); + if (found1 == std::string::npos) // Either the string is empty or all whitespace. + { + if (strlen(str1) > 0) // Is all whitespace + found1 = 0; // First whitespace position + } + else + { + if (strlen(str1) > found1+1) + ++found1; // move to the first whitespace position + else + found1 = std::string::npos; // No trailing whitespace + } + // Save the value at found1 and set to NULL + if (found1 != std::string::npos) + { + s1 = &const_cast(str1)[found1]; + orig1 = *s1; + *s1 = 0; + } + + // Set found2 to the first whitespace char in str2 + std::size_t found2 = str2.find_last_not_of(whitespaces); + if (found2 == std::string::npos) // Either the string is empty or all whitespace. + { + if (strlen(str2) > 0) // Is all whitespace + found2 = 0; // First whitespace position + } + else + { + if (strlen(str2) > found2+1) + ++found2; // move to the first whitespace position + else + found2 = std::string::npos; // No trailing whitespace + } + // Save the value at found2 and set to NULL + if (found2 != std::string::npos) + { + s2 = &const_cast(str2)[found2]; + orig2 = *s2; + *s2 = 0; + } + + // Compare the trimmed strings + rtn = idb_strcoll(str1.c_str(), str2.c_str()); + + // Restore the whitespace + if (s1) + *s1 = orig1; + if (s2) + *s2 = orig2; + return rtn; +} // BUG 5241 // Infinidb specific mbstowcs(). This will handle both windows and unix platforms From 474039b6cf07fc193678ee9bc48c6136bff9085a Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 27 Jun 2019 10:18:10 -0500 Subject: [PATCH 02/26] MCOL-1559 trim strings before compare on equal operator --- dbcon/execplan/predicateoperator.h | 1 + dbcon/joblist/jlf_execplantojoblist.cpp | 9 ++- primitives/linux-port/dictionary.cpp | 3 +- utils/funcexp/utils_utf8.h | 94 ++++++++----------------- 4 files changed, 38 insertions(+), 69 deletions(-) diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index e597f57ec..8b3a51fec 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -466,6 +466,7 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu return false; return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull; +// return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull; } diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index f3782c9d5..a95dd38cc 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1635,6 +1635,8 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) } string constval(cc->constval()); + // Because, on a filter, we want to compare ignoring trailing spaces +// boost::algorithm::trim_right(constval); CalpontSystemCatalog::OID dictOid = 0; @@ -2770,7 +2772,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) cop = COMPARE_NIL; string value = cc->constval(); - + // Because, on a filter, we want to compare ignoring trailing spaces +// boost::algorithm::trim_right(value); pds->addFilter(cop, value); } @@ -2853,7 +2856,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) cop = COMPARE_NIL; string value = cc->constval(); - + // Because, on a filter, we want to compare ignoring trailing spaces +// boost::algorithm::trim_right(value); pds->addFilter(cop, value); } @@ -2960,7 +2964,6 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) int64_t value = 0; string constval = cc->constval(); - // @bug 1151 string longer than colwidth of char/varchar. uint8_t rf = 0; bool isNull = ConstantColumn::NULLDATA == cc->type(); diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index e5a334436..49ca3cff2 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -188,7 +188,8 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, { // MCOL-1246 Trim whitespace before match string strData(sig, siglen); - boost::trim_right_if(strData, boost::is_any_of(" ")); +// boost::trim_right_if(strData, boost::is_any_of(" ")); + boost::algorithm::trim_right(strData); bool gotIt = eqFilter->find(strData) != eqFilter->end(); if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE && diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h index 878f7aee7..03035957e 100644 --- a/utils/funcexp/utils_utf8.h +++ b/utils/funcexp/utils_utf8.h @@ -75,6 +75,8 @@ std::string idb_setlocale() } char* pLoc = setlocale(LC_ALL, systemLang.c_str()); + // MCOL-1559 also set the C++ locale + std::locale::global(std::locale(pLoc)); if (pLoc == NULL) { @@ -131,78 +133,40 @@ int idb_strcoll(const char* str1, const char* str2) } // MCOL-1559 Add a trimmed version of strcoll -// We want to compare str1 and str2 ignoring any trailing whitespace -// without making a copy of the strings (performance hit). -// I can't find any library that does this while paying attention to -// Locale. string::compare can be used to compare substrings, but -// it's a byte by byte compare. -// We find the last real character, and if a space is following, we -// temporarily replace it with a NULL, do the compare, and restore the -// original value to that spot. -// WARNING: This is not thread safe. It temporarily modifies the -// strings and assumes it is free to do so. +// The intent here is to make no copy of the original strings and +// not modify them, so we can't use trim to deal with the spaces. inline int idb_strtrimcoll(const std::string& str1, const std::string& str2) { - const std::string whitespaces (" \t\f\v\n\r"); - int rtn = 0; - char orig1; - char orig2; - char* s1 = NULL; - char* s2 = NULL; - - // Set found1 to the first whitespace char in str1 + const std::string whitespaces (" "); + const char* s1 = str1.c_str(); + const char* s2 = str2.c_str(); + // Set found1 to the last non-whitespace char in str1 std::size_t found1 = str1.find_last_not_of(whitespaces); - if (found1 == std::string::npos) // Either the string is empty or all whitespace. - { - if (strlen(str1) > 0) // Is all whitespace - found1 = 0; // First whitespace position - } - else - { - if (strlen(str1) > found1+1) - ++found1; // move to the first whitespace position - else - found1 = std::string::npos; // No trailing whitespace - } - // Save the value at found1 and set to NULL - if (found1 != std::string::npos) - { - s1 = &const_cast(str1)[found1]; - orig1 = *s1; - *s1 = 0; - } - // Set found2 to the first whitespace char in str2 std::size_t found2 = str2.find_last_not_of(whitespaces); - if (found2 == std::string::npos) // Either the string is empty or all whitespace. - { - if (strlen(str2) > 0) // Is all whitespace - found2 = 0; // First whitespace position - } - else - { - if (strlen(str2) > found2+1) - ++found2; // move to the first whitespace position - else - found2 = std::string::npos; // No trailing whitespace - } - // Save the value at found2 and set to NULL - if (found2 != std::string::npos) - { - s2 = &const_cast(str2)[found2]; - orig2 = *s2; - *s2 = 0; - } - - // Compare the trimmed strings - rtn = idb_strcoll(str1.c_str(), str2.c_str()); - // Restore the whitespace - if (s1) - *s1 = orig1; - if (s2) - *s2 = orig2; + // Are both strings empty or all whitespace? + if (found1 == std::string::npos && found2 == std::string::npos) + { + return 0; // they match + } + // If str1 is empty or all spaces + if (found1 == std::string::npos && found2 != std::string::npos) + { + return -1; + } + // If str2 is empty or all spaces + if (found1 != std::string::npos && found2 == std::string::npos) + { + return 1; + } + + // Compare the (trimmed) strings + std::locale loc; + const std::collate& coll = std::use_facet >(loc); + int rtn = coll.compare(s1, s1+found1+1, s2, s2+found2+1); +// return coll.compare(s1, s1+found1, s2, s2+found2); return rtn; } From 59931768390d446da26754f7f53f8d00525b54a5 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 3 Jul 2019 11:18:23 -0500 Subject: [PATCH 03/26] MCOL-1559 add in trim for compare --- dbcon/joblist/jlf_execplantojoblist.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index a95dd38cc..92a25464c 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1636,7 +1636,7 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) string constval(cc->constval()); // Because, on a filter, we want to compare ignoring trailing spaces -// boost::algorithm::trim_right(constval); + boost::algorithm::trim_right(constval); CalpontSystemCatalog::OID dictOid = 0; @@ -2773,7 +2773,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) string value = cc->constval(); // Because, on a filter, we want to compare ignoring trailing spaces -// boost::algorithm::trim_right(value); + boost::algorithm::trim_right(value); pds->addFilter(cop, value); } @@ -2857,7 +2857,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) string value = cc->constval(); // Because, on a filter, we want to compare ignoring trailing spaces -// boost::algorithm::trim_right(value); + boost::algorithm::trim_right(value); pds->addFilter(cop, value); } From b3df052b9f3aa7a0623d0e40303d0598ec404d59 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 5 Jul 2019 12:53:01 -0500 Subject: [PATCH 04/26] MCOL-1559 only trim on spaces, not on tabs. Don't ask me why that is. --- dbcon/joblist/jlf_execplantojoblist.cpp | 6 +++--- primitives/linux-port/dictionary.cpp | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index 92a25464c..21458fa24 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1636,7 +1636,7 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) string constval(cc->constval()); // Because, on a filter, we want to compare ignoring trailing spaces - boost::algorithm::trim_right(constval); + boost::algorithm::trim_right_if(constval, boost::is_any_of(" ")); CalpontSystemCatalog::OID dictOid = 0; @@ -2773,7 +2773,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) string value = cc->constval(); // Because, on a filter, we want to compare ignoring trailing spaces - boost::algorithm::trim_right(value); + boost::algorithm::trim_right_if(value, boost::is_any_of(" ")); pds->addFilter(cop, value); } @@ -2857,7 +2857,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) string value = cc->constval(); // Because, on a filter, we want to compare ignoring trailing spaces - boost::algorithm::trim_right(value); + boost::algorithm::trim_right_if(value, boost::is_any_of(" ")); pds->addFilter(cop, value); } diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index 49ca3cff2..e5a334436 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -188,8 +188,7 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, { // MCOL-1246 Trim whitespace before match string strData(sig, siglen); -// boost::trim_right_if(strData, boost::is_any_of(" ")); - boost::algorithm::trim_right(strData); + boost::trim_right_if(strData, boost::is_any_of(" ")); bool gotIt = eqFilter->find(strData) != eqFilter->end(); if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE && From cd79a42ec124347ce32c0e6bf80584f90b23ce3b Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 16 Jul 2019 17:14:17 -0500 Subject: [PATCH 05/26] MCOL-3404 Back out experimental changes from MCOL-3343 --- dbcon/joblist/joblistfactory.cpp | 9 ++------- utils/rowgroup/rowgroup.h | 3 ++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 66245a8e2..152358bf7 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -399,7 +399,6 @@ void checkHavingClause(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) void preProcessFunctionOnAggregation(const vector& scs, const vector& aggs, - const vector& wcs, JobInfo& jobInfo) { // append the simple columns if not already projected @@ -433,10 +432,6 @@ void preProcessFunctionOnAggregation(const vector& scs, for (vector::const_iterator i = aggs.begin(); i != aggs.end(); i++) { addAggregateColumn(*i, -1, jobInfo.projectionCols, jobInfo); - if (wcs.size() > 0) - { - jobInfo.nonConstDelCols.push_back(SRCP((*i)->clone())); - } } } @@ -488,12 +483,12 @@ void checkReturnedColumns(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) if (ac != NULL && ac->aggColumnList().size() > 0) { jobInfo.nonConstCols[i]->outputIndex(i); - preProcessFunctionOnAggregation(ac->simpleColumnList(), ac->aggColumnList(), ac->windowfunctionColumnList(), jobInfo); + preProcessFunctionOnAggregation(ac->simpleColumnList(), ac->aggColumnList(), jobInfo); } else if (fc != NULL && fc->aggColumnList().size() > 0) { jobInfo.nonConstCols[i]->outputIndex(i); - preProcessFunctionOnAggregation(fc->simpleColumnList(), fc->aggColumnList(), fc->windowfunctionColumnList(), jobInfo); + preProcessFunctionOnAggregation(fc->simpleColumnList(), fc->aggColumnList(), jobInfo); } } } diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index b91e0f0ef..2334d22bc 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -1772,7 +1772,8 @@ inline void copyRow(const Row& in, Row* out, uint32_t colCount) { if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY || in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB || - in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT)) + in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT || + in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB)) out->setVarBinaryField(in.getVarBinaryStringField(i), i); else if (UNLIKELY(in.isLongString(i))) //out->setStringField(in.getStringField(i), i); From 05726a9da2b3aaa7ca8cbf608ccca4b7652c7e08 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 22 Jul 2019 17:51:26 -0500 Subject: [PATCH 06/26] MCOL-1559 Allow for the special case where LIKE is used on CHAR fields --- dbcon/joblist/jlf_execplantojoblist.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index 21458fa24..d73e79ff9 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1635,9 +1635,6 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) } string constval(cc->constval()); - // Because, on a filter, we want to compare ignoring trailing spaces - boost::algorithm::trim_right_if(constval, boost::is_any_of(" ")); - CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct = sc->colType(); @@ -1648,6 +1645,13 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) if (!sc->schemaName().empty() && sc->isInfiniDB() && !pc) ct = jobInfo.csc->colType(sc->oid()); + // Because, on a filter, we want to compare ignoring trailing spaces in many cases + // MaraiDB Server compares without trim for LIKE against CHAR. + if (ct.colDataType != execplan::CalpontSystemCatalog::CHAR || + sf->op()->op() != execplan::OP_LIKE) + { + boost::algorithm::trim_right_if(constval, boost::is_any_of(" ")); + } //X //@bug 339 nulls are not stored in dictionary if ((dictOid = isDictCol(ct)) > 0 && ConstantColumn::NULLDATA != cc->type()) From 78eb20ef4e86c5abc374294b1f10a10ae2ce5354 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 24 Jul 2019 12:59:59 -0500 Subject: [PATCH 07/26] MCOL-1559 remove the #define POSIX_REGEX and thus the use of regexec. Fix up the code changing #ifdef _MSC_VER to #ifdef POSIX_REGEX, where it applies to regexec. --- dbcon/execplan/constantcolumn.cpp | 14 ++++++------- dbcon/execplan/predicateoperator.h | 24 +++++++++++----------- dbcon/execplan/treenode.h | 6 +++--- dbcon/joblist/jlf_execplantojoblist.cpp | 6 ++---- primitives/linux-port/primitiveprocessor.h | 6 +++--- utils/funcexp/utils_utf8.h | 1 - 6 files changed, 27 insertions(+), 30 deletions(-) diff --git a/dbcon/execplan/constantcolumn.cpp b/dbcon/execplan/constantcolumn.cpp index 3a816b114..786610767 100644 --- a/dbcon/execplan/constantcolumn.cpp +++ b/dbcon/execplan/constantcolumn.cpp @@ -205,11 +205,11 @@ ConstantColumn::ConstantColumn( const ConstantColumn& rhs): if (fRegex.get() != NULL) { fRegex.reset(new CNX_Regex()); -#ifdef _MSC_VER - *fRegex = dataconvert::DataConvert::constructRegexp(fResult.strVal); -#else +#ifdef POSIX_REGEX string str = dataconvert::DataConvert::constructRegexp(fResult.strVal); regcomp(fRegex.get(), str.c_str(), REG_NOSUB | REG_EXTENDED); +#else + *fRegex = dataconvert::DataConvert::constructRegexp(fResult.strVal); #endif } } @@ -256,7 +256,7 @@ ConstantColumn::ConstantColumn(const uint64_t val, TYPE type) : ConstantColumn::~ConstantColumn() { -#ifndef _MSC_VER +#ifdef POSIX_REGEX if (fRegex.get() != NULL) regfree(fRegex.get()); @@ -394,11 +394,11 @@ void ConstantColumn::constructRegex() { //fRegex = new regex_t(); fRegex.reset(new CNX_Regex()); -#ifdef _MSC_VER - *fRegex = dataconvert::DataConvert::constructRegexp(fResult.strVal); -#else +#ifdef POSIX_REGEX string str = dataconvert::DataConvert::constructRegexp(fResult.strVal); regcomp(fRegex.get(), str.c_str(), REG_NOSUB | REG_EXTENDED); +#else + *fRegex = dataconvert::DataConvert::constructRegexp(fResult.strVal); #endif } diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index 8b3a51fec..8e19aa1b7 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -128,31 +128,31 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu // considers these nulls significant, but they're not in the pattern, so we need to strip // them off... const std::string& v = lop->getStrVal(row, isNull); - char* c = (char*)alloca(v.length() + 1); - memcpy(c, v.c_str(), v.length()); - c[v.length()] = 0; - std::string vv(c); +// char* c = (char*)alloca(v.length() + 1); +// memcpy(c, v.c_str(), v.length()); +// c[v.length()] = 0; +// std::string vv(c); if (regex) { -#ifdef _MSC_VER - bool ret = boost::regex_match(vv, *regex); +#ifdef POSIX_REGEX + bool ret = regexec(regex.get(), v.c_str(), 0, NULL, 0) == 0; #else - bool ret = regexec(regex.get(), vv.c_str(), 0, NULL, 0) == 0; + bool ret = boost::regex_match(v.c_str(), *regex); #endif return (((fOp == OP_LIKE) ? ret : !ret) && !isNull); } else { -#ifdef _MSC_VER - boost::regex regex(dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull))); - bool ret = boost::regex_match(vv, regex); -#else +#ifdef POSIX_REGEX regex_t regex; std::string str = dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull)); regcomp(®ex, str.c_str(), REG_NOSUB | REG_EXTENDED); - bool ret = regexec(®ex, vv.c_str(), 0, NULL, 0) == 0; + bool ret = regexec(®ex, v.c_str(), 0, NULL, 0) == 0; regfree(®ex); +#else + boost::regex regex(dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull))); + bool ret = boost::regex_match(v.c_str(), regex); #endif return (((fOp == OP_LIKE) ? ret : !ret) && !isNull); } diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index 6a49fa16f..42518db86 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -167,10 +167,10 @@ typedef IDB_Decimal CNX_Decimal; * @brief IDB_Regex struct * */ -#ifdef _MSC_VER -typedef boost::regex IDB_Regex; -#else +#ifdef POSIX_REGEX typedef regex_t IDB_Regex; +#else +typedef boost::regex IDB_Regex; #endif typedef IDB_Regex CNX_Regex; diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index d73e79ff9..f15f919db 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1644,15 +1644,13 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) // type of pseudo column is set by connector if (!sc->schemaName().empty() && sc->isInfiniDB() && !pc) ct = jobInfo.csc->colType(sc->oid()); +//X // Because, on a filter, we want to compare ignoring trailing spaces in many cases - // MaraiDB Server compares without trim for LIKE against CHAR. - if (ct.colDataType != execplan::CalpontSystemCatalog::CHAR || - sf->op()->op() != execplan::OP_LIKE) + if (sf->op()->op() != execplan::OP_LIKE) { boost::algorithm::trim_right_if(constval, boost::is_any_of(" ")); } -//X //@bug 339 nulls are not stored in dictionary if ((dictOid = isDictCol(ct)) > 0 && ConstantColumn::NULLDATA != cc->type()) { diff --git a/primitives/linux-port/primitiveprocessor.h b/primitives/linux-port/primitiveprocessor.h index 366e90daf..a02f88b29 100644 --- a/primitives/linux-port/primitiveprocessor.h +++ b/primitives/linux-port/primitiveprocessor.h @@ -33,9 +33,9 @@ #include #endif -#ifdef __linux__ -#define POSIX_REGEX -#endif +//#ifdef __linux__ +//#define POSIX_REGEX +//#endif #ifdef POSIX_REGEX #include diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h index 03035957e..9356ec737 100644 --- a/utils/funcexp/utils_utf8.h +++ b/utils/funcexp/utils_utf8.h @@ -166,7 +166,6 @@ int idb_strtrimcoll(const std::string& str1, const std::string& str2) std::locale loc; const std::collate& coll = std::use_facet >(loc); int rtn = coll.compare(s1, s1+found1+1, s2, s2+found2+1); -// return coll.compare(s1, s1+found1, s2, s2+found2); return rtn; } From 5266d7dc61e6cf140850367d6c681925712f8c19 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 30 Jul 2019 15:46:49 -0500 Subject: [PATCH 08/26] MCOL-1559 don't use facet compare if no trailing whitspcae --- utils/funcexp/utils_utf8.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h index 9356ec737..51eb63872 100644 --- a/utils/funcexp/utils_utf8.h +++ b/utils/funcexp/utils_utf8.h @@ -54,6 +54,10 @@ extern bool JPcodePoint; // code point ordering (Japanese UTF) flag, used in id const int MAX_UTF8_BYTES_PER_CHAR = 4; +// A global loc object so we don't construct one at every compare +static std::locale loc; +// Is there a way to construct a global reference to a facet? +// const std::collate& coll = std::use_facet >(loc); //Infinidb version of strlocale BUG 5362 //set System Locale "C" by default @@ -117,6 +121,9 @@ std::string idb_setlocale() if (systemLang.find("ja_JP") != std::string::npos) JPcodePoint = true; + std::locale localloc; + loc = localloc; + return systemLang; } @@ -138,7 +145,7 @@ int idb_strcoll(const char* str1, const char* str2) inline int idb_strtrimcoll(const std::string& str1, const std::string& str2) { - const std::string whitespaces (" "); + static const std::string whitespaces (" "); const char* s1 = str1.c_str(); const char* s2 = str2.c_str(); // Set found1 to the last non-whitespace char in str1 @@ -152,20 +159,28 @@ int idb_strtrimcoll(const std::string& str1, const std::string& str2) return 0; // they match } // If str1 is empty or all spaces - if (found1 == std::string::npos && found2 != std::string::npos) + if (found1 == std::string::npos) { return -1; } // If str2 is empty or all spaces - if (found1 != std::string::npos && found2 == std::string::npos) + if (found2 == std::string::npos) { return 1; } + // found1 and found2 point to the character that is not a space. + // compare wants it to point to one past. + found1 += 1; + found2 += 1; + // If no trimming needs doing, then strcoll is faster + if (found1 == str1.size() && found2 == str2.size()) + { + return idb_strcoll(s1, s2); + } // Compare the (trimmed) strings - std::locale loc; const std::collate& coll = std::use_facet >(loc); - int rtn = coll.compare(s1, s1+found1+1, s2, s2+found2+1); + int rtn = coll.compare(s1, s1+found1, s2, s2+found2); return rtn; } From 765d1d38d4055d5009246adba6ffa7cd47bd025e Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 31 Jul 2019 13:58:50 -0500 Subject: [PATCH 09/26] MCOL-174 Handle quoted numerics --- dbcon/mysql/ha_calpont_execplan.cpp | 2 +- utils/funcexp/func_bitwise.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 0fdf1e86b..a557cdad3 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -2899,7 +2899,7 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp String val, *str = item->val_str(&val); string valStr; valStr.assign(str->ptr(), str->length()); - rc = new ConstantColumn(valStr); + rc = new ConstantColumn(valStr, ConstantColumn::NUM); break; } diff --git a/utils/funcexp/func_bitwise.cpp b/utils/funcexp/func_bitwise.cpp index 752c28561..bed4f5dec 100644 --- a/utils/funcexp/func_bitwise.cpp +++ b/utils/funcexp/func_bitwise.cpp @@ -93,10 +93,6 @@ bool getUIntValFromParm( { isNull = true; } - else - { - value = 0; - } } break; From 81e745256bf2d5a199720c7ed07c3e006ae13e63 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 31 Jul 2019 14:49:31 -0500 Subject: [PATCH 10/26] MCOL-174 Replace custom helpers::power() with standard pow(). helpers::power() breaks with #dec > 9 --- utils/funcexp/func_bitwise.cpp | 6 +++--- utils/funcexp/func_cast.cpp | 10 ++++++---- utils/funcexp/func_char.cpp | 5 +++-- utils/funcexp/func_elt.cpp | 5 +++-- utils/funcexp/func_makedate.cpp | 10 ++++++---- utils/funcexp/func_maketime.cpp | 15 +++++++++------ utils/funcexp/func_mod.cpp | 14 +++++++------- utils/funcexp/func_period_diff.cpp | 4 ++-- 8 files changed, 39 insertions(+), 30 deletions(-) diff --git a/utils/funcexp/func_bitwise.cpp b/utils/funcexp/func_bitwise.cpp index bed4f5dec..9f5295901 100644 --- a/utils/funcexp/func_bitwise.cpp +++ b/utils/funcexp/func_bitwise.cpp @@ -106,9 +106,9 @@ bool getUIntValFromParm( { d.value = 0; } - - int64_t tmpval = d.value / helpers::power(d.scale); - int lefto = (d.value - tmpval * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + int64_t tmpval = d.value / pow(10.0, dscale); + int lefto = (d.value - tmpval * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( tmpval >= 0 && lefto > 4 ) tmpval++; diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index 97c5db075..8ccf73e0e 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -180,8 +180,9 @@ int64_t Func_cast_signed::getIntVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - int64_t value = d.value / helpers::power(d.scale); - int lefto = (d.value - value * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + int64_t value = d.value / pow(10.0, dscale); + int lefto = (d.value - value * pow(10.0, dscale)) / pow(dscale - 1); if ( value >= 0 && lefto > 4 ) value++; @@ -328,14 +329,15 @@ uint64_t Func_cast_unsigned::getUintVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); + double dscale = d.scale; if (d.value < 0) { return 0; } - uint64_t value = d.value / helpers::power(d.scale); - int lefto = (d.value - value * helpers::power(d.scale)) / helpers::power(d.scale - 1); + uint64_t value = d.value / pow(10.0, dscale); + int lefto = (d.value - value * pow(10.0, dscale)) / pow(dscale - 1); if ( value >= 0 && lefto > 4 ) value++; diff --git a/utils/funcexp/func_char.cpp b/utils/funcexp/func_char.cpp index bc54fe5c0..f6774452c 100644 --- a/utils/funcexp/func_char.cpp +++ b/utils/funcexp/func_char.cpp @@ -156,9 +156,10 @@ string Func_char::getStrVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); + double dscale = d.scale; // get decimal and round up - int value = d.value / helpers::power(d.scale); - int lefto = (d.value - value * helpers::power(d.scale)) / helpers::power(d.scale - 1); + int value = d.value / pow(10.0, dscale); + int lefto = (d.value - value * pow(10.0, dscale)) / pow(dscale - 1); if ( lefto > 4 ) value++; diff --git a/utils/funcexp/func_elt.cpp b/utils/funcexp/func_elt.cpp index d34dafa78..faa8a234a 100644 --- a/utils/funcexp/func_elt.cpp +++ b/utils/funcexp/func_elt.cpp @@ -71,8 +71,9 @@ string Func_elt::getStrVal(rowgroup::Row& row, case CalpontSystemCatalog::DECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - number = d.value / helpers::power(d.scale); - int lefto = (d.value - number * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + number = d.value / pow(10.0, dscale); + int lefto = (d.value - number * pow(10.0, dscale)) / pow(dscale - 1); if ( number >= 0 && lefto > 4 ) number++; diff --git a/utils/funcexp/func_makedate.cpp b/utils/funcexp/func_makedate.cpp index 5d013728f..6b4537bb4 100644 --- a/utils/funcexp/func_makedate.cpp +++ b/utils/funcexp/func_makedate.cpp @@ -68,8 +68,9 @@ uint64_t makedate(rowgroup::Row& row, case CalpontSystemCatalog::DECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - year = d.value / helpers::power(d.scale); - int lefto = (d.value - year * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + year = d.value / pow(10.0, dscale); + int lefto = (d.value - year * pow(10.0, dscale)) / pow(dscale - 1); if ( year >= 0 && lefto > 4 ) year++; @@ -127,8 +128,9 @@ uint64_t makedate(rowgroup::Row& row, case CalpontSystemCatalog::DECIMAL: { IDB_Decimal d = parm[1]->data()->getDecimalVal(row, isNull); - int64_t tmp = d.value / helpers::power(d.scale); - int lefto = (d.value - tmp * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + int64_t tmp = d.value / pow(10.0, dscale); + int lefto = (d.value - tmp * pow(10.0, dscale)) / pow(dscale - 1); if ( tmp >= 0 && lefto > 4 ) tmp++; diff --git a/utils/funcexp/func_maketime.cpp b/utils/funcexp/func_maketime.cpp index 694e995a7..8b19ae557 100644 --- a/utils/funcexp/func_maketime.cpp +++ b/utils/funcexp/func_maketime.cpp @@ -74,8 +74,9 @@ string Func_maketime::getStrVal(rowgroup::Row& row, case CalpontSystemCatalog::DECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - hour = d.value / helpers::power(d.scale); - int lefto = (d.value - hour * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + hour = d.value / pow(10.0, dscale); + int lefto = (d.value - hour * pow(10.0, dscale)) / pow(dscale - 1); if ( hour >= 0 && lefto > 4 ) hour++; @@ -113,8 +114,9 @@ string Func_maketime::getStrVal(rowgroup::Row& row, case CalpontSystemCatalog::DECIMAL: { IDB_Decimal d = parm[1]->data()->getDecimalVal(row, isNull); - min = d.value / helpers::power(d.scale); - int lefto = (d.value - min * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + min = d.value / pow(10.0, dscale); + int lefto = (d.value - min * pow(10.0, dscale)) / pow(dscale - 1); if ( min >= 0 && lefto > 4 ) min++; @@ -158,8 +160,9 @@ string Func_maketime::getStrVal(rowgroup::Row& row, case CalpontSystemCatalog::DECIMAL: { IDB_Decimal d = parm[2]->data()->getDecimalVal(row, isNull); - sec = d.value / helpers::power(d.scale); - int lefto = (d.value - sec * helpers::power(d.scale)) / helpers::power(d.scale - 1); + double dscale = d.scale; + sec = d.value / pow(10.0, dscale); + int lefto = (d.value - sec * pow(10.0, dscale)) / pow(dscale - 1); if ( sec >= 0 && lefto > 4 ) sec++; diff --git a/utils/funcexp/func_mod.cpp b/utils/funcexp/func_mod.cpp index 4cbdf23af..f9b51eb28 100644 --- a/utils/funcexp/func_mod.cpp +++ b/utils/funcexp/func_mod.cpp @@ -74,10 +74,10 @@ IDB_Decimal Func_mod::getDecimalVal(Row& row, } IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - int64_t value = d.value / helpers::power(d.scale); - int lefto = d.value % helpers::power(d.scale); + int64_t value = d.value / pow(10.0, d.scale); + int lefto = d.value % pow(10.0, d.scale); - int64_t mod = (value % div) * helpers::power(d.scale) + lefto; + int64_t mod = (value % div) * pow(10.0, d.scale) + lefto; retValue.value = mod; retValue.scale = d.scale; @@ -164,7 +164,7 @@ double Func_mod::getDoubleVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - int64_t value = d.value / helpers::power(d.scale); + int64_t value = d.value / pow(10.0, d.scale); mod = value % div; } @@ -268,7 +268,7 @@ long double Func_mod::getLongDoubleVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - int64_t value = d.value / helpers::power(d.scale); + int64_t value = d.value / pow(10.0, d.scale); mod = value % div; } @@ -375,7 +375,7 @@ int64_t Func_mod::getIntVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - int64_t value = d.value / helpers::power(d.scale); + int64_t value = d.value / pow(10.0, d.scale); mod = value % div; } @@ -473,7 +473,7 @@ uint64_t Func_mod::getUIntVal(Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - int64_t value = d.value / helpers::power(d.scale); + int64_t value = d.value / pow(10.0, d.scale); mod = value % div; } diff --git a/utils/funcexp/func_period_diff.cpp b/utils/funcexp/func_period_diff.cpp index faf15dbb4..27c24da83 100644 --- a/utils/funcexp/func_period_diff.cpp +++ b/utils/funcexp/func_period_diff.cpp @@ -85,7 +85,7 @@ int64_t Func_period_diff::getIntVal(rowgroup::Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); - period1 = d.value / helpers::power(d.scale); + period1 = d.value / pow(10.0, d.scale); break; } @@ -133,7 +133,7 @@ int64_t Func_period_diff::getIntVal(rowgroup::Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { IDB_Decimal d = parm[1]->data()->getDecimalVal(row, isNull); - period2 = d.value / helpers::power(d.scale); + period2 = d.value / pow(10.0, d.scale); break; } From e768a6c5ce555c730f3b45c0028bfdaa50299ed9 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 31 Jul 2019 15:03:12 -0500 Subject: [PATCH 11/26] MCOL-179 Don't round before divide for DIV. Mimic InnoDB behavior. --- utils/funcexp/func_div.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/utils/funcexp/func_div.cpp b/utils/funcexp/func_div.cpp index 327f5e71e..b82cfaa4b 100644 --- a/utils/funcexp/func_div.cpp +++ b/utils/funcexp/func_div.cpp @@ -51,6 +51,16 @@ int64_t Func_div::getIntVal(rowgroup::Row& row, { double val1 = parm[0]->data()->getDoubleVal(row, isNull); double val2 = parm[1]->data()->getDoubleVal(row, isNull); + + if (val2 == 0 || val2 == NAN) + { + isNull = true; + return 0; + } + // MCOL-179 InnoDB doesn't round or convert to int before dividing. + return static_cast(val1 / val2); + +#if 0 int64_t int_val2 = (int64_t)(val2 > 0 ? val2 + 0.5 : val2 - 0.5); if (int_val2 == 0) @@ -69,6 +79,7 @@ int64_t Func_div::getIntVal(rowgroup::Row& row, } return int_val1 / int_val2; +#endif } From e9a4412346dbb32fd5881f01680995bd6f5b005a Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 1 Aug 2019 18:20:13 +0100 Subject: [PATCH 12/26] MCOL-2219 Fix space handling in DDL parser Allow non-alphanumeric character after space in column names. --- dbcon/ddlpackage/ddl.l | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index bb65715da..62ced333b 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -65,7 +65,7 @@ double_quote \" grave_accent ` comment ("--"{non_newline}*) -extended_ident_cont [A-Za-z\200-\377_0-9\$#,()\[\].;\:\+\-\*\/\%\^\<\>\=!&|@\\] +extended_ident_cont [ A-Za-z\200-\377_0-9\$#,()\[\].;\:\+\-\*\/\%\^\<\>\=!&|@\\] self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] whitespace ({space}+|{comment}) @@ -190,8 +190,6 @@ BOOLEAN {return BOOLEAN;} \n { lineno++;} -{column_ident_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES); return IDENT;} - {whitespace} { /* ignore */ } From 82f5a985a0d6f8066e387b3dbe07a9d26b17e067 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 12 Aug 2019 08:50:47 +0100 Subject: [PATCH 13/26] MCOL-2219 Remove unused lexer patterns --- dbcon/ddlpackage/ddl.l | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 62ced333b..f04ede15e 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -75,10 +75,8 @@ ident_cont [A-Za-z\200-\377_0-9\$] identifier {ident_start}{ident_cont}* extended_identifier {ident_start}{extended_ident_cont}* /* fully qualified names regexes */ -ident_w_spaces {identifier}\x20* identifier_quoted {grave_accent}{extended_identifier}{grave_accent} identifier_double_quoted {double_quote}{extended_identifier}{double_quote} -column_ident_quoted {grave_accent}{ident_w_spaces}+{grave_accent} integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) From fd373dfbfb8a6025318b6ef4d30861b445d5b693 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 13 Aug 2019 15:20:28 -0500 Subject: [PATCH 14/26] MCOL-3419 Get rid of std::locale::global() from MCOL-1559. --- utils/funcexp/funcexp.cpp | 3 +++ utils/funcexp/utils_utf8.h | 9 ++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index d933474f7..b0f7680d8 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -46,6 +46,9 @@ namespace funcexp { namespace utf8 { +// A global loc object so we don't construct one at every compare +std::locale loc; + bool JPcodePoint = false; // extern-ed in utils_utf8.h } diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h index 51eb63872..2bcce9b31 100644 --- a/utils/funcexp/utils_utf8.h +++ b/utils/funcexp/utils_utf8.h @@ -36,7 +36,6 @@ #include #include - #include "alarmmanager.h" using namespace alarmmanager; @@ -55,7 +54,7 @@ extern bool JPcodePoint; // code point ordering (Japanese UTF) flag, used in id const int MAX_UTF8_BYTES_PER_CHAR = 4; // A global loc object so we don't construct one at every compare -static std::locale loc; +extern std::locale loc; // Is there a way to construct a global reference to a facet? // const std::collate& coll = std::use_facet >(loc); @@ -79,8 +78,6 @@ std::string idb_setlocale() } char* pLoc = setlocale(LC_ALL, systemLang.c_str()); - // MCOL-1559 also set the C++ locale - std::locale::global(std::locale(pLoc)); if (pLoc == NULL) { @@ -121,7 +118,8 @@ std::string idb_setlocale() if (systemLang.find("ja_JP") != std::string::npos) JPcodePoint = true; - std::locale localloc; + // MCOL-1559 Save off the locale to save runtime cpus + std::locale localloc(systemLang.c_str()); loc = localloc; return systemLang; @@ -148,6 +146,7 @@ int idb_strtrimcoll(const std::string& str1, const std::string& str2) static const std::string whitespaces (" "); const char* s1 = str1.c_str(); const char* s2 = str2.c_str(); + // Set found1 to the last non-whitespace char in str1 std::size_t found1 = str1.find_last_not_of(whitespaces); // Set found2 to the first whitespace char in str2 From ac4902c22f23d8ebcacfbb00ada41204b36413b9 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 19 Jul 2019 09:54:54 -0500 Subject: [PATCH 15/26] Tentative fix for procmon d/ling DBRM files into data1. That screws things up on shared filesystems, and storagemanager, which has sort-of a shared filesystem view. --- procmgr/processmanager.cpp | 12 ++++++------ procmon/processmonitor.cpp | 38 +++++++++++++++++++++++++++++++------- procmon/processmonitor.h | 2 +- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index f05a7dcbf..f32a6f51b 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -9210,7 +9210,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } catch (...) { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); returnStatus = oam::API_FAILURE; } @@ -9281,7 +9281,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } catch (...) { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); returnStatus = oam::API_FAILURE; } @@ -9316,7 +9316,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } catch (...) { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); pthread_mutex_unlock(&THREAD_LOCK); return oam::API_FAILURE; } @@ -9391,7 +9391,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } catch (...) { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); pthread_mutex_unlock(&THREAD_LOCK); return oam::API_FAILURE; } @@ -9411,7 +9411,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } catch (...) { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); pthread_mutex_unlock(&THREAD_LOCK); return oam::API_FAILURE; } @@ -9428,7 +9428,7 @@ int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleNa } catch (...) { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); returnStatus = oam::API_FAILURE; } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index f22be1b87..5380a32f2 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -22,6 +22,10 @@ ***************************************************************************/ #include +#include +#include +#include +#include #include "columnstoreversion.h" #include "IDBDataFile.h" @@ -40,6 +44,7 @@ using namespace logging; using namespace config; using namespace idbdatafile; +namespace bf = boost::filesystem; extern string systemOAM; extern string dm_server; @@ -505,7 +510,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO break; } - processList::iterator listPtr; processList* aPtr = config.monitoredListPtr(); listPtr = aPtr->begin(); @@ -2526,7 +2530,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, system(cmd.c_str()); // if Non Parent OAM Module, get the dbmr data from Parent OAM Module - if ( !gOAMParentModuleFlag && !HDFS ) + if ( !gOAMParentModuleFlag && !HDFS && DBRootStorageType != "storagemanager") { //create temp dbrm directory @@ -2551,18 +2555,24 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, // system(cmd.c_str()); // go request files from parent OAM module - if ( getDBRMdata() != oam::API_SUCCESS ) + if ( getDBRMdata(&DBRMDir) != oam::API_SUCCESS ) { log.writeLog(__LINE__, "Error: getDBRMdata failed", LOG_TYPE_ERROR); sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, SET); return oam::API_MINOR_FAILURE; } + // DBRMDir might have changed, so need to change DBRMroot + bf::path tmp(DBRMroot); + tmp = tmp.filename(); + DBRMroot = (bf::path(DBRMDir) / tmp).string(); + sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, CLEAR); // change DBRMroot to temp DBRMDir path // DBRMroot = tempDBRMDir + "/BRM_saves"; } + // // run the 'load_brm' script first if files exist // @@ -2634,9 +2644,15 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, // now delete the dbrm data from local disk if ( !gOAMParentModuleFlag && !HDFS && DataRedundancyConfig == "n") { + IDBFileSystem &fs = IDBPolicy::getFs(DBRMDir); + fs.remove(DBRMDir.c_str()); + log.writeLog(__LINE__, "removed downloaded DBRM files at " + DBRMDir, LOG_TYPE_DEBUG); + + #if 0 string cmd = "rm -f " + DBRMDir + "/*"; system(cmd.c_str()); log.writeLog(__LINE__, "removed DBRM file with command: " + cmd, LOG_TYPE_DEBUG); + #endif } } else @@ -4267,7 +4283,7 @@ int ProcessMonitor::processRestarted( std::string processName, bool manual) * * ******************************************************************************************/ -int ProcessMonitor::getDBRMdata() +int ProcessMonitor::getDBRMdata(string *path) { MonitorLog log; @@ -4350,6 +4366,11 @@ int ProcessMonitor::getDBRMdata() bool journalFile = false; + boost::uuids::uuid u = boost::uuids::random_generator()(); + bf::path pTmp = bf::path(*path) / boost::uuids::to_string(u); + *path = pTmp.string(); + log.writeLog(__LINE__, "Downloading DBRM files to " + *path, LOG_TYPE_DEBUG); + for ( int i = 0 ; i < numFiles ; i ++ ) { string fileName; @@ -4391,10 +4412,14 @@ int ProcessMonitor::getDBRMdata() // fileName = temp1; // } + bf::path pFilename(fileName); + pFilename = pTmp / pFilename.filename(); + const char *cFilename = pFilename.string().c_str(); + boost::scoped_ptr out(IDBDataFile::open( - IDBPolicy::getType(fileName.c_str(), + IDBPolicy::getType(cFilename, IDBPolicy::WRITEENG), - fileName.c_str(), "w", 0)); + cFilename, "w", 0)); // read file data try @@ -6279,7 +6304,6 @@ int ProcessMonitor::checkDataMount() return API_SUCCESS; } - //go unmount disk NOT assigned to this pm unmountExtraDBroots(); diff --git a/procmon/processmonitor.h b/procmon/processmonitor.h index e8c458006..6203fa218 100644 --- a/procmon/processmonitor.h +++ b/procmon/processmonitor.h @@ -478,7 +478,7 @@ public: */ int updateConfigFile(messageqcpp::ByteStream msg); - int getDBRMdata(); + int getDBRMdata(std::string *path); /** *@brief Send Msg to Process Monitor From 7f4d060dc3da313001ccf9ffb0f8bd3629b9dc53 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 16 Aug 2019 11:54:29 -0500 Subject: [PATCH 16/26] Removed some storage-manager specific stuff from a cherry-picked commit. --- procmon/processmonitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 5380a32f2..06a3acf50 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -2530,7 +2530,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, system(cmd.c_str()); // if Non Parent OAM Module, get the dbmr data from Parent OAM Module - if ( !gOAMParentModuleFlag && !HDFS && DBRootStorageType != "storagemanager") + if ( !gOAMParentModuleFlag && !HDFS) { //create temp dbrm directory From 5f497a0517556ebc514c634ff945b77e1a326dba Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 16 Aug 2019 13:11:37 -0500 Subject: [PATCH 17/26] Create dir to download BRM data to. --- procmon/processmonitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 06a3acf50..bab23fde1 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -4368,6 +4368,7 @@ int ProcessMonitor::getDBRMdata(string *path) boost::uuids::uuid u = boost::uuids::random_generator()(); bf::path pTmp = bf::path(*path) / boost::uuids::to_string(u); + bf::create_directories(pTmp); *path = pTmp.string(); log.writeLog(__LINE__, "Downloading DBRM files to " + *path, LOG_TYPE_DEBUG); From 26a0768b88d6d030b9926d9dd46392ad8903f3cc Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Fri, 16 Aug 2019 13:19:13 -0500 Subject: [PATCH 18/26] Include the right header file... --- procmon/processmonitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index bab23fde1..f1504561f 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -22,7 +22,7 @@ ***************************************************************************/ #include -#include +#include #include #include #include From 95fcc3dcbbf65570844c64a99d6c1113f001da1d Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 20 Aug 2019 09:50:43 -0500 Subject: [PATCH 19/26] MCOL-3423 Don't move decimal for LONG DOUBLE. Clear long double extra bits after copy, not before. --- utils/rowgroup/rowgroup.h | 5 +++-- utils/windowfunction/wf_stats.cpp | 7 +++++-- utils/windowfunction/wf_sum_avg.cpp | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 2334d22bc..1e5df447b 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -1036,12 +1036,13 @@ inline void Row::setFloatField(float val, uint32_t colIndex) inline void Row::setLongDoubleField(long double val, uint32_t colIndex) { + uint8_t* p = &data[offsets[colIndex]]; + *((long double*)p) = val; if (sizeof(long double) == 16) { // zero out the unused portion as there may be garbage there. - *((uint64_t*)&val+1) &= 0x000000000000FFFFULL; + *((uint64_t*)p+1) &= 0x000000000000FFFFULL; } - *((long double*) &data[offsets[colIndex]]) = val; } inline void Row::setVarBinaryField(const std::string& val, uint32_t colIndex) diff --git a/utils/windowfunction/wf_stats.cpp b/utils/windowfunction/wf_stats.cpp index db4b107ee..8b06eda61 100644 --- a/utils/windowfunction/wf_stats.cpp +++ b/utils/windowfunction/wf_stats.cpp @@ -140,6 +140,7 @@ void WF_stats::resetData() template void WF_stats::operator()(int64_t b, int64_t e, int64_t c) { + CDT cdt; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -163,7 +164,7 @@ void WF_stats::operator()(int64_t b, int64_t e, int64_t c) continue; T valIn; - getValue(colIn, valIn); + getValue(colIn, valIn, &cdt); long double val = (long double) valIn; fSum1 += val; @@ -177,7 +178,9 @@ void WF_stats::operator()(int64_t b, int64_t e, int64_t c) int scale = fRow.getScale(colIn); long double factor = pow(10.0, scale); - if (scale != 0) // adjust the scale if necessary + // adjust the scale if necessary + if (scale != 0 && + cdt != CalpontSystemCatalog::LONGDOUBLE) { fSum1 /= factor; fSum2 /= factor * factor; diff --git a/utils/windowfunction/wf_sum_avg.cpp b/utils/windowfunction/wf_sum_avg.cpp index 4632496df..d0d5f46f7 100644 --- a/utils/windowfunction/wf_sum_avg.cpp +++ b/utils/windowfunction/wf_sum_avg.cpp @@ -264,13 +264,15 @@ void WF_sum_avg::operator()(int64_t b, int64_t e, int64_t c) continue; T valIn; - getValue(colIn, valIn); + CDT cdt; + getValue(colIn, valIn, &cdt); // checkSumLimit(fSum, valIn); if ((!fDistinct) || (fSet.find(valIn) == fSet.end())) { long double val = valIn; - if (scale) + if (scale && + cdt != CalpontSystemCatalog::LONGDOUBLE) { val /= pow(10.0, scale); } From 608a042065030dd6430e8a568ffc7e84f6864619 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 20 Aug 2019 16:02:39 -0500 Subject: [PATCH 20/26] MCOL-3423 revert PR #806, which reverted ostensibly no longer needed code from MCOL-3343. That code is needed. --- dbcon/joblist/joblistfactory.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 152358bf7..66245a8e2 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -399,6 +399,7 @@ void checkHavingClause(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) void preProcessFunctionOnAggregation(const vector& scs, const vector& aggs, + const vector& wcs, JobInfo& jobInfo) { // append the simple columns if not already projected @@ -432,6 +433,10 @@ void preProcessFunctionOnAggregation(const vector& scs, for (vector::const_iterator i = aggs.begin(); i != aggs.end(); i++) { addAggregateColumn(*i, -1, jobInfo.projectionCols, jobInfo); + if (wcs.size() > 0) + { + jobInfo.nonConstDelCols.push_back(SRCP((*i)->clone())); + } } } @@ -483,12 +488,12 @@ void checkReturnedColumns(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) if (ac != NULL && ac->aggColumnList().size() > 0) { jobInfo.nonConstCols[i]->outputIndex(i); - preProcessFunctionOnAggregation(ac->simpleColumnList(), ac->aggColumnList(), jobInfo); + preProcessFunctionOnAggregation(ac->simpleColumnList(), ac->aggColumnList(), ac->windowfunctionColumnList(), jobInfo); } else if (fc != NULL && fc->aggColumnList().size() > 0) { jobInfo.nonConstCols[i]->outputIndex(i); - preProcessFunctionOnAggregation(fc->simpleColumnList(), fc->aggColumnList(), jobInfo); + preProcessFunctionOnAggregation(fc->simpleColumnList(), fc->aggColumnList(), fc->windowfunctionColumnList(), jobInfo); } } } From 881410d39780a33743f209e0d05925938af22fec Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 22 Aug 2019 16:39:02 -0500 Subject: [PATCH 21/26] MCOL-174 fix compile errors --- dbcon/execplan/predicateoperator.h | 1 + utils/funcexp/func_cast.cpp | 4 ++-- utils/funcexp/func_char.cpp | 2 +- utils/funcexp/func_elt.cpp | 2 +- utils/funcexp/func_makedate.cpp | 4 ++-- utils/funcexp/func_maketime.cpp | 6 +++--- utils/funcexp/func_mod.cpp | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index 8e19aa1b7..e2caf209e 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -35,6 +35,7 @@ #include #endif #include +#include #include #include "expressionparser.h" diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index 8ccf73e0e..35e417ff8 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -182,7 +182,7 @@ int64_t Func_cast_signed::getIntVal(Row& row, IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); double dscale = d.scale; int64_t value = d.value / pow(10.0, dscale); - int lefto = (d.value - value * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - value * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( value >= 0 && lefto > 4 ) value++; @@ -337,7 +337,7 @@ uint64_t Func_cast_unsigned::getUintVal(Row& row, } uint64_t value = d.value / pow(10.0, dscale); - int lefto = (d.value - value * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - value * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( value >= 0 && lefto > 4 ) value++; diff --git a/utils/funcexp/func_char.cpp b/utils/funcexp/func_char.cpp index f6774452c..a04403a00 100644 --- a/utils/funcexp/func_char.cpp +++ b/utils/funcexp/func_char.cpp @@ -159,7 +159,7 @@ string Func_char::getStrVal(Row& row, double dscale = d.scale; // get decimal and round up int value = d.value / pow(10.0, dscale); - int lefto = (d.value - value * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - value * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( lefto > 4 ) value++; diff --git a/utils/funcexp/func_elt.cpp b/utils/funcexp/func_elt.cpp index faa8a234a..99c86af75 100644 --- a/utils/funcexp/func_elt.cpp +++ b/utils/funcexp/func_elt.cpp @@ -73,7 +73,7 @@ string Func_elt::getStrVal(rowgroup::Row& row, IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); double dscale = d.scale; number = d.value / pow(10.0, dscale); - int lefto = (d.value - number * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - number * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( number >= 0 && lefto > 4 ) number++; diff --git a/utils/funcexp/func_makedate.cpp b/utils/funcexp/func_makedate.cpp index 6b4537bb4..8c8c50abf 100644 --- a/utils/funcexp/func_makedate.cpp +++ b/utils/funcexp/func_makedate.cpp @@ -70,7 +70,7 @@ uint64_t makedate(rowgroup::Row& row, IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); double dscale = d.scale; year = d.value / pow(10.0, dscale); - int lefto = (d.value - year * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - year * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( year >= 0 && lefto > 4 ) year++; @@ -130,7 +130,7 @@ uint64_t makedate(rowgroup::Row& row, IDB_Decimal d = parm[1]->data()->getDecimalVal(row, isNull); double dscale = d.scale; int64_t tmp = d.value / pow(10.0, dscale); - int lefto = (d.value - tmp * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - tmp * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( tmp >= 0 && lefto > 4 ) tmp++; diff --git a/utils/funcexp/func_maketime.cpp b/utils/funcexp/func_maketime.cpp index 8b19ae557..b43fed4ab 100644 --- a/utils/funcexp/func_maketime.cpp +++ b/utils/funcexp/func_maketime.cpp @@ -76,7 +76,7 @@ string Func_maketime::getStrVal(rowgroup::Row& row, IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); double dscale = d.scale; hour = d.value / pow(10.0, dscale); - int lefto = (d.value - hour * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - hour * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( hour >= 0 && lefto > 4 ) hour++; @@ -116,7 +116,7 @@ string Func_maketime::getStrVal(rowgroup::Row& row, IDB_Decimal d = parm[1]->data()->getDecimalVal(row, isNull); double dscale = d.scale; min = d.value / pow(10.0, dscale); - int lefto = (d.value - min * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - min * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( min >= 0 && lefto > 4 ) min++; @@ -162,7 +162,7 @@ string Func_maketime::getStrVal(rowgroup::Row& row, IDB_Decimal d = parm[2]->data()->getDecimalVal(row, isNull); double dscale = d.scale; sec = d.value / pow(10.0, dscale); - int lefto = (d.value - sec * pow(10.0, dscale)) / pow(dscale - 1); + int lefto = (d.value - sec * pow(10.0, dscale)) / pow(10.0, dscale - 1); if ( sec >= 0 && lefto > 4 ) sec++; diff --git a/utils/funcexp/func_mod.cpp b/utils/funcexp/func_mod.cpp index f9b51eb28..a4000af67 100644 --- a/utils/funcexp/func_mod.cpp +++ b/utils/funcexp/func_mod.cpp @@ -75,7 +75,7 @@ IDB_Decimal Func_mod::getDecimalVal(Row& row, IDB_Decimal d = parm[0]->data()->getDecimalVal(row, isNull); int64_t value = d.value / pow(10.0, d.scale); - int lefto = d.value % pow(10.0, d.scale); + int lefto = d.value % (int)pow(10.0, d.scale); int64_t mod = (value % div) * pow(10.0, d.scale) + lefto; From 5c8ff4a1ebc590e4a1023432bdf09213ff324c02 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 29 Aug 2019 09:44:17 -0500 Subject: [PATCH 22/26] MCOL-3435 push an implicit group by for Window Functions in same query as aggregates. --- dbcon/joblist/joblistfactory.cpp | 2 +- dbcon/joblist/windowfunctionstep.cpp | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 66245a8e2..b34215433 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -1525,7 +1525,7 @@ void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, // bug4531, window function support WindowFunctionStep::checkWindowFunction(csep, jobInfo); - // bug3391, move forward the aggregation check for no aggregte having clause. + // bug3391, move forward the aggregation check for no aggregate having clause. checkAggregation(csep, jobInfo); // include filters in having clause, if any. diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 1a603a98e..a04a4fcb2 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -477,6 +477,17 @@ void WindowFunctionStep::checkWindowFunction(CalpontSelectExecutionPlan* csep, J colSet.insert(key); } + // MCOL-3435 We haven't yet checked for aggregate, but we need to know + bool hasAggregation = false; + for (uint64_t i = 0; i < jobInfo.deliveredCols.size(); i++) + { + if (dynamic_cast(jobInfo.deliveredCols[i].get()) != NULL) + { + hasAggregation = true; + break; + } + } + // add non-duplicate auxiliary columns RetColsVector colsInAf; @@ -499,10 +510,17 @@ void WindowFunctionStep::checkWindowFunction(CalpontSelectExecutionPlan* csep, J if (colSet.find(key) == colSet.end()) { jobInfo.deliveredCols.push_back(*j); -// MCOL-3343 Enable this if we decide to allow Window Functions to run with -// aggregates with no group by. MariaDB allows this. Nobody else in the world does. -// There will be more work to get it to function if we try this. -// jobInfo.windowSet.insert(getTupleKey(jobInfo, *j, true)); + // MCOL-3435 Allow Window Functions to run with aggregates with + // no group by by inserting a group by for window parameters. + if (hasAggregation) + { + uint32_t tupleKey = getTupleKey(jobInfo, *j, true); + if (find(jobInfo.groupByColVec.begin(), jobInfo.groupByColVec.end(), tupleKey) + == jobInfo.groupByColVec.end()) + { + jobInfo.groupByColVec.push_back(tupleKey); + } + } } colSet.insert(key); From eae773d122a6d7d039b07e92b1cef8c7393fde88 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 11 Sep 2019 12:28:07 -0500 Subject: [PATCH 23/26] MCOL-3492 Don't do DISTINCT as aggregate in the presence of Window Functions --- dbcon/joblist/joblistfactory.cpp | 27 ++-- dbcon/joblist/tupleaggregatestep.cpp | 179 +++++++-------------------- 2 files changed, 62 insertions(+), 144 deletions(-) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index b34215433..520e365ef 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -604,7 +604,8 @@ void checkAggregation(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) jobInfo.hasDistinct = csep->distinct(); - if (csep->distinct() == true) + // DISTINCT with window functions must be done in tupleannexstep + if (csep->distinct() == true && jobInfo.windowDels.size() == 0) { jobInfo.hasAggregation = true; } @@ -878,6 +879,10 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo const SimpleColumn* sc = dynamic_cast(srcp.get()); AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); + // Use this instead of the above line to mimic MariaDB's sql_mode = 'ONLY_FULL_GROUP_BY' + // bool doDistinct = (csep->distinct() && + // csep->groupByCols().empty() && + // !jobInfo.hasAggregation); uint32_t tupleKey = -1; string alias; string view; @@ -1126,9 +1131,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // remember the columns to be returned jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == + // bug 1499 distinct processing, save unique distinct columns that aren't Window columns + if (doDistinct + && (jobInfo.distinctColVec.end() == find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) { jobInfo.distinctColVec.push_back(tupleKey); @@ -1279,13 +1284,13 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // remember the columns to be returned jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); - } + // bug 1499 distinct processing, save unique distinct columns that aren't Window columns + if (doDistinct + && (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 62f084412..c28de49c0 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -837,6 +837,7 @@ const string TupleAggregateStep::toString() const SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) { SJSTEP spjs; + TupleDeliveryStep* tds = dynamic_cast(step.get()); TupleBPS* tbps = dynamic_cast(step.get()); TupleHashJoinStep* thjs = dynamic_cast(step.get()); SubAdapterStep* sas = dynamic_cast(step.get()); @@ -914,171 +915,83 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) // preprocess the columns used by group_concat jobInfo.groupConcatInfo.prepGroupConcat(jobInfo); - bool doGroupConcat = false; + bool doUMOnly = jobInfo.groupConcatInfo.columns().size() > 0 +// || jobInfo.windowSet.size() > 0 + || sas + || ces; + + rgs.push_back(tds->getDeliveredRowGroup()); + + // get rowgroup and aggregator + // For TupleHashJoin, we prepare for both PM and UM only aggregation + if (doUMOnly || thjs) + { + if (distinctAgg == true) + prep1PhaseDistinctAggregate(jobInfo, rgs, aggs); + else + prep1PhaseAggregate(jobInfo, rgs, aggs); + + // TODO: fix this + if (doUMOnly) + rgs.push_back(rgs[0]); + } + + if (!doUMOnly) + { + if (distinctAgg == true) + prep2PhasesDistinctAggregate(jobInfo, rgs, aggs); + else + prep2PhasesAggregate(jobInfo, rgs, aggs); + } if (tbps != NULL) { - // get rowgroup and aggregator - rgs.push_back(tbps->getDeliveredRowGroup()); - - if (jobInfo.groupConcatInfo.columns().size() == 0) - { - if (distinctAgg == true) - prep2PhasesDistinctAggregate(jobInfo, rgs, aggs); - else - prep2PhasesAggregate(jobInfo, rgs, aggs); - } - else - { - if (distinctAgg == true) - prep1PhaseDistinctAggregate(jobInfo, rgs, aggs); - else - prep1PhaseAggregate(jobInfo, rgs, aggs); - - // TODO: fix this - rgs.push_back(rgs[0]); - doGroupConcat = true; - } - - // make sure connected by a RowGroupDL - JobStepAssociation tbpsJsa; - AnyDataListSPtr spdl(new AnyDataList()); - RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); - dl->OID(execplan::CNX_VTABLE_ID); - spdl->rowGroupDL(dl); - tbpsJsa.outAdd(spdl); - // create delivery step aggUM = dynamic_pointer_cast(aggs[0]); spjs.reset(new TupleAggregateStep(aggUM, rgs[1], rgs[2], jobInfo)); - spjs->inputAssociation(tbpsJsa); - // step id?? - spjs->stepId(step->stepId() + 1); - - // set the PM/UM side aggregate structs - tbps->outputAssociation(tbpsJsa); - - if (doGroupConcat) + if (doUMOnly) dynamic_cast(spjs.get())->umOnly(true); else tbps->setAggregateStep(aggs[1], rgs[2]); } else if (thjs != NULL) { - // default to UM aggregation - rgs.push_back(thjs->getDeliveredRowGroup()); - - if (distinctAgg == true) - prep1PhaseDistinctAggregate(jobInfo, rgs, aggs); - else - prep1PhaseAggregate(jobInfo, rgs, aggs); - - // also prepare for PM aggregation - // rowgroups -- 0-proj, 1-um, [2-phase case: 2-um, 3-pm] - // aggregators -- 0-um, [2-phase case: 1-um, 2-pm] - if (jobInfo.groupConcatInfo.columns().size() == 0) - { - if (distinctAgg == true) - prep2PhasesDistinctAggregate(jobInfo, rgs, aggs); - else - prep2PhasesAggregate(jobInfo, rgs, aggs); - } - else - { - // TODO: fix this - rgs.push_back(rgs[0]); - doGroupConcat = true; - } - - // make sure connected by a RowGroupDL - JobStepAssociation thjsJsa; - AnyDataListSPtr spdl(new AnyDataList()); - RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); - dl->OID(execplan::CNX_VTABLE_ID); - spdl->rowGroupDL(dl); - thjsJsa.outAdd(spdl); - // create delivery step aggUM = dynamic_pointer_cast(aggs[0]); spjs.reset(new TupleAggregateStep(aggUM, rgs[1], rgs[0], jobInfo)); - spjs->inputAssociation(thjsJsa); - if (doGroupConcat) + if (doUMOnly) dynamic_cast(spjs.get())->umOnly(true); else dynamic_cast(spjs.get())->savePmHJData(aggs[1], aggs[2], rgs[3]); - - // step id?? - spjs->stepId(step->stepId() + 1); - // set input side - thjs->outputAssociation(thjsJsa); thjs->deliveryStep(spjs); } - else if (sas != NULL) + else { - // UM aggregation - // rowgroups -- 0-proj, 1-um - // aggregators -- 0-um - rgs.push_back(sas->getDeliveredRowGroup()); - - if (distinctAgg == true) - prep1PhaseDistinctAggregate(jobInfo, rgs, aggs); - else - prep1PhaseAggregate(jobInfo, rgs, aggs); - - // make sure connected by a RowGroupDL - JobStepAssociation sasJsa; - AnyDataListSPtr spdl(new AnyDataList()); - RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); - dl->OID(execplan::CNX_VTABLE_ID); - spdl->rowGroupDL(dl); - sasJsa.outAdd(spdl); - - // create delivery step aggUM = dynamic_pointer_cast(aggs[0]); spjs.reset(new TupleAggregateStep(aggUM, rgs[1], rgs[0], jobInfo)); - spjs->inputAssociation(sasJsa); - - // step id?? - spjs->stepId(step->stepId() + 1); - - // set input side - sas->outputAssociation(sasJsa); } - else if (ces != NULL) - { - // UM aggregation - // rowgroups -- 0-proj, 1-um - // aggregators -- 0-um - rgs.push_back(ces->getDeliveredRowGroup()); - if (distinctAgg == true) - prep1PhaseDistinctAggregate(jobInfo, rgs, aggs); - else - prep1PhaseAggregate(jobInfo, rgs, aggs); + // Setup the input JobstepAssoctiation -- the mechanism + // whereby the previous step feeds data to this step. + // Otherwise, we need to create one and hook to the + // previous step as well as this aggregate step. + spjs->stepId(step->stepId() + 1); - // make sure connected by a RowGroupDL - JobStepAssociation cesJsa; - AnyDataListSPtr spdl(new AnyDataList()); - RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); - dl->OID(execplan::CNX_VTABLE_ID); - spdl->rowGroupDL(dl); - cesJsa.outAdd(spdl); + JobStepAssociation jsa; + AnyDataListSPtr spdl(new AnyDataList()); + RowGroupDL* dl = new RowGroupDL(1, jobInfo.fifoSize); + dl->OID(execplan::CNX_VTABLE_ID); + spdl->rowGroupDL(dl); + jsa.outAdd(spdl); - // create delivery step - aggUM = dynamic_pointer_cast(aggs[0]); - spjs.reset(new TupleAggregateStep(aggUM, rgs[1], rgs[0], jobInfo)); - spjs->inputAssociation(cesJsa); + spjs->inputAssociation(jsa); // Aggregate input - // step id?? - spjs->stepId(step->stepId() + 1); - - // set input side - ces->outputAssociation(cesJsa); - } + //Previous step output + step->outputAssociation(jsa); // add the aggregate on constants if (constAggDataVec.size() > 0) From f238a09ee0d12717cb168db69abc015fd4b0c176 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 12 Sep 2019 09:50:39 -0500 Subject: [PATCH 24/26] MCOL-3435 Don't put nested aggregate into implied group by. Use the csep group by instead of groupByColumnVec. --- dbcon/joblist/windowfunctionstep.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index a04a4fcb2..0c987919a 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -514,11 +514,24 @@ void WindowFunctionStep::checkWindowFunction(CalpontSelectExecutionPlan* csep, J // no group by by inserting a group by for window parameters. if (hasAggregation) { - uint32_t tupleKey = getTupleKey(jobInfo, *j, true); - if (find(jobInfo.groupByColVec.begin(), jobInfo.groupByColVec.end(), tupleKey) - == jobInfo.groupByColVec.end()) + // If an argument is an AggregateColumn, don't group by it. + if (dynamic_cast(j->get()) == NULL) { - jobInfo.groupByColVec.push_back(tupleKey); + bool bFound = false; + for (std::vector::iterator igpc = csep->groupByCols().begin(); + igpc < csep->groupByCols().end(); + ++igpc) + { + if (*igpc->get() == *j->get()) + { + bFound = true; + break; + } + } + if (!bFound) + { + csep->groupByCols().push_back(*j); + } } } } From a18ab529971b27ef3e9adb4f6e0464ad1a357c83 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 12 Sep 2019 10:41:37 -0500 Subject: [PATCH 25/26] MCOL-3492 Format cleanup --- dbcon/joblist/joblistfactory.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 520e365ef..d8febe971 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -1131,9 +1131,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // remember the columns to be returned jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - // bug 1499 distinct processing, save unique distinct columns that aren't Window columns - if (doDistinct - && (jobInfo.distinctColVec.end() == + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) { jobInfo.distinctColVec.push_back(tupleKey); @@ -1284,13 +1284,13 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // remember the columns to be returned jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - // bug 1499 distinct processing, save unique distinct columns that aren't Window columns - if (doDistinct - && (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); - } + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } From 4e92e75274c30622529341a33c7287421b7d9048 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 24 Sep 2019 15:20:27 +0100 Subject: [PATCH 26/26] Fix merge conflict issue --- procmon/processmonitor.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index a35d784dc..b6053d2fe 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -2616,11 +2616,6 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, tmp = tmp.filename(); DBRMroot = (bf::path(DBRMDir) / tmp).string(); - // DBRMDir might have changed, so need to change DBRMroot - bf::path tmp(DBRMroot); - tmp = tmp.filename(); - DBRMroot = (bf::path(DBRMDir) / tmp).string(); - sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, CLEAR); // change DBRMroot to temp DBRMDir path // DBRMroot = tempDBRMDir + "/BRM_saves"; @@ -4425,13 +4420,6 @@ int ProcessMonitor::getDBRMdata(string *path) bf::create_directories(pTmp); *path = pTmp.string(); log.writeLog(__LINE__, "Downloading DBRM files to " + *path, LOG_TYPE_DEBUG); - - boost::uuids::uuid u = boost::uuids::random_generator()(); - bf::path pTmp = bf::path(*path) / boost::uuids::to_string(u); - bf::create_directories(pTmp); - *path = pTmp.string(); - log.writeLog(__LINE__, "Downloading DBRM files to " + *path, LOG_TYPE_DEBUG); - for ( int i = 0 ; i < numFiles ; i ++ ) { string fileName; @@ -4476,10 +4464,6 @@ int ProcessMonitor::getDBRMdata(string *path) pFilename = pTmp / pFilename.filename(); const char *cFilename = pFilename.string().c_str(); - bf::path pFilename(fileName); - pFilename = pTmp / pFilename.filename(); - const char *cFilename = pFilename.string().c_str(); - boost::scoped_ptr out(IDBDataFile::open( IDBPolicy::getType(cFilename, IDBPolicy::WRITEENG),