diff --git a/.gitignore b/.gitignore index c7aa228d3..aba662bbf 100644 --- a/.gitignore +++ b/.gitignore @@ -105,4 +105,4 @@ install_manifest_platform.txt install_manifest_storage-engine.txt _CPack_Packages columnstoreversion.h - +.idea/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dd1f6ede..adee980ba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,8 +111,8 @@ endif() INCLUDE(check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) -MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -D_GLIBCXX_ASSERTIONS -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) +MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -D_GLIBCXX_ASSERTIONS -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) # enable security hardening features, like most distributions do # in our benchmarks that costs about ~1% of performance, depending on the load @@ -122,15 +122,22 @@ ELSE() SET(security_default ON) ENDIF() OPTION(SECURITY_HARDENED "Use security-enhancing compiler features (stack protector, relro, etc)" ${security_default}) +OPTION(SECURITY_HARDENED_NEW "Use new security-enhancing compilier features" OFF) IF(SECURITY_HARDENED) # security-enhancing flags MY_CHECK_AND_SET_COMPILER_FLAG("-pie -fPIC") MY_CHECK_AND_SET_COMPILER_FLAG("-Wl,-z,relro,-z,now") MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector --param=ssp-buffer-size=4") MY_CHECK_AND_SET_COMPILER_FLAG("-D_FORTIFY_SOURCE=2" RELEASE RELWITHDEBINFO) + MY_CHECK_AND_SET_COMPILER_FLAG("-fexceptions") + IF(SECURITY_HARDENED_NEW) + MY_CHECK_AND_SET_COMPILER_FLAG("-mcet -fcf-protection") + MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector-strong") + MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-clash-protection") + ENDIF() ENDIF() -SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") +SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") FIND_PACKAGE(Boost 1.53.0 REQUIRED COMPONENTS system filesystem thread regex date_time) diff --git a/README b/README index 0fc563153..8da8e257d 100644 --- a/README +++ b/README @@ -9,3 +9,4 @@ Additional features will be pushed in future releases. A few things to notice: - Do not use pre-releases on production systems. - The building of the ColumnStore engine needs a special build environment. We're working on making it available for everyone to build. + diff --git a/cpackEngineRPM.cmake b/cpackEngineRPM.cmake index 71d5e1f26..1a3527086 100644 --- a/cpackEngineRPM.cmake +++ b/cpackEngineRPM.cmake @@ -180,7 +180,6 @@ SET(CPACK_RPM_platform_USER_FILELIST "/usr/local/mariadb/columnstore/bin/resourceReport.sh" "/usr/local/mariadb/columnstore/bin/hadoopReport.sh" "/usr/local/mariadb/columnstore/bin/alarmReport.sh" -"/usr/local/mariadb/columnstore/bin/amazonInstaller" "/usr/local/mariadb/columnstore/bin/remote_command_verify.sh" "/usr/local/mariadb/columnstore/bin/disable-rep-columnstore.sh" "/usr/local/mariadb/columnstore/bin/columnstore.service" @@ -216,6 +215,9 @@ SET(CPACK_RPM_platform_USER_FILELIST "/usr/local/mariadb/columnstore/bin/os_detect.sh" "/usr/local/mariadb/columnstore/bin/columnstoreClusterTester.sh" "/usr/local/mariadb/columnstore/bin/mariadb-command-line.sh" +"/usr/local/mariadb/columnstore/bin/quick_installer_single_server.sh" +"/usr/local/mariadb/columnstore/bin/quick_installer_multi_server.sh" +"/usr/local/mariadb/columnstore/bin/quick_installer_amazon.sh" ${ignored}) SET(CPACK_RPM_libs_USER_FILELIST diff --git a/dbcon/ddlpackage/CMakeLists.txt b/dbcon/ddlpackage/CMakeLists.txt index f4ccd5594..30bc97124 100644 --- a/dbcon/ddlpackage/CMakeLists.txt +++ b/dbcon/ddlpackage/CMakeLists.txt @@ -1,4 +1,3 @@ - INCLUDE_DIRECTORIES( ${ENGINE_COMMON_INCLUDES} ) ADD_CUSTOM_COMMAND( @@ -13,7 +12,7 @@ ADD_CUSTOM_TARGET(ddl-lexer DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ddl-scan.cpp) ADD_CUSTOM_TARGET(ddl-parser DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ddl-gram.cpp) # Parser puts extra info to stderr. INCLUDE(../../check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG=1" DEBUG) ########### next target ############### diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index fd5ee6900..179e5c14b 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -32,7 +32,10 @@ #endif using namespace ddlpackage; -typedef enum { NOOP, STRIP_QUOTES, STRIP_QUOTES_FQ } copy_action_t; +typedef enum { NOOP, STRIP_QUOTES } copy_action_t; +#if YYDEBUG == 0 +int ddldebug = 0; +#endif int lineno = 1; void ddlerror(struct pass_to_bison* x, char const *s); @@ -75,8 +78,6 @@ extended_identifier {ident_start}{extended_ident_cont}* fq_identifier {identifier}\.{identifier} identifier_quoted {grave_accent}{extended_identifier}{grave_accent} identifier_double_quoted {double_quote}{extended_identifier}{double_quote} -fq_quoted ({identifier_quoted}|{extended_identifier})\.({identifier_quoted}|{identifier}) -fq_double_quoted ({identifier_double_quoted}|{extended_identifier})\.({identifier_double_quoted}|{identifier}) integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) @@ -90,10 +91,7 @@ realfail2 ({integer}|{decimal})[Ee][-+] {identifier_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } -{identifier_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } -{fq_identifier} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return FQ_IDENT; } -{fq_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } -{fq_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } +{identifier_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return DQ_IDENT; } ACTION {return ACTION;} ADD {return ADD;} @@ -125,7 +123,7 @@ CREATE {return CREATE;} CURRENT_USER {return CURRENT_USER;} DATE {ddlget_lval(yyscanner)->str=strdup("date"); return DATE;} DATETIME {return DATETIME;} -TIME {return TIME;} +TIME {ddlget_lval(yyscanner)->str=strdup("time"); return TIME;} DECIMAL {return DECIMAL;} DEC {return DECIMAL;} DEFAULT {return DEFAULT;} @@ -193,14 +191,14 @@ LONGTEXT {return LONGTEXT;} /* ignore */ } -{identifier} {ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return IDENT;} +{identifier} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return DQ_IDENT;} {self} { return ddlget_text(yyscanner)[0]; } {grave_accent} { - /* ignore */ + return ddlget_text(yyscanner)[0]; } "/*" { BEGIN(c_comment); } @@ -282,7 +280,6 @@ char* scanner_copy (char *str, yyscan_t yyscanner, copy_action_t action) char* result; char* nv = strdup(str); result = nv; - // free strduped memory later to prevent possible memory leak if(nv) ((scan_data*)ddlget_extra(yyscanner))->valbuf.push_back(nv); @@ -292,57 +289,6 @@ char* scanner_copy (char *str, yyscan_t yyscanner, copy_action_t action) nv[strlen(str) - 1] = '\0'; result = nv + 1; } - else if (action == STRIP_QUOTES_FQ) - { - bool move_left = false; - bool move_right = false; - char* left = nv; - char* tmp_first = nv; - // MCOL-1384 Loop through all comas in this quoted fq id - // looking for $quote_sign.$quote_sign sequence. - char* fq_delimiter; - int tmp_pos = 0; - while((fq_delimiter = strchr(tmp_first, '.')) != NULL) - { - if( (*(fq_delimiter -1) == '`' && *(fq_delimiter + 1) == '`') || - (*(fq_delimiter -1) == '"' && *(fq_delimiter + 1) == '"') ) - { - tmp_pos += fq_delimiter - tmp_first; - break; - } - tmp_first = fq_delimiter; - } - - char* fq_delimiter_orig = str + tmp_pos; - char* right = fq_delimiter + 1; - char* right_orig = fq_delimiter_orig + 1; - // MCOL-1384 Strip quotes from the left part. - if(*left == '"' || *left == '`') - { - result = left + 1; - *(fq_delimiter - 1) = '.'; - move_left = true; - } - else - { - fq_delimiter += 1; - } - - int right_length = strlen(right); - // MCOL-1384 Strip quotes from the right part. - if(*right == '`' || *right == '"') - { - right += 1; right_orig += 1; - right_length -= 2; - move_right = true; - *(fq_delimiter + right_length) = '\0'; - } - - if(move_left || move_right) - { - strncpy(fq_delimiter, right_orig, right_length); - } - } return result; } diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 68747dfc9..37ab49425 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -29,26 +29,18 @@ Understanding the New Sql book The postgress and mysql sources. find x -name \*.y -o -name \*.yy. - We don't support delimited identifiers. + We support quoted identifiers. All literals are stored as unconverted strings. You can't say "NOT DEFERRABLE". See the comment below. - This is not a reentrant parser. It uses the original global - variable style method of communication between the parser and - scanner. If we ever needed more than one parser thread per - processes, we would use the pure/reentrant options of bison and - flex. In that model, things that are traditionally global live - inside a struct that is passed around. We would need to upgrade to - a more recent version of flex. At the time of this writing, our - development systems have: flex version 2.5.4 + This is a reentrant parser. MCOL-66 Modify to be a reentrant parser */ %{ -#include "string.h" #include "sqlparser.h" #ifdef _MSC_VER @@ -71,7 +63,6 @@ char* copy_string(const char *str); %pure-parser %lex-param {void * scanner} %parse-param {struct ddlpackage::pass_to_bison * x} -%debug /* Bison uses this to generate a C union definition. This is used to store the application created values associated with syntactic @@ -118,11 +109,11 @@ DECIMAL DEFAULT DEFERRABLE DEFERRED IDB_DELETE DROP ENGINE FOREIGN FULL IMMEDIATE INDEX INITIALLY IDB_INT INTEGER KEY LONGBLOB LONGTEXT MATCH MAX_ROWS MEDIUMBLOB MEDIUMTEXT MIN_ROWS MODIFY NO NOT NULL_TOK NUMBER NUMERIC ON PARTIAL PRECISION PRIMARY -REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TIME TINYBLOB TINYTEXT +REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE -%token FQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE +%token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE TIME /* Notes: * 1. "ata" stands for alter_table_action @@ -206,6 +197,7 @@ VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE %type opt_if_not_exists %type trunc_table_statement %type rename_table_statement +%type ident %% stmtblock: stmtmulti { x->fParseTree = $1; } @@ -476,7 +468,7 @@ opt_equal: ; table_option: - ENGINE opt_equal IDENT {$$ = new pair("engine", $3);} + ENGINE opt_equal ident {$$ = new pair("engine", $3);} | MAX_ROWS opt_equal ICONST {$$ = new pair("max_rows", $3);} | @@ -491,9 +483,9 @@ table_option: $$ = new pair("auto_increment", $3); } | - DEFAULT CHARSET opt_equal IDENT {$$ = new pair("default charset", $4);} + DEFAULT CHARSET opt_equal ident {$$ = new pair("default charset", $4);} | - DEFAULT IDB_CHAR SET opt_equal IDENT {$$ = new pair("default charset", $5);} + DEFAULT IDB_CHAR SET opt_equal ident {$$ = new pair("default charset", $5);} ; alter_table_statement: @@ -623,27 +615,23 @@ table_name: ; qualified_name: - FQ_IDENT { - char* delimeterPosition = strchr(const_cast($1), '.'); - if( delimeterPosition ) - { - *delimeterPosition = '\0'; - char* schemaName = const_cast($1); - char* tableName = delimeterPosition + 1; - $$ = new QualifiedName(schemaName, tableName); - *delimeterPosition = '.'; - } - else - $$ = new QualifiedName($1); - } - | IDENT { + ident { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); else $$ = new QualifiedName($1); } + | ident '.' ident + { + $$ = new QualifiedName($1, $3); + } ; +ident: + DQ_IDENT + | IDENT + ; + ata_add_column: /* See the documentation for SchemaObject for an explanation of why we are using * dynamic_cast here. @@ -655,12 +643,13 @@ ata_add_column: ; column_name: - DATE - |IDENT + TIME + |DATE + |ident ; constraint_name: - IDENT + ident ; column_option: @@ -720,13 +709,22 @@ default_clause: { $$ = new ColumnDefaultValue($2); } + | DEFAULT DQ_IDENT /* MCOL-1406 */ + { + $$ = new ColumnDefaultValue($2); + } | DEFAULT NULL_TOK {$$ = new ColumnDefaultValue(NULL);} | DEFAULT USER {$$ = new ColumnDefaultValue("$USER");} - | DEFAULT CURRENT_USER {$$ = new ColumnDefaultValue("$CURRENT_USER");} + | DEFAULT CURRENT_USER optional_braces {$$ = new ColumnDefaultValue("$CURRENT_USER");} | DEFAULT SESSION_USER {$$ = new ColumnDefaultValue("$SESSION_USER");} | DEFAULT SYSTEM_USER {$$ = new ColumnDefaultValue("$SYSTEM_USER");} ; +optional_braces: + /* empty */ {} + | '(' ')' {} + ; + data_type: character_string_type | binary_string_type diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..c996dad17 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,15 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +167,12 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +197,27 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +246,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +263,19 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +634,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..07bbab0b6 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/execplan/arithmeticcolumn.cpp b/dbcon/execplan/arithmeticcolumn.cpp index c2f44f2b3..aab6c9265 100644 --- a/dbcon/execplan/arithmeticcolumn.cpp +++ b/dbcon/execplan/arithmeticcolumn.cpp @@ -368,11 +368,7 @@ bool ArithmeticColumn::operator==(const ArithmeticColumn& t) const else if (fExpression != NULL || t.fExpression != NULL) return false; - if (fAlias != t.fAlias) - return false; - if (fTableAlias != t.fTableAlias) - return false; if (fData != t.fData) return false; diff --git a/dbcon/execplan/calpontselectexecutionplan.cpp b/dbcon/execplan/calpontselectexecutionplan.cpp index f21bf618a..0e2417588 100644 --- a/dbcon/execplan/calpontselectexecutionplan.cpp +++ b/dbcon/execplan/calpontselectexecutionplan.cpp @@ -478,6 +478,7 @@ void CalpontSelectExecutionPlan::serialize(messageqcpp::ByteStream& b) const b << (uint64_t)fLimitStart; b << (uint64_t)fLimitNum; b << static_cast(fHasOrderBy); + b << static_cast(fSpecHandlerProcessed); b << static_cast(fSelectSubList.size()); @@ -645,6 +646,7 @@ void CalpontSelectExecutionPlan::unserialize(messageqcpp::ByteStream& b) b >> (uint64_t&)fLimitStart; b >> (uint64_t&)fLimitNum; b >> reinterpret_cast< ByteStream::byte&>(fHasOrderBy); + b >> reinterpret_cast< ByteStream::byte&>(fSpecHandlerProcessed); // for SELECT subquery b >> size; diff --git a/dbcon/execplan/calpontselectexecutionplan.h b/dbcon/execplan/calpontselectexecutionplan.h index b3c6458f4..5d1f2fbb6 100644 --- a/dbcon/execplan/calpontselectexecutionplan.h +++ b/dbcon/execplan/calpontselectexecutionplan.h @@ -575,6 +575,15 @@ public: return fHasOrderBy; } + void specHandlerProcessed(const bool hand) + { + fSpecHandlerProcessed = hand; + } + const bool specHandlerProcessed() const + { + return fSpecHandlerProcessed; + } + void selectSubList(const SelectList& selectSubList) { fSelectSubList = selectSubList; @@ -871,6 +880,9 @@ private: uint32_t fPriority; uint32_t fStringTableThreshold; + + // for specific handlers processing, e.g. GROUP BY + bool fSpecHandlerProcessed; // Derived table involved in the query. For derived table optimization std::vector fSubSelectList; diff --git a/dbcon/execplan/clientrotator.cpp b/dbcon/execplan/clientrotator.cpp index 55634af7c..0a289f5f6 100644 --- a/dbcon/execplan/clientrotator.cpp +++ b/dbcon/execplan/clientrotator.cpp @@ -49,6 +49,15 @@ using namespace logging; #include "clientrotator.h" +//#include "idb_mysql.h" + +/** Debug macro */ +#ifdef INFINIDB_DEBUG +#define IDEBUG(x) {x;} +#else +#define IDEBUG(x) {} +#endif + #define LOG_TO_CERR namespace execplan @@ -60,14 +69,36 @@ const uint64_t LOCAL_EXEMGR_PORT = 8601; string ClientRotator::getModule() { string installDir = startup::StartUp::installDir(); + + //Log to debug.log + LoggingID logid( 24, 0, 0); + string fileName = installDir + "/local/module"; + string module; ifstream moduleFile (fileName.c_str()); if (moduleFile.is_open()) + { getline (moduleFile, module); + } + else + { + { + logging::Message::Args args1; + logging::Message msg(1); + std::ostringstream oss; + oss << "ClientRotator::getModule open status2 =" << strerror(errno); + args1.add(oss.str()); + args1.add(fileName); + msg.format( args1 ); + Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_DEBUG, msg, logid); + } + } moduleFile.close(); + return module; } diff --git a/dbcon/execplan/constantcolumn.cpp b/dbcon/execplan/constantcolumn.cpp index d292ee984..dd112600e 100644 --- a/dbcon/execplan/constantcolumn.cpp +++ b/dbcon/execplan/constantcolumn.cpp @@ -328,8 +328,6 @@ bool ConstantColumn::operator==(const ConstantColumn& t) const if (fType != t.fType) return false; - if (fAlias != t.fAlias) - return false; if (fData != t.fData) return false; diff --git a/dbcon/execplan/constantcolumn.h b/dbcon/execplan/constantcolumn.h index 04098faae..be0731044 100644 --- a/dbcon/execplan/constantcolumn.h +++ b/dbcon/execplan/constantcolumn.h @@ -38,6 +38,8 @@ class ByteStream; */ namespace execplan { +class ConstantColumn; + /** * @brief A class to represent a constant return column * diff --git a/dbcon/execplan/functioncolumn.cpp b/dbcon/execplan/functioncolumn.cpp index 108026484..487d42c0b 100644 --- a/dbcon/execplan/functioncolumn.cpp +++ b/dbcon/execplan/functioncolumn.cpp @@ -332,8 +332,6 @@ bool FunctionColumn::operator==(const FunctionColumn& t) const if (**it != **it2) return false; -// if (fAlias != t.fAlias) -// return false; if (fTableAlias != t.fTableAlias) return false; diff --git a/dbcon/execplan/returnedcolumn.cpp b/dbcon/execplan/returnedcolumn.cpp index 8feb7025b..44278424f 100644 --- a/dbcon/execplan/returnedcolumn.cpp +++ b/dbcon/execplan/returnedcolumn.cpp @@ -173,14 +173,13 @@ void ReturnedColumn::unserialize(messageqcpp::ByteStream& b) bool ReturnedColumn::operator==(const ReturnedColumn& t) const { + // Not all fields are considered for a positive equality. if (fData != t.fData) return false; if (fCardinality != t.fCardinality) return false; - //if (fAlias != t.fAlias) - // return false; if (fDistinct != t.fDistinct) return false; @@ -193,24 +192,18 @@ bool ReturnedColumn::operator==(const ReturnedColumn& t) const if (fNullsFirst != t.fNullsFirst) return false; - //if (fOrderPos != t.fOrderPos) - // return false; if (fInputIndex != t.fInputIndex) return false; if (fOutputIndex != t.fOutputIndex) return false; - //if (fSequence != t.fSequence) - // return false; if (fResultType != t.fResultType) return false; if (fOperationType != t.fOperationType) return false; - //if (fExpressionId != t.fExpressionId) - // return false; return true; } diff --git a/dbcon/execplan/simplecolumn.cpp b/dbcon/execplan/simplecolumn.cpp index 64955401e..1d7780e33 100644 --- a/dbcon/execplan/simplecolumn.cpp +++ b/dbcon/execplan/simplecolumn.cpp @@ -346,7 +346,6 @@ void SimpleColumn::serialize(messageqcpp::ByteStream& b) const b << fViewName; b << (uint32_t) fOid; b << fData; - //b << fAlias; b << fTableAlias; b << (uint32_t) fSequence; b << static_cast(fIsInfiniDB); @@ -363,7 +362,6 @@ void SimpleColumn::unserialize(messageqcpp::ByteStream& b) b >> fViewName; b >> (uint32_t&) fOid; b >> fData; - //b >> fAlias; b >> fTableAlias; b >> (uint32_t&) fSequence; b >> reinterpret_cast< ByteStream::doublebyte&>(fIsInfiniDB); @@ -388,8 +386,6 @@ bool SimpleColumn::operator==(const SimpleColumn& t) const if (fColumnName != t.fColumnName) return false; -// if (fIndexName != t.fIndexName) -// return false; if (fViewName != t.fViewName) return false; @@ -399,8 +395,6 @@ bool SimpleColumn::operator==(const SimpleColumn& t) const if (data() != t.data()) return false; -// if (fAlias != t.fAlias) -// return false; if (fTableAlias != t.fTableAlias) return false; diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index cef9579e9..d43239563 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -39,6 +39,10 @@ #include "calpontsystemcatalog.h" #include "exceptclasses.h" #include "dataconvert.h" + +// Workaround for my_global.h #define of isnan(X) causing a std::std namespace +using namespace std; + namespace messageqcpp { class ByteStream; @@ -594,7 +598,7 @@ inline const std::string& TreeNode::getStrVal() int exponent = (int)floor(log10( fabs(fResult.floatVal))); // This will round down the exponent double base = fResult.floatVal * pow(10, -1.0 * exponent); - if (std::isnan(exponent) || std::isnan(base)) + if (isnan(exponent) || isnan(base)) { snprintf(tmp, 312, "%f", fResult.floatVal); fResult.strVal = removeTrailing0(tmp, 312); @@ -629,7 +633,7 @@ inline const std::string& TreeNode::getStrVal() int exponent = (int)floor(log10( fabs(fResult.doubleVal))); // This will round down the exponent double base = fResult.doubleVal * pow(10, -1.0 * exponent); - if (std::isnan(exponent) || std::isnan(base)) + if (isnan(exponent) || isnan(base)) { snprintf(tmp, 312, "%f", fResult.doubleVal); fResult.strVal = removeTrailing0(tmp, 312); diff --git a/dbcon/joblist/crossenginestep.cpp b/dbcon/joblist/crossenginestep.cpp index 789e58cd4..d3cef7928 100644 --- a/dbcon/joblist/crossenginestep.cpp +++ b/dbcon/joblist/crossenginestep.cpp @@ -744,7 +744,6 @@ string CrossEngineStep::makeQuery() // the string must consist of a single SQL statement without a terminating semicolon ; or \g. // oss << ";"; - return oss.str(); } diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/fifo.h b/dbcon/joblist/fifo.h index dbdf1eca8..429572ed5 100644 --- a/dbcon/joblist/fifo.h +++ b/dbcon/joblist/fifo.h @@ -398,20 +398,29 @@ void FIFO::signalPs() template inline bool FIFO::next(uint64_t id, element_t* out) { + base::mutex.lock(); fConsumptionStarted = true; if (cpos[id] >= fMaxElements) + { + base::mutex.unlock(); if (!waitForSwap(id)) return false; + base::mutex.lock(); + } *out = cBuffer[cpos[id]++]; #ifndef ONE_CS if (cpos[id] == fMaxElements) + { + base::mutex.unlock(); signalPs(); - + return true; + } #endif + base::mutex.unlock(); return true; } diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index f5dbeee17..4b1980d49 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -405,7 +405,7 @@ uint32_t getTupleKey(JobInfo& jobInfo, const SRCP& srcp, bool add) if (add) { - // setTupleInfo first if add is ture, ok if already set. + // setTupleInfo first if add is true, ok if already set. const SimpleColumn* sc = dynamic_cast(srcp.get()); if (sc != NULL) diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index b81a2ec8d..f3782c9d5 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1634,11 +1634,8 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) return doExpressionFilter(sf, jobInfo); } - // trim trailing space char in the predicate string constval(cc->constval()); - size_t spos = constval.find_last_not_of(" "); - if (spos != string::npos) constval = constval.substr(0, spos + 1); CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct = sc->colType(); @@ -2772,11 +2769,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) if (ConstantColumn::NULLDATA == cc->type() && (opeq == *sop || opne == *sop)) cop = COMPARE_NIL; - // trim trailing space char string value = cc->constval(); - size_t spos = value.find_last_not_of(" "); - if (spos != string::npos) value = value.substr(0, spos + 1); pds->addFilter(cop, value); } @@ -2858,11 +2852,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) if (ConstantColumn::NULLDATA == cc->type() && (opeq == *sop || opne == *sop)) cop = COMPARE_NIL; - // trim trailing space char string value = cc->constval(); - size_t spos = value.find_last_not_of(" "); - if (spos != string::npos) value = value.substr(0, spos + 1); pds->addFilter(cop, value); } @@ -2968,10 +2959,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) int8_t cop = op2num(sop); int64_t value = 0; string constval = cc->constval(); - // trim trailing space char - size_t spos = constval.find_last_not_of(" "); - if (spos != string::npos) constval = constval.substr(0, spos + 1); // @bug 1151 string longer than colwidth of char/varchar. uint8_t rf = 0; diff --git a/dbcon/joblist/jlf_subquery.cpp b/dbcon/joblist/jlf_subquery.cpp index add3dc533..1e4eaeeec 100644 --- a/dbcon/joblist/jlf_subquery.cpp +++ b/dbcon/joblist/jlf_subquery.cpp @@ -756,8 +756,8 @@ int doFromSubquery(CalpontExecutionPlan* ep, const string& alias, const string& void addOrderByAndLimit(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) { // make sure there is a LIMIT -// if (csep->orderByCols().size() > 0 csep->limitNum() == (uint64_t) - 1) -// return; + if (csep->orderByCols().size() > 0 && csep->limitNum() == (uint64_t) - 1) + return; jobInfo.limitStart = csep->limitStart(); jobInfo.limitCount = csep->limitNum(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..04989e7b7 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -301,6 +300,7 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) { const ArithmeticColumn* ac = NULL; const FunctionColumn* fc = NULL; + const ConstantColumn* cc = NULL; uint64_t eid = -1; CalpontSystemCatalog::ColType ct; ExpressionStep* es = new ExpressionStep(jobInfo); @@ -317,6 +317,11 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) eid = fc->expressionId(); ct = fc->resultType(); } + else if ((cc = dynamic_cast(retCols[i].get())) != NULL) + { + eid = cc->expressionId(); + ct = cc->resultType(); + } else { std::ostringstream errmsg; @@ -870,7 +875,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -892,20 +897,62 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + + const vector& cols = rcp->columnVec(); + + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + + continue; + } + +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +962,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +987,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +1011,359 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + // Only do the optimization of converting to count(*) if + // there is only one parameter. + if (aggParms.size() == 1 && aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + doDistinct = aggc->distinct(); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + aggCt = aggc->resultType(); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (!sc->schemaName().empty()) + { + ct = sc->colType(); -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - findit = jobInfo.tokenOnly.find(tupleKey); + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); + } + + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); + + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); + + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); + + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } @@ -1622,15 +1843,16 @@ void makeVtableModeSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps) { // @bug4848, enhance and unify limit handling. -// if (csep->limitNum() != (uint64_t) - 1) + if (csep->limitNum() != (uint64_t) - 1) { // special case for outer query order by limit -- return all - if (jobInfo.subId == 0 && csep->hasOrderBy()) + if (jobInfo.subId == 0 && csep->hasOrderBy() && !csep->specHandlerProcessed()) { jobInfo.limitCount = (uint64_t) - 1; } - // support order by and limit in sub-query/union + // support order by and limit in sub-query/union or + // GROUP BY handler processed outer query order else if (csep->orderByCols().size() > 0) { addOrderByAndLimit(csep, jobInfo); diff --git a/dbcon/joblist/limitedorderby.cpp b/dbcon/joblist/limitedorderby.cpp index 82d6041a8..7da1accfe 100644 --- a/dbcon/joblist/limitedorderby.cpp +++ b/dbcon/joblist/limitedorderby.cpp @@ -77,14 +77,9 @@ void LimitedOrderBy::initialize(const RowGroup& rg, const JobInfo& jobInfo) map::iterator j = keyToIndexMap.find(i->first); idbassert(j != keyToIndexMap.end()); - // MCOL-1052 Ordering direction in CSEP differs from - // internal direction representation. - if (i->second) - fOrderByCond.push_back(IdbSortSpec(j->second, false)); - else - fOrderByCond.push_back(IdbSortSpec(j->second, true)); - - //fOrderByCond.push_back(IdbSortSpec(j->second, i->second)); + // TODO Ordering direction in CSEP differs from + // internal direction representation. This behavior should be fixed + fOrderByCond.push_back(IdbSortSpec(j->second, i->second)); } // limit row count info @@ -176,15 +171,18 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row) } } - +/* + * The f() copies top element from an ordered queue into a row group. It + * does this backwards to syncronise sorting orientation with the server. + * The top row from the queue goes last into the returned set. + */ void LimitedOrderBy::finalize() { + queue tempQueue; if (fRowGroup.getRowCount() > 0) fDataQueue.push(fData); - // MCOL-1052 The removed check effectivly disables sorting to happen, - // since fStart = 0; - if (true) + if (fOrderByQueue.size() > 0) { uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); fMemSize += newSize; @@ -195,27 +193,49 @@ void LimitedOrderBy::finalize() << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } - + + uint64_t offset = 0; + uint64_t i = 0; + list tempRGDataList; + + // Skip first LIMIT rows in the the RowGroup + if ( fCount <= fOrderByQueue.size() ) + { + offset = fCount % fRowsPerRG; + if(!offset && fCount > 0) + offset = fRowsPerRG; + } + else + { + offset = fOrderByQueue.size() % fRowsPerRG; + if(!offset && fOrderByQueue.size() > 0) + offset = fRowsPerRG; + } + + list::iterator tempListIter = tempRGDataList.begin(); + + i = 0; + uint32_t rSize = fRow0.getSize(); + uint64_t preLastRowNumb = fRowsPerRG - 1; fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow0); - queue tempQueue; - uint64_t i = 0; - + offset = offset != 0 ? offset - 1 : offset; + fRowGroup.getRow(offset, &fRow0); + while ((fOrderByQueue.size() > fStart) && (i++ < fCount)) { const OrderByRow& topRow = fOrderByQueue.top(); row1.setData(topRow.fData); copyRow(row1, &fRow0); - //memcpy(fRow0.getData(), topRow.fData, fRow0.getSize()); fRowGroup.incRowCount(); - fRow0.nextRow(); + offset--; + fRow0.prevRow(rSize); fOrderByQueue.pop(); - if (fRowGroup.getRowCount() >= fRowsPerRG) + if(offset == (uint64_t)-1) { - tempQueue.push(fData); + tempRGDataList.push_front(fData); fMemSize += newSize; if (!fRm->getMemory(newSize, fSessionMemLimit)) @@ -224,18 +244,21 @@ void LimitedOrderBy::finalize() << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } - - fData.reinit(fRowGroup, fRowsPerRG); - //fData.reset(new uint8_t[fRowGroup.getDataSize(fRowsPerRG)]); + + fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); - fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow0); + fRowGroup.resetRowGroup(0); // ? + fRowGroup.getRow(preLastRowNumb, &fRow0); + offset = preLastRowNumb; } } - + // Push the last/only group into the queue. if (fRowGroup.getRowCount() > 0) - tempQueue.push(fData); - + tempRGDataList.push_front(fData); + + for(tempListIter = tempRGDataList.begin(); tempListIter != tempRGDataList.end(); tempListIter++) + tempQueue.push(*tempListIter); + fDataQueue = tempQueue; } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..da91919f0 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1094,8 +1097,10 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); - uint32_t projColsUDAFIndex = 0; - + // For UDAF + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1136,6 +1141,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // populate the aggregate rowgroup AGG_MAP aggFuncMap; + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -1153,8 +1159,9 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, jobInfo.cntStarPos)); + aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1170,9 +1177,10 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, -1)); + aggOp, stats, 0, outIdx, -1)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1218,16 +1226,17 @@ void TupleAggregateStep::prep1PhaseAggregate( widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) - groupBy[it->second]->fOutputColumnIndex = i; + groupBy[it->second]->fOutputColumnIndex = outIdx; else functionVec.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, - i, + outIdx, groupBy[it->second]->fOutputColumnIndex))); + ++outIdx; continue; } else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key) != @@ -1240,6 +1249,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); + ++outIdx; continue; } else if (jobInfo.groupConcatInfo.columns().find(key) != @@ -1252,6 +1262,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else if (jobInfo.windowSet.find(key) != jobInfo.windowSet.end()) @@ -1263,6 +1274,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else @@ -1283,31 +1295,30 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, outIdx)); } functionVec.push_back(funct); @@ -1468,11 +1479,9 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1480,9 +1489,47 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -1512,6 +1559,11 @@ void TupleAggregateStep::prep1PhaseAggregate( { aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); } + + if (aggOp != ROWAGG_MULTI_PARM) + { + ++outIdx; + } } // now fix the AVG function, locate the count(column) position @@ -1560,7 +1612,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1663,9 +1715,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; +// set avgSet; + list multiParmIndexes; + + // fOR udaf + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - set avgSet; - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; // for count column of average function map avgFuncMap, avgDistFuncMap; @@ -1675,7 +1732,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1814,9 +1871,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip sum / count(column) if avg is also selected - if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) - continue; +// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && +// (avgSet.find(aggKey) != avgSet.end())) +// continue; if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || @@ -1829,12 +1886,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { @@ -1843,12 +1900,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2043,7 +2099,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2054,7 +2110,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; - // UDAF Dummy holder for UserData struct + // Column for index of UDAF UserData struct oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); scaleAgg.push_back(0); @@ -2062,9 +2118,49 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::UBIGINT); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + multiParmIndexes.push_back(colAgg); + ++colAgg; + + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -2101,7 +2197,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. AGG_MAP aggDupFuncMap; - pUDAFFunc = NULL; + projColsUDAFIdx = 0; + int64_t multiParms = 0; // copy over the groupby vector // update the outputColumnIndex if returned @@ -2113,14 +2210,24 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // locate the return column position in aggregated rowgroup + uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on final agg.: Extra parms for an aggregate have no work there. + ++multiParms; + continue; + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2146,6 +2253,28 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + switch (aggOp) { case ROWAGG_DISTINCT_AVG: @@ -2396,7 +2525,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep1PhaseDistinctAggregate: " << emsg << " oid=" @@ -2420,7 +2549,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -2430,13 +2559,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (dupGroupbyIndex != -1) functionVec2.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + + if (aggOp == ROWAGG_UDAF) + { + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, outIdx)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2472,6 +2609,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -2489,7 +2628,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -2549,7 +2688,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2704,6 +2843,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -2711,6 +2855,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } + if (returnedColVec[k].first != distinctColKey) continue; @@ -2731,7 +2881,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } @@ -2750,9 +2900,16 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } + // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2768,7 +2925,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2790,7 +2947,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } @@ -2843,7 +3000,11 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; - uint32_t projColsUDAFIndex = 0; + // For UDAF + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2886,14 +3047,13 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3022,12 +3182,12 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { @@ -3036,12 +3196,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3240,7 +3399,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3258,9 +3417,48 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -3283,6 +3481,8 @@ void TupleAggregateStep::prep2PhasesAggregate( map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIdx = 0; + // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3292,6 +3492,9 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3299,15 +3502,36 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + continue; + } + // Is this a UDAF? use the function as part of the key. - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3408,7 +3632,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3430,7 +3654,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3441,7 +3665,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3450,22 +3674,20 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3500,6 +3722,8 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3517,7 +3741,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3769,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3624,7 +3848,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; - uint32_t projColsUDAFIndex = 0; + // For UDAF + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3691,18 +3919,18 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3838,12 +4066,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { @@ -3856,7 +4084,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4050,7 +4278,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4069,9 +4297,49 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -4087,6 +4355,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); @@ -4098,16 +4368,29 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } + funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fOutputColumnIndex - multiParms, + udafFuncCol->fAuxColumnIndex - multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } else { @@ -4115,19 +4398,28 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fOutputColumnIndex - multiParms, + funcPm->fAuxColumnIndex - multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = NULL; } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4429,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + projColsUDAFIdx = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4150,15 +4447,48 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4292,6 +4622,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4328,8 +4659,18 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } + widthAggDist.push_back(bigIntWidth); } } @@ -4377,7 +4718,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4401,7 +4742,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4410,23 +4751,20 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4463,6 +4801,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4480,7 +4820,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4880,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +5027,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +5039,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } + if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +5066,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +5083,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } + // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +5110,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +5131,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 4d24f0b4b..823b2bd04 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -569,6 +569,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) for (RetColsVector::iterator i = jobInfo.windowCols.begin(); i < jobInfo.windowCols.end(); i++) { + bool isUDAF = false; // window function type WindowFunctionColumn* wc = dynamic_cast(i->get()); uint64_t ridx = getColumnIndex(*i, colIndexMap, jobInfo); // result index @@ -590,6 +591,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // if (boost::iequals(wc->functionName(),"UDAF_FUNC") if (wc->functionName() == "UDAF_FUNC") { + isUDAF = true; ++wfsUserFunctionCount; } @@ -646,8 +648,12 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // column type for functor templates int ct = 0; + if (isUDAF) + { + ct = wc->getUDAFContext().getResultType(); + } // make sure index is in range - if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) + else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; // workaround for functions using "within group (order by)" syntax diff --git a/dbcon/mysql/CMakeLists.txt b/dbcon/mysql/CMakeLists.txt index 104e6bf6e..ae8f30622 100644 --- a/dbcon/mysql/CMakeLists.txt +++ b/dbcon/mysql/CMakeLists.txt @@ -27,10 +27,12 @@ add_library(calmysql SHARED ${libcalmysql_SRCS}) target_link_libraries(calmysql ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} ${NETSNMP_LIBRARIES} ${SERVER_BUILD_INCLUDE_DIR}/../libservices/libmysqlservices.a threadpool) +SET_TARGET_PROPERTIES(calmysql PROPERTIES LINK_FLAGS "${calmysql_link_flags} -Wl,-E") set_target_properties(calmysql PROPERTIES VERSION 1.0.0 SOVERSION 1) SET ( is_columnstore_tables_SRCS is_columnstore_tables.cpp + sm.cpp ) add_library(is_columnstore_tables SHARED ${is_columnstore_tables_SRCS}) @@ -42,6 +44,7 @@ set_target_properties(is_columnstore_tables PROPERTIES VERSION 1.0.0 SOVERSION 1 SET ( is_columnstore_columns_SRCS is_columnstore_columns.cpp + sm.cpp ) add_library(is_columnstore_columns SHARED ${is_columnstore_columns_SRCS}) @@ -53,6 +56,7 @@ set_target_properties(is_columnstore_columns PROPERTIES VERSION 1.0.0 SOVERSION SET ( is_columnstore_extents_SRCS is_columnstore_extents.cpp + sm.cpp ) add_library(is_columnstore_extents SHARED ${is_columnstore_extents_SRCS}) @@ -64,6 +68,7 @@ set_target_properties(is_columnstore_extents PROPERTIES VERSION 1.0.0 SOVERSION SET ( is_columnstore_files_SRCS is_columnstore_files.cpp + sm.cpp ) add_library(is_columnstore_files SHARED ${is_columnstore_files_SRCS}) diff --git a/dbcon/mysql/columnstore_info.sql b/dbcon/mysql/columnstore_info.sql index 563052a11..d0433a0d9 100644 --- a/dbcon/mysql/columnstore_info.sql +++ b/dbcon/mysql/columnstore_info.sql @@ -37,43 +37,56 @@ DROP PROCEDURE IF EXISTS `table_usage` // CREATE PROCEDURE table_usage (IN t_schema char(64), IN t_name char(64)) `table_usage`: BEGIN + DECLARE done INTEGER DEFAULT 0; + DECLARE dbname VARCHAR(64); + DECLARE tbname VARCHAR(64); + DECLARE object_ids TEXT; + DECLARE dictionary_object_ids TEXT; DECLARE `locker` TINYINT UNSIGNED DEFAULT IS_USED_LOCK('table_usage'); - + DECLARE columns_list CURSOR FOR SELECT TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(object_id) OBJECT_IDS, GROUP_CONCAT(dictionary_object_id) DICT_OBJECT_IDS FROM INFORMATION_SCHEMA.COLUMNSTORE_COLUMNS WHERE table_name = t_name and table_schema = t_schema GROUP BY table_schema, table_name; + DECLARE columns_list_sc CURSOR FOR SELECT TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(object_id) OBJECT_IDS, GROUP_CONCAT(dictionary_object_id) DICT_OBJECT_IDS FROM INFORMATION_SCHEMA.COLUMNSTORE_COLUMNS WHERE table_schema = t_schema GROUP BY table_schema, table_name; + DECLARE columns_list_all CURSOR FOR SELECT TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(object_id) OBJECT_IDS, GROUP_CONCAT(dictionary_object_id) DICT_OBJECT_IDS FROM INFORMATION_SCHEMA.COLUMNSTORE_COLUMNS GROUP BY table_schema, table_name; + DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1; IF `locker` IS NOT NULL THEN SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Error acquiring table_usage lock'; LEAVE `table_usage`; END IF; DO GET_LOCK('table_usage', 0); - DROP TABLE IF EXISTS columnstore_info.columnstore_columns; DROP TABLE IF EXISTS columnstore_info.columnstore_files; - CREATE TABLE columnstore_info.columnstore_columns engine=myisam as (select * from information_schema.columnstore_columns); - ALTER TABLE columnstore_info.columnstore_columns ADD INDEX `object_id` (`object_id`); - ALTER TABLE columnstore_info.columnstore_columns ADD INDEX `dictionary_object_id` (`dictionary_object_id`); - CREATE TABLE columnstore_info.columnstore_files engine=myisam as (select * from information_schema.columnstore_files); - ALTER TABLE columnstore_info.columnstore_files ADD INDEX `object_id` (`object_id`); + CREATE TEMPORARY TABLE columnstore_info.columnstore_files (TABLE_SCHEMA VARCHAR(64), TABLE_NAME VARCHAR(64), DATA BIGINT, DICT BIGINT); + IF t_name IS NOT NULL THEN -SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(data) as DATA_DISK_USAGE, columnstore_info.format_filesize(dict) as DICT_DISK_USAGE, columnstore_info.format_filesize(data + COALESCE(dict, 0)) as TOTAL_USAGE FROM ( -SELECT TABLE_SCHEMA, TABLE_NAME, (SELECT sum(cf.file_size) as data FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema) as data, (SELECT sum(cf.file_size) as dict FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.dictionary_object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema GROUP BY table_schema, table_name) as dict -FROM -columnstore_info.columnstore_columns ics where table_name = t_name and (table_schema = t_schema or t_schema IS NULL) -group by table_schema, table_name -) q; + OPEN columns_list; ELSEIF t_schema IS NOT NULL THEN -SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(data) as DATA_DISK_USAGE, columnstore_info.format_filesize(dict) as DICT_DISK_USAGE, columnstore_info.format_filesize(data + COALESCE(dict, 0)) as TOTAL_USAGE FROM ( -SELECT TABLE_SCHEMA, TABLE_NAME, (SELECT sum(cf.file_size) as data FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema) as data, (SELECT sum(cf.file_size) as dict FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.dictionary_object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema GROUP BY table_schema, table_name) as dict -FROM -columnstore_info.columnstore_columns ics where table_schema = t_schema -group by table_schema, table_name -) q; + OPEN columns_list_sc; ELSE -SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(data) as DATA_DISK_USAGE, columnstore_info.format_filesize(dict) as DICT_DISK_USAGE, columnstore_info.format_filesize(data + COALESCE(dict, 0)) as TOTAL_USAGE FROM ( -SELECT TABLE_SCHEMA, TABLE_NAME, (SELECT sum(cf.file_size) as data FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema) as data, (SELECT sum(cf.file_size) as dict FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.dictionary_object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema GROUP BY table_schema, table_name) as dict -FROM -columnstore_info.columnstore_columns ics -group by table_schema, table_name -) q; + OPEN columns_list_all; END IF; - DROP TABLE IF EXISTS columnstore_info.columnstore_columns; + + files_table: LOOP + IF t_name IS NOT NULL THEN + FETCH columns_list INTO dbname, tbname, object_ids, dictionary_object_ids; + ELSEIF t_schema IS NOT NULL THEN + FETCH columns_list_sc INTO dbname, tbname, object_ids, dictionary_object_ids; + ELSE + FETCH columns_list_all INTO dbname, tbname, object_ids, dictionary_object_ids; + END IF; + IF done = 1 THEN LEAVE files_table; + END IF; + INSERT INTO columnstore_info.columnstore_files (SELECT dbname, tbname, sum(file_size), 0 FROM information_schema.columnstore_files WHERE find_in_set(object_id, object_ids)); + IF dictionary_object_ids IS NOT NULL THEN + UPDATE columnstore_info.columnstore_files SET DICT = (SELECT sum(file_size) FROM information_schema.columnstore_files WHERE find_in_set(object_id, dictionary_object_ids)) WHERE TABLE_SCHEMA = dbname AND TABLE_NAME = tbname; + END IF; + END LOOP; + IF t_name IS NOT NULL THEN + CLOSE columns_list; + ELSEIF t_schema IS NOT NULL THEN + CLOSE columns_list_sc; + ELSE + CLOSE columns_list_all; + END IF; + SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(DATA) as DATA_DISK_USAGE, columnstore_info.format_filesize(DICT) as DICT_DATA_USAGE, columnstore_info.format_filesize(DATA + COALESCE(DICT, 0)) as TOTAL_USAGE FROM columnstore_info.columnstore_files; + DROP TABLE IF EXISTS columnstore_info.columnstore_files; DO RELEASE_LOCK('table_usage'); END // diff --git a/dbcon/mysql/ha_calpont.cpp b/dbcon/mysql/ha_calpont.cpp index 35953fc34..51ede22d5 100644 --- a/dbcon/mysql/ha_calpont.cpp +++ b/dbcon/mysql/ha_calpont.cpp @@ -1156,7 +1156,11 @@ create_calpont_group_by_handler(THD* thd, Query* query) { ha_calpont_group_by_handler* handler = NULL; - if ( thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE ) + // Create a handler if there is an agregate or a GROUP BY + // and if vtable was explicitly disabled. + if ( thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE + && thd->variables.infinidb_vtable_mode == 0 + && ( query->group_by || thd->lex->select_lex.with_sum_func) ) { handler = new ha_calpont_group_by_handler(thd, query); @@ -1169,6 +1173,33 @@ create_calpont_group_by_handler(THD* thd, Query* query) return handler; } +/*********************************************************** + * DESCRIPTION: + * GROUP BY handler constructor + * PARAMETERS: + * thd - THD pointer. + * query - Query describing structure + ***********************************************************/ +ha_calpont_group_by_handler::ha_calpont_group_by_handler(THD* thd_arg, Query* query) + : group_by_handler(thd_arg, calpont_hton), + select(query->select), + table_list(query->from), + distinct(query->distinct), + where(query->where), + group_by(query->group_by), + order_by(query->order_by), + having(query->having) +{ +} + +/*********************************************************** + * DESCRIPTION: + * GROUP BY destructor + ***********************************************************/ +ha_calpont_group_by_handler::~ha_calpont_group_by_handler() +{ +} + /*********************************************************** * DESCRIPTION: * Makes the plan and prepares the data @@ -1258,4 +1289,36 @@ mysql_declare_plugin(columnstore) 0 /* config flags */ } mysql_declare_plugin_end; +maria_declare_plugin(columnstore) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &columnstore_storage_engine, + "Columnstore", + "MariaDB", + "Columnstore storage engine", + PLUGIN_LICENSE_GPL, + columnstore_init_func, + columnstore_done_func, + 0x0100, /* 1.0 */ + NULL, /* status variables */ + calpont_system_variables, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +}, +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &infinidb_storage_engine, + "InfiniDB", + "MariaDB", + "Columnstore storage engine (deprecated: use columnstore)", + PLUGIN_LICENSE_GPL, + infinidb_init_func, + infinidb_done_func, + 0x0100, /* 1.0 */ + NULL, /* status variables */ + calpont_system_variables, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +} +maria_declare_plugin_end; diff --git a/dbcon/mysql/ha_calpont.h b/dbcon/mysql/ha_calpont.h index bcbcdc5da..e618ed4f0 100644 --- a/dbcon/mysql/ha_calpont.h +++ b/dbcon/mysql/ha_calpont.h @@ -255,12 +255,16 @@ public: * One should read comments in server/sql/group_by_handler.h * Attributes: * select - attribute contains all GROUP BY, HAVING, ORDER items and calls it - * an extended SELECT list accordin to comments in - * server/sql/group_handler.cc. - * So the temporary table for - * select count(*) from b group by a having a > 3 order by a - * will have 4 columns not 1. - * However server ignores all NULLs used in GROUP BY, HAVING, ORDER. + * an extended SELECT list according to comments in + * server/sql/group_handler.cc. + * So the temporary table for + * select count(*) from b group by a having a > 3 order by a + * will have 4 columns not 1. + * However server ignores all NULLs used in + * GROUP BY, HAVING, ORDER. + * select_list_descr - contains Item description returned by Item->print() + * that is used in lookup for corresponding columns in + * extended SELECT list. * table_list - contains all tables involved. Must be CS tables only. * distinct - looks like a useless thing for now. Couldn't get it set by server. * where - where items. @@ -275,17 +279,8 @@ public: class ha_calpont_group_by_handler: public group_by_handler { public: - ha_calpont_group_by_handler(THD* thd_arg, Query* query) - : group_by_handler(thd_arg, calpont_hton), - select(query->select), - table_list(query->from), - distinct(query->distinct), - where(query->where), - group_by(query->group_by), - order_by(query->order_by), - having(query->having) - { } - ~ha_calpont_group_by_handler() { } + ha_calpont_group_by_handler(THD* thd_arg, Query* query); + ~ha_calpont_group_by_handler(); int init_scan(); int next_row(); int end_scan(); diff --git a/dbcon/mysql/ha_calpont_ddl.cpp b/dbcon/mysql/ha_calpont_ddl.cpp index fa8ef79dd..0aebfaaee 100644 --- a/dbcon/mysql/ha_calpont_ddl.cpp +++ b/dbcon/mysql/ha_calpont_ddl.cpp @@ -1912,6 +1912,79 @@ pair parseTableName(const string& tn) } +// +// get_field_default_value: Returns the default value as a string value +// NOTE: This is duplicated code copied from show.cc and a MDEV-17006 has +// been created. +// + +static bool get_field_default_value(THD *thd, Field *field, String *def_value, + bool quoted) +{ + bool has_default; + enum enum_field_types field_type= field->type(); + + has_default= (field->default_value || + (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + field->unireg_check != Field::NEXT_NUMBER)); + + def_value->length(0); + if (has_default) + { + StringBuffer str(field->charset()); + if (field->default_value) + { + field->default_value->print(&str); + if (field->default_value->expr->need_parentheses_in_default()) + { + def_value->set_charset(&my_charset_utf8mb4_general_ci); + def_value->append('('); + def_value->append(str); + def_value->append(')'); + } + else + def_value->append(str); + } + else if (!field->is_null()) + { // Not null by default + if (field_type == MYSQL_TYPE_BIT) + { + str.qs_append('b'); + str.qs_append('\''); + str.qs_append(field->val_int(), 2); + str.qs_append('\''); + quoted= 0; + } + else + { + field->val_str(&str); + if (!field->str_needs_quotes()) + quoted= 0; + } + if (str.length()) + { + StringBuffer def_val; + uint dummy_errors; + /* convert to system_charset_info == utf8 */ + def_val.copy(str.ptr(), str.length(), field->charset(), + system_charset_info, &dummy_errors); + if (quoted) + append_unescaped(def_value, def_val.ptr(), def_val.length()); + else + def_value->append(def_val); + } + else if (quoted) + def_value->set(STRING_WITH_LEN("''"), system_charset_info); + } + else if (field->maybe_null() && quoted) + def_value->set(STRING_WITH_LEN("NULL"), system_charset_info); // Null as default + else + return 0; + + } + return has_default; +} + int ha_calpont_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* create_info, cal_connection_info& ci) { #ifdef INFINIDB_DEBUG @@ -2045,7 +2118,7 @@ int ha_calpont_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* } // @bug 3908. error out primary key for now. - if (table_arg->key_info && table_arg->key_info->name && string(table_arg->key_info->name) == "PRIMARY") + if (table_arg->key_info && table_arg->key_info->name.length && string(table_arg->key_info->name.str) == "PRIMARY") { string emsg = logging::IDBErrorInfo::instance()->errorMsg(ERR_CONSTRAINTS); setError(thd, ER_CHECK_NOT_IMPLEMENTED, emsg); @@ -2096,6 +2169,97 @@ int ha_calpont_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* return 1; } + // + // Check if this is a "CREATE TABLE ... LIKE " statement. + // If so generate a full create table statement using the properties of + // the source table. Note that source table has to be a columnstore table and + // we only check for currently supported options. + // + + if (thd->lex->create_info.like()) + { + TABLE_SHARE *share = table_arg->s; + my_bitmap_map *old_map; // To save the read_set + char datatype_buf[MAX_FIELD_WIDTH], def_value_buf[MAX_FIELD_WIDTH]; + String datatype, def_value; + ostringstream oss; + string tbl_name (name+2); + std::replace(tbl_name.begin(), tbl_name.end(), '/', '.'); + + // Save the current read_set map and mark it for read + old_map= tmp_use_all_columns(table_arg, table_arg->read_set); + + oss << "CREATE TABLE " << tbl_name << " ("; + + restore_record(table_arg, s->default_values); + for (Field **field= table_arg->field; *field; field++) + { + uint flags = (*field)->flags; + datatype.set(datatype_buf, sizeof(datatype_buf), system_charset_info); + (*field)->sql_type(datatype); + if (field != table_arg->field) + oss << ", "; + oss << (*field)->field_name.str << " " << datatype.ptr(); + + if (flags & NOT_NULL_FLAG) + oss << " NOT NULL"; + + def_value.set(def_value_buf, sizeof(def_value_buf), system_charset_info); + if (get_field_default_value(thd, *field, &def_value, true)) { + oss << " DEFAULT " << def_value.c_ptr(); + } + if ((*field)->comment.length) + { + String comment; + append_unescaped(&comment, (*field)->comment.str, (*field)->comment.length); + oss << " COMMENT "; + oss << comment.c_ptr(); + } + + } + // End the list of columns + oss<< ") ENGINE=columnstore "; + + // Process table level options + + if (create_info->auto_increment_value > 1) + { + oss << " AUTO_INCREMENT=" << create_info->auto_increment_value; + } + + if (share->table_charset) + { + oss << " DEFAULT CHARSET=" << share->table_charset->csname; + } + + // Process table level options such as MIN_ROWS, MAX_ROWS, COMMENT + + if (share->min_rows) + { + char buff[80]; + longlong10_to_str(share->min_rows, buff, 10); + oss << " MIN_ROWS=" << buff; + } + + if (share->max_rows) { + char buff[80]; + longlong10_to_str(share->max_rows, buff, 10); + oss << " MAX_ROWS=" << buff; + } + + if (share->comment.length) { + String comment; + append_unescaped(&comment, share->comment.str, share->comment.length); + oss << " COMMENT "; + oss << comment.c_ptr(); + } + + oss << ";"; + stmt = oss.str(); + + tmp_restore_column_map(table_arg->read_set, old_map); + } + rc = ProcessDDLStatement(stmt, db, tbl, tid2sid(thd->thread_id), emsg, compressiontype, isAnyAutoincreCol, startValue, columnName); if (rc != 0) @@ -2214,8 +2378,8 @@ int ha_calpont_impl_rename_table_(const char* from, const char* to, cal_connecti stmt = "alter table `" + fromPair.second + "` rename to `" + toPair.second + "`;"; string db; - if ( thd->db ) - db = thd->db; + if ( thd->db.length ) + db = thd->db.str; else if ( fromPair.first.length() != 0 ) db = fromPair.first; else @@ -2224,7 +2388,7 @@ int ha_calpont_impl_rename_table_(const char* from, const char* to, cal_connecti int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg); if (rc != 0) - push_warning(thd, Sql_condition::WARN_LEVEL_ERROR, 9999, emsg.c_str()); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); return rc; } @@ -2245,8 +2409,8 @@ extern "C" THD* thd = current_thd; string db(""); - if ( thd->db ) - db = thd->db; + if ( thd->db.length ) + db = thd->db.str; int compressiontype = thd->variables.infinidb_compression_type; @@ -2266,7 +2430,7 @@ extern "C" int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg, compressiontype); if (rc != 0) - push_warning(thd, Sql_condition::WARN_LEVEL_ERROR, 9999, emsg.c_str()); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); return rc; } diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index cf103a801..43bdc8cd6 100644 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -121,7 +121,7 @@ int buildBuffer(uchar* buf, string& buffer, int& columns, TABLE* table) columns++; - cols.append((*field)->field_name); + cols.append((*field)->field_name.str); if (ptr == end_ptr) { @@ -236,7 +236,7 @@ uint32_t buildValueList (TABLE* table, cal_connection_info& ci ) } } - ci.colNameList.push_back((*field)->field_name); + ci.colNameList.push_back((*field)->field_name.str); columnPos++; } @@ -895,6 +895,11 @@ int ha_calpont_impl_write_batch_row_(uchar* buf, TABLE* table, cal_impl_if::cal_ longlong tmp = my_time_packed_from_binary(pos, table->field[colpos]->decimals()); TIME_from_longlong_time_packed(<ime, tmp); + if (ltime.neg) + { + fprintf(ci.filePtr, "-"); + } + if (!ltime.second_part) { fprintf(ci.filePtr, "%02d:%02d:%02d%c", @@ -1815,8 +1820,11 @@ int ha_calpont_impl_write_batch_row_(uchar* buf, TABLE* table, cal_impl_if::cal_ } else if (ci.columnTypes[colpos].colWidth < 16777216) { - dataLength = *(uint32_t*) buf; - buf = buf + 3 ; + dataLength = *(uint16_t*) buf; + buf = buf + 2 ; + if (*(uint8_t*)buf) + dataLength += 256*256*(*(uint8_t*)buf) ; + buf++; } else { diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index fac0cd032..789d85ad3 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -42,13 +42,15 @@ #include #include #include + +#include + using namespace std; #include #include #include #include -using namespace boost; #include "errorids.h" using namespace logging; @@ -133,7 +135,7 @@ namespace { string lower(string str) { - algorithm::to_lower(str); + boost::algorithm::to_lower(str); return str; } } @@ -188,6 +190,63 @@ bool nonConstFunc(Item_func* ifp) return false; } +/*@brief buildAggFrmTempField- build aggr func from extSELECT list item*/ +/*********************************************************** + * DESCRIPTION: + * Server adds additional aggregation items to extended SELECT list and + * references them in projection and HAVING. This f() finds + * corresponding item in extSelAggColsItems and builds + * ReturnedColumn using the item. + * PARAMETERS: + * item Item* used to build aggregation + * gwi main structure + * RETURNS + * ReturnedColumn* if corresponding Item has been found + * NULL otherwise + ***********************************************************/ +ReturnedColumn* buildAggFrmTempField(Item* item, gp_walk_info& gwi) +{ + ReturnedColumn* result = NULL; + Item_field* ifip = NULL; + Item_ref* irip; + Item_func_or_sum* isfp; + + switch ( item->type() ) + { + case Item::FIELD_ITEM: + ifip = reinterpret_cast(item); + break; + default: + irip = reinterpret_cast(item); + if ( irip ) + ifip = reinterpret_cast(irip->ref[0]); + break; + } + + if (ifip && ifip->field) + { + std::vector::iterator iter = gwi.extSelAggColsItems.begin(); + for ( ; iter != gwi.extSelAggColsItems.end(); iter++ ) + { + //Item* temp_isfp = *iter; + isfp = reinterpret_cast(*iter); + + if ( isfp->type() == Item::SUM_FUNC_ITEM && + isfp->result_field == ifip->field ) + { + ReturnedColumn* rc = buildAggregateColumn(isfp, gwi); + + if (rc) + result = rc; + + break; + } + } + } + + return result; +} + string getViewName(TABLE_LIST* table_ptr) { string viewName = ""; @@ -200,13 +259,13 @@ string getViewName(TABLE_LIST* table_ptr) if (view) { if (!view->derived) - viewName = view->alias; + viewName = view->alias.str; while ((view = view->referencing_view)) { if (view->derived) continue; - viewName = view->alias + string(".") + viewName; + viewName = view->alias.str + string(".") + viewName; } } @@ -222,7 +281,7 @@ void debug_walk(const Item* item, void* arg) { Item_field* ifp = (Item_field*)item; cerr << "FIELD_ITEM: " << (ifp->db_name ? ifp->db_name : "") << '.' << bestTableName(ifp) << - '.' << ifp->field_name << endl; + '.' << ifp->field_name.str << endl; break; } @@ -231,7 +290,7 @@ void debug_walk(const Item* item, void* arg) Item_int* iip = (Item_int*)item; cerr << "INT_ITEM: "; - if (iip->name) cerr << iip->name << " (from name string)" << endl; + if (iip->name.length) cerr << iip->name.str << " (from name string)" << endl; else cerr << iip->val_int() << endl; break; @@ -349,7 +408,7 @@ void debug_walk(const Item* item, void* arg) while ((item = it++)) { Field* equal_field = it.get_curr_field(); - cerr << equal_field->field_name << endl; + cerr << equal_field->field_name.str << endl; } break; @@ -501,12 +560,12 @@ void debug_walk(const Item* item, void* arg) case Item::SUM_FUNC_ITEM: { Item_sum* isp = (Item_sum*)item; - char* item_name = item->name; + char* item_name = const_cast(item->name.str); // MCOL-1052 This is an extended SELECT list item - if (!item_name && isp->get_arg_count() && isp->get_arg(0)->name) + if (!item_name && isp->get_arg_count() && isp->get_arg(0)->name.length) { - item_name = isp->get_arg(0)->name; + item_name = const_cast(isp->get_arg(0)->name.str); } else if (!item_name && isp->get_arg_count() && isp->get_arg(0)->type() == Item::INT_ITEM) @@ -610,7 +669,7 @@ void debug_walk(const Item* item, void* arg) // could be used on alias. // could also be used to tell correlated join (equal level). cerr << "CACHED REF FIELD_ITEM: " << ifp->db_name << '.' << bestTableName(ifp) << - '.' << ifp->field_name << endl; + '.' << ifp->field_name.str << endl; break; } else if (field->type() == Item::FUNC_ITEM) @@ -661,7 +720,7 @@ void debug_walk(const Item* item, void* arg) realType += '.'; realType += bestTableName(ifp); realType += '.'; - realType += ifp->field_name; + realType += ifp->field_name.str; break; } @@ -710,14 +769,14 @@ void debug_walk(const Item* item, void* arg) // MCOL-1052 The field referenced presumable came from // extended SELECT list. - if ( !ifp->field_name ) + if ( !ifp->field_name.str ) { - cerr << "REF extra FIELD_ITEM: " << ifp->name << endl; + cerr << "REF extra FIELD_ITEM: " << ifp->name.str << endl; } else { cerr << "REF FIELD_ITEM: " << ifp->db_name << '.' << bestTableName(ifp) << '.' << - ifp->field_name << endl; + ifp->field_name.str << endl; } break; @@ -802,7 +861,7 @@ void debug_walk(const Item* item, void* arg) // could be used on alias. // could also be used to tell correlated join (equal level). cerr << "CACHED FIELD_ITEM: " << ifp->db_name << '.' << bestTableName(ifp) << - '.' << ifp->field_name << endl; + '.' << ifp->field_name.str << endl; break; } else if (field->type() == Item::REF_ITEM) @@ -848,7 +907,7 @@ void debug_walk(const Item* item, void* arg) realType += '.'; realType += bestTableName(ifp); realType += '.'; - realType += ifp->field_name; + realType += ifp->field_name.str; break; } @@ -949,9 +1008,9 @@ void buildNestedTableOuterJoin(gp_walk_info& gwi, TABLE_LIST* table_ptr) if (table->outer_join) { CalpontSystemCatalog::TableAliasName ta = make_aliasview( - (table->db ? table->db : ""), - (table->table_name ? table->table_name : ""), - (table->alias ? table->alias : ""), + (table->db.length ? table->db.str : ""), + (table->table_name.length ? table->table_name.str : ""), + (table->alias.length ? table->alias.str : ""), getViewName(table)); gwi.innerTables.insert(ta); } @@ -964,9 +1023,9 @@ void buildNestedTableOuterJoin(gp_walk_info& gwi, TABLE_LIST* table_ptr) while ((tab = li++)) { CalpontSystemCatalog::TableAliasName ta = make_aliasview( - (tab->db ? tab->db : ""), - (tab->table_name ? tab->table_name : ""), - (tab->alias ? tab->alias : ""), + (tab->db.length ? tab->db.str : ""), + (tab->table_name.length ? tab->table_name.str : ""), + (tab->alias.length ? tab->alias.str : ""), getViewName(tab)); gwi.innerTables.insert(ta); } @@ -1013,9 +1072,9 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) continue; CalpontSystemCatalog:: TableAliasName tan = make_aliasview( - (table_ptr->db ? table_ptr->db : ""), - (table_ptr->table_name ? table_ptr->table_name : ""), - (table_ptr->alias ? table_ptr->alias : ""), + (table_ptr->db.length ? table_ptr->db.str : ""), + (table_ptr->table_name.length ? table_ptr->table_name.str : ""), + (table_ptr->alias.length ? table_ptr->alias.str : ""), getViewName(table_ptr)); if (table_ptr->outer_join && table_ptr->on_expr) @@ -1031,9 +1090,9 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) while ((table = li++)) { CalpontSystemCatalog::TableAliasName ta = make_aliasview( - (table->db ? table->db : ""), - (table->table_name ? table->table_name : ""), - (table->alias ? table->alias : ""), + (table->db.length ? table->db.str : ""), + (table->table_name.length ? table->table_name.str : ""), + (table->alias.length ? table->alias.str : ""), getViewName(table)); gwi_outer.innerTables.insert(ta); } @@ -1041,10 +1100,10 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) #ifdef DEBUG_WALK_COND - if (table_ptr->alias) - cerr << table_ptr->alias ; - else if (table_ptr->alias) - cerr << table_ptr->alias; + if (table_ptr->alias.length) + cerr << table_ptr->alias.str; + else if (table_ptr->alias.length) + cerr << table_ptr->alias.str; cerr << " outer table expression: " << endl; expr->traverse_cond(debug_walk, &gwi_outer, Item::POSTFIX); @@ -1066,9 +1125,9 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) while ((table = li++)) { CalpontSystemCatalog:: TableAliasName ta = make_aliasview( - (table->db ? table->db : ""), - (table->table_name ? table->table_name : ""), - (table->alias ? table->alias : ""), + (table->db.length ? table->db.str : ""), + (table->table_name.length ? table->table_name.str : ""), + (table->alias.length ? table->alias.str : ""), getViewName(table)); gwi_outer.innerTables.insert(ta); } @@ -1237,7 +1296,7 @@ bool buildRowColumnFilter(gp_walk_info* gwip, RowColumn* rhs, RowColumn* lhs, It logicOp = "or"; } - scoped_ptr lo(new LogicOperator(logicOp)); + boost::scoped_ptr lo(new LogicOperator(logicOp)); // 1st round. build the equivalent filters // two entries have been popped from the stack already: lhs and rhs @@ -1525,8 +1584,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) ifp->functype() == Item_func::ISNOTNULL_FUNC) { ReturnedColumn* rhs = NULL; - - if (!gwip->rcWorkStack.empty()) + if (!gwip->rcWorkStack.empty() && !gwip->inCaseStmt) { rhs = gwip->rcWorkStack.top(); gwip->rcWorkStack.pop(); @@ -1627,8 +1685,49 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) idbassert(ifp->argument_count() == 1); ParseTree* ptp = 0; + if (((Item_func*)(ifp->arguments()[0]))->functype() == Item_func::EQUAL_FUNC) + { + // negate it in place + // Note that an EQUAL_FUNC ( a <=> b) was converted to + // ( a = b OR ( a is null AND b is null) ) + // NOT of the above expression is: ( a != b AND (a is not null OR b is not null ) - if (isPredicateFunction(ifp->arguments()[0], gwip) || ifp->arguments()[0]->type() == Item::COND_ITEM) + if (!gwip->ptWorkStack.empty()) + ptp = gwip->ptWorkStack.top(); + + if (ptp) + { + ParseTree* or_ptp = ptp; + ParseTree* and_ptp = or_ptp->right(); + ParseTree* equal_ptp = or_ptp->left(); + ParseTree* nullck_left_ptp = and_ptp->left(); + ParseTree* nullck_right_ptp = and_ptp->right(); + SimpleFilter *sf_left_nullck = dynamic_cast(nullck_left_ptp->data()); + SimpleFilter *sf_right_nullck = dynamic_cast(nullck_right_ptp->data()); + SimpleFilter *sf_equal = dynamic_cast(equal_ptp->data()); + + if (sf_left_nullck && sf_right_nullck && sf_equal) { + // Negate the null checks + sf_left_nullck->op()->reverseOp(); + sf_right_nullck->op()->reverseOp(); + sf_equal->op()->reverseOp(); + // Rehook the nodes + ptp = and_ptp; + ptp->left(equal_ptp); + ptp->right(or_ptp); + or_ptp->left(nullck_left_ptp); + or_ptp->right(nullck_right_ptp); + gwip->ptWorkStack.pop(); + gwip->ptWorkStack.push(ptp); + } + else { + gwip->fatalParseError = true; + gwip->parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_ASSERTION_FAILURE); + return false; + } + } + } + else if (isPredicateFunction(ifp->arguments()[0], gwip) || ifp->arguments()[0]->type() == Item::COND_ITEM) { // negate it in place if (!gwip->ptWorkStack.empty()) @@ -1702,7 +1801,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) } else if (ifp->functype() == Item_func::EQUAL_FUNC) { - // a = b OR (a IS NULL AND b IS NULL) + // Convert "a <=> b" to (a = b OR (a IS NULL AND b IS NULL))" idbassert (gwip->rcWorkStack.size() >= 2); ReturnedColumn* rhs = gwip->rcWorkStack.top(); gwip->rcWorkStack.pop(); @@ -1714,7 +1813,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) // b IS NULL ConstantColumn* nlhs1 = new ConstantColumn("", ConstantColumn::NULLDATA); sop.reset(new PredicateOperator("isnull")); - sop->setOpType(lhs->resultType(), rhs->resultType()); + sop->setOpType(lhs->resultType(), rhs->resultType()); sfn1 = new SimpleFilter(sop, rhs, nlhs1); ParseTree* ptpl = new ParseTree(sfn1); // a IS NULL @@ -1729,7 +1828,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) ptpn->right(ptpr); // a = b sop.reset(new PredicateOperator("=")); - sop->setOpType(lhs->resultType(), lhs->resultType()); + sop->setOpType(lhs->resultType(), rhs->resultType()); sfo = new SimpleFilter(sop, lhs->clone(), rhs->clone()); // OR with the NULL comparison tree ParseTree* ptp = new ParseTree(new LogicOperator("or")); @@ -2000,7 +2099,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) CalpontSystemCatalog::TableColName tcn = gwi.csc->colName(oidlist[j].objnum); CalpontSystemCatalog::ColType ct = gwi.csc->colType(oidlist[j].objnum); - if (strcasecmp(ifp->field_name, tcn.column.c_str()) == 0) + if (strcasecmp(ifp->field_name.str, tcn.column.c_str()) == 0) { // @bug4827. Remove the checking because outside tables could be the same // name as inner tables. This function is to identify column from a table, @@ -2022,7 +2121,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) sc->oid(oidlist[j].objnum); // @bug 3003. Keep column alias if it has. - sc->alias(ifp->is_autogenerated_name ? tcn.column : ifp->name); + sc->alias(ifp->is_autogenerated_name ? tcn.column : ifp->name.str); sc->tableAlias(lower(gwi.tbList[i].alias)); sc->viewName(lower(viewName)); @@ -2054,10 +2153,10 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) SimpleColumn* col = dynamic_cast(cols[j].get()); string alias = cols[j]->alias(); - if (strcasecmp(ifp->field_name, alias.c_str()) == 0 || + if (strcasecmp(ifp->field_name.str, alias.c_str()) == 0 || (col && alias.find(".") != string::npos && - (strcasecmp(ifp->field_name, col->columnName().c_str()) == 0 || - strcasecmp(ifp->field_name, (alias.substr(alias.find_last_of(".") + 1)).c_str()) == 0))) //@bug6066 + (strcasecmp(ifp->field_name.str, col->columnName().c_str()) == 0 || + strcasecmp(ifp->field_name.str, (alias.substr(alias.find_last_of(".") + 1)).c_str()) == 0))) //@bug6066 { // @bug4827. Remove the checking because outside tables could be the same // name as inner tables. This function is to identify column from a table, @@ -2079,7 +2178,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) sc->columnName(col->columnName()); // @bug 3003. Keep column alias if it has. - sc->alias(ifp->is_autogenerated_name ? cols[j]->alias() : ifp->name); + sc->alias(ifp->is_autogenerated_name ? cols[j]->alias() : ifp->name.str); sc->tableName(csep->derivedTbAlias()); sc->colPosition(j); string tableAlias(csep->derivedTbAlias()); @@ -2099,7 +2198,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) while (tblList) { - if (strcasecmp(tblList->alias, ifp->table_name) == 0) + if (strcasecmp(tblList->alias.str, ifp->table_name) == 0) { if (!tblList->outer_join) { @@ -2132,7 +2231,8 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) if (ifp->table_name) name += string(ifp->table_name) + "."; - name += ifp->name; + if (ifp->name.length) + name += ifp->name.str; args.add(name); gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_UNKNOWN_COL, args); } @@ -2393,7 +2493,7 @@ const string bestTableName(const Item_field* ifp) string field_table_table_name; if (ifp->cached_table) - field_table_table_name = ifp->cached_table->table_name; + field_table_table_name = ifp->cached_table->table_name.str; else if (ifp->field->table && ifp->field->table->s && ifp->field->table->s->table_name.str) field_table_table_name = ifp->field->table->s->table_name.str; @@ -2544,7 +2644,7 @@ SimpleColumn* getSmallestColumn(boost::shared_ptr csc, { // get the first column to project. @todo optimization to get the smallest one for foreign engine. Field* field = *(table->field); - SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name, tan.fIsInfiniDB, gwi.sessionid); + SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name.str, tan.fIsInfiniDB, gwi.sessionid); string alias(table->alias.ptr()); sc->tableAlias(lower(alias)); sc->isInfiniDB(false); @@ -2739,7 +2839,7 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB (const Item* item) return ct; } -ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport) +ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand) { ReturnedColumn* rc = NULL; @@ -2864,9 +2964,9 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp } if (func_name == "+" || func_name == "-" || func_name == "*" || func_name == "/" ) - return buildArithmeticColumn(ifp, gwi, nonSupport); + return buildArithmeticColumn(ifp, gwi, nonSupport, pushdownHand); else - return buildFunctionColumn(ifp, gwi, nonSupport); + return buildFunctionColumn(ifp, gwi, nonSupport, pushdownHand); } case Item::SUM_FUNC_ITEM: @@ -2992,13 +3092,17 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp } } - if (rc && item->name) - rc->alias(item->name); + if (rc && item->name.length) + rc->alias(item->name.str); return rc; } -ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport) +ArithmeticColumn* buildArithmeticColumn( + Item_func* item, + gp_walk_info& gwi, + bool& nonSupport, + bool pushdownHand) { if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -3013,15 +3117,15 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool ParseTree* lhs = 0, *rhs = 0; SRCP srcp; - if (item->name) - ac->alias(item->name); + if (item->name.length) + ac->alias(item->name.str); // argument_count() should generally be 2, except negate expression if (item->argument_count() == 2) { if (gwi.clauseType == SELECT || /*gwi.clauseType == HAVING || */gwi.clauseType == GROUP_BY || gwi.clauseType == FROM) // select list { - lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, pushdownHand)); if (!lhs->data() && (sfitempp[0]->type() == Item::FUNC_ITEM)) { @@ -3029,8 +3133,18 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool Item_func* ifp = (Item_func*)sfitempp[0]; lhs = buildParseTree(ifp, gwi, nonSupport); } + else if(pushdownHand && !lhs->data() && (sfitempp[0]->type() == Item::REF_ITEM)) + { + // There must be an aggregation column in extended SELECT + // list so find the corresponding column. + // Could have it set if there are aggregation funcs as this function arguments. + gwi.fatalParseError = false; - rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport)); + ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi); + if(rc) + lhs = new ParseTree(rc); + } + rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, pushdownHand)); if (!rhs->data() && (sfitempp[1]->type() == Item::FUNC_ITEM)) { @@ -3038,6 +3152,17 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool Item_func* ifp = (Item_func*)sfitempp[1]; rhs = buildParseTree(ifp, gwi, nonSupport); } + else if(pushdownHand && !rhs->data() && (sfitempp[1]->type() == Item::REF_ITEM)) + { + // There must be an aggregation column in extended SELECT + // list so find the corresponding column. + // Could have it set if there are aggregation funcs as this function arguments. + gwi.fatalParseError = false; + + ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi); + if(rc) + rhs = new ParseTree(rc); + } } else // where clause { @@ -3198,7 +3323,11 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool return ac; } -ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport) +ReturnedColumn* buildFunctionColumn( + Item_func* ifp, + gp_walk_info& gwi, + bool& nonSupport, + bool pushdownHand) { if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -3239,7 +3368,7 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non // Arithmetic exp if (funcName == "+" || funcName == "-" || funcName == "*" || funcName == "/" ) { - ArithmeticColumn* ac = buildArithmeticColumn(ifp, gwi, nonSupport); + ArithmeticColumn* ac = buildArithmeticColumn(ifp, gwi, nonSupport, pushdownHand); return ac; } @@ -3317,13 +3446,13 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non for (uint32_t i = 0; i < ifp->argument_count(); i++) { // group by clause try to see if the arguments are alias - if (gwi.clauseType == GROUP_BY && ifp->arguments()[i]->name) + if (gwi.clauseType == GROUP_BY && ifp->arguments()[i]->name.length) { uint32_t j = 0; for (; j < gwi.returnedCols.size(); j++) { - if (string (ifp->arguments()[i]->name) == gwi.returnedCols[j]->alias()) + if (string (ifp->arguments()[i]->name.str) == gwi.returnedCols[j]->alias()) { ReturnedColumn* rc = gwi.returnedCols[j]->clone(); rc->orderPos(j); @@ -3357,8 +3486,8 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non } // @bug 3039 - //if (isPredicateFunction(ifp->arguments()[i], &gwi) || ifp->arguments()[i]->has_subquery()) - if (ifp->arguments()[i]->has_subquery()) + //if (isPredicateFunction(ifp->arguments()[i], &gwi) || ifp->arguments()[i]->with_subquery()) + if (ifp->arguments()[i]->with_subquery()) { nonSupport = true; gwi.fatalParseError = true; @@ -3366,7 +3495,15 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non return NULL; } - ReturnedColumn* rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport); + ReturnedColumn* rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport, pushdownHand); + + // MCOL-1510 It must be a temp table field, so find the corresponding column. + if (!rc && pushdownHand + && ifp->arguments()[i]->type() == Item::REF_ITEM) + { + gwi.fatalParseError = false; + rc = buildAggFrmTempField(ifp->arguments()[i], gwi); + } if (!rc || nonSupport) { @@ -3628,8 +3765,8 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non fc->resultType(ct); } - if (ifp->name) - fc->alias(ifp->name); + if (ifp->name.length) + fc->alias(ifp->name.str); // @3391. optimization. try to associate expression ID to the expression on the select list if (gwi.clauseType != SELECT) @@ -3671,8 +3808,14 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS FuncExp* funcexp = FuncExp::instance(); string funcName = "case_simple"; - if (((Item_func_case*)item)->get_first_expr_num() == -1) + if (strcasecmp(((Item_func_case*)item)->case_type(), "searched") == 0) + { funcName = "case_searched"; + } +/* if (dynamic_cast(item)) + { + funcName = "case_searched"; + }*/ funcParms.reserve(item->argument_count()); // so buildXXXcolumn function will not pop stack. @@ -3712,8 +3855,12 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS if (funcName == "case_searched" && (i < arg_offset)) { + // MCOL-1472 Nested CASE with an ISNULL predicate. We don't want the predicate + // to pull off of rcWorkStack, so we set this inCaseStmt flag to tell it + // not to. + gwi.inCaseStmt = true; sptp.reset(buildParseTree((Item_func*)(item->arguments()[i]), gwi, nonSupport)); - + gwi.inCaseStmt = false; if (!gwi.ptWorkStack.empty() && *gwi.ptWorkStack.top()->data() == sptp->data()) { gwi.ptWorkStack.pop(); @@ -3856,7 +4003,7 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) bool isInformationSchema = false; // @bug5523 - if (ifp->cached_table && strcmp(ifp->cached_table->db, "information_schema") == 0) + if (ifp->cached_table && strcmp(ifp->cached_table->db.str, "information_schema") == 0) isInformationSchema = true; // support FRPM subquery. columns from the derived table has no definition @@ -3878,7 +4025,7 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) if (infiniDB) { ct = gwi.csc->colType( - gwi.csc->lookupOID(make_tcn(ifp->db_name, bestTableName(ifp), ifp->field_name))); + gwi.csc->lookupOID(make_tcn(ifp->db_name, bestTableName(ifp), ifp->field_name.str))); } else { @@ -3898,10 +4045,10 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) { case CalpontSystemCatalog::TINYINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<1>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<1>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } @@ -3909,10 +4056,10 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) case CalpontSystemCatalog::SMALLINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<2>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<2>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } @@ -3921,10 +4068,10 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) case CalpontSystemCatalog::INT: case CalpontSystemCatalog::MEDINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<4>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<4>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } @@ -3932,34 +4079,34 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) case CalpontSystemCatalog::BIGINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<8>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<8>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } break; case CalpontSystemCatalog::UTINYINT: - sc = new SimpleColumn_UINT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; case CalpontSystemCatalog::USMALLINT: - sc = new SimpleColumn_UINT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UMEDINT: - sc = new SimpleColumn_UINT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; case CalpontSystemCatalog::UBIGINT: - sc = new SimpleColumn_UINT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; default: - sc = new SimpleColumn(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); } sc->resultType(ct); @@ -3976,7 +4123,7 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) // view name sc->viewName(lower(getViewName(ifp->cached_table))); - sc->alias(ifp->name); + sc->alias(ifp->name.str); sc->isInfiniDB(infiniDB); if (!infiniDB && ifp->field) @@ -4038,6 +4185,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + bool bIsConst = false; if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4205,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 + if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4062,6 +4215,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) return NULL; } +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4077,449 +4231,544 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) ac = new AggregateColumn(gwi.sessionid); } - if (isp->name) - ac->alias(isp->name); + if (isp->name.length) + ac->alias(isp->name.str); if ((setAggOp(ac, isp))) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + + if (ac) + delete ac; + return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; - vector orderCols; - RowColumn* rowCol = new RowColumn(); - vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; - - for (uint32_t i = 0; i < select_ctn; i++) + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); + Item_func_group_concat* gc = (Item_func_group_concat*)isp; + vector orderCols; + RowColumn* rowCol = new RowColumn(); + vector selCols; - if (!rc || gwi.fatalParseError) - return NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; + return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + + if (ac) + delete ac; + + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); + if (ac) + delete ac; - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + ac->aggParms().push_back(parm); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); + ac->constCol(parm); + bIsConst = true; + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name.length) + args.add(item->name.str); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + + return NULL; + } + + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (!bIsConst && ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + +#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; +#endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + +#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; +#endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) + { + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + + if (ac) + delete ac; + + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + + if (ac) + delete ac; + + return NULL; + } + + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + + if (ac) + delete ac; + + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + + if (ac) + delete ac; + + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } } } - - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + + if (ac) + delete ac; + return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); - } + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); + if (ac) + delete ac; - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } + return NULL; } return ac; @@ -4646,7 +4895,7 @@ void gp_walk(const Item* item, void* arg) gwip->scsp = scsp; gwip->funcName.clear(); - gwip->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), scsp)); + gwip->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), scsp)); //@bug4636 take where clause column as dummy projection column, but only on local column. // varbinary aggregate is not supported yet, so rule it out @@ -4742,7 +4991,7 @@ void gp_walk(const Item* item, void* arg) if (!gwip->condPush) { - if (ifp->has_subquery() || funcName == "") + if (ifp->with_subquery() || funcName == "") { buildSubselectFunc(ifp, gwip); return; @@ -5138,26 +5387,9 @@ void gp_walk(const Item* item, void* arg) } else if (col->type() == Item::FIELD_ITEM && gwip->clauseType == HAVING) { - Item_field* ifip = static_cast(col); - std::vector::iterator iter = gwip->havingAggColsItems.begin(); - Item_func_or_sum* isfp = NULL; - - for ( ; iter != gwip->havingAggColsItems.end(); iter++ ) - { - Item* temp_isfp = *iter; - isfp = reinterpret_cast(temp_isfp); - - if ( isfp->type() == Item::SUM_FUNC_ITEM && - isfp->result_field == ifip->field ) - { - ReturnedColumn* rc = buildAggregateColumn(isfp, *gwip); - - if (rc) - gwip->rcWorkStack.push(rc); - - break; - } - } + ReturnedColumn* rc = buildAggFrmTempField(const_cast(item), *gwip); + if (rc) + gwip->rcWorkStack.push(rc); break; } @@ -5331,7 +5563,10 @@ void gp_walk(const Item* item, void* arg) * the involved item_fields to the passed in vector. It's used in parsing * functions or arithmetic expressions for vtable post process. */ -void parse_item (Item* item, vector& field_vec, bool& hasNonSupportItem, uint16_t& parseInfo) +void parse_item (Item* item, vector& field_vec, + bool& hasNonSupportItem, + uint16_t& parseInfo, + gp_walk_info* gwi) { Item::Type itype = item->type(); @@ -5369,7 +5604,7 @@ void parse_item (Item* item, vector& field_vec, bool& hasNonSupport } for (uint32_t i = 0; i < isp->argument_count(); i++) - parse_item(isp->arguments()[i], field_vec, hasNonSupportItem, parseInfo); + parse_item(isp->arguments()[i], field_vec, hasNonSupportItem, parseInfo, gwi); // parse_item(sfitempp[i], field_vec, hasNonSupportItem, parseInfo); break; @@ -5414,8 +5649,20 @@ void parse_item (Item* item, vector& field_vec, bool& hasNonSupport } else if ((*(ref->ref))->type() == Item::FIELD_ITEM) { - Item_field* ifp = reinterpret_cast(*(ref->ref)); - field_vec.push_back(ifp); + // MCOL-1510. This could be a non-supported function + // argument in form of a temp_table_field, so check + // and set hasNonSupportItem if it is so. + ReturnedColumn* rc = NULL; + if (gwi) + rc = buildAggFrmTempField(ref, *gwi); + + if (!rc) + { + Item_field* ifp = reinterpret_cast(*(ref->ref)); + field_vec.push_back(ifp); + } + else + hasNonSupportItem = true; break; } else if ((*(ref->ref))->type() == Item::FUNC_ITEM) @@ -5480,7 +5727,7 @@ void parse_item (Item* item, vector& field_vec, bool& hasNonSupport // item is a Item_cache_wrapper. Shouldn't get here. printf("EXPR_CACHE_ITEM in parse_item\n"); string parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_SUB_QUERY_TYPE); - setError(item->thd(), ER_CHECK_NOT_IMPLEMENTED, parseErrorText); + setError(gwi->thd, ER_CHECK_NOT_IMPLEMENTED, parseErrorText); break; } @@ -5605,7 +5852,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i while ((sj_nest = sj_list_it++)) { - cerr << sj_nest->db << "." << sj_nest->table_name << endl; + cerr << sj_nest->db.str << "." << sj_nest->table_name.str << endl; } #endif @@ -5618,7 +5865,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; table_ptr; table_ptr = table_ptr->next_local) { // mysql put vtable here for from sub. we ignore it - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; // Until we handle recursive cte: @@ -5641,7 +5888,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery fromSub(gwi, select_cursor); - string alias(table_ptr->alias); + string alias(table_ptr->alias.str); fromSub.alias(lower(alias)); CalpontSystemCatalog::TableAliasName tn = make_aliasview("", "", alias, viewName); @@ -5664,7 +5911,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else if (table_ptr->view) { View* view = new View(table_ptr->view->select_lex, &gwi); - CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db, table_ptr->table_name, table_ptr->alias); + CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str); view->viewName(tn); gwi.viewList.push_back(view); view->transform(); @@ -5676,17 +5923,17 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i // trigger system catalog cache if (infiniDB) - csc->columnRIDs(make_table(table_ptr->db, table_ptr->table_name), true); + csc->columnRIDs(make_table(table_ptr->db.str, table_ptr->table_name.str), true); - string table_name = table_ptr->table_name; + string table_name = table_ptr->table_name.str; // @bug5523 - if (table_ptr->db && strcmp(table_ptr->db, "information_schema") == 0) - table_name = (table_ptr->schema_table_name ? table_ptr->schema_table_name : table_ptr->alias); + if (table_ptr->db.length && strcmp(table_ptr->db.str, "information_schema") == 0) + table_name = (table_ptr->schema_table_name.length ? table_ptr->schema_table_name.str : table_ptr->alias.str); - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_name, table_ptr->alias, viewName, infiniDB); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_name, table_ptr->alias.str, viewName, infiniDB); gwi.tbList.push_back(tn); - CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db, table_name, table_ptr->alias, infiniDB); + CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db.str, table_name, table_ptr->alias.str, infiniDB); gwi.tableMap[tan] = make_pair(0, table_ptr); #ifdef DEBUG_WALK_COND cerr << tn << endl; @@ -5724,7 +5971,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i bool unionSel = false; - if (!isUnion && select_lex.master_unit()->is_union()) + if (!isUnion && select_lex.master_unit()->is_unit_op()) { gwi.thd->infinidb_vtable.isUnion = true; CalpontSelectExecutionPlan::SelectList unionVec; @@ -5992,7 +6239,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i while ((item = it++)) { - string itemAlias = (item->name ? item->name : ""); + string itemAlias = (item->name.length ? item->name.str : ""); // @bug 5916. Need to keep checking until getting concret item in case // of nested view. @@ -6011,7 +6258,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i Item_field* ifp = (Item_field*)item; SimpleColumn* sc = NULL; - if (ifp->field_name && string(ifp->field_name) == "*") + if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); break; @@ -6047,13 +6294,13 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (ifp->is_autogenerated_name) gwi.selectCols.push_back("`" + escapeBackTick(fullname.c_str()) + "`" + " `" + - escapeBackTick(itemAlias.empty() ? ifp->name : itemAlias.c_str()) + "`"); + escapeBackTick(itemAlias.empty() ? ifp->name.str : itemAlias.c_str()) + "`"); else - gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name : itemAlias.c_str())) + "`"); + gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name.str : itemAlias.c_str())) + "`"); gwi.returnedCols.push_back(spsc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), spsc)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc)); TABLE_LIST* tmp = 0; if (ifp->cached_table) @@ -6125,7 +6372,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i bool hasNonSupportItem = false; parse_item(ifp, tmpVec, hasNonSupportItem, parseInfo); - if (ifp->has_subquery() || + if (ifp->with_subquery() || string(ifp->func_name()) == string("") || ifp->functype() == Item_func::NOT_ALL_FUNC || parseInfo & SUB_BIT) @@ -6149,8 +6396,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (ifp->name) - srcp->alias(ifp->name); + if (ifp->name.length) + srcp->alias(ifp->name.str); continue; } @@ -6165,7 +6412,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i redo = true; String str; ifp->print(&str, QT_INFINIDB_NO_QUOTE); - gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name) + "`"); + gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name.str) + "`"); } break; @@ -6184,8 +6431,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; - sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name + "`"; - gwi.selectCols.push_back("`" + escapeBackTick(ifp->name) + "`"); + sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name.str + "`"; + gwi.selectCols.push_back("`" + escapeBackTick(ifp->name.str) + "`"); } } else // InfiniDB Non support functions still go through post process for now @@ -6232,8 +6479,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(cc); - if (ifp->name) - cc->alias(ifp->name); + if (ifp->name.length) + cc->alias(ifp->name.str); gwi.returnedCols.push_back(srcp); @@ -6285,7 +6532,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i ifp->print(&funcStr, QT_INFINIDB); string valStr; valStr.assign(funcStr.ptr(), funcStr.length()); - gwi.selectCols.push_back(valStr + " `" + escapeBackTick(ifp->name) + "`"); + gwi.selectCols.push_back(valStr + " `" + escapeBackTick(ifp->name.str) + "`"); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; gwi.parseErrorText = ""; @@ -6302,14 +6549,14 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else { // do not push the dummy column (mysql added) to returnedCol - if (item->name && string(item->name) == "Not_used") + if (item->name.length && string(item->name.str) == "Not_used") continue; // @bug3509. Constant column is sent to ExeMgr now. SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); gwi.returnedCols.push_back(srcp); @@ -6336,8 +6583,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_string* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -6364,8 +6611,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_decimal* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -6393,8 +6640,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); string name = string("null `") + escapeBackTick(srcp->alias().c_str()) + string("`") ; @@ -6454,18 +6701,18 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (sub->get_select_lex()->get_table_list()) rc->viewName(lower(getViewName(sub->get_select_lex()->get_table_list()))); - if (sub->name) - rc->alias(sub->name); + if (sub->name.length) + rc->alias(sub->name.str); gwi.returnedCols.push_back(SRCP(rc)); String str; sub->get_select_lex()->print(gwi.thd, &str, QT_INFINIDB_NO_QUOTE); sel_cols_in_create += "(" + string(str.c_ptr()) + ")"; - if (sub->name) + if (sub->name.length) { - sel_cols_in_create += "`" + escapeBackTick(sub->name) + "`"; - gwi.selectCols.push_back(sub->name); + sel_cols_in_create += "`" + escapeBackTick(sub->name.str) + "`"; + gwi.selectCols.push_back(sub->name.str); } else { @@ -6615,8 +6862,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i { emsg = "un-recognized column"; - if (funcFieldVec[i]->name) - emsg += string(funcFieldVec[i]->name); + if (funcFieldVec[i]->name.length) + emsg += string(funcFieldVec[i]->name.str); } else { @@ -6649,7 +6896,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (j == gwi.returnedCols.size()) { gwi.returnedCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name.str), srcp)); if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; @@ -6745,7 +6992,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -6809,7 +7056,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } else { - if (ifp->name && string(ifp->name) == gwi.returnedCols[j].get()->alias()) + if (ifp->name.length && string(ifp->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -6836,7 +7083,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } gwi.groupByCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), srcp)); } // @bug5638. The group by column is constant but not counter, alias has to match a column // on the select list @@ -6850,7 +7097,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (uint32_t j = 0; j < gwi.returnedCols.size(); j++) { - if (groupItem->name && string(groupItem->name) == gwi.returnedCols[j].get()->alias()) + if (groupItem->name.length && string(groupItem->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -6868,7 +7115,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } else if ((*(groupcol->item))->type() == Item::SUBSELECT_ITEM) { - if (!groupcol->in_field_list || !groupItem->name) + if (!groupcol->in_field_list || !groupItem->name.length) { nonSupportItem = groupItem; } @@ -6878,7 +7125,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -6935,8 +7182,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i { Message::Args args; - if (nonSupportItem->name) - args.add("'" + string(nonSupportItem->name) + "'"); + if (nonSupportItem->name.length) + args.add("'" + string(nonSupportItem->name.str) + "'"); else args.add(""); @@ -6954,8 +7201,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i string select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); string lower_create_query(gwi.thd->infinidb_vtable.create_vtable_query.c_ptr()); string lower_select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); - algorithm::to_lower(lower_create_query); - algorithm::to_lower(lower_select_query); + boost::algorithm::to_lower(lower_create_query); + boost::algorithm::to_lower(lower_select_query); // check if window functions are in order by. InfiniDB process order by list if @@ -7068,9 +7315,9 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else if (ord_item->type() == Item::FUNC_ITEM) { // @bug 2621. order by alias - if (!ord_item->is_autogenerated_name && ord_item->name) + if (!ord_item->is_autogenerated_name && ord_item->name.length) { - ord_cols += ord_item->name; + ord_cols += ord_item->name.str; continue; } @@ -7207,7 +7454,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i continue; } } - else if (ord_item->name && ord_item->type() == Item::FIELD_ITEM) + else if (ord_item->name.length && ord_item->type() == Item::FIELD_ITEM) { Item_field* field = reinterpret_cast(ord_item); ReturnedColumn* rc = buildSimpleColumn(field, gwi); @@ -7230,7 +7477,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } if (strcasecmp(fullname.c_str(), gwi.returnedCols[i]->alias().c_str()) == 0 || - strcasecmp(ord_item->name, gwi.returnedCols[i]->alias().c_str()) == 0) + strcasecmp(ord_item->name.str, gwi.returnedCols[i]->alias().c_str()) == 0) { ord_cols += string(" `") + escapeBackTick(gwi.returnedCols[i]->alias().c_str()) + '`'; addToSel = false; @@ -7321,7 +7568,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i sel_cols_in_create += fullname + " `" + escapeBackTick(fullname.c_str()) + "`"; gwi.returnedCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(fieldVec[i]->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(fieldVec[i]->field_name.str), srcp)); TABLE_LIST* tmp = (fieldVec[i]->cached_table ? fieldVec[i]->cached_table : 0); gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); @@ -7400,7 +7647,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_INTERNAL_ERROR; } - if (gwi.returnedCols.empty() && gwi.additionalRetCols.empty()) + if (gwi.returnedCols.empty() && gwi.additionalRetCols.empty() && minSc) gwi.returnedCols.push_back(minSc); } @@ -7439,12 +7686,12 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; table_ptr; table_ptr = table_ptr->next_global) { - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; if (table_ptr->derived) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; String str; @@ -7453,21 +7700,21 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (!firstTb) create_query += ", "; - create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias); + create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias.str); firstTb = false; - aliasSet.insert(table_ptr->alias); + aliasSet.insert(table_ptr->alias.str); } else if (table_ptr->view) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + - string(" `") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + + string(" `") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); firstTb = false; } else @@ -7476,31 +7723,31 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i // consistent with item.cc field print. if (table_ptr->referencing_view) { - if (aliasSet.find(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)) != aliasSet.end()) + if (aliasSet.find(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); create_query += string(" `") + - escapeBackTick(table_ptr->referencing_view->alias) + "_" + - escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)); + escapeBackTick(table_ptr->referencing_view->alias.str) + "_" + + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)); } else { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); - create_query += string("`") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); + create_query += string("`") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); } firstTb = false; @@ -7589,7 +7836,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i //continue; } // @bug 3518. if order by clause = selected column, use position. - else if (ord_item->name && ord_item->type() == Item::FIELD_ITEM) + else if (ord_item->name.length && ord_item->type() == Item::FIELD_ITEM) { Item_field* field = reinterpret_cast(ord_item); string fullname; @@ -7600,8 +7847,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (field->table_name) fullname += string(field->table_name) + "."; - if (field->field_name) - fullname += string(field->field_name); + if (field->field_name.length) + fullname += string(field->field_name.str); uint32_t i = 0; @@ -7614,7 +7861,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i continue; if (strcasecmp(fullname.c_str(), gwi.returnedCols[i]->alias().c_str()) == 0 || - strcasecmp(ord_item->name, gwi.returnedCols[i]->alias().c_str()) == 0) + strcasecmp(ord_item->name.str, gwi.returnedCols[i]->alias().c_str()) == 0) { ostringstream oss; oss << i + 1; @@ -7624,15 +7871,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } if (i == gwi.returnedCols.size()) - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } - else if (ord_item->name) + else if (ord_item->name.length) { // for union order by 1 case. For unknown reason, it doesn't show in_field_list if (ord_item->type() == Item::INT_ITEM) { - ord_cols += ord_item->name; + ord_cols += ord_item->name.str; } else if (ord_item->type() == Item::SUBSELECT_ITEM) { @@ -7642,7 +7889,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } else { - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } } else if (ord_item->type() == Item::FUNC_ITEM) @@ -7843,7 +8090,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; @@ -7893,9 +8148,11 @@ int cp_get_plan(THD* thd, SCSEP& csep) else if (status < 0) return status; +#ifdef DEBUG_WALK_COND cerr << "---------------- cp_get_plan EXECUTION PLAN ----------------" << endl; cerr << *csep << endl ; cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +#endif // Derived table projection and filter optimization. derivedTableOptimization(csep); @@ -7928,13 +8185,13 @@ int cp_get_table_plan(THD* thd, SCSEP& csep, cal_table_info& ti) { if (bitmap_is_set(read_set, field->field_index)) { - SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name, sessionID); + SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name.str, sessionID); string alias(table->alias.c_ptr()); sc->tableAlias(lower(alias)); assert (sc); boost::shared_ptr spsc(sc); gwi->returnedCols.push_back(spsc); - gwi->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(field->field_name), spsc)); + gwi->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(field->field_name.str), spsc)); } } @@ -8009,6 +8266,12 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); +#ifdef DEBUG_WALK_COND + cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; + cerr << *csep << endl ; + cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +#endif + if (status > 0) return ER_INTERNAL_ERROR; else if (status < 0) @@ -8137,7 +8400,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro while ((sj_nest = sj_list_it++)) { - cerr << sj_nest->db << "." << sj_nest->table_name << endl; + cerr << sj_nest->db.str << "." << sj_nest->table_name.str << endl; } #endif @@ -8173,7 +8436,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery fromSub(gwi, select_cursor); - string alias(table_ptr->alias); + string alias(table_ptr->alias.str); fromSub.alias(lower(alias)); CalpontSystemCatalog::TableAliasName tn = make_aliasview("", "", alias, viewName); @@ -8196,7 +8459,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else if (table_ptr->view) { View* view = new View(table_ptr->view->select_lex, &gwi); - CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db, table_ptr->table_name, table_ptr->alias); + CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str); view->viewName(tn); gwi.viewList.push_back(view); view->transform(); @@ -8208,17 +8471,17 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // trigger system catalog cache if (infiniDB) - csc->columnRIDs(make_table(table_ptr->db, table_ptr->table_name), true); + csc->columnRIDs(make_table(table_ptr->db.str, table_ptr->table_name.str), true); - string table_name = table_ptr->table_name; + string table_name = table_ptr->table_name.str; // @bug5523 - if (table_ptr->db && strcmp(table_ptr->db, "information_schema") == 0) - table_name = (table_ptr->schema_table_name ? table_ptr->schema_table_name : table_ptr->alias); + if (table_ptr->db.length && strcmp(table_ptr->db.str, "information_schema") == 0) + table_name = (table_ptr->schema_table_name.length ? table_ptr->schema_table_name.str : table_ptr->alias.str); - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_name, table_ptr->alias, viewName, infiniDB); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_name, table_ptr->alias.str, viewName, infiniDB); gwi.tbList.push_back(tn); - CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db, table_name, table_ptr->alias, infiniDB); + CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db.str, table_name, table_ptr->alias.str, infiniDB); gwi.tableMap[tan] = make_pair(0, table_ptr); #ifdef DEBUG_WALK_COND cerr << tn << endl; @@ -8302,7 +8565,9 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro gwi.rcWorkStack.push(new ConstantColumn((int64_t)0, ConstantColumn::NUM)); } - uint32_t failed = buildOuterJoin(gwi, select_lex); + SELECT_LEX tmp_select_lex; + tmp_select_lex.table_list.first = gi.groupByTables; + uint32_t failed = buildOuterJoin(gwi, tmp_select_lex); if (failed) return failed; @@ -8400,7 +8665,13 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro while ((item = it++)) { - string itemAlias = (item->name ? item->name : ""); + string itemAlias; + if(item->name.length) + itemAlias = (item->name.str); + else + { + itemAlias = ""; + } // @bug 5916. Need to keep checking until getting concret item in case // of nested view. @@ -8420,7 +8691,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SimpleColumn* sc = NULL; ConstantColumn* constCol = NULL; - if (ifp->field_name && string(ifp->field_name) == "*") + if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); break; @@ -8458,21 +8729,21 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (ifp->is_autogenerated_name) gwi.selectCols.push_back("`" + escapeBackTick(fullname.c_str()) + "`" + " `" + - escapeBackTick(itemAlias.empty() ? ifp->name : itemAlias.c_str()) + "`"); + escapeBackTick(itemAlias.empty() ? ifp->name.str : itemAlias.c_str()) + "`"); else - gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name : itemAlias.c_str())) + "`"); + gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name.str : itemAlias.c_str())) + "`"); // MCOL-1052 Replace SimpleColumn with ConstantColumn, // since it must have a single value only. if (constCol) { gwi.returnedCols.push_back(spcc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), spcc)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spcc)); } else { gwi.returnedCols.push_back(spsc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), spsc)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc)); } TABLE_LIST* tmp = 0; @@ -8509,13 +8780,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // add this agg col to returnedColumnList boost::shared_ptr spac(ac); gwi.returnedCols.push_back(spac); - // This item will be used in HAVING later. - Item_func_or_sum* isfp = reinterpret_cast(item); - - if ( ! isfp->name_length ) - { - gwi.havingAggColsItems.push_back(item); - } + // This item could be used in projection or HAVING later. + gwi.extSelAggColsItems.push_back(item); gwi.selectCols.push_back('`' + escapeBackTick(spac->alias().c_str()) + '`'); String str(256); @@ -8554,7 +8820,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro bool hasNonSupportItem = false; parse_item(ifp, tmpVec, hasNonSupportItem, parseInfo); - if (ifp->has_subquery() || + if (ifp->with_subquery() || string(ifp->func_name()) == string("") || ifp->functype() == Item_func::NOT_ALL_FUNC || parseInfo & SUB_BIT) @@ -8565,7 +8831,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - ReturnedColumn* rc = buildFunctionColumn(ifp, gwi, hasNonSupportItem); + ReturnedColumn* rc = buildFunctionColumn(ifp, gwi, hasNonSupportItem, true); SRCP srcp(rc); if (rc) @@ -8578,8 +8844,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (ifp->name) - srcp->alias(ifp->name); + if (ifp->name.length) + srcp->alias(ifp->name.str); continue; } @@ -8594,7 +8860,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro redo = true; String str; ifp->print(&str, QT_INFINIDB_NO_QUOTE); - gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name) + "`"); + gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name.str) + "`"); } break; @@ -8613,15 +8879,18 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; - sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name + "`"; - gwi.selectCols.push_back("`" + escapeBackTick(ifp->name) + "`"); + sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name.str + "`"; + gwi.selectCols.push_back("`" + escapeBackTick(ifp->name.str) + "`"); } } else // InfiniDB Non support functions still go through post process for now { hasNonSupportItem = false; uint32_t before_size = funcFieldVec.size(); - parse_item(ifp, funcFieldVec, hasNonSupportItem, parseInfo); + // MCOL-1510 Use gwi pointer here to catch funcs with + // not supported aggregate args in projections, + // e.g. NOT(SUM(i)). + parse_item(ifp, funcFieldVec, hasNonSupportItem, parseInfo, &gwi); uint32_t after_size = funcFieldVec.size(); // group by func and func in subquery can not be post processed @@ -8661,8 +8930,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SRCP srcp(cc); - if (ifp->name) - cc->alias(ifp->name); + if (ifp->name.length) + cc->alias(ifp->name.str); gwi.returnedCols.push_back(srcp); @@ -8712,7 +8981,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // @bug 1706 String funcStr; ifp->print(&funcStr, QT_INFINIDB); - gwi.selectCols.push_back(string(funcStr.c_ptr()) + " `" + escapeBackTick(ifp->name) + "`"); + gwi.selectCols.push_back(string(funcStr.c_ptr()) + " `" + escapeBackTick(ifp->name.str) + "`"); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; gwi.parseErrorText = ""; @@ -8729,14 +8998,14 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else { // do not push the dummy column (mysql added) to returnedCol - if (item->name && string(item->name) == "Not_used") + if (item->name.length && string(item->name.str) == "Not_used") continue; // @bug3509. Constant column is sent to ExeMgr now. SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); gwi.returnedCols.push_back(srcp); @@ -8763,8 +9032,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_string* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -8791,8 +9060,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_decimal* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -8881,18 +9150,18 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (sub->get_select_lex()->get_table_list()) rc->viewName(lower(getViewName(sub->get_select_lex()->get_table_list()))); - if (sub->name) - rc->alias(sub->name); + if (sub->name.length) + rc->alias(sub->name.str); gwi.returnedCols.push_back(SRCP(rc)); String str; sub->get_select_lex()->print(gwi.thd, &str, QT_INFINIDB_NO_QUOTE); sel_cols_in_create += "(" + string(str.c_ptr()) + ")"; - if (sub->name) + if (sub->name.length) { - sel_cols_in_create += "`" + escapeBackTick(sub->name) + "`"; - gwi.selectCols.push_back(sub->name); + sel_cols_in_create += "`" + escapeBackTick(sub->name.str) + "`"; + gwi.selectCols.push_back(sub->name.str); } else { @@ -9041,8 +9310,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { emsg = "un-recognized column"; - if (funcFieldVec[i]->name) - emsg += string(funcFieldVec[i]->name); + if (funcFieldVec[i]->name.length) + emsg += string(funcFieldVec[i]->name.str); } else { @@ -9075,7 +9344,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (j == gwi.returnedCols.size()) { gwi.returnedCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name.str), srcp)); if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; @@ -9171,7 +9440,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -9235,7 +9504,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } else { - if (ifp->name && string(ifp->name) == gwi.returnedCols[j].get()->alias()) + if (ifp->name.length && string(ifp->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -9262,7 +9531,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } gwi.groupByCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), srcp)); } // @bug5638. The group by column is constant but not counter, alias has to match a column // on the select list @@ -9276,7 +9545,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (uint32_t j = 0; j < gwi.returnedCols.size(); j++) { - if (groupItem->name && string(groupItem->name) == gwi.returnedCols[j].get()->alias()) + if (groupItem->name.length && string(groupItem->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -9294,7 +9563,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } else if ((*(groupcol->item))->type() == Item::SUBSELECT_ITEM) { - if (!groupcol->in_field_list || !groupItem->name) + if (!groupcol->in_field_list || !groupItem->name.length) { nonSupportItem = groupItem; } @@ -9304,7 +9573,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -9361,8 +9630,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { Message::Args args; - if (nonSupportItem->name) - args.add("'" + string(nonSupportItem->name) + "'"); + if (nonSupportItem->name.length) + args.add("'" + string(nonSupportItem->name.str) + "'"); else args.add(""); @@ -9380,8 +9649,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro string select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); string lower_create_query(gwi.thd->infinidb_vtable.create_vtable_query.c_ptr()); string lower_select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); - algorithm::to_lower(lower_create_query); - algorithm::to_lower(lower_select_query); + boost::algorithm::to_lower(lower_create_query); + boost::algorithm::to_lower(lower_select_query); // check if window functions are in order by. InfiniDB process order by list if @@ -9414,6 +9683,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else { Item* ord_item = *(ordercol->item); + bool nonAggField = true; // ignore not_used column on order by. if (ord_item->type() == Item::INT_ITEM && ord_item->full_name() && string(ord_item->full_name()) == "Not_used") @@ -9422,11 +9692,35 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro rc = gwi.returnedCols[((Item_int*)ord_item)->val_int() - 1]->clone(); else if (ord_item->type() == Item::SUBSELECT_ITEM) gwi.fatalParseError = true; + else if (ordercol->in_field_list && ord_item->type() == Item::FIELD_ITEM) + { + rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); + Item_field* ifp = static_cast(ord_item); + + // The item must be an alias for a projected column + // and extended SELECT list must contain a proper rc + // either aggregation or a field. + if (!rc && ifp->name.length) + { + gwi.fatalParseError = false; + execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.returnedCols.begin(); + + for ( ; iter != gwi.returnedCols.end(); iter++ ) + { + if ( (*iter).get()->alias() == ord_item->name.str ) + { + rc = (*iter).get()->clone(); + nonAggField = rc->hasAggregate() ? false : true; + break; + } + } + } + } else rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); // Looking for a match for this item in GROUP BY list. - if ( rc && ord_item->type() == Item::FIELD_ITEM ) + if ( rc && ord_item->type() == Item::FIELD_ITEM && nonAggField) { execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.groupByCols.begin(); @@ -9462,8 +9756,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else ostream << "unknown table" << '.'; - if (iip->field_name) - ostream << iip->field_name; + if (iip->field_name.length) + ostream << iip->field_name.str; else ostream << "unknown field"; @@ -9630,12 +9924,12 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (; table_ptr; table_ptr = table_ptr->next_local) { - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; if (table_ptr->derived) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; String str; @@ -9644,21 +9938,21 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (!firstTb) create_query += ", "; - create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias); + create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias.str); firstTb = false; - aliasSet.insert(table_ptr->alias); + aliasSet.insert(table_ptr->alias.str); } else if (table_ptr->view) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + - string(" `") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + + string(" `") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); firstTb = false; } else @@ -9667,31 +9961,31 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // consistent with item.cc field print. if (table_ptr->referencing_view) { - if (aliasSet.find(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)) != aliasSet.end()) + if (aliasSet.find(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); create_query += string(" `") + - escapeBackTick(table_ptr->referencing_view->alias) + "_" + - escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)); + escapeBackTick(table_ptr->referencing_view->alias.str) + "_" + + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)); } else { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); - create_query += string("`") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); + create_query += string("`") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); } firstTb = false; @@ -9724,7 +10018,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro sel_query += ", "; } - select_query.replace(lower_select_query.find("select *"), string("select *").length(), sel_query); + //select_query.replace(lower_select_query.find("select *"), string("select *").length(), sel_query); } else { @@ -9785,7 +10079,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro //continue; } // @bug 3518. if order by clause = selected column, use position. - else if (ord_item->name && ord_item->type() == Item::FIELD_ITEM) + else if (ord_item->name.length && ord_item->type() == Item::FIELD_ITEM) { Item_field* field = reinterpret_cast(ord_item); string fullname; @@ -9796,8 +10090,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (field->table_name) fullname += string(field->table_name) + "."; - if (field->field_name) - fullname += string(field->field_name); + if (field->field_name.length) + fullname += string(field->field_name.str); uint32_t i = 0; @@ -9810,7 +10104,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro continue; if (strcasecmp(fullname.c_str(), gwi.returnedCols[i]->alias().c_str()) == 0 || - strcasecmp(ord_item->name, gwi.returnedCols[i]->alias().c_str()) == 0) + strcasecmp(ord_item->name.str, gwi.returnedCols[i]->alias().c_str()) == 0) { ostringstream oss; oss << i + 1; @@ -9820,15 +10114,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } if (i == gwi.returnedCols.size()) - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } - else if (ord_item->name) + else if (ord_item->name.length) { // for union order by 1 case. For unknown reason, it doesn't show in_field_list if (ord_item->type() == Item::INT_ITEM) { - ord_cols += ord_item->name; + ord_cols += ord_item->name.str; } else if (ord_item->type() == Item::SUBSELECT_ITEM) { @@ -9838,7 +10132,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } else { - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } } else if (ord_item->type() == Item::FUNC_ITEM) @@ -9869,14 +10163,48 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } } - if (ord_cols.length() > 0) // has order by + if ( gwi.orderByCols.size() ) // has order by { gwi.thd->infinidb_vtable.has_order_by = true; csep->hasOrderBy(true); - ord_cols = " order by " + ord_cols; - select_query += ord_cols; + csep->specHandlerProcessed(true); } } + + // LIMIT and OFFSET are extracted from TABLE_LIST elements. + // All of JOIN-ed tables contain relevant limit and offset. + uint64_t limit = (uint64_t)-1; + if (gi.groupByTables->select_lex->select_limit && + ( limit = static_cast(gi.groupByTables->select_lex->select_limit)->val_int() ) && + limit != (uint64_t)-1 ) + { + csep->limitNum(limit); + } + else if (csep->hasOrderBy()) + { + // We use LimitedOrderBy so set the limit to + // go through the check in addOrderByAndLimit + csep->limitNum((uint64_t) - 2); + } + + if (gi.groupByTables->select_lex->offset_limit) + { + csep->limitStart(((Item_int*)gi.groupByTables->select_lex->offset_limit)->val_int()); + } + + //gwi.thd->infinidb_vtable.select_vtable_query.free(); + //gwi.thd->infinidb_vtable.select_vtable_query.append(select_query.c_str(), select_query.length()); + + // We don't currently support limit with correlated subquery + if (csep->limitNum() != (uint64_t) - 1 && + gwi.subQuery && !gwi.correlatedTbNameVec.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB); + setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); + return ER_CHECK_NOT_IMPLEMENTED; + } + } // ORDER BY processing ends here if ( gi.groupByDistinct ) @@ -9923,7 +10251,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; @@ -9960,3 +10296,4 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } +// vim:ts=4 sw=4: diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 5ca94562b..ad83f48db 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -61,7 +61,7 @@ using namespace std; #include #include #include -using namespace boost; +//using namespace boost; #include "idb_mysql.h" @@ -276,104 +276,6 @@ void storeNumericField(Field** f, int64_t value, CalpontSystemCatalog::ColType& } } -void storeNumericFieldGroupBy(Field** f, int64_t value, CalpontSystemCatalog::ColType& ct) -{ - // unset null bit first - if ((*f)->null_ptr) - *(*f)->null_ptr &= ~(*f)->null_bit; - - // For unsigned, use the ColType returned in the row rather than the - // unsigned_flag set by mysql. This is because mysql gets it wrong for SUM() - // Hopefully, in all other cases we get it right. - switch ((*f)->type()) - { - case MYSQL_TYPE_NEWDECIMAL: - { - Field_new_decimal* f2 = (Field_new_decimal*)*f; - - // @bug4388 stick to InfiniDB's scale in case mysql gives wrong scale due - // to create vtable limitation. - if (f2->dec < ct.scale) - f2->dec = ct.scale; - - char buf[256]; - dataconvert::DataConvert::decimalToString(value, (unsigned)ct.scale, buf, 256, ct.colDataType); - f2->store(buf, strlen(buf), f2->charset()); - break; - } - - case MYSQL_TYPE_TINY: //TINYINT type - { - Field_tiny* f2 = (Field_tiny*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_SHORT: //SMALLINT type - { - Field_short* f2 = (Field_short*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_LONG: //INT type - { - Field_long* f2 = (Field_long*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_LONGLONG: //BIGINT type - { - Field_longlong* f2 = (Field_longlong*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_FLOAT: // FLOAT type - { - Field_float* f2 = (Field_float*)*f; - float float_val = *(float*)(&value); - f2->store(float_val); - break; - } - - case MYSQL_TYPE_DOUBLE: // DOUBLE type - { - Field_double* f2 = (Field_double*)*f; - double double_val = *(double*)(&value); - f2->store(double_val); - break; - } - - case MYSQL_TYPE_VARCHAR: - { - Field_varstring* f2 = (Field_varstring*)*f; - char tmp[25]; - - if (ct.colDataType == CalpontSystemCatalog::DECIMAL) - dataconvert::DataConvert::decimalToString(value, (unsigned)ct.scale, tmp, 25, ct.colDataType); - else - snprintf(tmp, 25, "%ld", value); - - f2->store(tmp, strlen(tmp), f2->charset()); - break; - } - - default: - { - Field_longlong* f2 = (Field_longlong*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - } -} - // // @bug 2244. Log exception related to lost connection to ExeMgr. // Log exception error from calls to sm::tpl_scan_fetch in fetchNextRow() @@ -582,7 +484,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h *(*f)->null_ptr &= ~(*f)->null_bit; intColVal = row.getUintField<8>(s); - DataConvert::datetimeToString(intColVal, tmp, 255); + DataConvert::datetimeToString(intColVal, tmp, 255, colType.precision); /* setting the field_length is a sort-of hack. The length * at this point can be long enough to include mseconds. @@ -606,7 +508,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h *(*f)->null_ptr &= ~(*f)->null_bit; intColVal = row.getUintField<8>(s); - DataConvert::timeToString(intColVal, tmp, 255); + DataConvert::timeToString(intColVal, tmp, 255, colType.precision); Field_varstring* f2 = (Field_varstring*)*f; f2->store(tmp, strlen(tmp), f2->charset()); @@ -781,8 +683,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -974,7 +879,7 @@ uint32_t doUpdateDelete(THD* thd) } //@Bug 4387. Check BRM status before start statement. - scoped_ptr dbrmp(new DBRM()); + boost::scoped_ptr dbrmp(new DBRM()); int rc = dbrmp->isReadWrite(); thd->infinidb_vtable.isInfiniDBDML = true; @@ -1130,7 +1035,7 @@ uint32_t doUpdateDelete(THD* thd) schemaName = string(item->db_name); columnAssignmentPtr = new ColumnAssignment(); - columnAssignmentPtr->fColumn = string(item->name); + columnAssignmentPtr->fColumn = string(item->name.str); columnAssignmentPtr->fOperator = "="; columnAssignmentPtr->fFuncScale = 0; Item* value = value_it++; @@ -1276,7 +1181,7 @@ uint32_t doUpdateDelete(THD* thd) { Item_field* tmp = (Item_field*)value; - if (!tmp->field_name) //null + if (!tmp->field_name.length) //null { columnAssignmentPtr->fScalarExpression = "NULL"; columnAssignmentPtr->fFromCol = false; @@ -1397,9 +1302,9 @@ uint32_t doUpdateDelete(THD* thd) if (deleteTable->get_num_of_tables() == 1) { - schemaName = first_table->db; - tableName = first_table->table_name; - aliasName = first_table->alias; + schemaName = first_table->db.str; + tableName = first_table->table_name.str; + aliasName = first_table->alias.str; qualifiedTablName->fName = tableName; qualifiedTablName->fSchema = schemaName; pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(dmlStatement); @@ -1418,7 +1323,7 @@ uint32_t doUpdateDelete(THD* thd) first_table = (TABLE_LIST*) thd->lex->select_lex.table_list.first; schemaName = first_table->table->s->db.str; tableName = first_table->table->s->table_name.str; - aliasName = first_table->alias; + aliasName = first_table->alias.str; qualifiedTablName->fName = tableName; qualifiedTablName->fSchema = schemaName; pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(dmlStatement); @@ -1429,7 +1334,7 @@ uint32_t doUpdateDelete(THD* thd) first_table = (TABLE_LIST*) thd->lex->select_lex.table_list.first; schemaName = first_table->table->s->db.str; tableName = first_table->table->s->table_name.str; - aliasName = first_table->alias; + aliasName = first_table->alias.str; qualifiedTablName->fName = tableName; qualifiedTablName->fSchema = schemaName; pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(dmlStatement); @@ -1961,7 +1866,7 @@ uint32_t doUpdateDelete(THD* thd) } else { - thd->set_row_count_func(dmlRowCount); + thd->set_row_count_func(dmlRowCount+thd->get_row_count_func()); } push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, errorMsg.c_str()); @@ -1969,7 +1874,7 @@ uint32_t doUpdateDelete(THD* thd) else { // if (dmlRowCount != 0) //Bug 5117. Handling self join. - thd->set_row_count_func(dmlRowCount); + thd->set_row_count_func(dmlRowCount+thd->get_row_count_func()); //cout << " error status " << ci->rc << " and rowcount = " << dmlRowCount << endl; @@ -2240,7 +2145,7 @@ extern "C" bool includeInput = true; string pstr(parameter); - algorithm::to_lower(pstr); + boost::algorithm::to_lower(pstr); if (pstr == PmSmallSideMaxMemory) { @@ -2386,8 +2291,8 @@ extern "C" { tableName.table = args->args[0]; - if (thd->db) - tableName.schema = thd->db; + if (thd->db.length) + tableName.schema = thd->db.str; else { string msg("No schema information provided"); @@ -2524,8 +2429,8 @@ extern "C" { tableName.table = args->args[0]; - if (thd->db) - tableName.schema = thd->db; + if (thd->db.length) + tableName.schema = thd->db.str; else { return -1; @@ -3019,8 +2924,8 @@ int ha_calpont_impl_rnd_init(TABLE* table) ti.csep->verID(verID); ti.csep->sessionID(sessionID); - if (thd->db) - ti.csep->schemaName(thd->db); + if (thd->db.length) + ti.csep->schemaName(thd->db.str); ti.csep->traceFlags(ci->traceFlags); ti.msTablePtr = table; @@ -3113,8 +3018,8 @@ int ha_calpont_impl_rnd_init(TABLE* table) csep->verID(verID); csep->sessionID(sessionID); - if (thd->db) - csep->schemaName(thd->db); + if (thd->db.length) + csep->schemaName(thd->db.str); csep->traceFlags(ci->traceFlags); @@ -3779,12 +3684,12 @@ int ha_calpont_impl_delete_table(const char* name) if (thd->lex->sql_command == SQLCOM_DROP_DB) { - dbName = thd->lex->name.str; + dbName = const_cast(thd->lex->name.str); } else { TABLE_LIST* first_table = (TABLE_LIST*) thd->lex->select_lex.table_list.first; - dbName = first_table->db; + dbName = const_cast(first_table->db.str); } if (!dbName) @@ -3806,7 +3711,7 @@ int ha_calpont_impl_delete_table(const char* name) if (strcmp(dbName, "calpontsys") == 0 && string(name).find("@0024vtable") == string::npos) { std::string stmt(idb_mysql_query_str(thd)); - algorithm::to_upper(stmt); + boost::algorithm::to_upper(stmt); //@Bug 2432. systables can be dropped with restrict if (stmt.find(" RESTRICT") != string::npos) @@ -3958,7 +3863,7 @@ void ha_calpont_impl_start_bulk_insert(ha_rows rows, TABLE* table) if ((thd->lex)->sql_command == SQLCOM_INSERT) { string insertStmt = idb_mysql_query_str(thd); - algorithm::to_lower(insertStmt); + boost::algorithm::to_lower(insertStmt); string intoStr("into"); size_t found = insertStmt.find(intoStr); @@ -4106,7 +4011,7 @@ void ha_calpont_impl_start_bulk_insert(ha_rows rows, TABLE* table) #ifdef _MSC_VER aCmdLine = aCmdLine + "/bin/cpimport.exe -N -P " + to_string(localModuleId) + " -s " + ci->delimiter + " -e 0" + " -E " + escapechar + ci->enclosed_by + " "; #else - aCmdLine = aCmdLine + "/bin/cpimport -m 1 -N -P " + to_string(localModuleId) + " -s " + ci->delimiter + " -e 0" + " -E " + escapechar + ci->enclosed_by + " "; + aCmdLine = aCmdLine + "/bin/cpimport -m 1 -N -P " + boost::to_string(localModuleId) + " -s " + ci->delimiter + " -e 0" + " -E " + escapechar + ci->enclosed_by + " "; #endif } } @@ -4434,7 +4339,7 @@ void ha_calpont_impl_start_bulk_insert(ha_rows rows, TABLE* table) ci->stats.fQueryType = CalpontSelectExecutionPlan::queryTypeToString(CalpontSelectExecutionPlan::LOAD_DATA_INFILE); //@Bug 4387. Check BRM status before start statement. - scoped_ptr dbrmp(new DBRM()); + boost::scoped_ptr dbrmp(new DBRM()); int rc = dbrmp->isReadWrite(); if (rc != 0 ) @@ -4752,7 +4657,7 @@ int ha_calpont_impl_commit (handlerton* hton, THD* thd, bool all) return 0; //@Bug 5823 check if any active transaction for this session - scoped_ptr dbrmp(new DBRM()); + boost::scoped_ptr dbrmp(new DBRM()); BRM::TxnID txnId = dbrmp->getTxnID(tid2sid(thd->thread_id)); if (!txnId.valid) @@ -5042,6 +4947,7 @@ int ha_calpont_impl_external_lock(THD* thd, TABLE* table, int lock_type) { push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, infinidb_autoswitch_warning.c_str()); } + ci->queryState = 0; } else // vtable mode { @@ -5209,10 +5115,13 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE ci->warningMsg = msg; } - // if the previous query has error, re-establish the connection + // If the previous query has error and + // this is not a subquery run by the server(MCOL-1601) + // re-establish the connection if (ci->queryState != 0) { - sm::sm_cleanup(ci->cal_conn_hndl); + if( ci->cal_conn_hndl_st.size() == 0 ) + sm::sm_cleanup(ci->cal_conn_hndl); ci->cal_conn_hndl = 0; } @@ -5234,6 +5143,7 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE hndl = ci->cal_conn_hndl; + ci->cal_conn_hndl_st.push(ci->cal_conn_hndl); if (!csep) csep.reset(new CalpontSelectExecutionPlan()); @@ -5254,8 +5164,8 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE csep->verID(verID); csep->sessionID(sessionID); - if (group_hand->table_list->db_length) - csep->schemaName(group_hand->table_list->db); + if (group_hand->table_list->db.length) + csep->schemaName(group_hand->table_list->db.str); csep->traceFlags(ci->traceFlags); @@ -5275,7 +5185,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { @@ -5306,8 +5215,12 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE return 0; string query; - query.assign(thd->infinidb_vtable.original_query.ptr(), - thd->infinidb_vtable.original_query.length()); + // Set the query text only once if the server executes + // subqueries separately. + if(ci->queryState) + query.assign(""); + else + query.assign(thd->query_string.str(), thd->query_string.length()); csep->data(query); try @@ -5437,11 +5350,15 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE idbassert(hndl != 0); hndl->csc = csc; + // The next section is useless if (thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE) ti.conn_hndl = hndl; else + { ci->cal_conn_hndl = hndl; - + ci->cal_conn_hndl_st.pop(); + ci->cal_conn_hndl_st.push(ci->cal_conn_hndl); + } try { hndl->connect(); @@ -5474,11 +5391,11 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE (thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE) || (thd->infinidb_vtable.vtable_state == THD::INFINIDB_REDO_QUERY)) { - if (ti.tpl_ctx == 0) - { - ti.tpl_ctx = new sm::cpsm_tplh_t(); - ti.tpl_scan_ctx = sm::sp_cpsm_tplsch_t(new sm::cpsm_tplsch_t()); - } + // MCOL-1601 Using stacks of ExeMgr conn hndls, table and scan contexts. + ti.tpl_ctx = new sm::cpsm_tplh_t(); + ti.tpl_ctx_st.push(ti.tpl_ctx); + ti.tpl_scan_ctx = sm::sp_cpsm_tplsch_t(new sm::cpsm_tplsch_t()); + ti.tpl_scan_ctx_st.push(ti.tpl_scan_ctx); // make sure rowgroup is null so the new meta data can be taken. This is for some case mysql // call rnd_init for a table more than once. @@ -5558,6 +5475,7 @@ error: if (ci->cal_conn_hndl) { + // end_query() should be called here. sm::sm_cleanup(ci->cal_conn_hndl); ci->cal_conn_hndl = 0; } @@ -5569,6 +5487,7 @@ internal_error: if (ci->cal_conn_hndl) { + // end_query() should be called here. sm::sm_cleanup(ci->cal_conn_hndl); ci->cal_conn_hndl = 0; } @@ -5800,6 +5719,12 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* ci->cal_conn_hndl = 0; // clear querystats because no query stats available for cancelled query ci->queryStats = ""; + if ( ci->cal_conn_hndl_st.size() ) + { + ci->cal_conn_hndl_st.pop(); + if ( ci->cal_conn_hndl_st.size() ) + ci->cal_conn_hndl = ci->cal_conn_hndl_st.top(); + } } return 0; @@ -5809,6 +5734,7 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* cal_table_info ti = ci->tableMap[table]; sm::cpsm_conhdl_t* hndl; + bool clearScanCtx = false; hndl = ci->cal_conn_hndl; @@ -5816,6 +5742,8 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* { if (ti.tpl_scan_ctx.get()) { + clearScanCtx = ( (ti.tpl_scan_ctx.get()->rowsreturned) && + ti.tpl_scan_ctx.get()->rowsreturned == ti.tpl_scan_ctx.get()->getRowCount() ); try { sm::tpl_scan_close(ti.tpl_scan_ctx); @@ -5827,10 +5755,31 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* } ti.tpl_scan_ctx.reset(); - + if ( ti.tpl_scan_ctx_st.size() ) + { + ti.tpl_scan_ctx_st.pop(); + if ( ti.tpl_scan_ctx_st.size() ) + ti.tpl_scan_ctx = ti.tpl_scan_ctx_st.top(); + } try { - sm::tpl_close(ti.tpl_ctx, &hndl, ci->stats); + if(hndl) + { + sm::tpl_close(ti.tpl_ctx, &hndl, ci->stats, clearScanCtx); +// Normaly stats variables are set in external_lock method but we set it here +// since they we pretend we are in vtable_disabled mode and the stats vars won't be set. +// We sum the stats up here since server could run a number of +// queries e.g. each for a subquery in a filter. + if(hndl) + { + if (hndl->queryStats.length()) + ci->queryStats += hndl->queryStats; + if (hndl->extendedStats.length()) + ci->extendedStats += hndl->extendedStats; + if (hndl->miniStats.length()) + ci->miniStats += hndl->miniStats; + } + } ci->cal_conn_hndl = hndl; @@ -5863,6 +5812,20 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* ti.tpl_ctx = 0; + if ( ti.tpl_ctx_st.size() ) + { + ti.tpl_ctx_st.pop(); + if ( ti.tpl_ctx_st.size() ) + ti.tpl_ctx = ti.tpl_ctx_st.top(); + } + + if ( ci->cal_conn_hndl_st.size() ) + { + ci->cal_conn_hndl_st.pop(); + if ( ci->cal_conn_hndl_st.size() ) + ci->cal_conn_hndl = ci->cal_conn_hndl_st.top(); + } + ci->tableMap[table] = ti; // push warnings from CREATE phase diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index cb603ca49..72579111b 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -99,7 +99,7 @@ struct gp_walk_info execplan::CalpontSelectExecutionPlan::ReturnedColumnList groupByCols; execplan::CalpontSelectExecutionPlan::ReturnedColumnList subGroupByCols; execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols; - std::vector havingAggColsItems; + std::vector extSelAggColsItems; execplan::CalpontSelectExecutionPlan::ColumnMap columnMap; // This vector temporarily hold the projection columns to be added // to the returnedCols vector for subquery processing. It will be appended @@ -148,6 +148,9 @@ struct gp_walk_info int32_t recursionHWM; std::stack rcBookMarkStack; + // Kludge for MCOL-1472 + bool inCaseStmt; + gp_walk_info() : sessionid(0), fatalParseError(false), condPush(false), @@ -163,7 +166,8 @@ struct gp_walk_info lastSub(0), derivedTbCnt(0), recursionLevel(-1), - recursionHWM(0) + recursionHWM(0), + inCaseStmt(false) {} ~gp_walk_info() {} @@ -183,7 +187,9 @@ struct cal_table_info { } ~cal_table_info() {} sm::cpsm_tplh_t* tpl_ctx; + std::stack tpl_ctx_st; sm::sp_cpsm_tplsch_t tpl_scan_ctx; + std::stack tpl_scan_ctx_st; unsigned c; // for debug purpose TABLE* msTablePtr; // no ownership sm::cpsm_conhdl_t* conn_hndl; @@ -269,6 +275,7 @@ struct cal_connection_info } sm::cpsm_conhdl_t* cal_conn_hndl; + std::stack cal_conn_hndl_st; int queryState; CalTableMap tableMap; sm::tableid_t currentTable; @@ -326,15 +333,14 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, execplan::SCSEP& cse void setError(THD* thd, uint32_t errcode, const std::string errmsg, gp_walk_info* gwi); void setError(THD* thd, uint32_t errcode, const std::string errmsg); void gp_walk(const Item* item, void* arg); -void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); -execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport); +void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo, gp_walk_info* gwip = NULL); const std::string bestTableName(const Item_field* ifp); bool isInfiniDB(TABLE* table_ptr); // execution plan util functions prototypes -execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport); -execplan::ReturnedColumn* buildFunctionColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport); -execplan::ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport); +execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand = false); +execplan::ReturnedColumn* buildFunctionColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand = false); +execplan::ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand = false); execplan::ConstantColumn* buildDecimalColumn(Item* item, gp_walk_info& gwi); execplan::SimpleColumn* buildSimpleColumn(Item_field* item, gp_walk_info& gwi); execplan::FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport); @@ -346,7 +352,7 @@ void addIntervalArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); void castCharArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); void castDecimalArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); void castTypeArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); -void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); +//void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); bool isPredicateFunction(Item* item, gp_walk_info* gwip); execplan::ParseTree* buildRowPredicate(execplan::RowColumn* lhs, execplan::RowColumn* rhs, std::string predicateOp); bool buildRowColumnFilter(gp_walk_info* gwip, execplan::RowColumn* rhs, execplan::RowColumn* lhs, Item_func* ifp); diff --git a/dbcon/mysql/ha_calpont_partition.cpp b/dbcon/mysql/ha_calpont_partition.cpp index 99940262b..038c668c9 100644 --- a/dbcon/mysql/ha_calpont_partition.cpp +++ b/dbcon/mysql/ha_calpont_partition.cpp @@ -642,9 +642,9 @@ void partitionByValue_common(UDF_ARGS* args, // input } else { - if (current_thd->db) + if (current_thd->db.length) { - schema = current_thd->db; + schema = current_thd->db.str; } else { @@ -1019,9 +1019,9 @@ extern "C" } else { - if (current_thd->db) + if (current_thd->db.length) { - schema = current_thd->db; + schema = current_thd->db.str; } else { @@ -1228,7 +1228,7 @@ extern "C" { tableName.table = args->args[0]; - if (!current_thd->db) + if (!current_thd->db.length) { errMsg = "No schema name indicated."; memcpy(result, errMsg.c_str(), errMsg.length()); @@ -1236,7 +1236,7 @@ extern "C" return result; } - tableName.schema = current_thd->db; + tableName.schema = current_thd->db.str; parsePartitionString(args, 1, partitionNums, errMsg, tableName); } @@ -1316,14 +1316,14 @@ extern "C" { tableName.table = args->args[0]; - if (!current_thd->db) + if (!current_thd->db.length) { current_thd->get_stmt_da()->set_overwrite_status(true); current_thd->raise_error_printf(ER_INTERNAL_ERROR, IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NO_SCHEMA).c_str()); return result; } - tableName.schema = current_thd->db; + tableName.schema = current_thd->db.str; parsePartitionString(args, 1, partitionNums, errMsg, tableName); } @@ -1403,14 +1403,14 @@ extern "C" { tableName.table = args->args[0]; - if (!current_thd->db) + if (!current_thd->db.length) { current_thd->get_stmt_da()->set_overwrite_status(true); current_thd->raise_error_printf(ER_INTERNAL_ERROR, IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NO_SCHEMA).c_str()); return result; } - tableName.schema = current_thd->db; + tableName.schema = current_thd->db.str; parsePartitionString(args, 1, partSet, errMsg, tableName); } @@ -1724,9 +1724,9 @@ extern "C" } else { - if (current_thd->db) + if (current_thd->db.length) { - schema = current_thd->db; + schema = current_thd->db.str; } else { diff --git a/dbcon/mysql/ha_pseudocolumn.cpp b/dbcon/mysql/ha_pseudocolumn.cpp index 3e9b072f4..284130033 100644 --- a/dbcon/mysql/ha_pseudocolumn.cpp +++ b/dbcon/mysql/ha_pseudocolumn.cpp @@ -582,7 +582,7 @@ execplan::ReturnedColumn* buildPseudoColumn(Item* item, PseudoColumn* pc = new PseudoColumn(*sc, pseudoType); // @bug5892. set alias for derived table column matching. - pc->alias(ifp->name ? ifp->name : ""); + pc->alias(ifp->name.length ? ifp->name.str : ""); return pc; } diff --git a/dbcon/mysql/ha_view.cpp b/dbcon/mysql/ha_view.cpp index 5014007cc..764c2c5c5 100644 --- a/dbcon/mysql/ha_view.cpp +++ b/dbcon/mysql/ha_view.cpp @@ -84,7 +84,7 @@ void View::transform() for (; table_ptr; table_ptr = table_ptr->next_local) { // mysql put vtable here for from sub. we ignore it - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; string viewName = getViewName(table_ptr); @@ -93,8 +93,8 @@ void View::transform() { SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery* fromSub = new FromSubQuery(gwi, select_cursor); - string alias(table_ptr->alias); - gwi.viewName = make_aliasview("", alias, table_ptr->belong_to_view->alias, ""); + string alias(table_ptr->alias.str); + gwi.viewName = make_aliasview("", alias, table_ptr->belong_to_view->alias.str, ""); algorithm::to_lower(alias); fromSub->alias(alias); gwi.derivedTbList.push_back(SCSEP(fromSub->transform())); @@ -107,8 +107,8 @@ void View::transform() else if (table_ptr->view) { // for nested view, the view name is vout.vin... format - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_ptr->table_name, table_ptr->alias, viewName); - gwi.viewName = make_aliastable(table_ptr->db, table_ptr->table_name, viewName); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str, viewName); + gwi.viewName = make_aliastable(table_ptr->db.str, table_ptr->table_name.str, viewName); View* view = new View(table_ptr->view->select_lex, &gwi); view->viewName(gwi.viewName); gwi.viewList.push_back(view); @@ -121,9 +121,9 @@ void View::transform() // trigger system catalog cache if (infiniDB) - csc->columnRIDs(make_table(table_ptr->db, table_ptr->table_name), true); + csc->columnRIDs(make_table(table_ptr->db.str, table_ptr->table_name.str), true); - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_ptr->table_name, table_ptr->alias, viewName, infiniDB); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str, viewName, infiniDB); gwi.tbList.push_back(tn); gwi.tableMap[tn] = make_pair(0, table_ptr); fParentGwip->tableMap[tn] = make_pair(0, table_ptr); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..0c57ce8bc 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -203,7 +203,7 @@ string ConvertFuncName(Item_sum* item) switch (item->sum_func()) { case Item_sum::COUNT_FUNC: - if (!item->arguments()[0]->name) + if (!item->arguments()[0]->name.str) return "COUNT(*)"; return "COUNT"; @@ -289,6 +289,13 @@ string ConvertFuncName(Item_sum* item) return "PERCENT_RANK"; break; + case Item_sum::PERCENTILE_CONT_FUNC: + return "PERCENTILE_CONT"; + break; + + case Item_sum::PERCENTILE_DISC_FUNC: + return "PERCENTILE_DISC"; + case Item_sum::CUME_DIST_FUNC: return "CUME_DIST"; break; @@ -340,6 +347,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +378,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +416,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +433,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +895,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index dba9ae3c5..cce7bf9f4 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -63,6 +63,7 @@ template bool isnan(T); #endif #endif +#include "sql_plugin.h" #include "sql_table.h" #include "sql_select.h" #include "mysqld_error.h" diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index e8eb5b2b0..259e2d182 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -84,6 +84,7 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libudf_mysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index 53f67fbbf..278a606d4 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -56,10 +56,62 @@ ST_FIELD_INFO is_columnstore_columns_fields[] = }; +static void get_cond_item(Item_func* item, String** table, String** db) +{ + char tmp_char[MAX_FIELD_WIDTH]; + Item_field* item_field = (Item_field*) item->arguments()[0]->real_item(); + + if (strcasecmp(item_field->field_name.str, "table_name") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *table = item->arguments()[1]->val_str(&str_buf); + return; + } + else if (strcasecmp(item_field->field_name.str, "table_schema") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *db = item->arguments()[1]->val_str(&str_buf); + return; + } +} + +static void get_cond_items(COND* cond, String** table, String** db) +{ + if (cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + get_cond_item(fitem, table, db); + } + } + else if ((cond->type() == Item::COND_ITEM) && (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item* item; + + while ((item = li++)) + { + if (item->type() == Item::FUNC_ITEM) + { + get_cond_item((Item_func*)item, table, db); + } + else + { + get_cond_items(item, table, db); + } + } + } +} + static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) { CHARSET_INFO* cs = system_charset_info; TABLE* table = tables->table; + String* table_name = NULL; + String* db_name = NULL; boost::shared_ptr systemCatalogPtr = execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(execplan::CalpontSystemCatalog::idb_tid2sid(thd->thread_id)); @@ -69,9 +121,30 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) systemCatalogPtr->identity(execplan::CalpontSystemCatalog::FE); + if (cond) + { + get_cond_items(cond, &table_name, &db_name); + } + for (std::vector >::const_iterator it = catalog_tables.begin(); it != catalog_tables.end(); ++it) { + if (db_name) + { + if ((*it).second.schema.compare(db_name->ptr()) != 0) + { + continue; + } + } + + if (table_name) + { + if ((*it).second.table.compare(table_name->ptr()) != 0) + { + continue; + } + } + execplan::CalpontSystemCatalog::RIDList column_rid_list; // Note a table may get dropped as you iterate over the list of tables. @@ -184,8 +257,6 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) } } - - return 0; } diff --git a/dbcon/mysql/is_columnstore_extents.cpp b/dbcon/mysql/is_columnstore_extents.cpp index 8eb8e15e3..bdde53316 100644 --- a/dbcon/mysql/is_columnstore_extents.cpp +++ b/dbcon/mysql/is_columnstore_extents.cpp @@ -52,14 +52,142 @@ ST_FIELD_INFO is_columnstore_extents_fields[] = {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} }; -static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) +static int generate_result(BRM::OID_t oid, BRM::DBRM* emp, TABLE* table, THD* thd) { CHARSET_INFO* cs = system_charset_info; - TABLE* table = tables->table; std::vector entries; std::vector::iterator iter; std::vector::iterator end; + + emp->getExtents(oid, entries, false, false, true); + + if (entries.size() == 0) + return 0; + + iter = entries.begin(); + end = entries.end(); + + while (iter != end) + { + table->field[0]->store(oid); + + if (iter->colWid > 0) + { + table->field[1]->store("Column", strlen("Column"), cs); + + if (iter->partition.cprange.lo_val == std::numeric_limits::max() || + iter->partition.cprange.lo_val <= (std::numeric_limits::min() + 2)) + { + table->field[4]->set_null(); + } + else + { + table->field[4]->set_notnull(); + table->field[4]->store(iter->partition.cprange.lo_val); + } + + if (iter->partition.cprange.hi_val == std::numeric_limits::max() || + iter->partition.cprange.hi_val <= (std::numeric_limits::min() + 2)) + { + table->field[5]->set_null(); + } + else + { + table->field[5]->set_notnull(); + table->field[5]->store(iter->partition.cprange.hi_val); + } + + table->field[6]->store(iter->colWid); + + } + else + { + table->field[1]->store("Dictionary", strlen("Dictionary"), cs); + table->field[4]->set_null(); + table->field[5]->set_null(); + table->field[6]->store(8192); + } + + table->field[2]->store(iter->range.start); + table->field[3]->store(iter->range.start + (iter->range.size * 1024) - 1); + + table->field[7]->store(iter->dbRoot); + table->field[8]->store(iter->partitionNum); + table->field[9]->store(iter->segmentNum); + table->field[10]->store(iter->blockOffset); + table->field[11]->store(iter->range.size * 1024); + table->field[12]->store(iter->HWM); + + switch (iter->partition.cprange.isValid) + { + case 0: + table->field[13]->store("Invalid", strlen("Invalid"), cs); + break; + + case 1: + table->field[13]->store("Updating", strlen("Updating"), cs); + break; + + case 2: + table->field[13]->store("Valid", strlen("Valid"), cs); + break; + + default: + table->field[13]->store("Unknown", strlen("Unknown"), cs); + break; + } + + switch (iter->status) + { + case BRM::EXTENTAVAILABLE: + table->field[14]->store("Available", strlen("Available"), cs); + break; + + case BRM::EXTENTUNAVAILABLE: + table->field[14]->store("Unavailable", strlen("Unavailable"), cs); + break; + + case BRM::EXTENTOUTOFSERVICE: + table->field[14]->store("Out of service", strlen("Out of service"), cs); + break; + + default: + table->field[14]->store("Unknown", strlen("Unknown"), cs); + } + + // MCOL-1016: on multiple segments HWM is set to 0 on the lower + // segments, we don't want these to show as 8KB. The down side is + // if the column has less than 1 block it will show as 0 bytes. + // We have no lookahead without it getting messy so this is the + // best compromise. + if (iter->HWM == 0) + { + table->field[15]->store(0); + } + else + { + table->field[15]->store((iter->HWM + 1) * 8192); + } + + if (schema_table_store_record(thd, table)) + { + delete emp; + return 1; + } + + iter++; + + } + + return 0; +} + +static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) +{ + BRM::OID_t cond_oid = 0; + TABLE* table = tables->table; + BRM::DBRM* emp = new BRM::DBRM(); if (!emp || !emp->isDBRMReady()) @@ -67,130 +195,83 @@ static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) return 1; } + if (cond && cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + + if ((fitem->functype() == Item_func::EQ_FUNC) && (fitem->argument_count() == 2)) + { + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + // WHERE object_id = value + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + + if (strcasecmp(item_field->field_name.str, "object_id") == 0) + { + cond_oid = fitem->arguments()[1]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + else if (fitem->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[0]->const_item()) + { + // WHERE value = object_id + Item_field* item_field = (Item_field*) fitem->arguments()[1]->real_item(); + + if (strcasecmp(item_field->field_name.str, "object_id") == 0) + { + cond_oid = fitem->arguments()[0]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + } + else if (fitem->functype() == Item_func::IN_FUNC) + { + // WHERE object_id in (value1, value2) + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + + if (strcasecmp(item_field->field_name.str, "object_id") == 0) + { + for (unsigned int i = 1; i < fitem->argument_count(); i++) + { + cond_oid = fitem->arguments()[i]->val_int(); + int result = generate_result(cond_oid, emp, table, thd); + + if (result) + return 1; + } + } + } + else if (fitem->functype() == Item_func::UNKNOWN_FUNC && + strcasecmp(fitem->func_name(), "find_in_set") == 0) + { + // WHERE FIND_IN_SET(object_id, values) + String* tmp_var = fitem->arguments()[1]->val_str(); + std::stringstream ss(tmp_var->ptr()); + + while (ss >> cond_oid) + { + int ret = generate_result(cond_oid, emp, table, thd); + + if (ret) + return 1; + + if (ss.peek() == ',') + ss.ignore(); + } + } + } + execplan::ObjectIDManager oidm; BRM::OID_t MaxOID = oidm.size(); for (BRM::OID_t oid = 3000; oid <= MaxOID; oid++) { - emp->getExtents(oid, entries, false, false, true); + int result = generate_result(oid, emp, table, thd); - if (entries.size() == 0) - continue; - - iter = entries.begin(); - end = entries.end(); - - while (iter != end) - { - table->field[0]->store(oid); - - if (iter->colWid > 0) - { - table->field[1]->store("Column", strlen("Column"), cs); - - if (iter->partition.cprange.lo_val == std::numeric_limits::max() || - iter->partition.cprange.lo_val <= (std::numeric_limits::min() + 2)) - { - table->field[4]->set_null(); - } - else - { - table->field[4]->set_notnull(); - table->field[4]->store(iter->partition.cprange.lo_val); - } - - if (iter->partition.cprange.hi_val == std::numeric_limits::max() || - iter->partition.cprange.hi_val <= (std::numeric_limits::min() + 2)) - { - table->field[5]->set_null(); - } - else - { - table->field[5]->set_notnull(); - table->field[5]->store(iter->partition.cprange.hi_val); - } - - table->field[6]->store(iter->colWid); - - } - else - { - table->field[1]->store("Dictionary", strlen("Dictionary"), cs); - table->field[4]->set_null(); - table->field[5]->set_null(); - table->field[6]->store(8192); - } - - table->field[2]->store(iter->range.start); - table->field[3]->store(iter->range.start + (iter->range.size * 1024) - 1); - - table->field[7]->store(iter->dbRoot); - table->field[8]->store(iter->partitionNum); - table->field[9]->store(iter->segmentNum); - table->field[10]->store(iter->blockOffset); - table->field[11]->store(iter->range.size * 1024); - table->field[12]->store(iter->HWM); - - switch (iter->partition.cprange.isValid) - { - case 0: - table->field[13]->store("Invalid", strlen("Invalid"), cs); - break; - - case 1: - table->field[13]->store("Updating", strlen("Updating"), cs); - break; - - case 2: - table->field[13]->store("Valid", strlen("Valid"), cs); - break; - - default: - table->field[13]->store("Unknown", strlen("Unknown"), cs); - break; - } - - switch (iter->status) - { - case BRM::EXTENTAVAILABLE: - table->field[14]->store("Available", strlen("Available"), cs); - break; - - case BRM::EXTENTUNAVAILABLE: - table->field[14]->store("Unavailable", strlen("Unavailable"), cs); - break; - - case BRM::EXTENTOUTOFSERVICE: - table->field[14]->store("Out of service", strlen("Out of service"), cs); - break; - - default: - table->field[14]->store("Unknown", strlen("Unknown"), cs); - } - - // MCOL-1016: on multiple segments HWM is set to 0 on the lower - // segments, we don't want these to show as 8KB. The down side is - // if the column has less than 1 block it will show as 0 bytes. - // We have no lookahead without it getting messy so this is the - // best compromise. - if (iter->HWM == 0) - { - table->field[15]->store(0); - } - else - { - table->field[15]->store((iter->HWM + 1) * 8192); - } - - if (schema_table_store_record(thd, table)) - { - delete emp; - return 1; - } - - iter++; - - } + if (result) + return 1; } delete emp; diff --git a/dbcon/mysql/is_columnstore_files.cpp b/dbcon/mysql/is_columnstore_files.cpp index 740075165..be0411058 100644 --- a/dbcon/mysql/is_columnstore_files.cpp +++ b/dbcon/mysql/is_columnstore_files.cpp @@ -84,12 +84,10 @@ static bool get_file_sizes(messageqcpp::MessageQueueClient* msgQueueClient, cons } } -static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) +static int generate_result(BRM::OID_t oid, BRM::DBRM* emp, TABLE* table, THD* thd) { - BRM::DBRM* emp = new BRM::DBRM(); std::vector entries; CHARSET_INFO* cs = system_charset_info; - TABLE* table = tables->table; char oidDirName[WriteEngine::FILE_NAME_SIZE]; char fullFileName[WriteEngine::FILE_NAME_SIZE]; @@ -103,99 +101,184 @@ static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) oam::Oam oam_instance; int pmId = 0; + emp->getExtents(oid, entries, false, false, true); + + if (entries.size() == 0) + return 0; + + std::vector::const_iterator iter = entries.begin(); + + while ( iter != entries.end() ) //organize extents into files + { + // Don't include files more than once at different block offsets + if (iter->blockOffset > 0) + { + iter++; + return 0; + } + + try + { + oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); + } + catch (std::runtime_error) + { + // MCOL-1116: If we are here a DBRoot is offline/missing + iter++; + return 0; + } + + table->field[0]->store(oid); + table->field[1]->store(iter->segmentNum); + table->field[2]->store(iter->partitionNum); + + WriteEngine::Convertor::oid2FileName(oid, oidDirName, dbDir, iter->partitionNum, iter->segmentNum); + std::stringstream DbRootName; + DbRootName << "DBRoot" << iter->dbRoot; + std::string DbRootPath = config->getConfig("SystemConfig", DbRootName.str()); + fileSize = compressedFileSize = 0; + snprintf(fullFileName, WriteEngine::FILE_NAME_SIZE, "%s/%s", DbRootPath.c_str(), oidDirName); + + std::ostringstream oss; + oss << "pm" << pmId << "_WriteEngineServer"; + std::string client = oss.str(); + msgQueueClient = messageqcpp::MessageQueueClientPool::getInstance(oss.str()); + + if (!get_file_sizes(msgQueueClient, fullFileName, &fileSize, &compressedFileSize)) + { + messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); + delete emp; + return 1; + } + + table->field[3]->store(fullFileName, strlen(fullFileName), cs); + + if (fileSize > 0) + { + table->field[4]->set_notnull(); + table->field[4]->store(fileSize); + + if (compressedFileSize > 0) + { + table->field[5]->set_notnull(); + table->field[5]->store(compressedFileSize); + } + else + { + table->field[5]->set_null(); + } + } + else + { + table->field[4]->set_null(); + table->field[5]->set_null(); + } + + if (schema_table_store_record(thd, table)) + { + messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); + delete emp; + return 1; + } + + iter++; + messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); + msgQueueClient = NULL; + } + + return 0; +} + +static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) +{ + BRM::DBRM* emp = new BRM::DBRM(); + BRM::OID_t cond_oid = 0; + TABLE* table = tables->table; + if (!emp || !emp->isDBRMReady()) { return 1; } + if (cond && cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + + if ((fitem->functype() == Item_func::EQ_FUNC) && (fitem->argument_count() == 2)) + { + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + // WHERE object_id = value + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + + if (strcasecmp(item_field->field_name.str, "object_id") == 0) + { + cond_oid = fitem->arguments()[1]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + else if (fitem->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[0]->const_item()) + { + // WHERE value = object_id + Item_field* item_field = (Item_field*) fitem->arguments()[1]->real_item(); + + if (strcasecmp(item_field->field_name.str, "object_id") == 0) + { + cond_oid = fitem->arguments()[0]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + } + else if (fitem->functype() == Item_func::IN_FUNC) + { + // WHERE object_id in (value1, value2) + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + + if (strcasecmp(item_field->field_name.str, "object_id") == 0) + { + for (unsigned int i = 1; i < fitem->argument_count(); i++) + { + cond_oid = fitem->arguments()[i]->val_int(); + int result = generate_result(cond_oid, emp, table, thd); + + if (result) + return 1; + } + } + } + else if (fitem->functype() == Item_func::UNKNOWN_FUNC && + strcasecmp(fitem->func_name(), "find_in_set") == 0) + { + // WHERE FIND_IN_SET(object_id, values) + String* tmp_var = fitem->arguments()[1]->val_str(); + std::stringstream ss(tmp_var->ptr()); + + while (ss >> cond_oid) + { + int ret = generate_result(cond_oid, emp, table, thd); + + if (ret) + return 1; + + if (ss.peek() == ',') + ss.ignore(); + } + } + } + execplan::ObjectIDManager oidm; BRM::OID_t MaxOID = oidm.size(); - for (BRM::OID_t oid = 3000; oid <= MaxOID; oid++) + if (!cond_oid) { - emp->getExtents(oid, entries, false, false, true); - - if (entries.size() == 0) - continue; - - std::vector::const_iterator iter = entries.begin(); - - while ( iter != entries.end() ) //organize extents into files + for (BRM::OID_t oid = 3000; oid <= MaxOID; oid++) { - // Don't include files more than once at different block offsets - if (iter->blockOffset > 0) - { - iter++; - continue; - } + int result = generate_result(oid, emp, table, thd); - try - { - oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); - } - catch (std::runtime_error) - { - // MCOL-1116: If we are here a DBRoot is offline/missing - iter++; - continue; - } - - table->field[0]->store(oid); - table->field[1]->store(iter->segmentNum); - table->field[2]->store(iter->partitionNum); - - WriteEngine::Convertor::oid2FileName(oid, oidDirName, dbDir, iter->partitionNum, iter->segmentNum); - std::stringstream DbRootName; - DbRootName << "DBRoot" << iter->dbRoot; - std::string DbRootPath = config->getConfig("SystemConfig", DbRootName.str()); - fileSize = compressedFileSize = 0; - snprintf(fullFileName, WriteEngine::FILE_NAME_SIZE, "%s/%s", DbRootPath.c_str(), oidDirName); - - std::ostringstream oss; - oss << "pm" << pmId << "_WriteEngineServer"; - std::string client = oss.str(); - msgQueueClient = messageqcpp::MessageQueueClientPool::getInstance(oss.str()); - - if (!get_file_sizes(msgQueueClient, fullFileName, &fileSize, &compressedFileSize)) - { - messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); - delete emp; + if (result) return 1; - } - - table->field[3]->store(fullFileName, strlen(fullFileName), cs); - - if (fileSize > 0) - { - table->field[4]->set_notnull(); - table->field[4]->store(fileSize); - - if (compressedFileSize > 0) - { - table->field[5]->set_notnull(); - table->field[5]->store(compressedFileSize); - } - else - { - table->field[5]->set_null(); - } - } - else - { - table->field[4]->set_null(); - table->field[5]->set_null(); - } - - if (schema_table_store_record(thd, table)) - { - messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); - delete emp; - return 1; - } - - iter++; - messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); - msgQueueClient = NULL; } } diff --git a/dbcon/mysql/is_columnstore_tables.cpp b/dbcon/mysql/is_columnstore_tables.cpp index 7c52f6328..02a5dd72e 100644 --- a/dbcon/mysql/is_columnstore_tables.cpp +++ b/dbcon/mysql/is_columnstore_tables.cpp @@ -42,22 +42,95 @@ ST_FIELD_INFO is_columnstore_tables_fields[] = {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} }; +static void get_cond_item(Item_func* item, String** table, String** db) +{ + char tmp_char[MAX_FIELD_WIDTH]; + Item_field* item_field = (Item_field*) item->arguments()[0]->real_item(); + + if (strcasecmp(item_field->field_name.str, "table_name") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *table = item->arguments()[1]->val_str(&str_buf); + return; + } + else if (strcasecmp(item_field->field_name.str, "table_schema") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *db = item->arguments()[1]->val_str(&str_buf); + return; + } +} + +static void get_cond_items(COND* cond, String** table, String** db) +{ + if (cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + get_cond_item(fitem, table, db); + } + } + else if ((cond->type() == Item::COND_ITEM) && (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item* item; + + while ((item = li++)) + { + if (item->type() == Item::FUNC_ITEM) + { + get_cond_item((Item_func*)item, table, db); + } + else + { + get_cond_items(item, table, db); + } + } + } +} + static int is_columnstore_tables_fill(THD* thd, TABLE_LIST* tables, COND* cond) { CHARSET_INFO* cs = system_charset_info; TABLE* table = tables->table; + String* table_name = NULL; + String* db_name = NULL; boost::shared_ptr systemCatalogPtr = execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(execplan::CalpontSystemCatalog::idb_tid2sid(thd->thread_id)); systemCatalogPtr->identity(execplan::CalpontSystemCatalog::FE); + if (cond) + { + get_cond_items(cond, &table_name, &db_name); + } + const std::vector< std::pair > catalog_tables = systemCatalogPtr->getTables(); for (std::vector >::const_iterator it = catalog_tables.begin(); it != catalog_tables.end(); ++it) { + if (db_name) + { + if ((*it).second.schema.compare(db_name->ptr()) != 0) + { + continue; + } + } + + if (table_name) + { + if ((*it).second.table.compare(table_name->ptr()) != 0) + { + continue; + } + } + execplan::CalpontSystemCatalog::TableInfo tb_info = systemCatalogPtr->tableInfo((*it).second); std::string create_date = dataconvert::DataConvert::dateToString((*it).second.create_date); table->field[0]->store((*it).second.schema.c_str(), (*it).second.schema.length(), cs); diff --git a/dbcon/mysql/sm.cpp b/dbcon/mysql/sm.cpp index 569fee6a5..9cbfc73e6 100644 --- a/dbcon/mysql/sm.cpp +++ b/dbcon/mysql/sm.cpp @@ -20,6 +20,7 @@ * ***********************************************************************/ +#include #include #include #include @@ -279,7 +280,7 @@ tpl_open ( tableid_t tableid, cpsm_tplh_t* ntplh, cpsm_conhdl_t* conn_hdl) { - SMDEBUGLOG << "tpl_open: " << conn_hdl << " tableid: " << tableid << endl; + SMDEBUGLOG << "tpl_open: ntplh: " << ntplh << " conn_hdl: " << conn_hdl << " tableid: " << tableid << endl; // if first time enter this function for a statement, set // queryState to QUERY_IN_PRCOESS and get execution plan. @@ -318,7 +319,9 @@ tpl_scan_open ( tableid_t tableid, sp_cpsm_tplsch_t& ntplsch, cpsm_conhdl_t* conn_hdl ) { +#if IDB_SM_DEBUG SMDEBUGLOG << "tpl_scan_open: " << conn_hdl << " tableid: " << tableid << endl; +#endif // @bug 649. No initialization here. take passed in reference ntplsch->tableid = tableid; @@ -353,8 +356,8 @@ tpl_scan_close ( sp_cpsm_tplsch_t& ntplsch ) SMDEBUGLOG << "tpl_scan_close: "; if (ntplsch) - SMDEBUGLOG << " tableid: " << ntplsch->tableid << endl; - + SMDEBUGLOG << "tpl_scan_close: ntplsch " << ntplsch; + SMDEBUGLOG << "tpl_scan_close: tableid: " << ntplsch->tableid << endl; #endif ntplsch.reset(); @@ -364,11 +367,12 @@ tpl_scan_close ( sp_cpsm_tplsch_t& ntplsch ) status_t tpl_close ( cpsm_tplh_t* ntplh, cpsm_conhdl_t** conn_hdl, - QueryStats& stats ) + QueryStats& stats, + bool clear_scan_ctx) { cpsm_conhdl_t* hndl = *conn_hdl; #if IDB_SM_DEBUG - SMDEBUGLOG << "tpl_close: " << hndl; + SMDEBUGLOG << "tpl_close: hndl" << hndl << " ntplh " << ntplh; if (ntplh) SMDEBUGLOG << " tableid: " << ntplh->tableid; @@ -385,7 +389,16 @@ tpl_close ( cpsm_tplh_t* ntplh, ByteStream::quadbyte qb = 3; bs << qb; hndl->write(bs); + + // MCOL-1601 Dispose of unused empty RowGroup + if (clear_scan_ctx) + { + bs = hndl->exeMgr->read(); + } +#if IDB_SM_DEBUG + SMDEBUGLOG << "tpl_close hndl->exeMgr: " << hndl->exeMgr << endl; +#endif //keep reading until we get a string //TODO: really need to fix this! Why is ExeMgr sending other stuff? for (int tries = 0; tries < 10; tries++) @@ -414,6 +427,9 @@ tpl_close ( cpsm_tplh_t* ntplh, { // querystats messed up. close connection. // no need to throw for querystats protocol error, like for tablemode. +#if IDB_SM_DEBUG + SMDEBUGLOG << "tpl_close() exception whilst getting stats" << endl; +#endif end_query(hndl); sm_cleanup(hndl); *conn_hdl = 0; @@ -435,9 +451,9 @@ sm_init ( uint32_t sid, { // clear file content #if IDB_SM_DEBUG - smlog.close(); - smlog.open("/tmp/sm.log"); - SMDEBUGLOG << "sm_init: " << dboptions << endl; + //smlog.close(); + //smlog.open("/tmp/sm.log"); + SMDEBUGLOG << "sm_init: " << endl; #endif // @bug5660 Connection changes related to the local pm setting @@ -473,7 +489,6 @@ sm_cleanup ( cpsm_conhdl_t* conn_hdl ) { #if IDB_SM_DEBUG SMDEBUGLOG << "sm_cleanup: " << conn_hdl << endl; - SMDEBUGLOG.close(); #endif delete conn_hdl; diff --git a/dbcon/mysql/sm.h b/dbcon/mysql/sm.h index a2c8defaa..65cf35123 100644 --- a/dbcon/mysql/sm.h +++ b/dbcon/mysql/sm.h @@ -60,12 +60,12 @@ const int SQL_NOT_FOUND = -1000; const int SQL_KILLED = -1001; const int CALPONT_INTERNAL_ERROR = -1007; -#if IDB_SM_DEBUG -extern std::ofstream smlog; -#define SMDEBUGLOG smlog -#else -#define SMDEBUGLOG if (false) std::cerr -#endif +//#if IDB_SM_DEBUG +//extern std::ofstream smlog; +//#define SMDEBUGLOG smlog +//#else +#define SMDEBUGLOG if (true) std::cerr +//#endif extern const std::string DEFAULT_SAVE_PATH; typedef uint64_t tableid_t; @@ -282,7 +282,7 @@ extern status_t tpl_open(tableid_t, cpsm_tplh_t*, cpsm_conhdl_t*); extern status_t tpl_scan_open(tableid_t, sp_cpsm_tplsch_t&, cpsm_conhdl_t*); extern status_t tpl_scan_fetch(sp_cpsm_tplsch_t&, cpsm_conhdl_t*, int* k = 0); extern status_t tpl_scan_close(sp_cpsm_tplsch_t&); -extern status_t tpl_close(cpsm_tplh_t*, cpsm_conhdl_t**, querystats::QueryStats& stats); +extern status_t tpl_close(cpsm_tplh_t*, cpsm_conhdl_t**, querystats::QueryStats& stats, bool clear_scan_ctx = false); } diff --git a/exemgr/main.cpp b/exemgr/main.cpp index ca0573788..8742188a0 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1443,7 +1443,9 @@ int main(int argc, char* argv[]) #endif setupSignalHandlers(); - int err = setupResources(); + int err = 0; + if (!gDebug) + err = setupResources(); string errMsg; switch (err) diff --git a/oam/cloud/MCSVolumeCmds.sh b/oam/cloud/MCSVolumeCmds.sh index 291d27e44..c7a231261 100755 --- a/oam/cloud/MCSVolumeCmds.sh +++ b/oam/cloud/MCSVolumeCmds.sh @@ -202,7 +202,7 @@ detachvolume() { checkInfostatus if [ $STATUS == "detaching" ]; then retries=1 - while [ $retries -ne 60 ]; do + while [ $retries -ne 10 ]; do #retry until it's attached $AWSCLI detach-volume --volume-id $volumeName --region $Region > /tmp/volumeInfo_$volumeName 2>&1 @@ -239,7 +239,7 @@ attachvolume() { checkInfostatus if [ $STATUS == "attaching" -o $STATUS == "already-attached" ]; then retries=1 - while [ $retries -ne 60 ]; do + while [ $retries -ne 10 ]; do #check status until it's attached describevolume if [ $STATUS == "attached" ]; then diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml index 5ef94770b..8cc52a1c7 100644 --- a/oam/etc/Columnstore.xml +++ b/oam/etc/Columnstore.xml @@ -438,7 +438,7 @@ n n n - 2 + 1 n n internal diff --git a/oam/etc/ProcessConfig.xml b/oam/etc/ProcessConfig.xml index 8088135bb..0bedf10de 100644 --- a/oam/etc/ProcessConfig.xml +++ b/oam/etc/ProcessConfig.xml @@ -97,7 +97,7 @@ WriteEngineServer pm* DBRMWorkerNode - * + @ ExeMgr * SIMPLEX @@ -112,7 +112,7 @@ WriteEngineServer pm* DBRMWorkerNode - * + @ DDLProc @ SIMPLEX diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index fa1f6f410..47469ac5b 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5628,6 +5628,7 @@ void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::s dbrootList dbroot1; dbroot1.push_back(*pt1); + bool returnDbRoot = false; //send msg to unmount dbroot if module is not offline int opState; @@ -5642,7 +5643,6 @@ void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::s if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) { -// bool unmountPass = true; try { mountDBRoot(dbroot1, false); @@ -5652,13 +5652,8 @@ void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::s writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); cout << endl << "ERROR: umountDBRoot api failure" << endl; exceptionControl("manualMovePmDbroot", API_FAILURE); -// unmountPass = false; } -// if ( !unmountPass) { -// dbrootlist.erase(pt1); -// break; -// } } //check for amazon moving required @@ -5676,40 +5671,79 @@ void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::s //if Gluster, do the assign command if ( DataRedundancyConfig == "y") { - try - { + try + { string errmsg; int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg); - - if ( ret != 0 ) + if ( ret == 0 ) + { + todbrootConfigList.push_back(*pt2); + residedbrootConfigList.erase(pt2); + } + else { cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + returnDbRoot = true; } } catch (exception& e) { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } } - todbrootConfigList.push_back(*pt2); - - residedbrootConfigList.erase(pt2); - + if (returnDbRoot) + { + // something went wrong return it back to original owner + try + { + string errmsg; + writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg); + if ( ret != 0 ) + { + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + mountDBRoot(dbroot1); + //get updated Columnstore.xml distributed + distributeConfigFile("system"); + return; + } + catch (exception& e) + { + cout << endl << "**** glusterctl API exception: " << e.what() << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + catch (...) + { + cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + } break; } } } + + //set the 2 pms dbroot config try { @@ -5719,7 +5753,7 @@ void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::s { writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID, LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } try @@ -5730,7 +5764,7 @@ void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::s { writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID, LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } //send msg to mount dbroot @@ -5824,6 +5858,36 @@ bool Oam::autoMovePmDbroot(std::string residePM) exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); } + //detach first to make sure DBS can be detach before trying to move to another pm + DBRootConfigList::iterator pt3 = residedbrootConfigList.begin(); + + for ( ; pt3 != residedbrootConfigList.end() ; pt3++ ) + { + int dbrootID = *pt3; + + try + { + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); + + amazonDetach(dbrootlist); + } + catch (exception& ) + { + writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); + + //reattach + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); + + amazonAttach(residePM, dbrootlist); + + exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); + } + } + //get dbroot id for other PMs systemStorageInfo_t t; DeviceDBRootList moduledbrootlist; @@ -6344,16 +6408,16 @@ bool Oam::autoUnMovePmDbroot(std::string toPM) if (!found) { - writeLog("ERROR: no dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_ERROR ); - cout << "ERROR: no dbroots found in " << fileName << endl; - exceptionControl("autoUnMovePmDbroot", API_FAILURE); + writeLog("No dbroots found in " + InstallDir + "/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); + + cout << "No dbroots found in " << fileName << endl; } oldFile.close(); unlink (fileName.c_str()); ofstream newFile (fileName.c_str()); - //create new file +//create new file int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); @@ -6927,32 +6991,6 @@ void Oam::assignDbroot(std::string toPM, DBRootConfigList& dbrootlist) for ( ; pt3 != dbrootlist.end() ; pt3++) { todbrootConfigList.push_back(*pt3); - - /* if ( DataRedundancyConfig == "y") - { - try { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(*pt3), toPM, errmsg); - if ( ret != 0 ) - { - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } - */ } try @@ -7412,12 +7450,14 @@ void Oam::removeDbroot(DBRootConfigList& dbrootlist) { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } } @@ -7753,7 +7793,7 @@ void Oam::actionMysqlCalpont(MYSQLCALPONT_ACTION action) else return; - // check if mysql-Capont is installed + // check if mysql-Columnstore is installed string mysqlscript = InstallDir + "/mysql/mysql-Columnstore"; if (access(mysqlscript.c_str(), X_OK) != 0) @@ -10327,12 +10367,167 @@ void Oam::sendStatusUpdate(ByteStream obs, ByteStream::byte returnRequestType) /*************************************************************************** * - * Function: amazonReattach + * Function: amazonDetach + * + * Purpose: Amazon EC2 volume deattach needed + * + ****************************************************************************/ + +void Oam::amazonDetach(dbrootList dbrootConfigList) +{ + //if amazon cloud with external volumes, do the detach/attach moves + string cloud; + string DBRootStorageType; + + try + { + getSystemConfig("Cloud", cloud); + getSystemConfig("DBRootStorageType", DBRootStorageType); + } + catch (...) {} + + if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && + DBRootStorageType == "external" ) + { + writeLog("amazonDetach function started ", LOG_TYPE_DEBUG ); + + dbrootList::iterator pt3 = dbrootConfigList.begin(); + + for ( ; pt3 != dbrootConfigList.end() ; pt3++) + { + string dbrootid = *pt3; + string volumeNameID = "PMVolumeName" + dbrootid; + string volumeName = oam::UnassignedName; + string deviceNameID = "PMVolumeDeviceName" + dbrootid; + string deviceName = oam::UnassignedName; + + try + { + getSystemConfig( volumeNameID, volumeName); + getSystemConfig( deviceNameID, deviceName); + } + catch (...) + {} + + if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) + { + cout << " ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName << endl; + writeLog("ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); + exceptionControl("amazonDetach", API_INVALID_PARAMETER); + } + + //send msg to to-pm to umount volume + int returnStatus = sendMsgToProcMgr(UNMOUNT, dbrootid, FORCEFUL, ACK_YES); + + if (returnStatus != API_SUCCESS) + { + writeLog("ERROR: amazonDetach, umount failed on " + dbrootid, LOG_TYPE_ERROR ); + } + + if (!detachEC2Volume(volumeName)) + { + cout << " ERROR: amazonDetach, detachEC2Volume failed on " + volumeName << endl; + writeLog("ERROR: amazonDetach, detachEC2Volume failed on " + volumeName, LOG_TYPE_ERROR ); + exceptionControl("amazonDetach", API_FAILURE); + } + + writeLog("amazonDetach, detachEC2Volume passed on " + volumeName, LOG_TYPE_DEBUG ); + } + } +} + +/*************************************************************************** * - * Purpose: Amazon EC2 volume reattach needed + * Function: amazonAttach + * + * Purpose: Amazon EC2 volume Attach needed * ****************************************************************************/ +void Oam::amazonAttach(std::string toPM, dbrootList dbrootConfigList) +{ + //if amazon cloud with external volumes, do the detach/attach moves + string cloud; + string DBRootStorageType; + + try + { + getSystemConfig("Cloud", cloud); + getSystemConfig("DBRootStorageType", DBRootStorageType); + } + catch (...) {} + + if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && + DBRootStorageType == "external" ) + { + writeLog("amazonAttach function started ", LOG_TYPE_DEBUG ); + + //get Instance Name for to-pm + string toInstanceName = oam::UnassignedName; + + try + { + ModuleConfig moduleconfig; + getSystemConfig(toPM, moduleconfig); + HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); + toInstanceName = (*pt1).HostName; + } + catch (...) + {} + + if ( toInstanceName == oam::UnassignedName || toInstanceName.empty() ) + { + cout << " ERROR: amazonAttach, invalid Instance Name for " << toPM << endl; + writeLog("ERROR: amazonAttach, invalid Instance Name " + toPM, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_INVALID_PARAMETER); + } + + dbrootList::iterator pt3 = dbrootConfigList.begin(); + + for ( ; pt3 != dbrootConfigList.end() ; pt3++) + { + string dbrootid = *pt3; + string volumeNameID = "PMVolumeName" + dbrootid; + string volumeName = oam::UnassignedName; + string deviceNameID = "PMVolumeDeviceName" + dbrootid; + string deviceName = oam::UnassignedName; + + try + { + getSystemConfig( volumeNameID, volumeName); + getSystemConfig( deviceNameID, deviceName); + } + catch (...) + {} + + if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) + { + cout << " ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName << endl; + writeLog("ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_INVALID_PARAMETER); + } + + if (!attachEC2Volume(volumeName, deviceName, toInstanceName)) + { + cout << " ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName << endl; + writeLog("ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_FAILURE); + } + + writeLog("amazonAttach, attachEC2Volume passed on " + volumeName + ":" + toPM, LOG_TYPE_DEBUG ); + } + } +} + + +/*************************************************************************** +* +* Function: amazonReattach +* +* Purpose: Amazon EC2 volume reattach needed +* +****************************************************************************/ + void Oam::amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach) { //if amazon cloud with external volumes, do the detach/attach moves @@ -10428,6 +10623,7 @@ void Oam::amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool att } } + /*************************************************************************** * * Function: mountDBRoot diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index e82771e47..cc2f620ca 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -229,6 +229,7 @@ enum API_STATUS API_CONN_REFUSED, API_CANCELLED, API_STILL_WORKING, + API_DETACH_FAILURE, API_MAX }; @@ -2432,6 +2433,8 @@ public: void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false); void mountDBRoot(dbrootList dbrootConfigList, bool mount = true); + void amazonDetach(dbrootList dbrootConfigList); + void amazonAttach(std::string toPM, dbrootList dbrootConfigList); /** *@brief gluster control diff --git a/oamapps/postConfigure/CMakeLists.txt b/oamapps/postConfigure/CMakeLists.txt index ebe7d7f3f..4bdbadd1a 100644 --- a/oamapps/postConfigure/CMakeLists.txt +++ b/oamapps/postConfigure/CMakeLists.txt @@ -37,13 +37,13 @@ install(TARGETS getMySQLpw DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### -set(amazonInstaller_SRCS amazonInstaller.cpp helpers.cpp) +#set(amazonInstaller_SRCS amazonInstaller.cpp helpers.cpp) -add_executable(amazonInstaller ${amazonInstaller_SRCS}) +#add_executable(amazonInstaller ${amazonInstaller_SRCS}) -target_link_libraries(amazonInstaller ${ENGINE_LDFLAGS} readline ncurses ${SNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) +#target_link_libraries(amazonInstaller ${ENGINE_LDFLAGS} readline ncurses ${SNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) -install(TARGETS amazonInstaller DESTINATION ${ENGINE_BINDIR} COMPONENT platform) +#install(TARGETS amazonInstaller DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### @@ -56,3 +56,8 @@ target_link_libraries(mycnfUpgrade ${ENGINE_LDFLAGS} readline ncurses ${MARIADB_ install(TARGETS mycnfUpgrade DESTINATION ${ENGINE_BINDIR} COMPONENT platform) + +########### next target ############### + +install(PROGRAMS quick_installer_single_server.sh quick_installer_multi_server.sh quick_installer_amazon.sh + DESTINATION ${ENGINE_BINDIR} COMPONENT platform) diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index d486c2d6a..c0aee150a 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -170,6 +170,12 @@ int main(int argc, char* argv[]) if (p && *p) USER = p; + // setup to start on reboot, for non-root amazon installs + if ( !rootUser ) + { + system("sudo sed -i -e 's/#sudo runuser/sudo runuser/g' /etc/rc.d/rc.local >/dev/null 2>&1"); + } + //copy Columnstore.xml.rpmsave if upgrade option is selected if ( installType == "upgrade" ) { @@ -892,7 +898,10 @@ int main(int argc, char* argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -908,7 +917,10 @@ int main(int argc, char* argv[]) cout << endl << "ERROR: MariaDB ColumnStore Process failed to start, check log files in /var/log/mariadb/columnstore" << endl; cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh new file mode 100755 index 000000000..4a1df903c --- /dev/null +++ b/oamapps/postConfigure/quick_installer_amazon.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# +# $Id: quick_installer_amazon.sh 3705 2018-07-07 19:47:20Z dhill $ +# +# Poddst- Quick Installer for Amazon MariaDB Columnstore + +pmCount="" +umCount="" +systemName="" + +for arg in "$@"; do + if [ `expr -- "$arg" : '--pm-count='` -eq 11 ]; then + pmCount="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--um-count='` -eq 11 ]; then + umCount="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--system-name='` -eq 14 ]; then + systemName="`echo $arg | awk -F= '{print $2}'`" + systemName="-sn "$systemName + elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then + nonDistrubutedInstall=" " + elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then + echo "Usage ./quick_installer_amazon.sh [OPTION]" + echo "" + echo "Quick Installer for an Amazon MariaDB ColumnStore Install" + echo "This requires to be run on a MariaDB ColumnStore AMI" + echo "" + echo "Performace Module (pm) number is required" + echo "User Module (um) number is option" + echo "When only pm counts provided, system is combined setup" + echo "When both pm/um counts provided, system is seperate setup" + echo + echo "--pm-count=x Number of pm instances to create" + echo "--um-count=x Number of um instances to create, optional" + echo "--system-name=nnnn System Name, optional" + echo "" + exit 1 + else + echo "./quick_installer_amazon.sh: unknown argument: $arg, enter --help for help" 1>&2 + exit 1 + fi +done + +if [[ $pmCount = "" ]]; then + echo "" + echo "Performace Module (pm) count is required, exiting" + exit 1 +else + if [[ $umCount = "" ]]; then + echo "" + echo "NOTE: Performing a Multi-Server Combined install with um/pm running on some server" + echo"" + else + echo "" + echo "NOTE: Performing a Multi-Server Seperate install with um and pm running on seperate servers" + echo"" + fi +fi + +if [[ $HOME = "/root" ]]; then + echo "${bold}Run post-install script${normal}" + echo "" + /usr/local/mariadb/columnstore/bin/post-install + echo "${bold}Run postConfigure script${normal}" + echo "" + if [[ $umCount = "" ]]; then + /usr/local/mariadb/columnstore/bin/postConfigure -qa -pm-count $pmCount $systemName + else + /usr/local/mariadb/columnstore/bin/postConfigure -qa -pm-count $pmCount -um-count $umCount $systemName + fi +else + echo "${bold}Run post-install script${normal}" + echo "" + $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore + echo "${bold}Run postConfigure script${normal}" + echo "" + if [[ $umCount = "" ]]; then + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount $systemName + else + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount -um-count $umCount $systemName + fi +fi diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh new file mode 100755 index 000000000..dbb603220 --- /dev/null +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# +# $Id: quick_installer_multi_server.sh 3705 2018-07-07 19:47:20Z dhill $ +# +# Poddst- Quick Installer for Multi Server MariaDB Columnstore + +pmIpAddrs="" +umIpAddrs="" +nonDistrubutedInstall="-n" +systemName="" + +for arg in "$@"; do + if [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then + pmIpAddrs="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--um-ip-addresses='` -eq 18 ]; then + umIpAddrs="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--system-name='` -eq 14 ]; then + systemName="`echo $arg | awk -F= '{print $2}'`" + systemName="-sn "$systemName + elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then + nonDistrubutedInstall=" " + elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then + echo "Usage ./quick_installer_multi_server.sh [OPTION]" + echo "" + echo "Quick Installer for a Multi Server MariaDB ColumnStore Install" + echo "" + echo "Defaults to non-distrubuted install, meaning MariaDB Columnstore" + echo "needs to be preinstalled on all nodes in the system" + echo "" + echo "Performace Module (pm) IP addresses are required" + echo "User Module (um) IP addresses are option" + echo "When only pm IP addresses provided, system is combined setup" + echo "When both pm/um IP addresses provided, system is seperate setup" + echo + echo "--pm-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" + echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx, optional" + echo "--dist-install Use Distributed Install, optional" + echo "--system-name=nnnn System Name, optional" + echo "" + exit 1 + else + echo "quick_installer_multi_server.sh: unknown argument: $arg, enter --help for help" 1>&2 + exit 1 + fi +done + +if [[ $pmIpAddrs = "" ]]; then + echo "" + echo "Performace Module (pm) IP addresses required, exiting" + exit 1 +else + if [[ $umIpAddrs = "" ]]; then + echo "" + echo "NOTE: Performing a Multi-Server Combined install with um/pm running on some server" + echo"" + else + echo "" + echo "NOTE: Performing a Multi-Server Seperate install with um and pm running on seperate servers" + echo"" + fi +fi + +if [[ $HOME = "/root" ]]; then + echo "${bold}Run post-install script${normal}" + echo "" + /usr/local/mariadb/columnstore/bin/post-install + echo "${bold}Run postConfigure script${normal}" + echo "" + if [[ $umIpAddrs = "" ]]; then + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName + else + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName + fi +else + echo "${bold}Run post-install script${normal}" + echo "" + $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore + echo "${bold}Run postConfigure script${normal}" + echo "" + if [[ $umIpAddrs = "" ]]; then + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName + else + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName + fi +fi diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh new file mode 100755 index 000000000..432b395c4 --- /dev/null +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# +# $Id: quick_installer_single_server.sh 3705 2018-07-07 19:47:20Z dhill $ +# +# Poddst- Quick Installer for Single Server MariaDB Columnstore + +for arg in "$@"; do + if [ `expr -- "$arg" : '--help'` -eq 6 ]; then + echo "Usage ./quick_installer_multi_server.sh" + echo "" + echo "Quick Installer for a Single Server MariaDB ColumnStore Install" + echo "" + exit 1 + else + echo "quick_installer_multi_server.sh: ignoring unknown argument: $arg" 1>&2 + fi +done + + +if [ $HOME == "/root" ]; then + echo "Run post-install script" + echo "" + /usr/local/mariadb/columnstore/bin/post-install + echo "Run postConfigure script" + echo "" + /usr/local/mariadb/columnstore/bin/postConfigure -qs +else + echo "Run post-install script" + echo "" + $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore + echo "Run postConfigure script" + echo "" + . /etc/profile.d/columnstoreEnv.sh; $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qs +fi diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index f31273b1c..2b4450d2c 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -65,8 +65,6 @@ inline uint64_t order_swap(uint64_t x) template inline string fixChar(int64_t intval); -idb_regex_t placeholderRegex; - template inline int compareBlock( const void* a, const void* b ) { @@ -1095,6 +1093,7 @@ inline void p_Col_ridArray(NewColRequestHeader* in, uint16_t* ridArray = 0; uint8_t* in8 = reinterpret_cast(in); const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W; + idb_regex_t placeholderRegex; placeholderRegex.used = false; diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index bc56a7430..019761d39 100644 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -1677,15 +1677,11 @@ void BatchPrimitiveProcessor::execute() } catch (logging::QueryDataExcept& qex) { - ostringstream os; - os << qex.what() << endl; - writeErrorMsg(os.str(), qex.errorCode()); + writeErrorMsg(qex.what(), qex.errorCode()); } catch (logging::DictionaryBufferOverflow& db) { - ostringstream os; - os << db.what() << endl; - writeErrorMsg(os.str(), db.errorCode()); + writeErrorMsg(db.what(), db.errorCode()); } catch (scalar_exception& se) { @@ -1758,15 +1754,11 @@ void BatchPrimitiveProcessor::execute() } catch (IDBExcept& iex) { - ostringstream os; - os << iex.what() << endl; - writeErrorMsg(os.str(), iex.errorCode(), true, false); + writeErrorMsg(iex.what(), iex.errorCode(), true, false); } catch (const std::exception& ex) { - ostringstream os; - os << ex.what() << endl; - writeErrorMsg(os.str(), logging::batchPrimitiveProcessorErr); + writeErrorMsg(ex.what(), logging::batchPrimitiveProcessorErr); } catch (...) { diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index fe087f8aa..227b494de 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -2067,6 +2067,11 @@ struct ReadThread case DICT_CREATE_EQUALITY_FILTER: { PriorityThreadPool::Job job; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; job.functor = boost::shared_ptr(new CreateEqualityFilter(bs)); OOBPool->addJob(job); break; @@ -2075,6 +2080,11 @@ struct ReadThread case DICT_DESTROY_EQUALITY_FILTER: { PriorityThreadPool::Job job; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; job.functor = boost::shared_ptr(new DestroyEqualityFilter(bs)); OOBPool->addJob(job); break; @@ -2108,6 +2118,11 @@ struct ReadThread job.id = hdr->Hdr.UniqueID; job.weight = LOGICAL_BLOCK_RIDS; job.priority = hdr->Hdr.Priority; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; if (hdr->flags & IS_SYSCAT) { @@ -2155,9 +2170,15 @@ struct ReadThread job.id = bpps->getID(); job.weight = ismHdr->Size; job.priority = bpps->priority(); + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; if (bpps->isSysCat()) { + //boost::thread t(*bpps); // using already-existing threads may cut latency // if it's changed back to running in an independent thread @@ -2176,6 +2197,11 @@ struct ReadThread { PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Create(fBPPHandler, bs)); + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->createBPP(*bs); break; @@ -2186,6 +2212,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::AddJoiner(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->addJoinerToBPP(*bs); break; @@ -2199,6 +2230,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::LastJoiner(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); break; } @@ -2210,6 +2246,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Destroy(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->destroyBPP(*bs); break; @@ -2228,6 +2269,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Abort(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); break; } @@ -2259,12 +2305,12 @@ struct ReadThread } } - // If this function is called, we have a "bug" of some sort. We added - // the "fIos" connection to UmSocketSelector earlier, so at the very - // least, UmSocketSelector should have been able to return that con- - // nection/port. We will try to recover by using the original fIos to - // send the response msg; but as stated, if this ever happens we have - // a bug we need to resolve. +// If this function is called, we have a "bug" of some sort. We added +// the "fIos" connection to UmSocketSelector earlier, so at the very +// least, UmSocketSelector should have been able to return that con- +// nection/port. We will try to recover by using the original fIos to +// send the response msg; but as stated, if this ever happens we have +// a bug we need to resolve. void handleUmSockSelErr(const string& cmd) { ostringstream oss; diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index 140926266..1b19ccd0f 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -321,6 +321,22 @@ int main(int argc, char* argv[]) // This is unset due to the way we start it program_invocation_short_name = const_cast("PrimProc"); + int gDebug = 0; + int c; + + while ((c = getopt(argc, argv, "d")) != EOF) + { + switch(c) + { + case 'd': + gDebug++; + break; + case '?': + default: + break; + } + } + Config* cf = Config::makeConfig(); setupSignalHandlers(); @@ -329,7 +345,9 @@ int main(int argc, char* argv[]) mlp = new primitiveprocessor::Logger(); - int err = setupResources(); + int err = 0; + if (!gDebug) + err = setupResources(); string errMsg; switch (err) diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 44f39ad9d..60b9f0060 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1650,9 +1650,6 @@ void pingDeviceThread() break; //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -1681,7 +1678,7 @@ void pingDeviceThread() processManager.restartProcessType("WriteEngineServer", moduleName); //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE); + processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); downActiveOAMModule = false; int retry; @@ -1727,7 +1724,7 @@ void pingDeviceThread() //set query system state ready processManager.setQuerySystemState(true); - break; + goto break_case; } } catch (...) @@ -1749,25 +1746,24 @@ void pingDeviceThread() if ( retry == 5 ) { log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING); - + log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING); //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); //set module to disable state processManager.disableModule(moduleName, true); - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); + // Need to do something here to verify data mounts before resuming + // Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + //call dbrm control + oam.dbrmctl("readonly"); + log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG); //clear count moduleInfoList[moduleName] = 0; - processManager.setSystemState(oam::ACTIVE); + processManager.setSystemState(oam::DEGRADED); //set query system state ready processManager.setQuerySystemState(true); @@ -1777,7 +1773,7 @@ void pingDeviceThread() } else //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE); + processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); //restart module processes int retry = 0; @@ -1965,9 +1961,6 @@ void pingDeviceThread() } } - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2027,9 +2020,6 @@ void pingDeviceThread() else processManager.setSystemState(oam::ACTIVE); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2087,7 +2077,7 @@ void pingDeviceThread() if ( PrimaryUMModuleName == moduleName ) downPrimaryUM = true; - // if not disabled and amazon, skip + // if disabled, skip if (opState != oam::AUTO_DISABLED ) { //Log failure, issue alarm, set moduleOpState @@ -2095,9 +2085,6 @@ void pingDeviceThread() log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -2134,6 +2121,8 @@ void pingDeviceThread() ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) { + string error; + try { log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); @@ -2151,6 +2140,20 @@ void pingDeviceThread() { log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); } + + if ( error == oam.itoa(oam::API_DETACH_FAILURE) ) + { + processManager.setModuleState(moduleName, oam::AUTO_DISABLED); + + // resume the dbrm + oam.dbrmctl("resume"); + log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + + //set query system state ready + processManager.setQuerySystemState(true); + + break; + } } } @@ -2356,9 +2359,6 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2375,9 +2375,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); } @@ -2392,9 +2389,6 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); } @@ -2527,6 +2521,7 @@ void pingDeviceThread() } } //end of for loop } + break_case: // check and take action if LAN outage is flagged if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index bdb8c5941..fadb63ea4 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -405,7 +405,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) msg >> target; msg >> graceful; msg >> ackIndicator; - msg >> manualFlag; + msg >> manualFlag; switch (actionType) { @@ -902,29 +902,31 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED - || opState == oam::AUTO_DISABLED ) + || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) { - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + processManager.recycleProcess(moduleName); //check for SIMPLEX Processes on mate might need to be started processManager.checkSimplexModule(moduleName); + + processManager.setSystemState(oam::ACTIVE); + + //set query system state ready + processManager.setQuerySystemState(true); + } else { - log.writeLog(__LINE__, "ERROR: module not stopped", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "ERROR: module not stopped, state = " + oam.itoa(opState), LOG_TYPE_ERROR); status = API_FAILURE; break; } @@ -987,7 +989,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - //stopModules being removed with the REMOVE option, which will stop process + // do stopmodule then enable for ( ; listPT != devicenetworklist.end() ; listPT++) { string moduleName = (*listPT).DeviceName; @@ -1013,6 +1015,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (opState == oam::MAN_DISABLED) { + processManager.stopModule(moduleName, graceful, manualFlag); + log.writeLog(__LINE__, "stop Module Completed on " + moduleName, LOG_TYPE_INFO); + status = processManager.enableModule(moduleName, oam::MAN_OFFLINE); log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO); } @@ -1357,6 +1362,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); } + //set query system state ready + processManager.setQuerySystemState(true); + startsystemthreadStop = false; break; @@ -3049,9 +3057,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName); //set query system states not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -3150,14 +3155,15 @@ void processMSG(messageqcpp::IOSocket* cfIos) sleep(1); } + processManager.setQuerySystemState(true); - dbrm.setSystemQueryReady(true); } // if a DDLProc was restarted, reinit DMLProc if ( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + processManager.setQuerySystemState(true); } //only run on auto process restart @@ -3211,9 +3217,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } } - //enable query stats - dbrm.setSystemQueryReady(true); - + //set query system states ready processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -3639,6 +3643,8 @@ int ProcessManager::disableModule(string target, bool manualFlag) if (opState == oam::AUTO_DISABLED && newState == oam::MAN_DISABLED) { + //removemodule to get proess in MAN_OFFLINE + stopModule(target, REMOVE, true); try { @@ -3691,7 +3697,7 @@ int ProcessManager::disableModule(string target, bool manualFlag) setModuleState(target, newState); - //set Columnstore.xml enbale state + //set Columnstore.xml enable state setEnableState( target, SnewState); log.writeLog(__LINE__, "disableModule - setEnableState", LOG_TYPE_DEBUG); @@ -3777,18 +3783,18 @@ void ProcessManager::recycleProcess(string module, bool enableModule) restartProcessType("PrimProc"); sleep(1); - restartProcessType("ExeMgr"); - sleep(1); - - restartProcessType("mysql"); + restartProcessType("mysqld"); restartProcessType("WriteEngineServer"); sleep(1); - restartProcessType("DDLProc", module); + startProcessType("ExeMgr"); sleep(1); - restartProcessType("DMLProc", module); + startProcessType("DDLProc"); + sleep(1); + + startProcessType("DMLProc"); return; } @@ -3799,7 +3805,7 @@ void ProcessManager::recycleProcess(string module, bool enableModule) * purpose: Clear the Disable State on a specified module * ******************************************************************************************/ -int ProcessManager::enableModule(string target, int state) +int ProcessManager::enableModule(string target, int state, bool failover) { Oam oam; ModuleConfig moduleconfig; @@ -3839,7 +3845,8 @@ int ProcessManager::enableModule(string target, int state) setStandbyModule(newStandbyModule); //set recycle process - recycleProcess(target); + if (!failover) + recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -4127,6 +4134,7 @@ void ProcessManager::setSystemState(uint16_t state) Oam oam; ALARMManager aManager; Configuration config; + ProcessManager processManager(config, log); log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG); @@ -4148,9 +4156,10 @@ void ProcessManager::setSystemState(uint16_t state) // Process Alarms string system = "System"; - - if ( state == oam::ACTIVE ) - { + if( state == oam::ACTIVE ) { + //set query system states ready + processManager.setQuerySystemState(true); + //clear alarms if set aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR); aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR); @@ -4541,7 +4550,8 @@ int ProcessManager::stopProcessType( std::string processName, bool manualFlag ) if ( systemprocessstatus.processstatus[i].ProcessName == processName) { //skip if in a COLD_STANDBY state - if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) +// if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) + if ( systemprocessstatus.processstatus[i].ProcessOpState != oam::ACTIVE ) continue; // found one, request restart of it @@ -4647,7 +4657,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski PMwithUM = "n"; } - // If mysql is the processName, then send to modules were ExeMgr is running + // If mysqld is the processName, then send to modules were ExeMgr is running try { oam.getProcessStatus(systemprocessstatus); @@ -4658,7 +4668,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski if ( systemprocessstatus.processstatus[i].Module == skipModule ) continue; - if ( processName == "mysql" ) + if ( processName == "mysqld" ) { if ( systemprocessstatus.processstatus[i].ProcessName == "ExeMgr") { @@ -4681,12 +4691,17 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski if ( systemprocessstatus.processstatus[i].ProcessName == processName ) { //skip if in a BUSY_INIT state - if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::BUSY_INIT || - systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_INIT || - systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_INIT || - ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) ) - continue; +// if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::BUSY_INIT || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_OFFLINE || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_INIT || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_INIT || +// ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) ) +// continue; + if ( systemprocessstatus.processstatus[i].ProcessOpState != oam::ACTIVE ) + continue; + if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) ) { string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0, MAX_MODULE_TYPE_SIZE); @@ -6806,7 +6821,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ if ( IPAddr == oam::UnassignedIpAddr ) { - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } @@ -6817,7 +6832,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ if ( system(cmd.c_str()) != 0) { //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } } @@ -7630,7 +7645,7 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } //set query system state not ready - processManager.setQuerySystemState(true); + processManager.setQuerySystemState(false); // Bug 4554: Wait until DMLProc is finished with rollback if (status == oam::API_SUCCESS) @@ -7707,6 +7722,9 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) processManager.setSystemState(rtn); } + //set query system state ready + processManager.setQuerySystemState(true); + // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); startsystemthreadStatus = status; @@ -8235,19 +8253,18 @@ void ProcessManager::checkSimplexModule(std::string moduleName) if ( state == oam::COLD_STANDBY ) { - //set Primary UM Module - if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) - { + //process DDL/DMLProc + if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc") + { + setPMProcIPs((*pt).DeviceName); + + log.writeLog(__LINE__, "Set Primary UM Module = " + (*pt).DeviceName, LOG_TYPE_DEBUG); + oam.setSystemConfig("PrimaryUMModuleName", (*pt).DeviceName); //distribute config file distributeConfigFile("system"); sleep(2); - - //add MySQL Replication setup, if needed - log.writeLog(__LINE__, "Setup MySQL Replication for COLD_STANDBY DMLProc going ACTIVE", LOG_TYPE_DEBUG); - oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist, (*pt).DeviceName); } int status = processManager.startProcess((*pt).DeviceName, @@ -8258,12 +8275,24 @@ void ProcessManager::checkSimplexModule(std::string moduleName) { log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG); - //check to see if DDL/DML IPs need to be updated - if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) - setPMProcIPs((*pt).DeviceName); + status = processManager.startProcess((*pt).DeviceName, + "DMLProc", + FORCEFUL); + if ( status == API_SUCCESS ) { + log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/DMLProc", LOG_TYPE_DEBUG); + } + else + log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/DMLProc", LOG_TYPE_DEBUG); } else log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG); + + //setup new MariaDB Replication Master + if ( systemprocessconfig.processconfig[j].ProcessName == "DMLProc" ) { + log.writeLog(__LINE__, "Setup MySQL Replication for COLD_STANDBY DMLProc going ACTIVE", LOG_TYPE_DEBUG); + oam::DeviceNetworkList devicenetworklist; + processManager.setMySQLReplication(devicenetworklist, (*pt).DeviceName); + } } else { @@ -9795,7 +9824,7 @@ int ProcessManager::OAMParentModuleChange() { log.writeLog(__LINE__, "System Active, restart needed processes", LOG_TYPE_DEBUG); - processManager.restartProcessType("mysql"); + processManager.restartProcessType("mysqld"); processManager.restartProcessType("ExeMgr"); processManager.restartProcessType("WriteEngineServer"); processManager.reinitProcessType("DBRMWorkerNode"); @@ -10325,7 +10354,7 @@ int ProcessManager::OAMParentModuleChange() if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) && ( moduleNameList.size() <= 0 && config.moduleType() == "pm") ) { - int status = 0; + status = 0; } else { @@ -10995,7 +11024,7 @@ void ProcessManager::stopProcessTypes(bool manualFlag) log.writeLog(__LINE__, "stopProcessTypes Called"); //front-end first - processManager.stopProcessType("mysql", manualFlag); + processManager.stopProcessType("mysqld", manualFlag); processManager.stopProcessType("DMLProc", manualFlag); processManager.stopProcessType("DDLProc", manualFlag); processManager.stopProcessType("ExeMgr", manualFlag); diff --git a/procmgr/processmanager.h b/procmgr/processmanager.h index fd2fe834e..9a57b38b5 100644 --- a/procmgr/processmanager.h +++ b/procmgr/processmanager.h @@ -309,7 +309,7 @@ public: /** *@brief Enable a specified module */ - int enableModule(std::string target, int state); + int enableModule(std::string target, int state, bool failover = false); /** *@brief Enable a specified module diff --git a/procmon/processmonitor.h b/procmon/processmonitor.h index 04c72bb29..5ec131d42 100644 --- a/procmon/processmonitor.h +++ b/procmon/processmonitor.h @@ -488,7 +488,7 @@ public: /** *@brief check if module failover is needed due to a process outage */ - void checkProcessFailover( std::string processName); + void checkModuleFailover(std::string processName); /** *@brief run upgrade script diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index de092ca0e..8b69f6926 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -10,7 +10,7 @@ CHECK=true REPORTPASS=true LOGFILE="" -OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16") +OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16" "ubuntu18") NODE_IPADDRESS="" @@ -37,7 +37,7 @@ checkContinue() { } ### -# Print Fucntions +# Print Functions ### helpPrint () { @@ -57,7 +57,7 @@ helpPrint () { echo "" echo "Additional information on Tool is documented at:" echo "" - echo "https://mariadb.com/kb/en/mariadb/*****/" + echo "https://mariadb.com/kb/en/library/mariadb-columnstore-cluster-test-tool/" echo "" echo "Items that are checked:" echo " Node Ping test" @@ -65,6 +65,7 @@ helpPrint () { echo " ColumnStore Port test" echo " OS version" echo " Locale settings" + echo " Umask settings" echo " Firewall settings" echo " Date/time settings" echo " Dependent packages installed" @@ -326,16 +327,18 @@ checkSSH() rc="$?" if [ $rc -eq 0 ] || ( [ $rc -eq 2 ] && [ $OS == "suse12" ] ) ; then if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node Passed SSH login test using ssh-keys" + echo $ipadd " Node Passed SSH login test using ssh-keys" else - echo $ipadd " Node Passed SSH login test using user password" + echo $ipadd " Node Passed SSH login test using user password" fi else if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" + echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" else - echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" + echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" fi + + echo "Error - Fix the SSH login issue and rerun test" exit 1 fi done @@ -489,12 +492,47 @@ checkLocale() fi } -checkSELINUX() +checkLocalUMASK() +{ + # UMASK check + # + echo "" + echo "** Run Local UMASK check" + echo "" + + pass=true + filename=UMASKtest + + rm -f $filename + touch $filename + permission=$(stat -c "%A" "$filename") + result=${permission:4:1} + if [ ${result} == "r" ] ; then + result=${permission:7:1} + if [ ${result} == "r" ] ; then + echo "UMASK local setting test passed" + else + echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" + pass=false + fi + + if ! $pass; then + checkContinue + fi + + rm -f $filename +} + +checkLocalSELINUX() { # SELINUX check # echo "" - echo "** Run SELINUX check" + echo "** Run Local SELINUX check" echo "" pass=true @@ -511,21 +549,86 @@ checkSELINUX() echo "Local Node SELINUX setting is Not Enabled" fi - for ipadd in "${NODE_IPADDRESS[@]}"; do - `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > /tmp/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "$ipadd Node SELINUX setting is Not Enabled" - else - `cat config | grep SELINUX | grep enforcing > /tmp/selinux_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" - pass=false - else - echo "$ipadd Node SELINUX setting is Not Enabled" - fi - `rm -f config` - fi - done + if ! $pass; then + checkContinue + fi +} + +checkUMASK() +{ + # UMASK check + # + echo "" + echo "** Run UMASK check" + echo "" + + pass=true + + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD 'rm -f UMASKtest;touch UMASKtest;echo $(stat -c "%A" "UMASKtest") > test.log' > /tmp/remote_command_check 2>&1` + if [ "$?" -eq 0 ]; then + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd Calpont1 test.log >> /tmp/remote_scp_get 2>&1` + if [ "$?" -eq 0 ]; then + permission=`cat test.log` + result=${permission:4:1} + if [ ${result} == "r" ] ; then + result=${permission:7:1} + if [ ${result} == "r" ] ; then + echo "$ipadd Node UMASK setting test passed" + else + echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_scp_get.sh error, check /tmp/remote_scp_get" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_command.sh error, check /tmp/remote_command_check" + pass=false + fi + `rm -f test.log` + done + + if ! $pass; then + checkContinue + fi + + rm -f $filename +} + +checkSELINUX() +{ + # SELINUX check + # + echo "" + echo "** Run SELINUX check" + echo "" + + pass=true + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "$ipadd Node SELINUX setting is Not Enabled" + else + `cat config | grep SELINUX | grep enforcing > /tmp/selinux_check 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" + pass=false + else + echo "$ipadd Node SELINUX setting is Not Enabled" + fi + `rm -f config` + fi + done + + if ! $pass; then + checkContinue + fi } checkFirewalls() @@ -951,7 +1054,7 @@ checkPackages() declare -a UBUNTU_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1V5" "net-tools" "libnuma1" ) declare -a UBUNTU_PKG_NOT=("mariadb-server" "libmariadb18") - if [ "$OS" == "ubuntu16" ] ; then + if [ "$OS" == "ubuntu16" ] || [ "$OS" == "ubuntu18" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" pass=false @@ -1311,12 +1414,15 @@ echo "" checkLocalOS checkLocalDir +checkLocalUMASK +checkLocalSELINUX if [ "$IPADDRESSES" != "" ]; then checkPing checkSSH checkRemoteDir checkOS checkLocale + checkUMASK checkSELINUX checkFirewalls checkPorts diff --git a/utils/clusterTester/os_detect.sh b/utils/clusterTester/os_detect.sh index 7930c0daf..be69e870e 100755 --- a/utils/clusterTester/os_detect.sh +++ b/utils/clusterTester/os_detect.sh @@ -29,7 +29,7 @@ detectOS () { echo Operating System name: $osPrettyName echo Operating System tag: $osTag case "$osTag" in - centos6|centos7|ubuntu16|debian8|suse12|debian9) + centos6|centos7|ubuntu16|debian8|suse12|debian9|ubuntu18) ;; *) echo OS not supported diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..63d05d3d2 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,139 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include +#include namespace static_any { namespace anyimpl { + struct empty_any + { + }; - struct bad_any_cast - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct empty_any - { - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; - - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +155,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +197,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +217,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +260,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) - throw anyimpl::bad_any_cast(); + T& cast() + { + if (policy != anyimpl::get_policy()) + throw std::runtime_error("static_any: type mismatch in cast"); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/common/simpleallocator.h b/utils/common/simpleallocator.h index 71474ca24..9419a8f69 100644 --- a/utils/common/simpleallocator.h +++ b/utils/common/simpleallocator.h @@ -156,10 +156,6 @@ public: ptr->T::~T(); } - SimplePool* getPool() - { - return fPool; - } void setPool(SimplePool* pool) { fPool = pool; diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 657a8d0b1..832143c5b 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -859,7 +859,7 @@ bool mysql_str_to_datetime( const string& input, DateTime& output, bool& isDate return true; } -bool mysql_str_to_time( const string& input, Time& output ) +bool mysql_str_to_time( const string& input, Time& output, long decimals ) { int32_t datesepct = 0; uint32_t dtend = 0; @@ -999,20 +999,21 @@ bool mysql_str_to_time( const string& input, Time& output ) if ( !isTimeValid( hour, min, sec, usec ) ) { // Emulate MariaDB's time saturation - if (hour > 838) + // TODO: msec saturation + if ((hour > 838) && !isNeg) { output.hour = 838; output.minute = 59; output.second = 59; - output.msecond = 999999; + output.msecond = exp10(decimals) - 1; output.is_neg = 0; } - else if (hour < -838) + else if ((hour < -838) || ((hour > 838) && isNeg)) { output.hour = -838; output.minute = 59; output.second = 59; - output.msecond = 999999; + output.msecond = exp10(decimals) - 1; output.is_neg = 1; } // If neither of the above match then we return a 0 time @@ -1068,9 +1069,9 @@ bool stringToDatetimeStruct(const string& data, DateTime& dtime, bool* date) return true; } -bool stringToTimeStruct(const string& data, Time& dtime) +bool stringToTimeStruct(const string& data, Time& dtime, long decimals) { - if ( !mysql_str_to_time( data, dtime ) ) + if ( !mysql_str_to_time( data, dtime, decimals ) ) return false; return true; @@ -1415,15 +1416,12 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, { Time aTime; - if (stringToTimeStruct(data, aTime)) + if (!stringToTimeStruct(data, aTime, colType.precision)) { - value = (int64_t) * (reinterpret_cast(&aTime)); - } - else - { - value = (int64_t) 0; pushWarning = true; } + + value = (int64_t) * (reinterpret_cast(&aTime)); } break; @@ -1910,6 +1908,7 @@ int64_t DataConvert::convertColumnTime( { status = 0; char* p; + char* retp = NULL; char* savePoint = NULL; p = const_cast(dataOrg); int64_t value = 0; @@ -1926,6 +1925,18 @@ int64_t DataConvert::convertColumnTime( return value; } + if (dataOrgLen == 0) + { + return value; + } + + if (dataOrgLen < 3) + { + // Not enough chars to be a time + status = -1; + return value; + } + if (p[0] == '-') { isNeg = true; @@ -1934,9 +1945,9 @@ int64_t DataConvert::convertColumnTime( errno = 0; p = strtok_r(p, ":.", &savePoint); - inHour = strtol(p, 0, 10); + inHour = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -1950,9 +1961,9 @@ int64_t DataConvert::convertColumnTime( return value; } - inMinute = strtol(p, 0, 10); + inMinute = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -1966,9 +1977,9 @@ int64_t DataConvert::convertColumnTime( return value; } - inSecond = strtol(p, 0, 10); + inSecond = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -1978,9 +1989,9 @@ int64_t DataConvert::convertColumnTime( if (p != NULL) { - inMicrosecond = strtol(p, 0, 10); + inMicrosecond = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -2082,13 +2093,8 @@ std::string DataConvert::datetimeToString( long long datetimevalue, long decima if (dt.msecond && decimals) { - snprintf(buf + strlen(buf), 21 + decimals, ".%d", dt.msecond); - - // Pad end with zeros - if (strlen(buf) < (size_t)(21 + decimals)) - { - sprintf(buf + strlen(buf), "%0*d", (int)(21 + decimals - strlen(buf)), 0); - } + // Pad start with zeros + sprintf(buf + strlen(buf), ".%0*d", (int)decimals, dt.msecond); } return buf; @@ -2118,14 +2124,8 @@ std::string DataConvert::timeToString( long long timevalue, long decimals ) if (dt.msecond && decimals) { - size_t start = strlen(buf); - snprintf(buf + strlen(buf), 12 + decimals, ".%d", dt.msecond); - - // Pad end with zeros - if (strlen(buf) - start < (size_t)decimals) - { - sprintf(buf + strlen(buf), "%0*d", (int)(decimals - (strlen(buf) - start) + 1), 0); - } + // Pad start with zeros + sprintf(buf + strlen(buf), ".%0*d", (int)decimals, dt.msecond); } return buf; @@ -2566,7 +2566,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 8, 2); min = string(buf + 10, 2); sec = string(buf + 12, 2); - msec = string(buf + 14, 6); break; case 12: @@ -2576,7 +2575,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 6, 2); min = string(buf + 8, 2); sec = string(buf + 10, 2); - msec = string(buf + 12, 6); break; case 10: @@ -2585,7 +2583,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 4, 2); min = string(buf + 6, 2); sec = string(buf + 8, 2); - msec = string(buf + 10, 6); break; case 9: @@ -2594,7 +2591,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 3, 2); min = string(buf + 5, 2); sec = string(buf + 7, 2); - msec = string(buf + 9, 6); break; case 8: @@ -2645,7 +2641,7 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) h = atoi(hour.c_str()); minute = atoi(min.c_str()); s = atoi(sec.c_str()); - ms = atoi(msec.c_str()); + ms = 0; if (!isDateValid(d, m, y) || !isDateTimeValid(h, minute, s, ms)) return -1; @@ -2664,7 +2660,7 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) return *(reinterpret_cast(&adaytime)); } -int64_t DataConvert::intToTime(int64_t data) +int64_t DataConvert::intToTime(int64_t data, bool fromString) { char buf[21] = {0}; char* bufread = buf; @@ -2693,43 +2689,78 @@ int64_t DataConvert::intToTime(int64_t data) bufread++; } + bool zero = false; + switch (strlen(bufread)) { + // A full datetime + case 14: + hour = string(buf + 8, 2); + min = string(buf + 10, 2); + sec = string(buf + 12, 2); + break; + + // Date so this is all 0 + case 8: + zero = true; + break; + case 7: hour = string(bufread, 3); min = string(bufread + 2, 2); sec = string(bufread + 4, 2); - msec = string(bufread + 6, 6); break; case 6: hour = string(bufread, 2); min = string(bufread + 2, 2); sec = string(bufread + 4, 2); - msec = string(bufread + 6, 6); + break; + + case 5: + hour = string(bufread, 1); + min = string(bufread + 1, 2); + sec = string(bufread + 3, 2); break; case 4: min = string(bufread, 2); sec = string(bufread + 2, 2); - msec = string(bufread + 4, 6); + break; + + case 3: + min = string(bufread, 1); + sec = string(bufread + 1, 2); break; case 2: sec = string(bufread, 2); - msec = string(bufread + 2, 6); + break; + + case 1: + sec = string(bufread, 1); break; default: return -1; } - h = atoi(hour.c_str()); - minute = atoi(min.c_str()); - s = atoi(sec.c_str()); - ms = atoi(msec.c_str()); + if (!zero) + { + h = atoi(hour.c_str()); + minute = atoi(min.c_str()); + s = atoi(sec.c_str()); + } + else if (fromString) + { + // Saturate fromString + h = 838; + minute = 59; + s = 59; + ms = 999999; + } - if (!isTimeValid(h, minute, s, ms)) + if (!isTimeValid(h, minute, s, 0)) return -1; atime.hour = h; @@ -2749,6 +2780,7 @@ int64_t DataConvert::stringToTime(const string& data) uint64_t min = 0, sec = 0, msec = 0; int64_t day = -1, hour = 0; bool isNeg = false; + bool hasDate = false; string time, hms, ms; char* end = NULL; @@ -2760,17 +2792,27 @@ int64_t DataConvert::stringToTime(const string& data) isNeg = true; } + if (data.substr(pos+1, data.length()-pos-1).find("-") != string::npos) + { + // A second dash, this has a date + hasDate = true; + isNeg = false; + } // Day pos = data.find(" "); if (pos != string::npos) { - day = strtol(data.substr(0, pos).c_str(), &end, 10); + if (!hasDate) + { + day = strtol(data.substr(0, pos).c_str(), &end, 10); - if (*end != '\0') - return -1; - hour = day * 24; - day = -1; + if (*end != '\0') + return -1; + + hour = day * 24; + day = -1; + } time = data.substr(pos + 1, data.length() - pos - 1); } else @@ -2778,6 +2820,22 @@ int64_t DataConvert::stringToTime(const string& data) time = data; } + if (time.find(":") == string::npos) + { + if (hasDate) + { + // Has dashes, no colons. This is just a date! + // Or the length < 6 (MariaDB returns NULL) + return -1; + } + else + { + // This is an int time + return intToTime(atoll(time.c_str()), true); + } + } + + // Fraction pos = time.find("."); @@ -2796,11 +2854,18 @@ int64_t DataConvert::stringToTime(const string& data) if (pos == string::npos) { - hour += atoi(hms.c_str()); + if (hour >= 0) + hour += atoi(hms.c_str()); + else + hour -= atoi(hms.c_str()); } else { - hour += atoi(hms.substr(0, pos).c_str()); + if (hour >= 0) + hour += atoi(hms.substr(0, pos).c_str()); + else + hour -= atoi(hms.substr(0, pos).c_str()); + ms = hms.substr(pos + 1, hms.length() - pos - 1); } diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index 0cf3480c5..a6ce20198 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -541,7 +541,7 @@ public: // convert integer to datetime EXPORT static int64_t intToDatetime(int64_t data, bool* isDate = NULL); // convert integer to date - EXPORT static int64_t intToTime(int64_t data); + EXPORT static int64_t intToTime(int64_t data, bool fromString = false); // convert string to date. alias to stringToDate EXPORT static int64_t dateToInt(const std::string& date); // convert string to datetime. alias to datetimeToInt @@ -587,14 +587,7 @@ inline void DataConvert::datetimeToString( long long datetimevalue, char* buf, u if (msec || decimals) { - size_t start = strlen(buf); - snprintf(buf + strlen(buf), buflen - start, ".%d", msec); - - // Pad end with zeros - if (strlen(buf) - start < (size_t)decimals) - { - snprintf(buf + strlen(buf), buflen - strlen(buf), "%0*d", (int)(decimals - (strlen(buf) - start) + 1), 0); - } + snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec); } } @@ -636,14 +629,8 @@ inline void DataConvert::timeToString( long long timevalue, char* buf, unsigned if (msec || decimals) { - size_t start = strlen(buf); - snprintf(buf + strlen(buf), buflen - start, ".%d", msec); - - // Pad end with zeros - if (strlen(buf) - start < (size_t)decimals) - { - snprintf(buf + strlen(buf), buflen - strlen(buf), "%0*d", (int)(decimals - (strlen(buf) - start) + 1), 0); - } + // Pad start with zeros + snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec); } } diff --git a/utils/funcexp/func_add_time.cpp b/utils/funcexp/func_add_time.cpp index 61eb9ec6c..3974190b4 100644 --- a/utils/funcexp/func_add_time.cpp +++ b/utils/funcexp/func_add_time.cpp @@ -36,179 +36,6 @@ using namespace dataconvert; #include "functor_dtm.h" #include "funchelpers.h" -namespace -{ -using namespace funcexp; - -int64_t addTime(DateTime& dt1, Time& dt2) -{ - DateTime dt; - dt.year = 0; - dt.month = 0; - dt.day = 0; - dt.hour = 0; - dt.minute = 0; - dt.second = 0; - dt.msecond = 0; - - int64_t month, day, hour, min, sec, msec, tmp; - msec = (signed)(dt1.msecond + dt2.msecond); - dt.msecond = tmp = msec % 1000000; - - if (tmp < 0) - { - dt.msecond = tmp + 1000000; - dt2.second--; - } - - sec = (signed)(dt1.second + dt2.second + msec / 1000000); - dt.second = tmp = sec % 60; - - if (tmp < 0) - { - dt.second = tmp + 60; - dt2.minute--; - } - - min = (signed)(dt1.minute + dt2.minute + sec / 60); - dt.minute = tmp = min % 60; - - if (tmp < 0) - { - dt.minute = tmp + 60; - dt2.hour--; - } - - hour = (signed)(dt1.hour + dt2.hour + min / 60); - - if ((hour < 0) || (hour > 23)) - { - dt2.day = hour / 24; - hour = hour % 24; - } - if (hour < 0) - { - dt.hour = hour + 24; - dt2.day--; - } - else - { - dt.hour = hour; - } - - day = (signed)(dt1.day + dt2.day); - - - if (isLeapYear(dt1.year) && dt1.month == 2) - day--; - - month = dt1.month; - int addyear = 0; - - if (day < 0) - { - int monthSave = month; - - while (day <= 0) - { - month = (month == 1 ? 12 : month - 1); - - for (; day <= 0 && month > 0; month--) - day += getDaysInMonth(month, dt1.year); - - month++; -// month=12; - } - - if ( month > monthSave ) - addyear--; - } - else - { - int monthSave = month; - - while (day > getDaysInMonth(month, dt1.year)) - { - for (; day > getDaysInMonth(month, dt1.year) && month <= 12; month++) - day -= getDaysInMonth(month, dt1.year); - - if (month > 12) - month = 1; - } - - if ( month < monthSave ) - addyear++; - } - - dt.day = day; - dt.month = month; - dt.year = dt1.year + addyear; - - return *(reinterpret_cast(&dt)); -} - -int64_t addTime(Time& dt1, Time& dt2) -{ - Time dt; - dt.is_neg = false; - dt.hour = 0; - dt.minute = 0; - dt.second = 0; - dt.msecond = 0; - - int64_t min, sec, msec, tmp; - msec = (signed)(dt1.msecond + dt2.msecond); - dt.msecond = tmp = msec % 1000000; - - if (tmp < 0) - { - dt.msecond = tmp + 1000000; - dt2.second--; - } - - sec = (signed)(dt1.second + dt2.second + msec / 1000000); - dt.second = tmp = sec % 60; - - if (tmp < 0) - { - dt.second = tmp + 60; - dt2.minute--; - } - - min = (signed)(dt1.minute + dt2.minute + sec / 60); - dt.minute = tmp = min % 60; - - if (tmp < 0) - { - dt.minute = tmp + 60; - dt2.hour--; - } - - dt.hour = tmp = (signed)(dt1.hour + dt2.hour + min / 60); - - // Saturation - if (tmp > 838) - { - dt.hour = 838; - dt.minute = 59; - dt.second = 59; - dt.msecond = 999999; - } - else if (tmp < -838) - { - dt.is_neg = true; - dt.hour = -838; - dt.minute = 59; - dt.second = 59; - dt.msecond = 999999; - } - - return *(reinterpret_cast(&dt)); -} - - -} - namespace funcexp { @@ -222,7 +49,14 @@ int64_t Func_add_time::getIntVal(rowgroup::Row& row, bool& isNull, CalpontSystemCatalog::ColType& op_ct) { - return getDatetimeIntVal(row, parm, isNull, op_ct); + if (parm[0]->data()->resultType().colDataType == execplan::CalpontSystemCatalog::TIME) + { + return getTimeIntVal(row, parm, isNull, op_ct); + } + else + { + return getDatetimeIntVal(row, parm, isNull, op_ct); + } } string Func_add_time::getStrVal(rowgroup::Row& row, diff --git a/utils/funcexp/func_case.cpp b/utils/funcexp/func_case.cpp index 4c416360c..03ca241f8 100644 --- a/utils/funcexp/func_case.cpp +++ b/utils/funcexp/func_case.cpp @@ -315,20 +315,13 @@ CalpontSystemCatalog::ColType caseOperationType(FunctionParm& fp, CalpontSystemCatalog::ColType& resultType, bool simpleCase) { - FunctionParm::size_type n = fp.size(); + uint64_t simple = simpleCase ? 1 : 0; + bool hasElse = (((fp.size()-simple) % 2) != 0); // if 1, then ELSE exist - if (simpleCase) // simple case has an expression - n -= 1; // remove expression from count of expression_i + result_i - bool hasElse = ((n % 2) != 0); // if 1, then ELSE exist - - if (hasElse) - --n; // n now is an even number uint64_t parmCount = hasElse ? (fp.size() - 2) : (fp.size() - 1); - uint64_t whereCount = hasElse ? (fp.size() - 2 + simpleCase) / 2 : (fp.size() - 1) / 2 + simpleCase; - - idbassert((n % 2) == 0); + uint64_t whereCount = hasElse ? (fp.size() - 2 + simple) / 2 : (fp.size() - 1) / 2 + simple; bool allStringO = true; bool allStringR = true; @@ -341,33 +334,24 @@ CalpontSystemCatalog::ColType caseOperationType(FunctionParm& fp, for (uint64_t i = 0; i <= parmCount; i++) { - // operation or result type - operation = ((i > 0) && (i <= whereCount)); - - // the result type of ELSE, if exists. - if (i == n) - { - if (!hasElse) - break; - - if (simpleCase) + // for SimpleCase, we return the type of the case expression, + // which will always be in position 0. + if (i == 0 && simpleCase) + { + if (fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::CHAR && + fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::TEXT && + fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::VARCHAR) { - // the case expression - if (fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::CHAR && - fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::TEXT && - fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::VARCHAR) - { - PredicateOperator op; - op.setOpType(oct, fp[i]->data()->resultType()); - allStringO = false; - oct = op.operationType(); - } - - i += 1; + PredicateOperator op; + op.setOpType(oct, fp[i]->data()->resultType()); + allStringO = false; + oct = op.operationType(); } + i += 1; + } - operation = false; - } + // operation or result type + operation = ((i > 0+simple) && (i <= whereCount)); if (fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::CHAR && fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::TEXT && @@ -378,10 +362,13 @@ CalpontSystemCatalog::ColType caseOperationType(FunctionParm& fp, if (operation) { - op.setOpType(oct, fp[i]->data()->resultType()); - allStringO = false; - oct = op.operationType(); - } + if (!simpleCase) + { + op.setOpType(oct, fp[i]->data()->resultType()); + allStringO = false; + oct = op.operationType(); + } + } // If any parm is of string type, the result type should be string. (same as if) else if (rct.colDataType != CalpontSystemCatalog::CHAR && @@ -457,6 +444,13 @@ bool Func_simple_case::getBoolVal(Row& row, if (isNull) return joblist::BIGINTNULL; + ParseTree* lop = parm[i]->left(); + ParseTree* rop = parm[i]->right(); + if (lop && rop) + { + return (reinterpret_cast(parm[i]->data()))->getBoolVal(row, isNull, lop, rop); + } + return parm[i]->data()->getBoolVal(row, isNull); } diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index 3542e341d..b396c45ea 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -589,6 +589,33 @@ int32_t Func_cast_date::getDateIntVal(rowgroup::Row& row, { return parm[0]->data()->getDateIntVal(row, isNull); } + case execplan::CalpontSystemCatalog::TIME: + { + int64_t val1; + string value = ""; + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + val1 = addTime(aDateTime, aTime); + value = dataconvert::DataConvert::datetimeToString(val1); + value = value.substr(0, 10); + return dataconvert::DataConvert::stringToDate(value); + break; + } + + default: { @@ -680,6 +707,27 @@ int64_t Func_cast_date::getDatetimeIntVal(rowgroup::Row& row, val1.msecond = 0; return *(reinterpret_cast(&val1)); } + case CalpontSystemCatalog::TIME: + { + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + val = addTime(aDateTime, aTime); + return val; + } + default: { @@ -814,6 +862,26 @@ int64_t Func_cast_datetime::getDatetimeIntVal(rowgroup::Row& row, return parm[0]->data()->getDatetimeIntVal(row, isNull); } + case CalpontSystemCatalog::TIME: + { + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + aTime.day = 0; + return addTime(aDateTime, aTime); + break; + } + default: { isNull = true; diff --git a/utils/funcexp/func_date.cpp b/utils/funcexp/func_date.cpp index 5f700e446..7fc990ab6 100644 --- a/utils/funcexp/func_date.cpp +++ b/utils/funcexp/func_date.cpp @@ -56,6 +56,9 @@ int64_t Func_date::getIntVal(rowgroup::Row& row, string value = ""; + DateTime aDateTime; + Time aTime; + switch (type) { case execplan::CalpontSystemCatalog::DATE: @@ -72,6 +75,31 @@ int64_t Func_date::getIntVal(rowgroup::Row& row, break; } + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + { + int64_t val; + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + val = addTime(aDateTime, aTime); + value = dataconvert::DataConvert::datetimeToString(val); + value = value.substr(0, 10); + break; + } + + case execplan::CalpontSystemCatalog::BIGINT: case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::MEDINT: diff --git a/utils/funcexp/func_date_format.cpp b/utils/funcexp/func_date_format.cpp index 033ceda02..9a25cb941 100644 --- a/utils/funcexp/func_date_format.cpp +++ b/utils/funcexp/func_date_format.cpp @@ -269,6 +269,34 @@ string Func_date_format::getStrVal(rowgroup::Row& row, dt.msecond = (uint32_t)((val & 0xfffff)); break; + case CalpontSystemCatalog::TIME: + { + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + val = addTime(aDateTime, aTime); + dt.year = (uint32_t)((val >> 48) & 0xffff); + dt.month = (uint32_t)((val >> 44) & 0xf); + dt.day = (uint32_t)((val >> 38) & 0x3f); + dt.hour = (uint32_t)((val >> 32) & 0x3f); + dt.minute = (uint32_t)((val >> 26) & 0x3f); + dt.second = (uint32_t)((val >> 20) & 0x3f); + dt.msecond = (uint32_t)((val & 0xfffff)); + break; + } + + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::TEXT: diff --git a/utils/funcexp/func_day.cpp b/utils/funcexp/func_day.cpp index bbd5edfcb..7ff2bab9a 100644 --- a/utils/funcexp/func_day.cpp +++ b/utils/funcexp/func_day.cpp @@ -49,6 +49,9 @@ int64_t Func_day::getIntVal(rowgroup::Row& row, { int64_t val = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -59,6 +62,16 @@ int64_t Func_day::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (uint32_t)((val >> 38) & 0x3f); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (uint32_t)((val >> 38) & 0x3f); + break; + + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_dayname.cpp b/utils/funcexp/func_dayname.cpp index 3825bc1a3..5da6d8943 100644 --- a/utils/funcexp/func_dayname.cpp +++ b/utils/funcexp/func_dayname.cpp @@ -54,6 +54,9 @@ int64_t Func_dayname::getIntVal(rowgroup::Row& row, int64_t val = 0; int32_t dayofweek = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -70,6 +73,17 @@ int64_t Func_dayname::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: @@ -145,7 +159,11 @@ string Func_dayname::getStrVal(rowgroup::Row& row, bool& isNull, CalpontSystemCatalog::ColType& op_ct) { - uint32_t weekday = getIntVal(row, parm, isNull, op_ct); + int32_t weekday = getIntVal(row, parm, isNull, op_ct); + + if (weekday == -1) + return ""; + return helpers::weekdayFullNames[weekday]; } diff --git a/utils/funcexp/func_dayofweek.cpp b/utils/funcexp/func_dayofweek.cpp index a152ee15f..ec84f5738 100644 --- a/utils/funcexp/func_dayofweek.cpp +++ b/utils/funcexp/func_dayofweek.cpp @@ -52,6 +52,9 @@ int64_t Func_dayofweek::getIntVal(rowgroup::Row& row, uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -68,6 +71,17 @@ int64_t Func_dayofweek::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_dayofyear.cpp b/utils/funcexp/func_dayofyear.cpp index 65c017202..ee3b9cf30 100644 --- a/utils/funcexp/func_dayofyear.cpp +++ b/utils/funcexp/func_dayofyear.cpp @@ -52,6 +52,9 @@ int64_t Func_dayofyear::getIntVal(rowgroup::Row& row, uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -68,6 +71,17 @@ int64_t Func_dayofyear::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_hex.cpp b/utils/funcexp/func_hex.cpp index 79ed1d57e..83106a9be 100644 --- a/utils/funcexp/func_hex.cpp +++ b/utils/funcexp/func_hex.cpp @@ -78,6 +78,7 @@ string Func_hex::getStrVal(rowgroup::Row& row, case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::TIME: { const string& arg = parm[0]->data()->getStrVal(row, isNull); scoped_array hexPtr(new char[strlen(arg.c_str()) * 2 + 1]); diff --git a/utils/funcexp/func_hour.cpp b/utils/funcexp/func_hour.cpp index 685a264db..4750829ad 100644 --- a/utils/funcexp/func_hour.cpp +++ b/utils/funcexp/func_hour.cpp @@ -127,22 +127,13 @@ int64_t Func_hour::getIntVal(rowgroup::Row& row, if (isTime) { - // If negative, mask so it doesn't turn positive - bool isNeg = false; + // HOUR() is always positive in MariaDB, even for negative time int64_t mask = 0; if ((val >> 40) & 0x800) mask = 0xfffffffffffff000; - if (!mask && (val >> 63)) - { - isNeg = true; - } - - val = mask | ((val >> 40) & 0xfff); - - if (isNeg) - val *= -1; + val = abs(mask | ((val >> 40) & 0xfff)); } else { diff --git a/utils/funcexp/func_last_day.cpp b/utils/funcexp/func_last_day.cpp index 38ba46ccb..28b4c01e2 100644 --- a/utils/funcexp/func_last_day.cpp +++ b/utils/funcexp/func_last_day.cpp @@ -53,6 +53,8 @@ int64_t Func_last_day::getIntVal(rowgroup::Row& row, uint32_t month = 0; uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -70,6 +72,17 @@ int64_t Func_last_day::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_makedate.cpp b/utils/funcexp/func_makedate.cpp index 2a30515f1..948b612de 100644 --- a/utils/funcexp/func_makedate.cpp +++ b/utils/funcexp/func_makedate.cpp @@ -146,11 +146,26 @@ uint64_t makedate(rowgroup::Row& row, break; } + case CalpontSystemCatalog::TIME: + { + std::ostringstream ss; + Time aTime = parm[1]->data()->getTimeIntVal(row, isNull); + ss << aTime.hour << aTime.minute << aTime.second; + dayofyear = ss.str(); + break; + } + default: isNull = true; return 0; } + if (atoi(dayofyear.c_str()) == 0) + { + isNull = true; + return 0; + } + // convert the year to a date in our internal format, then subtract // one since we are about to add the day of year back in Date d(year, 1, 1); diff --git a/utils/funcexp/func_month.cpp b/utils/funcexp/func_month.cpp index ba285f348..5479270d0 100644 --- a/utils/funcexp/func_month.cpp +++ b/utils/funcexp/func_month.cpp @@ -48,6 +48,8 @@ int64_t Func_month::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -59,6 +61,15 @@ int64_t Func_month::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 44) & 0xf); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (unsigned)((val >> 44) & 0xf); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_monthname.cpp b/utils/funcexp/func_monthname.cpp index dbe5aa513..9657b1ea2 100644 --- a/utils/funcexp/func_monthname.cpp +++ b/utils/funcexp/func_monthname.cpp @@ -47,7 +47,11 @@ string Func_monthname::getStrVal(rowgroup::Row& row, bool& isNull, CalpontSystemCatalog::ColType& op_ct) { - uint32_t month = getIntVal(row, parm, isNull, op_ct); + int32_t month = getIntVal(row, parm, isNull, op_ct); + + if (month == -1) + return ""; + return helpers::monthFullNames[month]; } @@ -75,6 +79,8 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -86,6 +92,16 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 44) & 0xf); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (unsigned)((val >> 44) & 0xf); + break; + + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_nullif.cpp b/utils/funcexp/func_nullif.cpp index a268b0ea1..04a45534a 100644 --- a/utils/funcexp/func_nullif.cpp +++ b/utils/funcexp/func_nullif.cpp @@ -531,7 +531,8 @@ int64_t Func_nullif::getTimeIntVal(rowgroup::Row& row, default: { - isNull = true; + isNull = false; + return exp1; } } diff --git a/utils/funcexp/func_quarter.cpp b/utils/funcexp/func_quarter.cpp index 1603ef31f..78559d68d 100644 --- a/utils/funcexp/func_quarter.cpp +++ b/utils/funcexp/func_quarter.cpp @@ -50,6 +50,8 @@ int64_t Func_quarter::getIntVal(rowgroup::Row& row, { // try to cast to date/datetime int64_t val = 0, month = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -63,6 +65,15 @@ int64_t Func_quarter::getIntVal(rowgroup::Row& row, month = (val >> 44) & 0xf; break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + month = (uint32_t)((val >> 44) & 0xf); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_str_to_date.cpp b/utils/funcexp/func_str_to_date.cpp index 42d43cb6b..31bbbf6ca 100644 --- a/utils/funcexp/func_str_to_date.cpp +++ b/utils/funcexp/func_str_to_date.cpp @@ -198,6 +198,24 @@ int64_t Func_str_to_date::getDatetimeIntVal(rowgroup::Row& row, return time; } +int64_t Func_str_to_date::getTimeIntVal(rowgroup::Row& row, + FunctionParm& parm, + bool& isNull, + CalpontSystemCatalog::ColType& ct) +{ + dataconvert::DateTime dateTime; + dataconvert::Time retTime; + dateTime = getDateTime(row, parm, isNull, ct); + retTime.day = 0; + retTime.is_neg = false; + retTime.hour = dateTime.hour; + retTime.minute = dateTime.minute; + retTime.second = dateTime.second; + retTime.msecond = dateTime.msecond; + int64_t time = *(reinterpret_cast(&retTime)); + return time; +} + int64_t Func_str_to_date::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull, diff --git a/utils/funcexp/func_substring_index.cpp b/utils/funcexp/func_substring_index.cpp index 4948cfc5f..cf1395759 100644 --- a/utils/funcexp/func_substring_index.cpp +++ b/utils/funcexp/func_substring_index.cpp @@ -74,6 +74,9 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row, if ( count > (int64_t) end ) return str; + if (( count < 0 ) && ((count * -1) > end)) + return str; + string value = str; if ( count > 0 ) diff --git a/utils/funcexp/func_to_days.cpp b/utils/funcexp/func_to_days.cpp index 773ec4a8e..f16642958 100644 --- a/utils/funcexp/func_to_days.cpp +++ b/utils/funcexp/func_to_days.cpp @@ -59,6 +59,9 @@ int64_t Func_to_days::getIntVal(rowgroup::Row& row, month = 0, day = 0; + DateTime aDateTime; + Time aTime; + switch (type) { case execplan::CalpontSystemCatalog::DATE: @@ -82,6 +85,25 @@ int64_t Func_to_days::getIntVal(rowgroup::Row& row, break; } + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + { + int64_t val; + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + return helpers::calc_mysql_daynr(year, month, day); + break; + } + case execplan::CalpontSystemCatalog::VARCHAR: // including CHAR' case execplan::CalpontSystemCatalog::CHAR: case execplan::CalpontSystemCatalog::TEXT: diff --git a/utils/funcexp/func_week.cpp b/utils/funcexp/func_week.cpp index 65145052f..a9e47bd4b 100644 --- a/utils/funcexp/func_week.cpp +++ b/utils/funcexp/func_week.cpp @@ -53,6 +53,8 @@ int64_t Func_week::getIntVal(rowgroup::Row& row, int64_t val = 0; int16_t mode = 0; + DateTime aDateTime; + Time aTime; if (parm.size() > 1) // mode value mode = parm[1]->data()->getIntVal(row, isNull); @@ -73,6 +75,17 @@ int64_t Func_week::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_weekday.cpp b/utils/funcexp/func_weekday.cpp index 6022a860f..9666710f5 100644 --- a/utils/funcexp/func_weekday.cpp +++ b/utils/funcexp/func_weekday.cpp @@ -52,6 +52,8 @@ int64_t Func_weekday::getIntVal(rowgroup::Row& row, uint32_t month = 0; uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -69,6 +71,17 @@ int64_t Func_weekday::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_year.cpp b/utils/funcexp/func_year.cpp index 119881499..17ff4f2d0 100644 --- a/utils/funcexp/func_year.cpp +++ b/utils/funcexp/func_year.cpp @@ -48,6 +48,8 @@ int64_t Func_year::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -59,6 +61,15 @@ int64_t Func_year::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 48) & 0xffff); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (unsigned)((val >> 48) & 0xffff); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_yearweek.cpp b/utils/funcexp/func_yearweek.cpp index 84b8ca0a4..e567440b4 100644 --- a/utils/funcexp/func_yearweek.cpp +++ b/utils/funcexp/func_yearweek.cpp @@ -54,6 +54,8 @@ int64_t Func_yearweek::getIntVal(rowgroup::Row& row, int64_t val = 0; int16_t mode = 0; // default to 2 + DateTime aDateTime; + Time aTime; if (parm.size() > 1) // mode value mode = parm[1]->data()->getIntVal(row, isNull); @@ -76,6 +78,17 @@ int64_t Func_yearweek::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index 3c530f381..66782cc54 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -139,6 +139,7 @@ FuncExp::FuncExp() fFuncMap["least"] = new Func_least(); //dlh fFuncMap["left"] = new Func_left(); //dlh fFuncMap["length"] = new Func_length(); + fFuncMap["octet_length"] = new Func_length(); // MariaDB 10.3 fFuncMap["ln"] = new Func_log(); fFuncMap["locate"] = new Func_instr(); fFuncMap["log"] = new Func_log(); @@ -152,6 +153,7 @@ FuncExp::FuncExp() fFuncMap["microsecond"] = new Func_microsecond(); fFuncMap["minute"] = new Func_minute(); //dlh fFuncMap["mod"] = new Func_mod(); //dlh + fFuncMap["MOD"] = new Func_mod(); // MariaDB 10.3 fFuncMap["%"] = new Func_mod(); //dlh fFuncMap["md5"] = new Func_md5(); fFuncMap["mid"] = new Func_substr(); diff --git a/utils/funcexp/funcexpwrapper.cpp b/utils/funcexp/funcexpwrapper.cpp index 9c67fd37e..8bf1adf4f 100644 --- a/utils/funcexp/funcexpwrapper.cpp +++ b/utils/funcexp/funcexpwrapper.cpp @@ -33,7 +33,6 @@ #include "objectreader.h" using namespace messageqcpp; -using namespace boost; using namespace rowgroup; using namespace execplan; @@ -103,12 +102,12 @@ void FuncExpWrapper::deserialize(ByteStream& bs) bs >> rcsCount; for (i = 0; i < fCount; i++) - filters.push_back(shared_ptr(ObjectReader::createParseTree(bs))); + filters.push_back(boost::shared_ptr(ObjectReader::createParseTree(bs))); for (i = 0; i < rcsCount; i++) { ReturnedColumn* rc = (ReturnedColumn*) ObjectReader::createTreeNode(bs); - rcs.push_back(shared_ptr(rc)); + rcs.push_back(boost::shared_ptr(rc)); } } @@ -125,12 +124,12 @@ bool FuncExpWrapper::evaluate(Row* r) return true; } -void FuncExpWrapper::addFilter(const shared_ptr& f) +void FuncExpWrapper::addFilter(const boost::shared_ptr& f) { filters.push_back(f); } -void FuncExpWrapper::addReturnedColumn(const shared_ptr& rc) +void FuncExpWrapper::addReturnedColumn(const boost::shared_ptr& rc) { rcs.push_back(rc); } diff --git a/utils/funcexp/functor.cpp b/utils/funcexp/functor.cpp index b74812ee9..1e50ea1fc 100644 --- a/utils/funcexp/functor.cpp +++ b/utils/funcexp/functor.cpp @@ -28,6 +28,8 @@ #include #include #include +#include + using namespace std; #include "joblisttypes.h" @@ -145,6 +147,189 @@ int64_t Func::intToTime(int64_t i) return i; } +int64_t Func::nowDatetime() +{ + DateTime result; + boost::posix_time::ptime now = boost::posix_time::microsec_clock::local_time(); + result.year = now.date().year(); + result.month = now.date().month(); + result.day = now.date().day(); + result.hour = now.time_of_day().hours(); + result.minute = now.time_of_day().minutes(); + result.second = now.time_of_day().seconds(); + result.msecond = now.time_of_day().total_microseconds(); + + return (int64_t) * (reinterpret_cast(&result)); +} + +int64_t Func::addTime(DateTime& dt1, Time& dt2) +{ + DateTime dt; + dt.year = 0; + dt.month = 0; + dt.day = 0; + dt.hour = 0; + dt.minute = 0; + dt.second = 0; + dt.msecond = 0; + + int64_t month, day, hour, min, sec, msec, tmp; + msec = (signed)(dt1.msecond + dt2.msecond); + dt.msecond = tmp = msec % 1000000; + + if (tmp < 0) + { + dt.msecond = tmp + 1000000; + dt2.second--; + } + + sec = (signed)(dt1.second + dt2.second + msec / 1000000); + dt.second = tmp = sec % 60; + + if (tmp < 0) + { + dt.second = tmp + 60; + dt2.minute--; + } + + min = (signed)(dt1.minute + dt2.minute + sec / 60); + dt.minute = tmp = min % 60; + + if (tmp < 0) + { + dt.minute = tmp + 60; + dt2.hour--; + } + + hour = (signed)(dt1.hour + dt2.hour + min / 60); + + if ((hour < 0) || (hour > 23)) + { + dt2.day = hour / 24; + hour = hour % 24; + } + + if (hour < 0) + { + dt.hour = hour + 24; + dt2.day--; + } + else + { + dt.hour = hour; + } + + day = (signed)(dt1.day + dt2.day); + + + if (isLeapYear(dt1.year) && dt1.month == 2) + day--; + + month = dt1.month; + int addyear = 0; + + if (day <= 0) + { + int monthSave = month; + + while (day <= 0) + { + month = (month == 1 ? 12 : month - 1); + + for (; day <= 0 && month > 0; month--) + day += getDaysInMonth(month, dt1.year); + + month++; +// month=12; + } + + if ( month > monthSave ) + addyear--; + } + else + { + int monthSave = month; + + while (day > getDaysInMonth(month, dt1.year)) + { + for (; day > getDaysInMonth(month, dt1.year) && month <= 12; month++) + day -= getDaysInMonth(month, dt1.year); + + if (month > 12) + month = 1; + } + + if ( month < monthSave ) + addyear++; + } + + dt.day = day; + dt.month = month; + dt.year = dt1.year + addyear; + + return *(reinterpret_cast(&dt)); +} + +int64_t Func::addTime(Time& dt1, Time& dt2) +{ + Time dt; + dt.is_neg = false; + dt.hour = 0; + dt.minute = 0; + dt.second = 0; + dt.msecond = 0; + + int64_t min, sec, msec, tmp; + msec = (signed)(dt1.msecond + dt2.msecond); + dt.msecond = tmp = msec % 1000000; + + if (tmp < 0) + { + dt.msecond = tmp + 1000000; + dt2.second--; + } + + sec = (signed)(dt1.second + dt2.second + msec / 1000000); + dt.second = tmp = sec % 60; + + if (tmp < 0) + { + dt.second = tmp + 60; + dt2.minute--; + } + + min = (signed)(dt1.minute + dt2.minute + sec / 60); + dt.minute = tmp = min % 60; + + if (tmp < 0) + { + dt.minute = tmp + 60; + dt2.hour--; + } + + dt.hour = tmp = (signed)(dt1.hour + dt2.hour + min / 60); + + // Saturation + if (tmp > 838) + { + dt.hour = 838; + dt.minute = 59; + dt.second = 59; + dt.msecond = 999999; + } + else if (tmp < -838) + { + dt.is_neg = true; + dt.hour = -838; + dt.minute = 59; + dt.second = 59; + dt.msecond = 999999; + } + + return *(reinterpret_cast(&dt)); +} + + string Func::intToString(int64_t i) { return helpers::intToString(i); diff --git a/utils/funcexp/functor.h b/utils/funcexp/functor.h index 9edb9bf62..20914e99e 100644 --- a/utils/funcexp/functor.h +++ b/utils/funcexp/functor.h @@ -34,6 +34,9 @@ #include "calpontsystemcatalog.h" +#include "dataconvert.h" +using namespace dataconvert; + namespace rowgroup { class Row; @@ -162,6 +165,10 @@ protected: virtual std::string intToString(int64_t); virtual std::string doubleToString(double); + virtual int64_t nowDatetime(); + virtual int64_t addTime(DateTime& dt1, dataconvert::Time& dt2); + virtual int64_t addTime(dataconvert::Time& dt1, dataconvert::Time& dt2); + std::string fFuncName; private: diff --git a/utils/funcexp/functor_dtm.h b/utils/funcexp/functor_dtm.h index d7837a4fe..bcff47854 100644 --- a/utils/funcexp/functor_dtm.h +++ b/utils/funcexp/functor_dtm.h @@ -473,6 +473,10 @@ public: FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& op_ct); + int64_t getTimeIntVal(rowgroup::Row& row, + FunctionParm& fp, + bool& isNull, + execplan::CalpontSystemCatalog::ColType& op_ct); }; diff --git a/utils/funcexp/functor_str.h b/utils/funcexp/functor_str.h index c71cdec91..b7051be4e 100644 --- a/utils/funcexp/functor_str.h +++ b/utils/funcexp/functor_str.h @@ -24,6 +24,7 @@ #include "functor.h" +using namespace std; namespace funcexp { @@ -126,7 +127,7 @@ protected: exponent = (int)floor(log10( fabs(floatVal))); base = floatVal * pow(10, -1.0 * exponent); - if (std::isnan(exponent) || std::isnan(base)) + if (isnan(exponent) || isnan(base)) { snprintf(buf, 20, "%f", floatVal); fFloatStr = execplan::removeTrailing0(buf, 20); diff --git a/utils/joiner/tuplejoiner.cpp b/utils/joiner/tuplejoiner.cpp index 613640a11..edc94b067 100644 --- a/utils/joiner/tuplejoiner.cpp +++ b/utils/joiner/tuplejoiner.cpp @@ -149,8 +149,7 @@ TupleJoiner::TupleJoiner( for (uint32_t i = 0; i < smallKeyColumns.size(); i++) { discreteValues[i] = false; - - if (isUnsigned(smallRG.getColType(i))) + if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]])) { cpValues[i].push_back(static_cast(numeric_limits::max())); cpValues[i].push_back(0); @@ -1033,8 +1032,7 @@ boost::shared_ptr TupleJoiner::copyForDiskJoin() for (uint32_t i = 0; i < smallKeyColumns.size(); i++) { ret->discreteValues[i] = false; - - if (isUnsigned(smallRG.getColType(i))) + if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]])) { ret->cpValues[i].push_back(static_cast(numeric_limits::max())); ret->cpValues[i].push_back(0); diff --git a/utils/libmysql_client/libmysql_client.cpp b/utils/libmysql_client/libmysql_client.cpp index c12abafdf..300df8a75 100644 --- a/utils/libmysql_client/libmysql_client.cpp +++ b/utils/libmysql_client/libmysql_client.cpp @@ -120,12 +120,17 @@ int LibMySQL::run(const char* query) void LibMySQL::handleMySqlError(const char* errStr, unsigned int errCode) { ostringstream oss; - oss << errStr << "(" << errCode << ")"; - if (errCode == (unsigned int) - 1) - oss << "(null pointer)"; + if (getErrno()) + { + oss << errStr << " (" << getErrno() << ")"; + oss << " (" << getErrorMsg() << ")"; + } else - oss << "(" << errCode << ")"; + { + oss << errStr << " (" << errCode << ")"; + oss << " (unknown)"; + } throw logging::IDBExcept(oss.str(), logging::ERR_CROSS_ENGINE_CONNECT); diff --git a/utils/libmysql_client/libmysql_client.h b/utils/libmysql_client/libmysql_client.h index 41ee5f9de..5720ffd73 100644 --- a/utils/libmysql_client/libmysql_client.h +++ b/utils/libmysql_client/libmysql_client.h @@ -71,6 +71,14 @@ public: { return fErrStr; } + unsigned int getErrno() + { + return mysql_errno(fCon); + } + const char* getErrorMsg() + { + return mysql_error(fCon); + } private: MYSQL* fCon; diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index 60919c906..4b4196800 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -29,7 +29,7 @@ using namespace std; namespace logging { -ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within InfiniDB. Please check the log files for more details. Additional Information: ") +ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within Columnstore. Please check the log files for more details. Additional Information: ") { fErrorCodes[batchPrimitiveStepErr] = "error in BatchPrimitiveStep."; fErrorCodes[tupleBPSErr] = "error in TupleBPS."; diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index d1a3f4988..f8453843e 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -35,6 +35,7 @@ #include "exceptclasses.h" #include "serializeable.h" +#include "any.hpp" class ByteStreamTestSuite; diff --git a/utils/messageqcpp/messagequeue.cpp b/utils/messageqcpp/messagequeue.cpp index 2bff9481d..142d4c19c 100644 --- a/utils/messageqcpp/messagequeue.cpp +++ b/utils/messageqcpp/messagequeue.cpp @@ -157,24 +157,42 @@ void MessageQueueClient::setup(bool syncProto) { string otherEndIPStr; string otherEndPortStr; - uint16_t port; + struct addrinfo hints, *servinfo; + int rc = 0; otherEndIPStr = fConfig->getConfig(fOtherEnd, "IPAddr"); otherEndPortStr = fConfig->getConfig(fOtherEnd, "Port"); if (otherEndIPStr.length() == 0) otherEndIPStr = "127.0.0.1"; - if (otherEndPortStr.length() == 0 || (port = static_cast(strtol(otherEndPortStr.c_str(), 0, 0))) == 0) + if (otherEndPortStr.length() == 0 || static_cast(strtol(otherEndPortStr.c_str(), 0, 0)) == 0) { - string msg = "MessageQueueClient::MessageQueueClient: config error: Invalid/Missing Port attribute"; + string msg = "MessageQueueClient::setup(): config error: Invalid/Missing Port attribute"; throw runtime_error(msg); } - memset(&fServ_addr, 0, sizeof(fServ_addr)); - sockaddr_in* sinp = reinterpret_cast(&fServ_addr); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(port); - sinp->sin_addr.s_addr = inet_addr(otherEndIPStr.c_str()); + memset(&hints, 0, sizeof hints); + // ATM We support IPv4 only. + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + + if( !(rc = getaddrinfo(otherEndIPStr.c_str(), otherEndPortStr.c_str(), &hints, &servinfo)) ) + { + memset(&fServ_addr, 0, sizeof(fServ_addr)); + sockaddr_in* sinp = reinterpret_cast(&fServ_addr); + *sinp = *reinterpret_cast(servinfo->ai_addr); + freeaddrinfo(servinfo); + } + else + { + string msg = "MessageQueueClient::setup(): "; + msg.append(gai_strerror(rc)); + logging::Message::Args args; + logging::LoggingID li(31); + args.add(msg); + fLogger.logMessage(logging::LOG_TYPE_ERROR, logging::M0000, args, li); + } #ifdef SKIP_IDB_COMPRESSION fClientSock.setSocketImpl(new InetStreamSocket()); @@ -200,15 +218,34 @@ MessageQueueClient::MessageQueueClient(const string& otherEnd, Config* config, b setup(syncProto); } -MessageQueueClient::MessageQueueClient(const string& ip, uint16_t port, bool syncProto) : +MessageQueueClient::MessageQueueClient(const string& dnOrIp, uint16_t port, bool syncProto) : fLogger(31), fIsAvailable(true) { - memset(&fServ_addr, 0, sizeof(fServ_addr)); - sockaddr_in* sinp = reinterpret_cast(&fServ_addr); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(port); - sinp->sin_addr.s_addr = inet_addr(ip.c_str()); + struct addrinfo hints, *servinfo; + int rc = 0; + memset(&hints, 0, sizeof hints); + // ATM We support IPv4 only. + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if( !(rc = getaddrinfo(dnOrIp.c_str(), NULL, &hints, &servinfo)) ) + { + memset(&fServ_addr, 0, sizeof(fServ_addr)); + sockaddr_in* sinp = reinterpret_cast(&fServ_addr); + *sinp = *reinterpret_cast(servinfo->ai_addr); + sinp->sin_port = htons(port); + freeaddrinfo(servinfo); + } + else + { + string msg = "MessageQueueClient::MessageQueueClient(): "; + msg.append(gai_strerror(rc)); + logging::Message::Args args; + logging::LoggingID li(31); + args.add(msg); + fLogger.logMessage(logging::LOG_TYPE_ERROR, logging::M0000, args, li); + } #ifdef SKIP_IDB_COMPRESSION fClientSock.setSocketImpl(new InetStreamSocket()); #else diff --git a/utils/messageqcpp/messagequeue.h b/utils/messageqcpp/messagequeue.h index 3c61d074c..ca4d1f389 100644 --- a/utils/messageqcpp/messagequeue.h +++ b/utils/messageqcpp/messagequeue.h @@ -33,6 +33,7 @@ #include #else #include +#include #endif #include "serversocket.h" @@ -183,7 +184,7 @@ public: * * construct a queue from this process to otherEnd on the given IP and Port. */ - EXPORT explicit MessageQueueClient(const std::string& ip, uint16_t port, bool syncProto = true); + EXPORT explicit MessageQueueClient(const std::string& dnOrIp, uint16_t port, bool syncProto=true); /** diff --git a/utils/messageqcpp/messagequeuepool.cpp b/utils/messageqcpp/messagequeuepool.cpp index 1777b38c5..f986f734f 100644 --- a/utils/messageqcpp/messagequeuepool.cpp +++ b/utils/messageqcpp/messagequeuepool.cpp @@ -37,12 +37,12 @@ static uint64_t TimeSpecToSeconds(struct timespec* ts) return (uint64_t)ts->tv_sec + (uint64_t)ts->tv_nsec / 1000000000; } -MessageQueueClient* MessageQueueClientPool::getInstance(const std::string& ip, uint64_t port) +MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &dnOrIp, uint64_t port) { boost::mutex::scoped_lock lock(queueMutex); std::ostringstream oss; - oss << ip << "_" << port; + oss << dnOrIp << "_" << port; std::string searchString = oss.str(); MessageQueueClient* returnClient = MessageQueueClientPool::findInPool(searchString); @@ -59,7 +59,7 @@ MessageQueueClient* MessageQueueClientPool::getInstance(const std::string& ip, u clock_gettime(CLOCK_MONOTONIC, &now); uint64_t nowSeconds = TimeSpecToSeconds(&now); - newClientObject->client = new MessageQueueClient(ip, port); + newClientObject->client = new MessageQueueClient(dnOrIp, port); newClientObject->inUse = true; newClientObject->lastUsed = nowSeconds; clientMap.insert(std::pair(searchString, newClientObject)); diff --git a/utils/messageqcpp/messagequeuepool.h b/utils/messageqcpp/messagequeuepool.h index 472794a5a..db49d8e5c 100644 --- a/utils/messageqcpp/messagequeuepool.h +++ b/utils/messageqcpp/messagequeuepool.h @@ -42,7 +42,7 @@ class MessageQueueClientPool { public: static MessageQueueClient* getInstance(const std::string& module); - static MessageQueueClient* getInstance(const std::string& ip, uint64_t port); + static MessageQueueClient *getInstance(const std::string &dnOrIp, uint64_t port); static void releaseInstance(MessageQueueClient* client); static void deleteInstance(MessageQueueClient* client); static MessageQueueClient* findInPool(const std::string& search); diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..f9db8b266 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1532,9 +1547,9 @@ void RowAggregation::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut, in case execplan::CalpontSystemCatalog::DATETIME: { uint64_t dtm = rowIn.getUintField(colIn); - valIn = ((dtm >> 48) * 10000000000000000LL) + (((dtm >> 44) & 0xF) * 100000000000000) + - (((dtm >> 38) & 077) * 1000000000000) + (((dtm >> 32) & 077) * 10000000000) + - (((dtm >> 26) & 077) * 100000000) + (((dtm >> 20) & 077) * 1000000) + (dtm & 0xfffff); + valIn = ((dtm >> 48) * 10000000000LL) + (((dtm >> 44) & 0xF) * 100000000) + + (((dtm >> 38) & 077) * 1000000) + (((dtm >> 32) & 077) * 10000) + + (((dtm >> 26) & 077) * 100) + ((dtm >> 20) & 077); break; } @@ -1550,8 +1565,8 @@ void RowAggregation::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut, in } hour |= ((dtm >> 40) & 0xfff); - valIn = (hour * 10000000000) + - (((dtm >> 32) & 0xff) * 100000000) + (((dtm >> 24) & 0xff) * 1000000) + (dtm & 0xffffff); + valIn = (hour * 10000) + + (((dtm >> 32) & 0xff) * 100) + ((dtm >> 24) & 0xff); break; } @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,17 +1723,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); - } - else - { - throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colOut + 1, i); break; } @@ -1725,7 +1731,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1996,132 +2002,277 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + uint32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; + ConstantColumn* cc; + bool bIsNull = false; + execplan::CalpontSystemCatalog::ColDataType colDataType; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + for (uint32_t i = 0; i < paramCount; ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time + // we find a null. We still need to eat the rest of the parameters + // to sync updateEntry + if (bIsNull) { - return; + ++funcColsIdx; + continue; } - flag |= mcsv1sdk::PARAM_IS_NULL; - } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; - flags.push_back(flag); - rgContext.setDataFlags(&flags); + // If this particular parameter is a constant, then we need + // to acces the constant value rather than a row value. + cc = NULL; - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + if (pFunctionCol->fpConstCol) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: + cc = dynamic_cast(pFunctionCol->fpConstCol.get()); + } + + if ((cc && cc->type() == ConstantColumn::NULLDATA) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) + { + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; + bIsNull = true; + ++funcColsIdx; + continue; } - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } + if (cc) + { + colDataType = cc->resultType().colDataType; + } + else + { + colDataType = fRowGroupIn.getColTypes()[colIn]; + } - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) + { + switch (colDataType) { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } + if (cc) + { + datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } + break; + } - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = colDataType; - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; + if (cc) + { + datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + + if (cc) + { + datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + + if (cc) + { + datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getDoubleField(colIn); + } + + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + + if (cc) + { + datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getFloatField(colIn); + } + + break; + } + + case execplan::CalpontSystemCatalog::DATE: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + + if (cc) + { + datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + + break; + } + + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + + if (cc) + { + datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + + if (cc) + { + datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getIntField(colIn); + } + + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + + if (cc) + { + datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getStringField(colIn); + } + + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } } } - } - valsIn.push_back(datum); + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx + 1]->fAggFunction == ROWAGG_MULTI_PARM) + { + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; + } + } // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2369,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2415,17 +2567,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); - } - else - { - throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -2585,22 +2727,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2735,196 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +3130,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3142,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3432,63 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } + +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3785,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3890,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3802,17 +4125,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); - } - else - { - throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -4010,46 +4323,46 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data - boost::shared_ptr userData = rowIn.getUserData(colIn + 1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; - if (!userData) + flags[0] = 0; + + if (!userDataIn) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4242,17 +4555,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); - } - else - { - throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..b593239cd 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -50,6 +50,7 @@ #include "stlpoolallocator.h" #include "returnedcolumn.h" #include "mcsv1_udaf.h" +#include "constantcolumn.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -110,6 +111,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -197,6 +201,13 @@ struct RowAggFunctionCol // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; + + // For UDAF that have more than one parameter and some parameters are constant. + // There will be a series of RowAggFunctionCol created, one for each parameter. + // The first will be a RowUDAFFunctionCol. Subsequent ones will be RowAggFunctionCol + // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. + // If this parameter is constant, that value is here. + SRCP fpConstCol; }; @@ -217,8 +228,11 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, - rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + fUDAFContext(rhs.fUDAFContext), + bInterrupted(false) {} virtual ~RowUDAFFunctionCol() {} @@ -235,6 +249,17 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + + if (fpConstCol) + { + bs << (uint8_t)1; + fpConstCol.get()->serialize(bs); + } + else + { + bs << (uint8_t)0; + } + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -242,6 +267,14 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> (uint8_t&)fAggFunction; bs >> fInputColumnIndex; bs >> fOutputColumnIndex; + uint8_t t; + bs >> t; + + if (t) + { + fpConstCol.reset(new ConstantColumn); + fpConstCol.get()->unserialize(bs); + } } inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const @@ -583,7 +616,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +693,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +835,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +932,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 896da1f4a..a07cbcc87 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -58,6 +58,9 @@ #include "../winport/winport.h" +// Workaround for my_global.h #define of isnan(X) causing a std::std namespace +using namespace std; + namespace rowgroup { @@ -355,7 +358,8 @@ public: */ template void setUintField_offset(uint64_t val, uint32_t offset); inline void nextRow(uint32_t size); - + inline void prevRow(uint32_t size, uint64_t number); + inline void setUintField(uint64_t val, uint32_t colIndex); template void setIntField(int64_t, uint32_t colIndex); inline void setIntField(int64_t, uint32_t colIndex); @@ -896,6 +900,12 @@ inline void Row::nextRow(uint32_t size) data += size; } + +inline void Row::prevRow(uint32_t size, uint64_t number = 1) +{ + data -= size * number; +} + template inline void Row::setUintField(uint64_t val, uint32_t colIndex) { @@ -1012,7 +1022,7 @@ inline void Row::setFloatField(float val, uint32_t colIndex) //N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan, // but not necessarily the same bits that you put in. This only seems to be for float (double seems // to work). - if (std::isnan(val)) + if (isnan(val)) setUintField<4>(joblist::FLOATNULL, colIndex); else *((float*) &data[offsets[colIndex]]) = val; diff --git a/utils/threadpool/prioritythreadpool.cpp b/utils/threadpool/prioritythreadpool.cpp index b223cee8b..4c043ebbb 100644 --- a/utils/threadpool/prioritythreadpool.cpp +++ b/utils/threadpool/prioritythreadpool.cpp @@ -33,6 +33,8 @@ using namespace logging; #include "prioritythreadpool.h" using namespace boost; +#include "dbcon/joblist/primitivemsg.h" + namespace threadpool { @@ -51,9 +53,9 @@ PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads cout << "started " << highThreads << " high, " << midThreads << " med, " << lowThreads << " low.\n"; - threadCounts[HIGH] = highThreads; - threadCounts[MEDIUM] = midThreads; - threadCounts[LOW] = lowThreads; + defaultThreadCounts[HIGH] = threadCounts[HIGH] = highThreads; + defaultThreadCounts[MEDIUM] = threadCounts[MEDIUM] = midThreads; + defaultThreadCounts[LOW] = threadCounts[LOW] = lowThreads; } PriorityThreadPool::~PriorityThreadPool() @@ -68,6 +70,25 @@ void PriorityThreadPool::addJob(const Job& job, bool useLock) if (useLock) lk.lock(); + // Create any missing threads + if (defaultThreadCounts[HIGH] != threadCounts[HIGH]) + { + threads.create_thread(ThreadHelper(this, HIGH)); + threadCounts[HIGH]++; + } + + if (defaultThreadCounts[MEDIUM] != threadCounts[MEDIUM]) + { + threads.create_thread(ThreadHelper(this, MEDIUM)); + threadCounts[MEDIUM]++; + } + + if (defaultThreadCounts[LOW] != threadCounts[LOW]) + { + threads.create_thread(ThreadHelper(this, LOW)); + threadCounts[LOW]++; + } + if (job.priority > 66) jobQueues[HIGH].push_back(job); else if (job.priority > 33) @@ -113,80 +134,148 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() vector reschedule; uint32_t rescheduleCount; uint32_t queueSize; + bool running = false; - while (!_stop) + try { - - mutex::scoped_lock lk(mutex); - - queue = pickAQueue(preferredQueue); - - if (jobQueues[queue].empty()) + while (!_stop) { - newJob.wait(lk); - continue; - } - queueSize = jobQueues[queue].size(); - weight = 0; - // 3 conditions stop this thread from grabbing all jobs in the queue - // - // 1: The weight limit has been exceeded - // 2: The queue is empty - // 3: It has grabbed more than half of the jobs available & - // should leave some to the other threads + mutex::scoped_lock lk(mutex); - while ((weight < weightPerRun) && (!jobQueues[queue].empty()) - && (runList.size() <= queueSize / 2)) - { - runList.push_back(jobQueues[queue].front()); - jobQueues[queue].pop_front(); - weight += runList.back().weight; - } + queue = pickAQueue(preferredQueue); - lk.unlock(); + if (jobQueues[queue].empty()) + { + newJob.wait(lk); + continue; + } - reschedule.resize(runList.size()); - rescheduleCount = 0; + queueSize = jobQueues[queue].size(); + weight = 0; + // 3 conditions stop this thread from grabbing all jobs in the queue + // + // 1: The weight limit has been exceeded + // 2: The queue is empty + // 3: It has grabbed more than half of the jobs available & + // should leave some to the other threads - for (i = 0; i < runList.size() && !_stop; i++) - { - try + while ((weight < weightPerRun) && (!jobQueues[queue].empty()) + && (runList.size() <= queueSize / 2)) + { + runList.push_back(jobQueues[queue].front()); + jobQueues[queue].pop_front(); + weight += runList.back().weight; + } + + lk.unlock(); + + reschedule.resize(runList.size()); + rescheduleCount = 0; + + for (i = 0; i < runList.size() && !_stop; i++) { reschedule[i] = false; + running = true; reschedule[i] = (*(runList[i].functor))(); + running = false; if (reschedule[i]) rescheduleCount++; } - catch (std::exception& e) + + // no real work was done, prevent intensive busy waiting + if (rescheduleCount == runList.size()) + usleep(1000); + + if (rescheduleCount > 0) { - cerr << e.what() << endl; + lk.lock(); + + for (i = 0; i < runList.size(); i++) + if (reschedule[i]) + addJob(runList[i], false); + + if (rescheduleCount > 1) + newJob.notify_all(); + else + newJob.notify_one(); + + lk.unlock(); } + + runList.clear(); } - - // no real work was done, prevent intensive busy waiting - if (rescheduleCount == runList.size()) - usleep(1000); - - if (rescheduleCount > 0) - { - lk.lock(); - - for (i = 0; i < runList.size(); i++) - if (reschedule[i]) - addJob(runList[i], false); - - if (rescheduleCount > 1) - newJob.notify_all(); - else - newJob.notify_one(); - - lk.unlock(); - } - - runList.clear(); } + catch (std::exception& ex) + { + // Log the exception and exit this thread + try + { + threadCounts[queue]--; +#ifndef NOLOGGING + logging::Message::Args args; + logging::Message message(5); + args.add("threadFcn: Caught exception: "); + args.add(ex.what()); + + message.format( args ); + + logging::LoggingID lid(22); + logging::MessageLog ml(lid); + + ml.logErrorMessage( message ); +#endif + + if (running) + sendErrorMsg(runList[i].uniqueID, runList[i].stepID, runList[i].sock); + } + catch (...) + { + } + } + catch (...) + { + + // Log the exception and exit this thread + try + { + threadCounts[queue]--; +#ifndef NOLOGGING + logging::Message::Args args; + logging::Message message(6); + args.add("threadFcn: Caught unknown exception!"); + + message.format( args ); + + logging::LoggingID lid(22); + logging::MessageLog ml(lid); + + ml.logErrorMessage( message ); +#endif + + if (running) + sendErrorMsg(runList[i].uniqueID, runList[i].stepID, runList[i].sock); + } + catch (...) + { + } + } +} + +void PriorityThreadPool::sendErrorMsg(uint32_t id, uint32_t step, primitiveprocessor::SP_UM_IOSOCK sock) +{ + ISMPacketHeader ism; + PrimitiveHeader ph = {0}; + + ism.Status = logging::primitiveServerErr; + ph.UniqueID = id; + ph.StepID = step; + ByteStream msg(sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); + msg.append((uint8_t*) &ism, sizeof(ism)); + msg.append((uint8_t*) &ph, sizeof(ph)); + + sock->write(msg); } void PriorityThreadPool::stop() diff --git a/utils/threadpool/prioritythreadpool.h b/utils/threadpool/prioritythreadpool.h index 2a31a7725..f58af4d8d 100644 --- a/utils/threadpool/prioritythreadpool.h +++ b/utils/threadpool/prioritythreadpool.h @@ -36,6 +36,7 @@ #include #include #include "../winport/winport.h" +#include "primitives/primproc/umsocketselector.h" namespace threadpool { @@ -62,6 +63,9 @@ public: uint32_t weight; uint32_t priority; uint32_t id; + uint32_t uniqueID; + uint32_t stepID; + primitiveprocessor::SP_UM_IOSOCK sock; }; enum Priority @@ -112,9 +116,11 @@ private: Priority pickAQueue(Priority preference); void threadFcn(const Priority preferredQueue) throw(); + void sendErrorMsg(uint32_t id, uint32_t step, primitiveprocessor::SP_UM_IOSOCK sock); std::list jobQueues[3]; // higher indexes = higher priority uint32_t threadCounts[3]; + uint32_t defaultThreadCounts[3]; boost::mutex mutex; boost::condition newJob; boost::thread_group threads; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..ad4460977 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..6a727caf6 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -48,7 +48,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -103,7 +102,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +137,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..fba1fcdcc 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,13 +50,12 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include #include -#include #ifdef _MSC_VER #include #else @@ -134,7 +133,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +168,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +244,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..5af852967 --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,259 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..a830c6803 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..9e4596440 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -34,8 +34,9 @@ using namespace mcsv1sdk; UDAF_MAP UDAFMap::fm; #include "allnull.h" #include "ssq.h" -#include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -50,8 +51,9 @@ UDAF_MAP& UDAFMap::getMap() // the function names passed to the interface is always in lower case. fm["allnull"] = new allnull(); fm["ssq"] = new ssq(); - fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +117,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + || fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +127,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +220,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +242,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..e09228d77 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -68,7 +68,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -77,6 +76,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +200,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +265,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +296,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +371,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +379,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +394,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +420,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +468,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +503,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +580,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +640,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +675,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +749,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +763,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +932,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..48bd93c70 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -134,7 +133,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +168,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +244,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..e99871f97 --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..27b8708f7 --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..e27ecf1fa 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -114,7 +113,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +146,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +222,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..1c0fee1db 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -349,78 +349,6 @@ extern "C" return data->sumsq; } -//======================================================================= - - /** - * MEDIAN connector stub - */ -#ifdef _MSC_VER - __declspec(dllexport) -#endif - my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message) - { - if (args->arg_count != 1) - { - strcpy(message, "median() requires one argument"); - return 1; - } - - /* - if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data)))) - { - strmov(message,"Couldn't allocate memory"); - return 1; - } - data->sumsq = 0; - - initid->ptr = (char*)data; - */ - return 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void median_deinit(UDF_INIT* initid) - { -// free(initid->ptr); - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) - { -// struct ssq_data* data = (struct ssq_data*)initid->ptr; -// data->sumsq = 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - median_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) - { -// struct ssq_data* data = (struct ssq_data*)initid->ptr; -// double val = cvtArgToDouble(args->arg_type[0], args->args[0]); -// data->sumsq = val*val; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) - { -// struct ssq_data* data = (struct ssq_data*)initid->ptr; -// return data->sumsq; - return 0; - } - /** * avg_mode connector stub */ @@ -490,6 +418,172 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + + if (args->arg_count != 2) + { + strcpy(message, "regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message, "Couldn't allocate memory"); + return 1; + } + + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + + if (args->arg_count != 1) + { + strcpy(message, "avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message, "Couldn't allocate memory"); + return 1; + } + + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/idborderby.h b/utils/windowfunction/idborderby.h index 91db95366..a432fdc31 100644 --- a/utils/windowfunction/idborderby.h +++ b/utils/windowfunction/idborderby.h @@ -59,6 +59,7 @@ class IdbCompare; struct IdbSortSpec { int fIndex; + // TODO There are three ordering specs since 10.2 int fAsc; // ::= ASC | DESC int fNf; // ::= NULLS FIRST | NULLS LAST diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..79ed61b52 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size() - 1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,74 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i + 1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k + 1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -431,6 +454,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; + bool isNull = false; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || @@ -442,18 +466,40 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; + + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + cc = static_cast(fConstantParms[i].get()); + + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i + 1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { @@ -461,39 +507,251 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + bHasNull = false; + + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + cc = static_cast(fConstantParms[k].get()); + uint64_t colIn = fFieldIndex[k + 1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) + { + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + { + int64_t valIn; + + if (cc) + { + valIn = cc->getIntVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + int64_t valIn; + + if (cc) + { + valIn = cc->getDecimalVal(fRow, isNull).value; + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + { + uint64_t valIn; + + if (cc) + { + valIn = cc->getUintVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + + if (cc) + { + valIn = cc->getDoubleVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + + if (cc) + { + valIn = cc->getFloatVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + } } - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); + getContext().setDataFlags(flags); rc = getContext().getFunction()->nextValue(&getContext(), valsIn); diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..ef2ca5853 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,18 +21,38 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -// The template parameter is currently only used to support DISTINCT, as -// as that is done via a set template class WF_udaf : public WindowFunctionType { @@ -72,7 +92,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..dfceb6364 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -39,7 +39,6 @@ using namespace logging; using namespace ordering; #include "calpontsystemcatalog.h" -#include "constantcolumn.h" #include "dataconvert.h" // int64_t IDB_pow[19] using namespace execplan; @@ -228,6 +227,9 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; } + // Copy the only the constant parameter pointers + af->constParms(wc->functionParms()); + return af; } @@ -492,10 +494,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; @@ -634,6 +636,27 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } +void WindowFunctionType::constParms(const std::vector& functionParms) +{ + // fConstantParms will end up with a copy of functionParms, but only + // the constant types will be copied. Other types will take up space but + // be NULL. This allows us to acces the constants without the overhead + // of dynamic_cast for every row. + for (size_t i = 0; i < functionParms.size(); ++i) + { + ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + + if (cc) + { + fConstantParms.push_back(functionParms[i]); + } + else + { + fConstantParms.push_back(SRCP(cc)); + } + } +} + } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index 50732d3b5..5c2f43db0 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -31,7 +31,7 @@ #include "returnedcolumn.h" #include "rowgroup.h" #include "windowframe.h" - +#include "constantcolumn.h" namespace ordering { @@ -198,6 +198,8 @@ public: fStep = step; } + void constParms(const std::vector& functionParms); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: @@ -244,6 +246,9 @@ protected: // output and input field indices: [0] - output std::vector fFieldIndex; + // constant function parameters -- needed for udaf with constant + std::vector fConstantParms; + // row meta data rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; diff --git a/utils/winport/win_setup_mysql_part4.sql b/utils/winport/win_setup_mysql_part4.sql index 3b75fbe98..d884214ec 100644 --- a/utils/winport/win_setup_mysql_part4.sql +++ b/utils/winport/win_setup_mysql_part4.sql @@ -18,4 +18,5 @@ CREATE FUNCTION idbextentmin RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbextentmax RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.dll'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index 31e0d8792..826b2dc73 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -110,6 +110,21 @@ int main(int argc, char** argv) printf ("Locale is : %s\n", systemLang.c_str() ); + int gDebug = 0; + int c; + while ((c = getopt(argc, argv, "d")) != EOF) + { + switch (c) + { + case 'd': + gDebug++; + break; + case '?': + default: + break; + } + } + //set BUSY_INIT state { // Is there a reason to have a seperate Oam instance for this? @@ -210,7 +225,9 @@ int main(int argc, char** argv) } } - int err = setupResources(); + int err = 0; + if (!gDebug) + err = setupResources(); string errMsg; switch (err) diff --git a/writeengine/splitter/we_cmdargs.h b/writeengine/splitter/we_cmdargs.h index 46a7524cc..3186fe6c0 100644 --- a/writeengine/splitter/we_cmdargs.h +++ b/writeengine/splitter/we_cmdargs.h @@ -77,7 +77,11 @@ public: { return fLocFile; } - int getMode() const + int getReadBufSize() + { + return fReadBufSize; + } + int getMode() { return fMode; } diff --git a/writeengine/splitter/we_filereadthread.cpp b/writeengine/splitter/we_filereadthread.cpp index 3110137dc..6840d377d 100644 --- a/writeengine/splitter/we_filereadthread.cpp +++ b/writeengine/splitter/we_filereadthread.cpp @@ -88,6 +88,17 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh): fSdh(aSdh), //TODO batch qty to get from config fBatchQty = 10000; + if (fSdh.getReadBufSize() < DEFAULTBUFFSIZE) + { + fBuffSize = DEFAULTBUFFSIZE; + } + else + { + fBuffSize = fSdh.getReadBufSize(); + } + + fBuff = new char [fBuffSize]; + } //WEFileReadThread::WEFileReadThread(const WEFileReadThread& rhs):fSdh(rhs.fSdh) @@ -109,6 +120,7 @@ WEFileReadThread::~WEFileReadThread() } fpThread = 0; + delete []fBuff; //cout << "WEFileReadThread destructor called" << endl; } @@ -352,17 +364,17 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) if (fEnclEsc) { //pStart = aBuff; - aLen = getNextRow(fInFile, fBuff, sizeof(fBuff) - 1); + aLen = getNextRow(fInFile, fBuff, fBuffSize - 1); } else { - fInFile.getline(fBuff, sizeof(fBuff) - 1); + fInFile.getline(fBuff, fBuffSize - 1); aLen = fInFile.gcount(); } ////aLen chars incl \n, Therefore aLen-1; '<<' oper won't go past it //cout << "Data Length " << aLen < 0)) + if ((aLen < (fBuffSize - 2)) && (aLen > 0)) { fBuff[aLen - 1] = '\n'; fBuff[aLen] = 0; @@ -374,7 +386,7 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) if (fSdh.getDebugLvl() > 2) cout << "File data line = " << aIdx << endl; } - else if (aLen >= sizeof(fBuff) - 2) //Didn't hit delim; BIG ROW + else if (aLen >= fBuffSize - 2) //Didn't hit delim; BIG ROW { cout << "Bad Row data " << endl; cout << fBuff << endl; diff --git a/writeengine/splitter/we_filereadthread.h b/writeengine/splitter/we_filereadthread.h index c7cd54e82..2fb92332c 100644 --- a/writeengine/splitter/we_filereadthread.h +++ b/writeengine/splitter/we_filereadthread.h @@ -126,7 +126,7 @@ public: void add2InputDataFileList(std::string& FileName); private: - enum { MAXBUFFSIZE = 1024 * 1024 }; + enum { DEFAULTBUFFSIZE = 1024 * 1024 }; // don't allow anyone else to set void setTgtPmId(unsigned int fTgtPmId) @@ -151,7 +151,8 @@ private: char fEncl; // Encl char char fEsc; // Esc char char fDelim; // Column Delimit char - char fBuff[MAXBUFFSIZE]; // main data buffer + char* fBuff; // main data buffer + int fBuffSize; }; } /* namespace WriteEngine */ diff --git a/writeengine/splitter/we_sdhandler.cpp b/writeengine/splitter/we_sdhandler.cpp index 802fd108b..61fe45239 100644 --- a/writeengine/splitter/we_sdhandler.cpp +++ b/writeengine/splitter/we_sdhandler.cpp @@ -2663,6 +2663,13 @@ char WESDHandler::getEscChar() //------------------------------------------------------------------------------ +int WESDHandler::getReadBufSize() +{ + return fRef.fCmdArgs.getReadBufSize(); +} + +//------------------------------------------------------------------------------ + char WESDHandler::getDelimChar() { return fRef.fCmdArgs.getDelimChar(); diff --git a/writeengine/splitter/we_sdhandler.h b/writeengine/splitter/we_sdhandler.h index b593b1420..f31f40c63 100644 --- a/writeengine/splitter/we_sdhandler.h +++ b/writeengine/splitter/we_sdhandler.h @@ -152,6 +152,7 @@ public: char getEscChar(); char getDelimChar(); bool getConsoleLog(); + int getReadBufSize(); ImportDataMode getImportDataMode() const; void sysLog(const logging::Message::Args& msgArgs, logging::LOG_TYPE logType, logging::Message::MessageID msgId); diff --git a/writeengine/wrapper/we_colop.cpp b/writeengine/wrapper/we_colop.cpp index 7aecf726d..ffd01df2e 100644 --- a/writeengine/wrapper/we_colop.cpp +++ b/writeengine/wrapper/we_colop.cpp @@ -222,7 +222,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, //Find out where the rest rows go BRM::LBID_t startLbid; //need to put in a loop until newExtent is true - newExtent = dbRootExtentTrackers[0]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); + newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); TableMetaData* tableMetaData = TableMetaData::makeTableMetaData(tableOid); while (!newExtent) @@ -238,7 +238,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, for (i = 0; i < dbRootExtentTrackers.size(); i++) { - if (i != 0) + if (i != column.colNo) dbRootExtentTrackers[i]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); // Round up HWM to the end of the current extent @@ -303,7 +303,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, tableMetaData->setColExtsInfo(newColStructList[i].dataOid, aColExtsInfo); } - newExtent = dbRootExtentTrackers[0]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); + newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); } } @@ -324,7 +324,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } rc = BRMWrapper::getInstance()->allocateStripeColExtents(cols, dbRoot, partition, segment, extents); - newHwm = extents[0].startBlkOffset; + newHwm = extents[column.colNo].startBlkOffset; if (rc != NO_ERROR) return rc; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 5d3dfec85..923871ef9 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -1683,6 +1683,20 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + // MCOL-984: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + int32_t lowColLen = 8192; + int32_t colId = 0; + + for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) + { + if (colStructList[colIt].colWidth < lowColLen) + { + colId = colIt; + lowColLen = colStructList[colId].colWidth; + } + } + // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) // return rc; @@ -1709,8 +1723,8 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, //-------------------------------------------------------------------------- if (isFirstBatchPm) { - currentDBrootIdx = dbRootExtentTrackers[0]->getCurrentDBRootIdx(); - extentInfo = dbRootExtentTrackers[0]->getDBRootExtentList(); + currentDBrootIdx = dbRootExtentTrackers[colId]->getCurrentDBRootIdx(); + extentInfo = dbRootExtentTrackers[colId]->getDBRootExtentList(); dbRoot = extentInfo[currentDBrootIdx].fDbRoot; partitionNum = extentInfo[currentDBrootIdx].fPartition; @@ -1895,7 +1909,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, } // if (isFirstBatchPm) else //get the extent info from tableMetaData { - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) @@ -1931,20 +1945,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, // allocate row id(s) //-------------------------------------------------------------------------- - // MCOL-984: find the smallest column width to calculate the RowID from so - // that all HWMs will be incremented by this operation - int32_t lowColLen = 8192; - int32_t colId = 0; - - for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) - { - if (colStructList[colIt].colWidth < lowColLen) - { - colId = colIt; - lowColLen = colStructList[colId].colWidth; - curColStruct = colStructList[colId]; - } - } + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; @@ -1970,7 +1971,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, if (it != aColExtsInfo.end()) { hwm = it->hwm; - //cout << "Got from colextinfo hwm for oid " << colStructList[0].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; + //cout << "Got from colextinfo hwm for oid " << colStructList[colId].dataOid << " is " << hwm << " and seg is " << colStructList[colId].fColSegment << endl; } oldHwm = hwm; //Save this info for rollback @@ -2024,10 +2025,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2245,31 +2246,32 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, if (it != aColExtsInfo.end()) //update hwm info { oldHwm = it->hwm; - } - // save hwm for the old extent - colWidth = colStructList[i].colWidth; - succFlag = colOp->calculateRowId(lastRid, BYTE_PER_BLOCK / colWidth, colWidth, curFbo, curBio); + // save hwm for the old extent + colWidth = colStructList[i].colWidth; + succFlag = colOp->calculateRowId(lastRid, BYTE_PER_BLOCK / colWidth, colWidth, curFbo, curBio); - //cout << "insertcolumnrec oid:rid:fbo:oldhwm = " << colStructList[i].dataOid << ":" << lastRid << ":" << curFbo << ":" << oldHwm << endl; - if (succFlag) - { - if ((HWM)curFbo >= oldHwm) + //cout << "insertcolumnrec oid:rid:fbo:oldhwm = " << colStructList[i].dataOid << ":" << lastRid << ":" << curFbo << ":" << oldHwm << endl; + if (succFlag) { - it->hwm = (HWM)curFbo; - } + if ((HWM)curFbo >= oldHwm) + { + it->hwm = (HWM)curFbo; + } - //@Bug 4947. set current to false for old extent. - if (newExtent) - { - it->current = false; - } + //@Bug 4947. set current to false for old extent. + if (newExtent) + { + it->current = false; + } + + //cout << "updated old ext info for oid " << colStructList[i].dataOid << " dbroot:part:seg:hwm:current = " + //<< it->dbRoot<<":"<partNum<<":"<segNum<<":"<hwm<<":"<< it->current<< " and newExtent is " << newExtent << endl; + } + else + return ERR_INVALID_PARAM; - //cout << "updated old ext info for oid " << colStructList[i].dataOid << " dbroot:part:seg:hwm:current = " - //<< it->dbRoot<<":"<partNum<<":"<segNum<<":"<hwm<<":"<< it->current<< " and newExtent is " << newExtent << endl; } - else - return ERR_INVALID_PARAM; //update hwm for the new extent if (newExtent) @@ -2285,6 +2287,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, it++; } + colWidth = newColStructList[i].colWidth; succFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK / colWidth, colWidth, curFbo, curBio); if (succFlag) @@ -2355,27 +2358,31 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, curFbo)); } } + else + return ERR_INVALID_PARAM; } + } - // If we create a new extent for this batch - for (unsigned i = 0; i < newColStructList.size(); i++) + // If we create a new extent for this batch + for (unsigned i = 0; i < newColStructList.size(); i++) + { + colOp = m_colOp[op(newColStructList[i].fCompressionType)]; + width = newColStructList[i].colWidth; + successFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK / width, width, curFbo, curBio); + + if (successFlag) { - colOp = m_colOp[op(newColStructList[i].fCompressionType)]; - width = newColStructList[i].colWidth; - successFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK / width, width, curFbo, curBio); - - if (successFlag) + if (curFbo != lastFbo) { - if (curFbo != lastFbo) - { - RETURN_ON_ERROR(AddLBIDtoList(txnid, - lbids, - colDataTypes, - newColStructList[i], - curFbo)); - } + RETURN_ON_ERROR(AddLBIDtoList(txnid, + lbids, + colDataTypes, + newColStructList[i], + curFbo)); } } + else + return ERR_INVALID_PARAM; } if (lbids.size() > 0) @@ -2582,7 +2589,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3277,7 +3284,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -5131,7 +5138,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, bool versioning) { int rc = 0; - void* valArray; + void* valArray = NULL; string segFile; Column curCol; ColStructList::size_type totalColumn; @@ -5157,146 +5164,150 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, totalRow2 = 0; } - valArray = malloc(sizeof(uint64_t) * totalRow1); - - if (totalRow1 == 0) + // It is possible totalRow1 is zero but totalRow2 has values + if ((totalRow1 == 0) && (totalRow2 == 0)) return rc; TableMetaData* aTbaleMetaData = TableMetaData::makeTableMetaData(tableOid); - for (i = 0; i < totalColumn; i++) + if (totalRow1) { - //@Bug 2205 Check if all rows go to the new extent - //Write the first batch - RID* firstPart = rowIdArray; - ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)]; + valArray = malloc(sizeof(uint64_t) * totalRow1); - // set params - colOp->initColumn(curCol); - // need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, colStructList[i].colWidth, - colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, - colStructList[i].fCompressionType, colStructList[i].fColDbRoot, - colStructList[i].fColPartition, colStructList[i].fColSegment); - - ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(colStructList[i].dataOid); - ColExtsInfo::iterator it = aColExtsInfo.begin(); - - while (it != aColExtsInfo.end()) + for (i = 0; i < totalColumn; i++) { - if ((it->dbRoot == colStructList[i].fColDbRoot) && (it->partNum == colStructList[i].fColPartition) && (it->segNum == colStructList[i].fColSegment)) - break; + //@Bug 2205 Check if all rows go to the new extent + //Write the first batch + RID* firstPart = rowIdArray; + ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)]; - it++; - } + // set params + colOp->initColumn(curCol); + // need to pass real dbRoot, partition, and segment to setColParam + colOp->setColParam(curCol, 0, colStructList[i].colWidth, + colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, + colStructList[i].fCompressionType, colStructList[i].fColDbRoot, + colStructList[i].fColPartition, colStructList[i].fColSegment); - if (it == aColExtsInfo.end()) //add this one to the list - { - ColExtInfo aExt; - aExt.dbRoot = colStructList[i].fColDbRoot; - aExt.partNum = colStructList[i].fColPartition; - aExt.segNum = colStructList[i].fColSegment; - aExt.compType = colStructList[i].fCompressionType; - aColExtsInfo.push_back(aExt); - aTbaleMetaData->setColExtsInfo(colStructList[i].dataOid, aColExtsInfo); - } + ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(colStructList[i].dataOid); + ColExtsInfo::iterator it = aColExtsInfo.begin(); - rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file + while (it != aColExtsInfo.end()) + { + if ((it->dbRoot == colStructList[i].fColDbRoot) && (it->partNum == colStructList[i].fColPartition) && (it->segNum == colStructList[i].fColSegment)) + break; - if (rc != NO_ERROR) - break; + it++; + } - // handling versioning - vector rangeList; + if (it == aColExtsInfo.end()) //add this one to the list + { + ColExtInfo aExt; + aExt.dbRoot = colStructList[i].fColDbRoot; + aExt.partNum = colStructList[i].fColPartition; + aExt.segNum = colStructList[i].fColSegment; + aExt.compType = colStructList[i].fCompressionType; + aColExtsInfo.push_back(aExt); + aTbaleMetaData->setColExtsInfo(colStructList[i].dataOid, aColExtsInfo); + } - if (versioning) - { - rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], - colStructList[i].colWidth, totalRow1, firstPart, rangeList); + rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file if (rc != NO_ERROR) - { - if (colStructList[i].fCompressionType == 0) - { - curCol.dataFile.pFile->flush(); - } - - BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); break; - } - } - //totalRow1 -= totalRow2; - // have to init the size here - // nullArray = (bool*) malloc(sizeof(bool) * totalRow); - uint8_t tmp8; - uint16_t tmp16; - uint32_t tmp32; + // handling versioning + vector rangeList; - for (size_t j = 0; j < totalRow1; j++) - { - uint64_t curValue = colValueList[((totalRow1 + totalRow2) * i) + j]; - - switch (colStructList[i].colType) + if (versioning) { - case WriteEngine::WR_VARBINARY : // treat same as char for now - case WriteEngine::WR_CHAR: - case WriteEngine::WR_BLOB: - case WriteEngine::WR_TEXT: - ((uint64_t*)valArray)[j] = curValue; - break; + rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], + colStructList[i].colWidth, totalRow1, firstPart, rangeList); - case WriteEngine::WR_INT: - case WriteEngine::WR_UINT: - case WriteEngine::WR_FLOAT: - tmp32 = curValue; - ((uint32_t*)valArray)[j] = tmp32; - break; + if (rc != NO_ERROR) + { + if (colStructList[i].fCompressionType == 0) + { + curCol.dataFile.pFile->flush(); + } - case WriteEngine::WR_ULONGLONG: - case WriteEngine::WR_LONGLONG: - case WriteEngine::WR_DOUBLE: - case WriteEngine::WR_TOKEN: - ((uint64_t*)valArray)[j] = curValue; - break; - - case WriteEngine::WR_BYTE: - case WriteEngine::WR_UBYTE: - tmp8 = curValue; - ((uint8_t*)valArray)[j] = tmp8; - break; - - case WriteEngine::WR_SHORT: - case WriteEngine::WR_USHORT: - tmp16 = curValue; - ((uint16_t*)valArray)[j] = tmp16; + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); break; + } } + + //totalRow1 -= totalRow2; + // have to init the size here + // nullArray = (bool*) malloc(sizeof(bool) * totalRow); + uint8_t tmp8; + uint16_t tmp16; + uint32_t tmp32; + + for (size_t j = 0; j < totalRow1; j++) + { + uint64_t curValue = colValueList[((totalRow1 + totalRow2) * i) + j]; + + switch (colStructList[i].colType) + { + case WriteEngine::WR_VARBINARY : // treat same as char for now + case WriteEngine::WR_CHAR: + case WriteEngine::WR_BLOB: + case WriteEngine::WR_TEXT: + ((uint64_t*)valArray)[j] = curValue; + break; + + case WriteEngine::WR_INT: + case WriteEngine::WR_UINT: + case WriteEngine::WR_FLOAT: + tmp32 = curValue; + ((uint32_t*)valArray)[j] = tmp32; + break; + + case WriteEngine::WR_ULONGLONG: + case WriteEngine::WR_LONGLONG: + case WriteEngine::WR_DOUBLE: + case WriteEngine::WR_TOKEN: + ((uint64_t*)valArray)[j] = curValue; + break; + + case WriteEngine::WR_BYTE: + case WriteEngine::WR_UBYTE: + tmp8 = curValue; + ((uint8_t*)valArray)[j] = tmp8; + break; + + case WriteEngine::WR_SHORT: + case WriteEngine::WR_USHORT: + tmp16 = curValue; + ((uint16_t*)valArray)[j] = tmp16; + break; + } + } + + +#ifdef PROFILE + timer.start("writeRow "); +#endif + rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); +#ifdef PROFILE + timer.stop("writeRow "); +#endif + colOp->closeColumnFile(curCol); + + if (versioning) + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + + // check error + if (rc != NO_ERROR) + break; + + } // end of for (i = 0 + + if (valArray != NULL) + { + free(valArray); + valArray = NULL; } - - -#ifdef PROFILE - timer.start("writeRow "); -#endif - rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); -#ifdef PROFILE - timer.stop("writeRow "); -#endif - colOp->closeColumnFile(curCol); - - if (versioning) - BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); - - // check error - if (rc != NO_ERROR) - break; - - } // end of for (i = 0 - - if (valArray != NULL) - { - free(valArray); - valArray = NULL; } // MCOL-1176 - Write second extent