diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 index 953c25a12..fb908092e --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,10 +194,11 @@ SET (ENGINE_WE_CONFIGCPP_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/writeengine/x SET (ENGINE_SERVER_SQL_INCLUDE "${SERVER_SOURCE_ROOT_DIR}/sql") SET (ENGINE_SERVER_INCLUDE_INCLUDE "${SERVER_SOURCE_ROOT_DIR}/include") SET (ENGINE_SERVER_PCRE_INCLUDE "${SERVER_BUILD_INCLUDE_DIR}/../pcre") +SET (ENGINE_UTILS_UDFSDK_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/udfsdk") SET (ENGINE_DEFAULT_INCLUDES ${CMAKE_BINARY_DIR} "." "../" "../../" ${SERVER_BUILD_INCLUDE_DIR}) -SET (ENGINE_COMMON_INCLUDES ${ENGINE_DEFAULT_INCLUDES} ${Boost_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ${ENGINE_UTILS_MESSAGEQCPP_INCLUDE} ${ENGINE_WE_SHARED_INCLUDE} ${ENGINE_UTILS_IDBDATAFILE_INCLUDE} ${ENGINE_UTILS_LOGGINGCPP_INCLUDE} ${ENGINE_UTILS_CONFIGCPP_INCLUDE} ${ENGINE_UTILS_COMPRESS_INCLUDE} ${ENGINE_VERSIONING_BRM_INCLUDE} ${ENGINE_UTILS_ROWGROUP_INCLUDE} ${ENGINE_UTILS_COMMON_INCLUDE} ${ENGINE_UTILS_DATACONVERT_INCLUDE} ${ENGINE_UTILS_RWLOCK_INCLUDE} ${ENGINE_UTILS_FUNCEXP_INCLUDE} ${ENGINE_OAMAPPS_ALARMMANAGER_INCLUDE} ${ENGINE_UTILS_INCLUDE} ${ENGINE_OAM_OAMCPP_INCLUDE} ${ENGINE_DBCON_DDLPKGPROC_INCLUDE} ${ENGINE_DBCON_DDLPKG_INCLUDE} ${ENGINE_DBCON_EXECPLAN_INCLUDE} ${ENGINE_UTILS_STARTUP_INCLUDE} ${ENGINE_DBCON_JOBLIST_INCLUDE} ${ENGINE_WE_WRAPPER_INCLUDE} ${ENGINE_WE_SERVER_INCLUDE} ${ENGINE_DBCON_DMLPKG_INCLUDE} ${ENGINE_WE_CLIENT_INCLUDE} ${ENGINE_DBCON_DMLPKGPROC_INCLUDE} ${ENGINE_UTILS_CACHEUTILS_INCLUDE} ${ENGINE_UTILS_MYSQLCL_INCLUDE} ${ENGINE_UTILS_QUERYTELE_INCLUDE} ${ENGINE_UTILS_THRIFT_INCLUDE} ${ENGINE_UTILS_JOINER_INCLUDE} ${ENGINE_UTILS_THREADPOOL_INCLUDE} ${ENGINE_UTILS_BATCHLDR_INCLUDE} ${ENGINE_UTILS_DDLCLEANUP_INCLUDE} ${ENGINE_UTILS_QUERYSTATS_INCLUDE} ${ENGINE_WE_CONFIGCPP_INCLUDE} ${ENGINE_SERVER_SQL_INCLUDE} ${ENGINE_SERVER_INCLUDE_INCLUDE} ${ENGINE_SERVER_PCRE_INCLUDE}) +SET (ENGINE_COMMON_INCLUDES ${ENGINE_DEFAULT_INCLUDES} ${Boost_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ${ENGINE_UTILS_MESSAGEQCPP_INCLUDE} ${ENGINE_WE_SHARED_INCLUDE} ${ENGINE_UTILS_IDBDATAFILE_INCLUDE} ${ENGINE_UTILS_LOGGINGCPP_INCLUDE} ${ENGINE_UTILS_CONFIGCPP_INCLUDE} ${ENGINE_UTILS_COMPRESS_INCLUDE} ${ENGINE_VERSIONING_BRM_INCLUDE} ${ENGINE_UTILS_ROWGROUP_INCLUDE} ${ENGINE_UTILS_COMMON_INCLUDE} ${ENGINE_UTILS_DATACONVERT_INCLUDE} ${ENGINE_UTILS_RWLOCK_INCLUDE} ${ENGINE_UTILS_FUNCEXP_INCLUDE} ${ENGINE_OAMAPPS_ALARMMANAGER_INCLUDE} ${ENGINE_UTILS_INCLUDE} ${ENGINE_OAM_OAMCPP_INCLUDE} ${ENGINE_DBCON_DDLPKGPROC_INCLUDE} ${ENGINE_DBCON_DDLPKG_INCLUDE} ${ENGINE_DBCON_EXECPLAN_INCLUDE} ${ENGINE_UTILS_STARTUP_INCLUDE} ${ENGINE_DBCON_JOBLIST_INCLUDE} ${ENGINE_WE_WRAPPER_INCLUDE} ${ENGINE_WE_SERVER_INCLUDE} ${ENGINE_DBCON_DMLPKG_INCLUDE} ${ENGINE_WE_CLIENT_INCLUDE} ${ENGINE_DBCON_DMLPKGPROC_INCLUDE} ${ENGINE_UTILS_CACHEUTILS_INCLUDE} ${ENGINE_UTILS_MYSQLCL_INCLUDE} ${ENGINE_UTILS_QUERYTELE_INCLUDE} ${ENGINE_UTILS_THRIFT_INCLUDE} ${ENGINE_UTILS_JOINER_INCLUDE} ${ENGINE_UTILS_THREADPOOL_INCLUDE} ${ENGINE_UTILS_BATCHLDR_INCLUDE} ${ENGINE_UTILS_DDLCLEANUP_INCLUDE} ${ENGINE_UTILS_QUERYSTATS_INCLUDE} ${ENGINE_WE_CONFIGCPP_INCLUDE} ${ENGINE_SERVER_SQL_INCLUDE} ${ENGINE_SERVER_INCLUDE_INCLUDE} ${ENGINE_SERVER_PCRE_INCLUDE} ${ENGINE_UTILS_UDFSDK_INCLUDE}) ADD_SUBDIRECTORY(utils) ADD_SUBDIRECTORY(oam/oamcpp) diff --git a/dbcon/ddlpackage/ddlpackage.vpj b/dbcon/ddlpackage/ddlpackage.vpj old mode 100644 new mode 100755 index 9a5cb5708..49ba4b6d7 --- a/dbcon/ddlpackage/ddlpackage.vpj +++ b/dbcon/ddlpackage/ddlpackage.vpj @@ -1,242 +1,242 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbcon/ddlpackageproc/ddlpackageproc.vpj b/dbcon/ddlpackageproc/ddlpackageproc.vpj old mode 100644 new mode 100755 index d578404c3..69a4c2f14 --- a/dbcon/ddlpackageproc/ddlpackageproc.vpj +++ b/dbcon/ddlpackageproc/ddlpackageproc.vpj @@ -1,243 +1,243 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbcon/dmlpackage/dmlpackage.vpj b/dbcon/dmlpackage/dmlpackage.vpj old mode 100644 new mode 100755 index ca071bdd7..ae0778713 --- a/dbcon/dmlpackage/dmlpackage.vpj +++ b/dbcon/dmlpackage/dmlpackage.vpj @@ -1,255 +1,255 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbcon/dmlpackageproc/deletepackageprocessor.cpp b/dbcon/dmlpackageproc/deletepackageprocessor.cpp old mode 100644 new mode 100755 index dde256e31..a22344c17 --- a/dbcon/dmlpackageproc/deletepackageprocessor.cpp +++ b/dbcon/dmlpackageproc/deletepackageprocessor.cpp @@ -458,16 +458,15 @@ namespace dmlpackageprocessor } // XXXST: take out the 'true' when all jobsteps have been made st-compatible - uint32_t amount = rgData.deserialize(msg, true); + rgData.deserialize(msg, true); rowGroup->setData(&rgData); //rowGroup->setData(const_cast(msg.buf())); err = (rowGroup->getStatus() != 0); if (err) { - msgBk.advance(amount); //msgBk.advance(rowGroup->getDataSize()); string errorMsg; - msgBk >> errorMsg; + msg >> errorMsg; logging::Message::Args args; logging::Message message(2); args.add("Delete Failed: "); diff --git a/dbcon/dmlpackageproc/dmlpackageproc.vpj b/dbcon/dmlpackageproc/dmlpackageproc.vpj old mode 100644 new mode 100755 index 78da36cf7..9788a7980 --- a/dbcon/dmlpackageproc/dmlpackageproc.vpj +++ b/dbcon/dmlpackageproc/dmlpackageproc.vpj @@ -1,236 +1,236 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbcon/dmlpackageproc/updatepackageprocessor.cpp b/dbcon/dmlpackageproc/updatepackageprocessor.cpp old mode 100644 new mode 100755 index 2cb858261..4cc355514 --- a/dbcon/dmlpackageproc/updatepackageprocessor.cpp +++ b/dbcon/dmlpackageproc/updatepackageprocessor.cpp @@ -512,16 +512,15 @@ uint64_t UpdatePackageProcessor::fixUpRows(dmlpackage::CalpontDMLPackage& cpacka //timer.stop("Meta"); continue; } - uint32_t amount = rgData.deserialize(msg, true); + rgData.deserialize(msg, true); rowGroup->setData(&rgData); //rowGroup->setData(const_cast(msg.buf())); err = (rowGroup->getStatus() != 0); if (err) { - msgBk.advance(amount); //msgBk.advance(rowGroup->getDataSize()); string errorMsg; - msgBk >> errorMsg; + msg >> errorMsg; logging::Message::Args args; logging::Message message(2); args.add("Update Failed: "); diff --git a/dbcon/execplan/CMakeLists.txt b/dbcon/execplan/CMakeLists.txt old mode 100644 new mode 100755 index bc6d841c8..eb354691b --- a/dbcon/execplan/CMakeLists.txt +++ b/dbcon/execplan/CMakeLists.txt @@ -1,5 +1,5 @@ -include_directories( ${ENGINE_COMMON_INCLUDES} ) +include_directories( ${ENGINE_COMMON_INCLUDES} ${ENGINE_UTILS_UDFSDK_INCLUDE} ) ########### next target ############### @@ -41,7 +41,8 @@ set(execplan_LIB_SRCS treenode.cpp treenodeimpl.cpp vendorexecutionplan.cpp - windowfunctioncolumn.cpp) + windowfunctioncolumn.cpp + udafcolumn.cpp) add_library(execplan SHARED ${execplan_LIB_SRCS}) diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h old mode 100644 new mode 100755 index 74e476835..188ac8c6c --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -70,7 +70,8 @@ public: BIT_AND, BIT_OR, BIT_XOR, - GROUP_CONCAT + GROUP_CONCAT, + UDAF }; /** diff --git a/dbcon/execplan/calpontsystemcatalog.cpp b/dbcon/execplan/calpontsystemcatalog.cpp old mode 100644 new mode 100755 index f072725b1..33fe2a7a3 --- a/dbcon/execplan/calpontsystemcatalog.cpp +++ b/dbcon/execplan/calpontsystemcatalog.cpp @@ -1051,7 +1051,7 @@ const CalpontSystemCatalog::ColType CalpontSystemCatalog::colType(const OID& Oid colMap.insert(CMVT_(scale, srcp)); srcp.reset(col[8]); colMap.insert(CMVT_(precision, srcp)); - // TODO: NULL value handling & convert to boost::any + // TODO: NULL value handling & convert to static_any::any // delete this manually at fcn exit srcp.reset(col[9]); colMap.insert(CMVT_(defaultvalue, srcp)); @@ -5045,7 +5045,7 @@ void CalpontSystemCatalog::getSchemaInfo(const string& in_schema) colMap.insert(CMVT_(scale, srcp)); srcp.reset(col[8]); colMap.insert(CMVT_(precision, srcp)); - // TODO: NULL value handling & convert to boost::any + // TODO: NULL value handling & convert to static_any::any // delete this manually at fcn exit srcp.reset(col[9]); colMap.insert(CMVT_(defaultvalue, srcp)); diff --git a/dbcon/execplan/calpontsystemcatalog.h b/dbcon/execplan/calpontsystemcatalog.h old mode 100644 new mode 100755 index fc9346e2b..c58e531bc --- a/dbcon/execplan/calpontsystemcatalog.h +++ b/dbcon/execplan/calpontsystemcatalog.h @@ -156,7 +156,8 @@ public: TEXT, /*!< TEXT type */ NUM_OF_COL_DATA_TYPE, /* NEW TYPES ABOVE HERE */ LONGDOUBLE, /* @bug3241, dev and variance calculation only */ - STRINT /* @bug3532, string as int for fast comparison */ + STRINT, /* @bug3532, string as int for fast comparison */ + UNDEFINED /*!< Undefined - used in UDAF API */ }; /** the set of column constraint types @@ -914,6 +915,32 @@ inline bool isCharType(const execplan::CalpontSystemCatalog::ColDataType type) /** convenience function to determine if column type is an * unsigned type */ +inline bool isNumeric(const execplan::CalpontSystemCatalog::ColDataType type) +{ + switch (type) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + case execplan::CalpontSystemCatalog::UFLOAT: + case execplan::CalpontSystemCatalog::UDOUBLE: + case execplan::CalpontSystemCatalog::UDECIMAL: + return true; + default: + return false; + } +} + inline bool isUnsigned(const execplan::CalpontSystemCatalog::ColDataType type) { switch (type) diff --git a/dbcon/execplan/execplan.vpj b/dbcon/execplan/execplan.vpj old mode 100644 new mode 100755 index 6f964bc80..f01df0d2a --- a/dbcon/execplan/execplan.vpj +++ b/dbcon/execplan/execplan.vpj @@ -213,6 +213,7 @@ + @@ -221,6 +222,7 @@ + @@ -232,6 +234,7 @@ + @@ -258,6 +261,7 @@ + @@ -268,6 +272,7 @@ + @@ -282,7 +287,9 @@ + + +#include + +using namespace std; + +#include "bytestream.h" +using namespace messageqcpp; + +#include "rowgroup.h" +using namespace rowgroup; + +#include "joblisttypes.h" +using namespace joblist; + +#include "simplefilter.h" +#include "constantfilter.h" +#include "arithmeticcolumn.h" +#include "functioncolumn.h" +#include "objectreader.h" +#include "groupconcatcolumn.h" +#include "udafcolumn.h" + +namespace execplan { + +/** + * Constructors/Destructors + */ +UDAFColumn::UDAFColumn(): + AggregateColumn() +{ +} + +UDAFColumn::UDAFColumn(const uint32_t sessionID): + AggregateColumn(sessionID) +{ +} + +UDAFColumn::UDAFColumn(const UDAFColumn& rhs, const uint32_t sessionID): + AggregateColumn(dynamic_cast(rhs)) +{ +} + +UDAFColumn::~UDAFColumn() +{ +} + +/** + * Methods + */ + +const string UDAFColumn::toString() const +{ + ostringstream output; + output << "UDAFColumn " << endl; + output << AggregateColumn::toString() << endl; + output << context.toString() << endl; + return output.str(); +} + +ostream& operator<<(ostream& output, const UDAFColumn& rhs) +{ + output << rhs.toString(); + return output; +} + +void UDAFColumn::serialize(messageqcpp::ByteStream& b) const +{ + b << (uint8_t) ObjectReader::UDAFCOLUMN; + AggregateColumn::serialize(b); + context.serialize(b); +} + +void UDAFColumn::unserialize(messageqcpp::ByteStream& b) +{ + ObjectReader::checkType(b, ObjectReader::UDAFCOLUMN); + AggregateColumn::unserialize(b); + context.unserialize(b); +} + +bool UDAFColumn::operator==(const UDAFColumn& t) const +{ + const AggregateColumn *rc1, *rc2; + + rc1 = static_cast(this); + rc2 = static_cast(&t); + if (*rc1 != *rc2) + return false; + if (context != t.context) + return false; + return true; +} + +bool UDAFColumn::operator==(const TreeNode* t) const +{ + const UDAFColumn *ac; + + ac = dynamic_cast(t); + if (ac == NULL) + return false; + return *this == *ac; +} + +bool UDAFColumn::operator!=(const UDAFColumn& t) const +{ + return !(*this == t); +} + +bool UDAFColumn::operator!=(const TreeNode* t) const +{ + return !(*this == t); +} + +} // namespace execplan diff --git a/dbcon/execplan/udafcolumn.h b/dbcon/execplan/udafcolumn.h new file mode 100755 index 000000000..7034a49b3 --- /dev/null +++ b/dbcon/execplan/udafcolumn.h @@ -0,0 +1,131 @@ +/* Copyright (C) 2014 InfiniDB, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id: groupconcatcolumn.h 9210 2013-01-21 14:10:42Z rdempsey $ +* +* +***********************************************************************/ +/** @file */ + +#ifndef UDAFCOLUMN_H +#define UDAFCOLUMN_H +#include + +#include "calpontselectexecutionplan.h" +#include "aggregatecolumn.h" +#include "mcsv1_udaf.h" + +namespace messageqcpp { +class ByteStream; +} + +using namespace mcsv1sdk; +/** + * Namespace + */ +namespace execplan { +/** + * @brief A class to represent an aggregate return column + * + * This class is a specialization of class ReturnedColumn that + * handles a user defined aggregate function (UDAF) call. + */ +class UDAFColumn : public AggregateColumn { + +public: + + /** + * Constructors + */ + UDAFColumn(); + + UDAFColumn(const uint32_t sessionID); + + UDAFColumn(const UDAFColumn& rhs, const uint32_t sessionID=0); + + /** + * Destructors + */ + virtual ~UDAFColumn(); + + /** + * Overloaded stream operator + */ + virtual const std::string toString() const; + + /** return a copy of this pointer + * + * deep copy of this pointer and return the copy + */ + virtual UDAFColumn* clone() const { return new UDAFColumn(*this); } + + /** + * Accessors and Mutators + */ + mcsv1Context& getContext() {return context;} + + /** + * Serialize interface + */ + virtual void serialize(messageqcpp::ByteStream&) const; + virtual void unserialize(messageqcpp::ByteStream&); + + /** @brief Do a deep, strict (as opposed to semantic) equivalence test + * + * Do a deep, strict (as opposed to semantic) equivalence test. + * @return true iff every member of t is a duplicate copy of every member of this; + * false otherwise + */ + virtual bool operator==(const TreeNode* t) const; + + /** @brief Do a deep, strict (as opposed to semantic) equivalence test + * + * Do a deep, strict (as opposed to semantic) equivalence test. + * @return true iff every member of t is a duplicate copy of every member of this; + * false otherwise + */ + virtual bool operator==(const UDAFColumn& t) const; + + /** @brief Do a deep, strict (as opposed to semantic) equivalence test + * + * Do a deep, strict (as opposed to semantic) equivalence test. + * @return false iff every member of t is a duplicate copy of every member of this; + * true otherwise + */ + virtual bool operator!=(const TreeNode* t) const; + + /** @brief Do a deep, strict (as opposed to semantic) equivalence test + * + * Do a deep, strict (as opposed to semantic) equivalence test. + * @return false iff every member of t is a duplicate copy of every member of this; + * true otherwise + */ + virtual bool operator!=(const UDAFColumn& t) const; + +private: + mcsv1Context context; +}; + +/** +* stream operator +*/ +std::ostream& operator<<(std::ostream& os, const UDAFColumn& rhs); + +} +#endif //UDAFCOLUMN_H + diff --git a/dbcon/execplan/wf_frame.h b/dbcon/execplan/wf_frame.h new file mode 100755 index 000000000..34b393b87 --- /dev/null +++ b/dbcon/execplan/wf_frame.h @@ -0,0 +1,95 @@ +/* Copyright (C) 2014 InfiniDB, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id: wf_frame.h 9679 2017-06-11 $ +* +* +***********************************************************************/ + +/** @file */ + +#ifndef WINDOW_FUNCTION_FRAME_H +#define WINDOW_FUNCTION_FRAME_H +#include +#include +#include + +#include "returnedcolumn.h" + +/** + * Namespace + */ +namespace execplan { + +// This enum is made consistant with mysql Item_window_func +enum WF_FRAME +{ + WF_PRECEDING = 0, + WF_FOLLOWING, + WF_UNBOUNDED_PRECEDING, + WF_UNBOUNDED_FOLLOWING, + WF_CURRENT_ROW, + WF_UNKNOWN +}; + +struct WF_Boundary +{ + WF_Boundary() {} + WF_Boundary(WF_FRAME frame):fFrame(frame) {} + ~WF_Boundary() {} + const std::string toString() const; + void serialize(messageqcpp::ByteStream&) const; + void unserialize(messageqcpp::ByteStream&); + SRCP fVal; /// has to evaluate to unsigned value + SRCP fBound; /// order by col +, -, date_add or date_sub for RANGE window + enum WF_FRAME fFrame; +}; + +struct WF_Frame +{ + WF_Frame(): fIsRange(true) + { + fStart.fFrame = WF_UNBOUNDED_PRECEDING; + fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; + } + ~WF_Frame() {} + const std::string toString() const; + void serialize(messageqcpp::ByteStream&) const; + void unserialize(messageqcpp::ByteStream&); + WF_Boundary fStart; + WF_Boundary fEnd; + bool fIsRange; /// RANGE or ROWS +}; + +/** + * @brief A class to represent the order by clause of window function + */ +struct WF_OrderBy +{ + WF_OrderBy() {} + WF_OrderBy(std::vector orders): fOrders(orders) {} + ~WF_OrderBy() {}; + const std::string toString() const; + void serialize(messageqcpp::ByteStream&) const; + void unserialize(messageqcpp::ByteStream&); + std::vector fOrders; + WF_Frame fFrame; +}; + +} +#endif diff --git a/dbcon/execplan/windowfunctioncolumn.cpp b/dbcon/execplan/windowfunctioncolumn.cpp old mode 100644 new mode 100755 index 25d288045..04d9e1642 --- a/dbcon/execplan/windowfunctioncolumn.cpp +++ b/dbcon/execplan/windowfunctioncolumn.cpp @@ -257,6 +257,7 @@ void WindowFunctionColumn::serialize(messageqcpp::ByteStream& b) const for (uint32_t i = 0; i < fPartitions.size(); i++) fPartitions[i]->serialize(b); fOrderBy.serialize(b); + udafContext.serialize(b); } void WindowFunctionColumn::unserialize(messageqcpp::ByteStream& b) @@ -283,6 +284,7 @@ void WindowFunctionColumn::unserialize(messageqcpp::ByteStream& b) fPartitions.push_back(srcp); } fOrderBy.unserialize(b); + udafContext.unserialize(b); } void WindowFunctionColumn::addToPartition(vector& groupByList) diff --git a/dbcon/execplan/windowfunctioncolumn.h b/dbcon/execplan/windowfunctioncolumn.h old mode 100644 new mode 100755 index 477660b2f..ab0c47b41 --- a/dbcon/execplan/windowfunctioncolumn.h +++ b/dbcon/execplan/windowfunctioncolumn.h @@ -31,6 +31,8 @@ #include "returnedcolumn.h" #include "functor.h" +#include "mcsv1_udaf.h" +#include "wf_frame.h" namespace messageqcpp { class ByteStream; @@ -41,61 +43,6 @@ class ByteStream; */ namespace execplan { -// This enum is made consistant with mysql Item_window_func -enum WF_FRAME -{ - WF_PRECEDING = 0, - WF_FOLLOWING, - WF_UNBOUNDED_PRECEDING, - WF_UNBOUNDED_FOLLOWING, - WF_CURRENT_ROW, - WF_UNKNOWN -}; - -struct WF_Boundary -{ - WF_Boundary() {} - WF_Boundary(WF_FRAME frame):fFrame(frame) {} - ~WF_Boundary() {} - const std::string toString() const; - void serialize(messageqcpp::ByteStream&) const; - void unserialize(messageqcpp::ByteStream&); - SRCP fVal; /// has to evaluate to unsigned value - SRCP fBound; /// order by col +, -, date_add or date_sub for RANGE window - enum WF_FRAME fFrame; -}; - -struct WF_Frame -{ - WF_Frame(): fIsRange(true) - { - fStart.fFrame = WF_UNBOUNDED_PRECEDING; - fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; - } - ~WF_Frame() {} - const std::string toString() const; - void serialize(messageqcpp::ByteStream&) const; - void unserialize(messageqcpp::ByteStream&); - WF_Boundary fStart; - WF_Boundary fEnd; - bool fIsRange; /// RANGE or ROWS -}; - -/** - * @brief A class to represent the order by clause of window function - */ -struct WF_OrderBy -{ - WF_OrderBy() {} - WF_OrderBy(std::vector orders): fOrders(orders) {} - ~WF_OrderBy() {}; - const std::string toString() const; - void serialize(messageqcpp::ByteStream&) const; - void unserialize(messageqcpp::ByteStream&); - std::vector fOrders; - WF_Frame fFrame; -}; - /** * @brief A class to represent a functional column * @@ -182,6 +129,9 @@ public: virtual bool hasWindowFunc(); void adjustResultType(); + // UDAnF support + mcsv1sdk::mcsv1Context& getUDAFContext() {return udafContext;} + private: /** * Fields @@ -197,6 +147,8 @@ private: virtual bool operator!=(const TreeNode* t) const { return false; } bool operator!=(const WindowFunctionColumn& t) const; + // UDAnF support + mcsv1sdk::mcsv1Context udafContext; /*********************************************************** * F&E framework * ***********************************************************/ diff --git a/dbcon/ingres/ingres.vpj b/dbcon/ingres/ingres.vpj old mode 100644 new mode 100755 index ecfaba87e..415db83c3 --- a/dbcon/ingres/ingres.vpj +++ b/dbcon/ingres/ingres.vpj @@ -1,220 +1,220 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp old mode 100644 new mode 100755 index da1bacba2..47cb54a5b --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -58,6 +58,7 @@ using namespace boost; #include "simplecolumn.h" #include "rowcolumn.h" #include "treenodeimpl.h" +#include "udafcolumn.h" using namespace execplan; #include "configcpp.h" @@ -697,6 +698,22 @@ void updateAggregateColType(AggregateColumn* ac, const SRCP& srcp, int op, JobIn ct.scale = 0; ct.precision = 0; } + else if (op == AggregateColumn::UDAF) + { + UDAFColumn* udafc = dynamic_cast(ac); + if (udafc) + { + mcsv1Context& udafContext = udafc->getContext(); + ct.colDataType = udafContext.getResultType(); + ct.colWidth = udafContext.getColWidth(); + ct.scale = udafContext.getScale(); + ct.precision = udafContext.getPrecision(); + } + else + { + ct = ac->resultType(); + } + } else { ct = ac->resultType(); @@ -883,7 +900,16 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (ac->constCol().get() != NULL) { // replace the aggregate on constant with a count(*) - SRCP clone(new AggregateColumn(*ac, ac->sessionID())); + SRCP clone; + UDAFColumn* udafc = dynamic_cast(ac); + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*ac, ac->sessionID())); + } jobInfo.constAggregate.insert(make_pair(i, clone)); ac->aggOp(AggregateColumn::COUNT_ASTERISK); ac->distinct(false); diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp old mode 100644 new mode 100755 index 867caa6ff..f533415df --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -47,6 +47,7 @@ using namespace config; #include "calpontsystemcatalog.h" #include "aggregatecolumn.h" +#include "udafcolumn.h" #include "arithmeticcolumn.h" #include "functioncolumn.h" #include "constantcolumn.h" @@ -100,6 +101,7 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::BIT_XOR: return ROWAGG_BIT_XOR; case AggregateColumn::GROUP_CONCAT: return ROWAGG_GROUP_CONCAT; case AggregateColumn::CONSTANT: return ROWAGG_CONSTANT; + case AggregateColumn::UDAF: return ROWAGG_UDAF; default: return ROWAGG_FUNCT_UNDEFINE; } } @@ -695,8 +697,18 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) ConstantColumn* cc = dynamic_cast(ac->constCol().get()); idbassert(cc != NULL); // @bug5261 bool isNull = (ConstantColumn::NULLDATA == cc->type()); - constAggDataVec.push_back( - ConstantAggData(cc->constval(), functionIdMap(ac->aggOp()), isNull)); + UDAFColumn* udafc = dynamic_cast(ac); + if (udafc) + { + constAggDataVec.push_back( + ConstantAggData(cc->constval(), udafc->getContext().getName(), + functionIdMap(ac->aggOp()), isNull)); + } + else + { + constAggDataVec.push_back( + ConstantAggData(cc->constval(), functionIdMap(ac->aggOp()), isNull)); + } } } } @@ -1097,7 +1109,24 @@ void TupleAggregateStep::prep1PhaseAggregate( } } - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colProj, i)); + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.nonConstCols[i].get()); + if (udafc) + { + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + } + else + { + throw logic_error("prep1PhasesAggregate: A UDAF function is called but there's no UDAFColumn"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + } functionVec.push_back(funct); switch (aggOp) @@ -1224,6 +1253,23 @@ void TupleAggregateStep::prep1PhaseAggregate( } break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funct.get()); + if (!udafFuncCol) + { + throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } + // Return column + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(udafFuncCol->fUDAFContext.getScale()); + precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); + typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); + widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + break; + } + default: { ostringstream emsg; @@ -1285,14 +1331,34 @@ void TupleAggregateStep::prep1PhaseAggregate( } } - // add auxiliary fields for statistics functions + // add auxiliary fields for UDAF and statistics functions for (uint64_t i = 0; i < functionVec.size(); i++) { + uint64_t j = functionVec[i]->fInputColumnIndex; + + if (functionVec[i]->fAggFunction == ROWAGG_UDAF) + { + // UDAF user data + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(functionVec[i].get()); + if (!udafFuncCol) + { + throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + functionVec[i]->fAuxColumnIndex = lastCol++; + oidsAgg.push_back(oidsAgg[j]); + keysAgg.push_back(keysAgg[j]); + scaleAgg.push_back(0); + precisionAgg.push_back(0); + precisionAgg.push_back(0); + typeAgg.push_back(CalpontSystemCatalog::VARBINARY); + widthAgg.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); + continue; + } + if (functionVec[i]->fAggFunction != ROWAGG_STATS) continue; functionVec[i]->fAuxColumnIndex = lastCol; - uint64_t j = functionVec[i]->fInputColumnIndex; // sum(x) oidsAgg.push_back(oidsAgg[j]); @@ -1527,7 +1593,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( continue; uint64_t colProj = projColPosMap[aggKey]; - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colProj, colAgg)); + + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.nonConstCols[i].get()); + if (udafc) + { + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); + } + else + { + throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no UDAFColumn"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, colAgg)); + } functionVec1.push_back(funct); aggFuncMap.insert(make_pair(make_pair(aggKey, aggOp), colAgg)); @@ -1671,6 +1755,32 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funct.get()); + if (!udafFuncCol) + { + throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + } + // Return column + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(udafFuncCol->fUDAFContext.getScale()); + precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); + typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); + widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + colAgg++; + // UDAF Dummy holder for UserData struct + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(0); + precisionAgg.push_back(0); + typeAgg.push_back(CalpontSystemCatalog::VARBINARY); + widthAgg.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); // Binary column needs +2 for length bytes + funct->fAuxColumnIndex = colAgg++; + break; + } + default: { ostringstream emsg; @@ -2090,14 +2200,33 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } - // add auxiliary fields for statistics functions + // add auxiliary fields for UDAF and statistics functions for (uint64_t i = 0; i < functionVec2.size(); i++) { + uint64_t j = functionVec2[i]->fInputColumnIndex; + + if (functionVec2[i]->fAggFunction == ROWAGG_UDAF) + { + // Dummy Column for UDAF user data + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(functionVec2[i].get()); + if (!udafFuncCol) + { + throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + functionVec2[i]->fAuxColumnIndex = lastCol++; + oidsAggDist.push_back(oidsAggDist[j]); // Dummy? + keysAggDist.push_back(keysAggDist[j]); // Dummy? + scaleAggDist.push_back(0); + precisionAggDist.push_back(0); + typeAggDist.push_back(CalpontSystemCatalog::VARBINARY); + widthAggDist.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); + continue; + } + if (functionVec2[i]->fAggFunction != ROWAGG_STATS) continue; functionVec2[i]->fAuxColumnIndex = lastCol; - uint64_t j = functionVec2[i]->fInputColumnIndex; // sum(x) oidsAggDist.push_back(oidsAggDist[j]); @@ -2344,22 +2473,22 @@ void TupleAggregateStep::prep2PhasesAggregate( { // check if there are any aggregate columns // a vector that has the aggregate function to be done by PM - vector > aggColVec; +// vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; - for (uint64_t i = 0; i < returnedColVec.size(); i++) - { +// for (uint64_t i = 0; i < returnedColVec.size(); i++) +// { // skip if not an aggregation column - if (returnedColVec[i].second == 0) - continue; +// if (returnedColVec[i].second == 0) +// continue; - aggColVec.push_back(returnedColVec[i]); +// aggColVec.push_back(returnedColVec[i]); // remember if a column has an average function, // with avg function, no need for separate sum or count_column_name - if (returnedColVec[i].second == AggregateColumn::AVG) - avgSet.insert(returnedColVec[i].first); - } +// if (returnedColVec[i].second == AggregateColumn::AVG) +// avgSet.insert(returnedColVec[i].first); +// } // populate the aggregate rowgroup on PM and UM // PM: projectedRG -> aggregateRGPM @@ -2480,11 +2609,15 @@ void TupleAggregateStep::prep2PhasesAggregate( } // vectors for aggregate functions - for (uint64_t i = 0; i < aggColVec.size(); i++) + for (uint64_t i = 0; i < returnedColVec.size(); i++) { - uint32_t aggKey = aggColVec[i].first; - RowAggFunctionType aggOp = functionIdMap(aggColVec[i].second); - RowAggFunctionType stats = statsFuncIdMap(aggColVec[i].second); + // skip if not an aggregation column + if (returnedColVec[i].second == 0) + continue; + + uint32_t aggKey = returnedColVec[i].first; + RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); + RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); // skip on PM if this is a constant if (aggOp == ROWAGG_CONSTANT) @@ -2504,7 +2637,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) + (returnedColVec[i].second == AggregateColumn::AVG)) +// (avgSet.find(aggKey) != avgSet.end())) // skip sum / count(column) if avg is also selected continue; @@ -2513,7 +2647,24 @@ void TupleAggregateStep::prep2PhasesAggregate( continue; uint64_t colProj = projColPosMap[aggKey]; - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colProj, colAggPm)); + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.nonConstCols[i].get()); + if (udafc) + { + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); + } + else + { + throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no UDAFColumn"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, colAggPm)); + } functionVecPm.push_back(funct); aggFuncMap.insert(make_pair(make_pair(aggKey, aggOp), colAggPm)); @@ -2667,7 +2818,31 @@ void TupleAggregateStep::prep2PhasesAggregate( colAggPm++; } break; - + case ROWAGG_UDAF: + { + // Return column + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funct.get()); + if (!udafFuncCol) + { + throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(udafFuncCol->fUDAFContext.getScale()); + precisionAggPm.push_back(udafFuncCol->fUDAFContext.getPrecision()); + typeAggPm.push_back(udafFuncCol->fUDAFContext.getResultType()); + widthAggPm.push_back(udafFuncCol->fUDAFContext.getColWidth()); + colAggPm++; + // Dummy Column for UDAF UserData struct + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(0); + precisionAggPm.push_back(0); + typeAggPm.push_back(CalpontSystemCatalog::VARBINARY); + widthAggPm.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); // Binary column needs +2 for length bytes + funct->fAuxColumnIndex = colAggPm++; + break; + } default: { ostringstream emsg; @@ -2849,7 +3024,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colPm, i)); + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.nonConstCols[i].get()); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colPm; else if (aggOp == ROWAGG_CONSTANT) @@ -2909,14 +3093,33 @@ void TupleAggregateStep::prep2PhasesAggregate( } } - // add auxiliary fields for statistics functions + // add auxiliary fields for UDAF and statistics functions for (uint64_t i = 0; i < functionVecUm.size(); i++) { + uint64_t j = functionVecUm[i]->fInputColumnIndex; + + if (functionVecUm[i]->fAggFunction == ROWAGG_UDAF) + { + // Dummy column for UDAF user data + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(functionVecUm[i].get()); + if (!udafFuncCol) + { + throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + functionVecUm[i]->fAuxColumnIndex = lastCol++; + oidsAggUm.push_back(oidsAggUm[j]); // Dummy? + keysAggUm.push_back(keysAggUm[j]); // Dummy? + scaleAggUm.push_back(0); + precisionAggUm.push_back(0); + typeAggUm.push_back(CalpontSystemCatalog::VARBINARY); + widthAggUm.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); + continue; + } + if (functionVecUm[i]->fAggFunction != ROWAGG_STATS) continue; functionVecUm[i]->fAuxColumnIndex = lastCol; - uint64_t j = functionVecUm[i]->fInputColumnIndex; // sum(x) oidsAggUm.push_back(oidsAggUm[j]); @@ -3170,7 +3373,24 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( continue; uint64_t colProj = projColPosMap[aggKey]; - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colProj, colAggPm)); + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.nonConstCols[i].get()); + if (udafc) + { + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); + } + else + { + throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no UDAFColumn"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, colAggPm)); + } functionVecPm.push_back(funct); aggFuncMap.insert(make_pair(make_pair(aggKey, aggOp), colAggPm)); @@ -3314,6 +3534,32 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funct.get()); + if (!udafFuncCol) + { + throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + // Return column + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(udafFuncCol->fUDAFContext.getScale()); + precisionAggPm.push_back(udafFuncCol->fUDAFContext.getPrecision()); + typeAggPm.push_back(udafFuncCol->fUDAFContext.getResultType()); + widthAggPm.push_back(udafFuncCol->fUDAFContext.getColWidth()); + colAggPm++; + // Dummy column for UDAF UserData struct + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(0); + precisionAggPm.push_back(0); + typeAggPm.push_back(CalpontSystemCatalog::VARBINARY); + widthAggPm.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); // Binary column needs +2 for length bytes + funct->fAuxColumnIndex = colAggPm++; + break; + } + default: { ostringstream emsg; @@ -3337,14 +3583,29 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) { + SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - funcPm->fAggFunction, - funcPm->fStatsFunction, - funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); - functionNoDistVec.push_back(funct); + // UDAF support + if (funcPm->fAggFunction == ROWAGG_UDAF) + { + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + funct.reset(new RowUDAFFunctionCol( + udafFuncCol->fUDAFContext, + udafFuncCol->fOutputColumnIndex, + udafFuncCol->fOutputColumnIndex, + udafFuncCol->fAuxColumnIndex)); + functionNoDistVec.push_back(funct); + } + else + { + funct.reset(new RowAggFunctionCol( + funcPm->fAggFunction, + funcPm->fStatsFunction, + funcPm->fOutputColumnIndex, + funcPm->fOutputColumnIndex, + funcPm->fAuxColumnIndex)); + functionNoDistVec.push_back(funct); + } } posAggUm = posAggPm; @@ -3611,7 +3872,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colUm, i)); + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.nonConstCols[i].get()); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colUm; else if (aggOp == ROWAGG_CONSTANT) @@ -3704,14 +3974,32 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } } - // add auxiliary fields for statistics functions + // add auxiliary fields for UDAF and statistics functions for (uint64_t i = 0; i < functionVecUm.size(); i++) { + uint64_t j = functionVecUm[i]->fInputColumnIndex; + + if (functionVecUm[i]->fAggFunction == ROWAGG_UDAF) + { + // Dummy column for UDAF user data + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(functionVecUm[i].get()); + if (!udafFuncCol) + { + throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + functionVecUm[i]->fAuxColumnIndex = lastCol++; + oidsAggDist.push_back(oidsAggUm[j]); // Dummy? + keysAggDist.push_back(keysAggUm[j]); // Dummy? + scaleAggDist.push_back(0); + precisionAggDist.push_back(0); + typeAggDist.push_back(CalpontSystemCatalog::VARBINARY); + widthAggDist.push_back(udafFuncCol->fUDAFContext.getUserDataSize()+2); + continue; + } if (functionVecUm[i]->fAggFunction != ROWAGG_STATS) continue; functionVecUm[i]->fAuxColumnIndex = lastCol; - uint64_t j = functionVecUm[i]->fInputColumnIndex; // sum(x) oidsAggDist.push_back(oidsAggDist[j]); @@ -3880,27 +4168,39 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector::iterator it = functionVecUm.begin(); while (it != functionVecUm.end()) { + SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t f = *it++; - if ((f->fOutputColumnIndex == k) && - (f->fAggFunction == ROWAGG_COUNT_ASTERISK || - f->fAggFunction == ROWAGG_COUNT_COL_NAME || - f->fAggFunction == ROWAGG_SUM || - f->fAggFunction == ROWAGG_AVG || - f->fAggFunction == ROWAGG_MIN || - f->fAggFunction == ROWAGG_MAX || - f->fAggFunction == ROWAGG_STATS || - f->fAggFunction == ROWAGG_BIT_AND || - f->fAggFunction == ROWAGG_BIT_OR || - f->fAggFunction == ROWAGG_BIT_XOR || - f->fAggFunction == ROWAGG_CONSTANT)) + if (f->fOutputColumnIndex == k) { - SP_ROWAGG_FUNC_t funct( - new RowAggFunctionCol( - f->fAggFunction, - f->fStatsFunction, - f->fInputColumnIndex, - f->fOutputColumnIndex, - f->fAuxColumnIndex)); + if (f->fAggFunction == ROWAGG_UDAF) + { + RowUDAFFunctionCol* udafFuncCol = dynamic_cast(f.get()); + funct.reset(new RowUDAFFunctionCol( + udafFuncCol->fUDAFContext, + udafFuncCol->fInputColumnIndex, + udafFuncCol->fOutputColumnIndex, + udafFuncCol->fAuxColumnIndex)); + functionSub2.push_back(funct); + } + else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || + f->fAggFunction == ROWAGG_COUNT_COL_NAME || + f->fAggFunction == ROWAGG_SUM || + f->fAggFunction == ROWAGG_AVG || + f->fAggFunction == ROWAGG_MIN || + f->fAggFunction == ROWAGG_MAX || + f->fAggFunction == ROWAGG_STATS || + f->fAggFunction == ROWAGG_BIT_AND || + f->fAggFunction == ROWAGG_BIT_OR || + f->fAggFunction == ROWAGG_BIT_XOR || + f->fAggFunction == ROWAGG_CONSTANT) + { + funct.reset( + new RowAggFunctionCol( + f->fAggFunction, + f->fStatsFunction, + f->fInputColumnIndex, + f->fOutputColumnIndex, + f->fAuxColumnIndex)); functionSub2.push_back(funct); } } @@ -4292,6 +4592,7 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID) for (uint32_t j = 0; j < multiDist->subAggregators().size(); j++) { fRowGroupIns[threadID].getRow(0, &rowIn); + rowIn.setUserDataStore(rgDatas[c].getUserDataStore()); for (uint64_t i = 0; i < fRowGroupIns[threadID].getRowCount(); ++i) { for (uint64_t k = 0; @@ -4313,6 +4614,7 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID) { fRowGroupIns[threadID].setData(&rgDatas[c]); fRowGroupIns[threadID].getRow(0, &rowIn); + rowIn.setUserDataStore(rgDatas[c].getUserDataStore()); for (uint64_t i = 0; i < fRowGroupIns[threadID].getRowCount(); ++i) { // The key is the groupby columns, which are the leading columns. diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp old mode 100644 new mode 100755 index 1b42e9fd7..5b71aaf4f --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -146,6 +146,7 @@ WindowFunctionStep::WindowFunctionStep(const JobInfo& jobInfo) : fEndOfResult(false), fIsSelect(true), fUseSSMutex(false), + fUseUFMutex(false), fInputDL(NULL), fOutputDL(NULL), fInputIterator(-1), @@ -531,6 +532,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // @bug6065, window functions that will update string table int64_t wfsUpdateStringTable = 0; + int64_t wfsUserFunctionCount = 0; for (RetColsVector::iterator i=jobInfo.windowCols.begin(); iresultType(); if ((types[ridx] == CalpontSystemCatalog::CHAR || types[ridx] == CalpontSystemCatalog::VARCHAR || - types[ridx] == CalpontSystemCatalog::TEXT) && + types[ridx] == CalpontSystemCatalog::TEXT || + types[ridx] == CalpontSystemCatalog::VARBINARY || + types[ridx] == CalpontSystemCatalog::BLOB) && rg.getColumnWidth(ridx) >= jobInfo.stringTableThreshold) { - wfsUpdateStringTable++; + ++wfsUpdateStringTable; } } +// if (boost::iequals(wc->functionName(),"UDAF_FUNC") + if (wc->functionName() == "UDAF_FUNC") + { + ++wfsUserFunctionCount; + } vector fields; fields.push_back(ridx); // result @@ -609,7 +618,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // create the functor based on function name boost::shared_ptr func = - WindowFunctionType::makeWindowFunction(fn, ct); + WindowFunctionType::makeWindowFunction(fn, ct, wc); // parse parms after peer and fields are set // functions may need to set order column index @@ -751,7 +760,8 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) if (wfsUpdateStringTable > 1) fUseSSMutex = true; - + if (wfsUserFunctionCount > 1) + fUseUFMutex = true; fRowGroupOut = fRowGroupDelivered; } @@ -799,6 +809,8 @@ void WindowFunctionStep::execute() //@bug6065, make StringStore::storeString() thread safe, default to false. rgData.useStoreStringMutex(fUseSSMutex); + // For the User Data of UDAnF + rgData.useUserDataMutex(fUseUFMutex); // window function does not change row count fRowsReturned += rowCnt; diff --git a/dbcon/joblist/windowfunctionstep.h b/dbcon/joblist/windowfunctionstep.h old mode 100644 new mode 100755 index 73d47bacd..8edb7b48b --- a/dbcon/joblist/windowfunctionstep.h +++ b/dbcon/joblist/windowfunctionstep.h @@ -160,7 +160,7 @@ private: bool fEndOfResult; bool fIsSelect; bool fUseSSMutex; //@bug6065, mutex for setStringField - + bool fUseUFMutex; // To ensure thread safety of User Data (UDAnF) // for input/output datalist RowGroupDL* fInputDL; RowGroupDL* fOutputDL; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index ce240b76d..ec449be19 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -80,6 +80,7 @@ using namespace cal_impl_if; #include "groupconcatcolumn.h" #include "outerjoinonfilter.h" #include "intervalcolumn.h" +#include "udafcolumn.h" using namespace execplan; #include "funcexp.h" @@ -447,6 +448,9 @@ void debug_walk(const Item *item, void *arg) case Item_sum::MAX_FUNC: cout << "MAX_FUNC: " << item_name << endl; break; + case Item_sum::UDF_SUM_FUNC: + cout << "UDAF_FUNC: " << item_name << endl; + break; default: cout << "SUM_FUNC_ITEM type=" << isp->sum_func() << endl; break; @@ -2135,6 +2139,9 @@ uint32_t setAggOp(AggregateColumn* ac, Item_sum* isp) return ER_CHECK_NOT_IMPLEMENTED; return rc; } + case Item_sum::UDF_SUM_FUNC: + ac->aggOp(AggregateColumn::UDAF); + return rc; default: return ER_CHECK_NOT_IMPLEMENTED; } @@ -3527,7 +3534,9 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.aggOnSelect = true; // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument - if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC) + // TODO: Support more than one parm + if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC + && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { gwi.fatalParseError = true; gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); @@ -3536,9 +3545,18 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { ac = new GroupConcatColumn(gwi.sessionid); + } else + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + ac = new UDAFColumn(gwi.sessionid); + } + else + { ac = new AggregateColumn(gwi.sessionid); + } if (isp->name) ac->alias(isp->name); @@ -3894,6 +3912,53 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.count_asterisk_list.push_back(ac); } + + // For UDAF, populate the context and call the UDAF init() function. + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + COL_TYPES colTypes; + execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; + + // Build the column type vector. For now, there is only one + colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); + + // Call the user supplied init() + if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + return NULL; + } + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } return ac; } diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 0400d2999..0b14822b6 100755 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -582,6 +582,8 @@ int fetchNextRow(uchar *buf, cal_table_info& ti, cal_connection_info* ci) (*f)->field_length = 40; //float float_val = *(float*)(&value); //f2->store(float_val); + if (f2->decimals() < (uint32_t)row.getScale(s)) + f2->dec = (uint32_t)row.getScale(s); f2->store(dl); if ((*f)->null_ptr) *(*f)->null_ptr &= ~(*f)->null_bit; @@ -603,6 +605,8 @@ int fetchNextRow(uchar *buf, cal_table_info& ti, cal_connection_info* ci) (*f)->field_length = 310; //double double_val = *(double*)(&value); //f2->store(double_val); + if (f2->decimals() < (uint32_t)row.getScale(s)) + f2->dec = (uint32_t)row.getScale(s); f2->store(dl); if ((*f)->null_ptr) *(*f)->null_ptr &= ~(*f)->null_bit; diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 7361dfd51..c53b0094c 100755 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -45,6 +45,9 @@ using namespace logging; #include "funcexp.h" using namespace funcexp; +#include "mcsv1_udaf.h" +using namespace mcsv1sdk; + namespace cal_impl_if { ReturnedColumn* nullOnError(gp_walk_info& gwi) @@ -232,7 +235,7 @@ string ConvertFuncName(Item_sum* item) return "BIT_XOR"; break; case Item_sum::UDF_SUM_FUNC: - return "UDF_SUM_FUNC"; // Not supported + return "UDAF_FUNC"; break; case Item_sum::GROUP_CONCAT_FUNC: return "GROUP_CONCAT"; // Not supported @@ -286,14 +289,14 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n gwi.hasWindowFunc = true; Item_window_func* wf = (Item_window_func*)item; - string funcName = ConvertFuncName(wf->window_func()); + Item_sum* item_sum = wf->window_func(); + string funcName = ConvertFuncName(item_sum); WindowFunctionColumn* ac = new WindowFunctionColumn(funcName); - ac->distinct(wf->window_func()->has_with_distinct()); + ac->distinct(item_sum->has_with_distinct()); Window_spec *win_spec = wf->window_spec; SRCP srcp; // arguments vector funcParms; - Item_sum* item_sum = (Item_sum*)wf->arguments()[0]; for (uint32_t i = 0; i < item_sum->argument_count(); i++) { srcp.reset(buildReturnedColumn((item_sum->arguments()[i]), gwi, nonSupport)); @@ -303,17 +306,76 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n if (gwi.clauseType == WHERE && !gwi.rcWorkStack.empty()) gwi.rcWorkStack.pop(); } + + // Setup UDAnF functions + if (item_sum->sum_func() == Item_sum::UDF_SUM_FUNC) + { + Item_udf_sum* udfsum = (Item_udf_sum*)item_sum; + + mcsv1sdk::mcsv1Context& context = ac->getUDAFContext(); + context.setName(udfsum->func_name()); + + // Set up the return type defaults for the call to init() + execplan::CalpontSystemCatalog::ColType& rt = ac->resultType(); + context.setResultType(rt.colDataType); + context.setColWidth(rt.colWidth); + context.setScale(rt.scale); + context.setPrecision(rt.precision); + + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + context.setContextFlag(CONTEXT_IS_ANALYTIC); + + COL_TYPES colTypes; + execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; + + // Build the column type vector. + for (size_t i=0; i < funcParms.size(); ++i) + { + colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + } + + // Call the user supplied init() + if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = context.getErrorMessage(); + return NULL; + } + + if (!context.getRunFlag(UDAF_OVER_REQUIRED) && !context.getRunFlag(UDAF_OVER_ALLOWED)) + { + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_UDANF_NOT_ALLOWED, + context.getName()); + return nullOnError(gwi); + } + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + ac->resultType(ct); + } + // Some functions, such as LEAD/LAG don't have all parameters implemented in the // front end. Add dummies here to make the backend use defaults. // Some of these will be temporary until they are implemented in the front end. // Others need to stay because the back end expects them, but the front end // no longer sends them. // This case is kept in enum order in hopes the compiler can optimize - switch (wf->window_func()->sum_func()) + switch (item_sum->sum_func()) { - case Item_sum::COUNT_FUNC: - case Item_sum::COUNT_DISTINCT_FUNC: + case Item_sum::UDF_SUM_FUNC: + { + uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); + char sIgnoreNulls[18]; + sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); + srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + funcParms.push_back(srcp); break; + } case Item_sum::FIRST_VALUE_FUNC: srcp.reset(new ConstantColumn("1", (uint64_t)1, ConstantColumn::NUM)); // OFFSET (always one) funcParms.push_back(srcp); @@ -365,251 +427,326 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->partitions(partitions); // Order by + WF_OrderBy orderBy; + // order columns if (win_spec->order_list) { - WF_OrderBy orderBy; - // order columns - if (win_spec->order_list) + // It is an error to have an order by clause if a UDAnF says it shouldn't + if (item_sum->sum_func() == Item_sum::UDF_SUM_FUNC) { - vector orders; - ORDER* orderCol = reinterpret_cast(win_spec->order_list->first); - for (; orderCol; orderCol= orderCol->next) + mcsv1sdk::mcsv1Context& context = ac->getUDAFContext(); + if (!context.getRunFlag(UDAF_ORDER_ALLOWED)) { - Item* orderItem = *(orderCol->item); - srcp.reset(buildReturnedColumn(orderItem, gwi, nonSupport)); - if (!srcp) - return nullOnError(gwi); - srcp->asc(orderCol->direction == ORDER::ORDER_ASC ? true : false); -// srcp->nullsFirst(orderCol->nulls); // nulls 2-default, 1-nulls first, 0-nulls last - srcp->nullsFirst(orderCol->direction == ORDER::ORDER_ASC ? 1 : 0); // WINDOWS TODO: implement NULLS FIRST/LAST in 10.2 front end - orders.push_back(srcp); + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_UDANF_ORDER_NOT_ALLOWED, + context.getName()); + return nullOnError(gwi); } - orderBy.fOrders = orders; } - // window frame - WF_Frame frm; - if (win_spec->window_frame) + vector orders; + ORDER* orderCol = reinterpret_cast(win_spec->order_list->first); + for (; orderCol; orderCol= orderCol->next) { - frm.fIsRange = win_spec->window_frame->units == Window_frame::UNITS_RANGE; - // start - if (win_spec->window_frame->top_bound) + Item* orderItem = *(orderCol->item); + srcp.reset(buildReturnedColumn(orderItem, gwi, nonSupport)); + if (!srcp) + return nullOnError(gwi); + srcp->asc(orderCol->direction == ORDER::ORDER_ASC ? true : false); +// srcp->nullsFirst(orderCol->nulls); // nulls 2-default, 1-nulls first, 0-nulls last + srcp->nullsFirst(orderCol->direction == ORDER::ORDER_ASC ? 1 : 0); // WINDOWS TODO: implement NULLS FIRST/LAST in 10.2 front end + orders.push_back(srcp); + } + orderBy.fOrders = orders; + } + else + { + if (item_sum->sum_func() == Item_sum::UDF_SUM_FUNC) + { + mcsv1sdk::mcsv1Context& context = ac->getUDAFContext(); + if (context.getRunFlag(UDAF_ORDER_REQUIRED)) { - frm.fStart.fFrame = frame(win_spec->window_frame->top_bound->precedence_type, - win_spec->window_frame->top_bound->offset); // offset NULL means UNBOUNDED + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_UDANF_NOT_ALLOWED, + context.getName()); + return nullOnError(gwi); + } + } + } - if (win_spec->window_frame->top_bound->offset) + // window frame + WF_Frame frm; + if (win_spec->window_frame) + { + // It is an error to have a frame clause if a UDAnF says it shouldn't + if (item_sum->sum_func() == Item_sum::UDF_SUM_FUNC) + { + mcsv1sdk::mcsv1Context& context = ac->getUDAFContext(); + if (!context.getRunFlag(UDAF_WINDOWFRAME_ALLOWED)) + { + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_UDANF_FRAME_NOT_ALLOWED, + context.getName()); + return nullOnError(gwi); + } + } + + frm.fIsRange = win_spec->window_frame->units == Window_frame::UNITS_RANGE; + // start + if (win_spec->window_frame->top_bound) + { + frm.fStart.fFrame = frame(win_spec->window_frame->top_bound->precedence_type, + win_spec->window_frame->top_bound->offset); // offset NULL means UNBOUNDED + + if (win_spec->window_frame->top_bound->offset) + { + frm.fStart.fVal.reset(buildReturnedColumn(win_spec->window_frame->top_bound->offset, gwi, nonSupport)); + if (!frm.fStart.fVal) + return nullOnError(gwi); + + // 1. check expr is numeric type (rows) or interval (range) + bool boundTypeErr = false; + switch (frm.fStart.fVal->resultType().colDataType) { - frm.fStart.fVal.reset(buildReturnedColumn(win_spec->window_frame->top_bound->offset, gwi, nonSupport)); - if (!frm.fStart.fVal) - return nullOnError(gwi); - - // 1. check expr is numeric type (rows) or interval (range) - bool boundTypeErr = false; - switch (frm.fStart.fVal->resultType().colDataType) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::CLOB: + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::CLOB: + boundTypeErr = true; + break; + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + if (!frm.fIsRange) boundTypeErr = true; - break; - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - if (!frm.fIsRange) - boundTypeErr = true; - else if (dynamic_cast(frm.fStart.fVal.get()) == NULL) - boundTypeErr = true; - break; - default: //okay - break; - } - if (boundTypeErr) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_BOUND_TYPE, - colDataTypeToString(frm.fStart.fVal->resultType().colDataType)); - return nullOnError(gwi); - } - } - } - - // end - if (win_spec->window_frame->bottom_bound) - { - frm.fEnd.fFrame = frame(win_spec->window_frame->bottom_bound->precedence_type, - win_spec->window_frame->bottom_bound->offset); - if (win_spec->window_frame->bottom_bound->offset) - { - frm.fEnd.fVal.reset(buildReturnedColumn(win_spec->window_frame->bottom_bound->offset, gwi, nonSupport)); - if (!frm.fEnd.fVal) - return nullOnError(gwi); - - // check expr is numeric type (rows) or interval (range) - bool boundTypeErr = false; - switch (frm.fEnd.fVal->resultType().colDataType) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::CLOB: + else if (dynamic_cast(frm.fStart.fVal.get()) == NULL) boundTypeErr = true; - break; - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - if (!frm.fIsRange) - boundTypeErr = true; - else if (dynamic_cast(frm.fEnd.fVal.get()) == NULL) - boundTypeErr = true; - break; - default: //okay - break; - } - if (boundTypeErr) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_BOUND_TYPE, - colDataTypeToString(frm.fStart.fVal->resultType().colDataType)); - return nullOnError(gwi); - } + break; + default: //okay + break; } - } - else // no end specified. default end to current row - { - frm.fEnd.fFrame = WF_CURRENT_ROW; - } - - if (frm.fStart.fVal || frm.fEnd.fVal) - { - // check order by key only 1 (should be error-ed out in parser. double check here) - if (frm.fIsRange && orderBy.fOrders.size() > 1) + if (boundTypeErr) { gwi.fatalParseError = true; gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_ORDER_KEY); + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_BOUND_TYPE, + colDataTypeToString(frm.fStart.fVal->resultType().colDataType)); return nullOnError(gwi); } + } + } - // check order by key type is numeric or date/datetime - bool orderTypeErr = false; - if (frm.fIsRange && orderBy.fOrders.size() == 1) + // end + if (win_spec->window_frame->bottom_bound) + { + frm.fEnd.fFrame = frame(win_spec->window_frame->bottom_bound->precedence_type, + win_spec->window_frame->bottom_bound->offset); + if (win_spec->window_frame->bottom_bound->offset) + { + frm.fEnd.fVal.reset(buildReturnedColumn(win_spec->window_frame->bottom_bound->offset, gwi, nonSupport)); + if (!frm.fEnd.fVal) + return nullOnError(gwi); + + // check expr is numeric type (rows) or interval (range) + bool boundTypeErr = false; + switch (frm.fEnd.fVal->resultType().colDataType) { - switch (orderBy.fOrders[0]->resultType().colDataType) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::CLOB: - orderTypeErr = true; - break; - default: //okay - // interval bound has to have date/datetime order key - if ((dynamic_cast(frm.fStart.fVal.get()) != NULL || - dynamic_cast(frm.fEnd.fVal.get()) != NULL)) - { - if (orderBy.fOrders[0]->resultType().colDataType != CalpontSystemCatalog::DATE && - orderBy.fOrders[0]->resultType().colDataType != CalpontSystemCatalog::DATETIME) - orderTypeErr = true; - } - else - { - if (orderBy.fOrders[0]->resultType().colDataType == CalpontSystemCatalog::DATETIME) - orderTypeErr = true; - } - break; - } - if (orderTypeErr) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_ORDER_TYPE, - colDataTypeToString(orderBy.fOrders[0]->resultType().colDataType)); - return nullOnError(gwi); - } + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::CLOB: + boundTypeErr = true; + break; + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + if (!frm.fIsRange) + boundTypeErr = true; + else if (dynamic_cast(frm.fEnd.fVal.get()) == NULL) + boundTypeErr = true; + break; + default: //okay + break; + } + if (boundTypeErr) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_BOUND_TYPE, + colDataTypeToString(frm.fStart.fVal->resultType().colDataType)); + return nullOnError(gwi); } } + } + else // no end specified. default end to current row + { + frm.fEnd.fFrame = WF_CURRENT_ROW; + } - // construct +,- or interval function for boundary - if (frm.fIsRange && frm.fStart.fVal) + if (frm.fStart.fVal || frm.fEnd.fVal) + { + // check order by key only 1 (should be error-ed out in parser. double check here) + if (frm.fIsRange && orderBy.fOrders.size() > 1) { - frm.fStart.fBound.reset(buildBoundExp(frm.fStart, orderBy.fOrders[0], gwi)); + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_ORDER_KEY); + return nullOnError(gwi); + } + + // check order by key type is numeric or date/datetime + bool orderTypeErr = false; + if (frm.fIsRange && orderBy.fOrders.size() == 1) + { + switch (orderBy.fOrders[0]->resultType().colDataType) + { + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::CLOB: + orderTypeErr = true; + break; + default: //okay + // interval bound has to have date/datetime order key + if ((dynamic_cast(frm.fStart.fVal.get()) != NULL || + dynamic_cast(frm.fEnd.fVal.get()) != NULL)) + { + if (orderBy.fOrders[0]->resultType().colDataType != CalpontSystemCatalog::DATE && + orderBy.fOrders[0]->resultType().colDataType != CalpontSystemCatalog::DATETIME) + orderTypeErr = true; + } + else + { + if (orderBy.fOrders[0]->resultType().colDataType == CalpontSystemCatalog::DATETIME) + orderTypeErr = true; + } + break; + } + if (orderTypeErr) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_INVALID_ORDER_TYPE, + colDataTypeToString(orderBy.fOrders[0]->resultType().colDataType)); + return nullOnError(gwi); + } + } + } + + // construct +,- or interval function for boundary + if (frm.fIsRange && frm.fStart.fVal) + { + frm.fStart.fBound.reset(buildBoundExp(frm.fStart, orderBy.fOrders[0], gwi)); + if (!frm.fStart.fBound) + return nullOnError(gwi); + } + if (frm.fIsRange && frm.fEnd.fVal) + { + frm.fEnd.fBound.reset(buildBoundExp(frm.fEnd, orderBy.fOrders[0], gwi)); + if (!frm.fEnd.fVal) + return nullOnError(gwi); + } + } + else + { + // Certain function types have different default boundaries + // This case is kept in enum order in hopes the compiler can optimize + switch (item_sum->sum_func()) + { + case Item_sum::COUNT_FUNC: + case Item_sum::COUNT_DISTINCT_FUNC: + case Item_sum::SUM_FUNC: + case Item_sum::SUM_DISTINCT_FUNC: + case Item_sum::AVG_FUNC: + case Item_sum::AVG_DISTINCT_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_CURRENT_ROW; + break; + case Item_sum::MIN_FUNC: + case Item_sum::MAX_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; +// frm.fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; + frm.fEnd.fFrame = WF_CURRENT_ROW; + break; + case Item_sum::STD_FUNC: + case Item_sum::VARIANCE_FUNC: + case Item_sum::SUM_BIT_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_CURRENT_ROW; + break; + case Item_sum::UDF_SUM_FUNC: + { + mcsv1sdk::mcsv1Context& context = ac->getUDAFContext(); + if (context.getRunFlag(UDAF_WINDOWFRAME_REQUIRED)) + { + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WF_UDANF_FRAME_REQUIRED, + context.getName()); + return nullOnError(gwi); + } + int32_t bound; + context.getStartFrame(frm.fStart.fFrame, bound); + if (frm.fStart.fFrame == execplan::WF_PRECEDING) + { + if (bound == 0) + bound = 1; + srcp.reset(new ConstantColumn((int64_t)bound)); + frm.fStart.fVal = srcp; + frm.fStart.fBound.reset(buildBoundExp(frm.fStart, srcp, gwi)); if (!frm.fStart.fBound) return nullOnError(gwi); } - if (frm.fIsRange && frm.fEnd.fVal) + + context.getEndFrame(frm.fEnd.fFrame, bound); + if (frm.fEnd.fFrame == execplan::WF_FOLLOWING) { - frm.fEnd.fBound.reset(buildBoundExp(frm.fEnd, orderBy.fOrders[0], gwi)); - if (!frm.fEnd.fVal) + if (bound == 0) + bound = 1; + srcp.reset(new ConstantColumn((int64_t)bound)); + frm.fEnd.fVal = srcp; + frm.fEnd.fBound.reset(buildBoundExp(frm.fEnd, srcp, gwi)); + if (!frm.fEnd.fBound) return nullOnError(gwi); } + break; } - else - { - // Certain function types have different default boundaries - // This case is kept in enum order in hopes the compiler can optimize - switch (wf->window_func()->sum_func()) - { - case Item_sum::COUNT_FUNC: - case Item_sum::COUNT_DISTINCT_FUNC: - case Item_sum::SUM_FUNC: - case Item_sum::SUM_DISTINCT_FUNC: - case Item_sum::AVG_FUNC: - case Item_sum::AVG_DISTINCT_FUNC: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; - frm.fEnd.fFrame = WF_CURRENT_ROW; - break; - case Item_sum::MIN_FUNC: - case Item_sum::MAX_FUNC: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; -// frm.fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; - frm.fEnd.fFrame = WF_CURRENT_ROW; - break; - case Item_sum::STD_FUNC: - case Item_sum::VARIANCE_FUNC: - case Item_sum::SUM_BIT_FUNC: - case Item_sum::UDF_SUM_FUNC: - case Item_sum::GROUP_CONCAT_FUNC: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; - frm.fEnd.fFrame = WF_CURRENT_ROW; - break; - case Item_sum::ROW_NUMBER_FUNC: - case Item_sum::RANK_FUNC: - case Item_sum::DENSE_RANK_FUNC: - case Item_sum::PERCENT_RANK_FUNC: - case Item_sum::CUME_DIST_FUNC: - case Item_sum::NTILE_FUNC: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; - frm.fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; - break; - case Item_sum::FIRST_VALUE_FUNC: - case Item_sum::LAST_VALUE_FUNC: - case Item_sum::NTH_VALUE_FUNC: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; - frm.fEnd.fFrame = WF_CURRENT_ROW; - break; - case Item_sum::LEAD_FUNC: - case Item_sum::LAG_FUNC: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; - frm.fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; - break; - default: - frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; - frm.fEnd.fFrame = WF_CURRENT_ROW; - break; - }; - } - - orderBy.fFrame = frm; - ac->orderBy(orderBy); + case Item_sum::GROUP_CONCAT_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_CURRENT_ROW; + break; + case Item_sum::ROW_NUMBER_FUNC: + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; + break; + case Item_sum::FIRST_VALUE_FUNC: + case Item_sum::LAST_VALUE_FUNC: + case Item_sum::NTH_VALUE_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_CURRENT_ROW; + break; + case Item_sum::LEAD_FUNC: + case Item_sum::LAG_FUNC: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_UNBOUNDED_FOLLOWING; + break; + default: + frm.fStart.fFrame = WF_UNBOUNDED_PRECEDING; + frm.fEnd.fFrame = WF_CURRENT_ROW; + break; + }; } + + orderBy.fFrame = frm; + ac->orderBy(orderBy); } if (gwi.fatalParseError || nonSupport) @@ -620,7 +757,8 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(wf->arguments()[0])); + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when // infinidb_double_for_decimal_math is set. ac->adjustResultType(); diff --git a/dbcon/mysql/mysql-Columnstore b/dbcon/mysql/mysql-Columnstore index de090329f..a0c8bca23 100755 --- a/dbcon/mysql/mysql-Columnstore +++ b/dbcon/mysql/mysql-Columnstore @@ -310,7 +310,7 @@ fi kill_by_pid() { # let's see if we can kill the 2 mysql procs by hand # get the our mysql from ps - eval $(ps -ef | grep "$COLUMNSTORE_INSTALL_DIR/mysql//bin/mysqld " | grep -v grep | head -1 | awk '{printf "pid=%d\n", $2}') + eval $(ps -ef | grep "$COLUMNSTORE_INSTALL_DIR/mysql//bin/mysqld" | grep -v grep | head -1 | awk '{printf "pid=%d\n", $2}') if [ -n "$pid" ]; then ppid=$(ps -o ppid= -p $pid) diff --git a/ddlproc/ddlproc.vpj b/ddlproc/ddlproc.vpj old mode 100644 new mode 100755 index 95f5ae022..5c1cd7394 --- a/ddlproc/ddlproc.vpj +++ b/ddlproc/ddlproc.vpj @@ -1,224 +1,224 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dmlproc/dmlproc.vpj b/dmlproc/dmlproc.vpj old mode 100644 new mode 100755 index ca0e671dd..8bcbb8410 --- a/dmlproc/dmlproc.vpj +++ b/dmlproc/dmlproc.vpj @@ -1,238 +1,238 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/primitives/linux-port/linux-port.vpj b/primitives/linux-port/linux-port.vpj old mode 100644 new mode 100755 index 15eda0936..aeac03231 --- a/primitives/linux-port/linux-port.vpj +++ b/primitives/linux-port/linux-port.vpj @@ -1,225 +1,225 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp old mode 100644 new mode 100755 index ab2d5cd7b..3fbf5d848 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -371,8 +371,20 @@ void BatchPrimitiveProcessor::initBPP(ByteStream &bs) { bs >> fAggregateRG; fAggregator.reset(new RowAggregation); -// cout << "Made an aggregator\n"; bs >> *(fAggregator.get()); + // If there's UDAF involved, set up for PM processing + for (uint64_t i = 0; i < fAggregator->getAggFunctions().size(); i++) + { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fAggregator->getAggFunctions()[i].get()); + if (rowUDAF) + { + // On the PM, the aux column is not sent, but rather is output col + 1. + rowUDAF->fAuxColumnIndex = rowUDAF->fOutputColumnIndex + 1; + // Set the PM flag in case the UDAF cares. + rowUDAF->fUDAFContext.setContextFlags(rowUDAF->fUDAFContext.getContextFlags() + | mcsv1sdk::CONTEXT_IS_PM); + } + } } } @@ -1294,7 +1306,7 @@ void BatchPrimitiveProcessor::execute() } // @bug4507, 8k else { // @bug4507, 8k fAggregator->loadResult(*serialized); // @bug4507, 8k - fAggregator->reset(); // @bug4507, 8k + fAggregator->aggReset(); // @bug4507, 8k } // @bug4507, 8k } else { @@ -1361,7 +1373,7 @@ void BatchPrimitiveProcessor::execute() } // @bug4507, 8k else { // @bug4507, 8k fAggregator->loadResult(*serialized); // @bug4507, 8k - fAggregator->reset(); // @bug4507, 8k + fAggregator->aggReset(); // @bug4507, 8k } // @bug4507, 8k } @@ -1706,7 +1718,7 @@ int BatchPrimitiveProcessor::operator()() } if (fAggregator && currentBlockOffset == 0) // @bug4507, 8k - fAggregator->reset(); // @bug4507, 8k + fAggregator->aggReset(); // @bug4507, 8k for (; currentBlockOffset < count; currentBlockOffset++) { if (!(sessionID & 0x80000000)) { // can't do this with syscat queries @@ -1765,8 +1777,11 @@ int BatchPrimitiveProcessor::operator()() vssCache.clear(); #ifndef __FreeBSD__ - if (sendThread->aborted()) - objLock.try_lock(); + // If we've been aborted the lock *may* have been released already + // By doing try_lock, we ensure the unlock will work whether it was + // locked or not. + if (sendThread->aborted()) + objLock.try_lock(); objLock.unlock(); #endif freeLargeBuffers(); diff --git a/primitives/primproc/batchprimitiveprocessor.h b/primitives/primproc/batchprimitiveprocessor.h old mode 100644 new mode 100755 diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp old mode 100644 new mode 100755 diff --git a/primitives/primproc/primproc.vpj b/primitives/primproc/primproc.vpj old mode 100644 new mode 100755 index 4ca34b6f5..125da0f85 --- a/primitives/primproc/primproc.vpj +++ b/primitives/primproc/primproc.vpj @@ -1,258 +1,258 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/dbloadxml/colxml.vpj b/tools/dbloadxml/colxml.vpj old mode 100644 new mode 100755 index 5311c7e36..f0be7dc6b --- a/tools/dbloadxml/colxml.vpj +++ b/tools/dbloadxml/colxml.vpj @@ -1,234 +1,234 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/editem/editem.vpj b/tools/editem/editem.vpj old mode 100644 new mode 100755 index df0f32bcc..f0f2cf717 --- a/tools/editem/editem.vpj +++ b/tools/editem/editem.vpj @@ -1,233 +1,233 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/hdfsCheck/hdfsCheck.vpj b/tools/hdfsCheck/hdfsCheck.vpj old mode 100644 new mode 100755 index 9d722dd33..92e9bdb76 --- a/tools/hdfsCheck/hdfsCheck.vpj +++ b/tools/hdfsCheck/hdfsCheck.vpj @@ -1,218 +1,218 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/common/any.hpp b/utils/common/any.hpp new file mode 100755 index 000000000..50b1eb28d --- /dev/null +++ b/utils/common/any.hpp @@ -0,0 +1,223 @@ +#pragma once +/* + * (C) Copyright Christopher Diggins 2005-2011 + * (C) Copyright Pablo Aguilar 2005 + * (C) Copyright Kevlin Henney 2001 + * + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt + */ + +#include + +namespace static_any +{ +namespace anyimpl +{ + + struct bad_any_cast + { + }; + + struct empty_any + { + }; + + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; + + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() { return sizeof(T); } + }; + + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { } + virtual void copy_from_value(void const* src, void** dest) + { new(dest) T(*reinterpret_cast(src)); } + virtual void clone(void* const* src, void** dest) { *dest = *src; } + virtual void move(void* const* src, void** dest) { *dest = *src; } + virtual void* get_value(void** src) { return reinterpret_cast(src); } + }; + + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) + delete(*reinterpret_cast(x)); *x = NULL; } + virtual void copy_from_value(void const* src, void** dest) { + *dest = new T(*reinterpret_cast(src)); } + virtual void clone(void* const* src, void** dest) { + *dest = new T(**reinterpret_cast(src)); } + virtual void move(void* const* src, void** dest) { + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } + virtual void* get_value(void** src) { return *src; } + }; + + template + struct choose_policy + { + typedef big_any_policy type; + }; + + template + struct choose_policy + { + typedef small_any_policy type; + }; + + struct any; + + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; + + /// Specializations for small types. + #define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; + + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); + SMALL_POLICY(std::string); + + #undef SMALL_POLICY + + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; +} + +class any +{ +private: + // fields + anyimpl::base_any_policy* policy; + void* object; + +public: + /// Initializing constructor. + template + any(const T& x) + : policy(anyimpl::get_policy()), object(NULL) + { + assign(x); + } + + /// Empty constructor. + any() + : policy(anyimpl::get_policy()), object(NULL) + { } + + /// Special initializing constructor for string literals. + any(const char* x) + : policy(anyimpl::get_policy()), object(NULL) + { + assign(x); + } + + /// Copy constructor. + any(const any& x) + : policy(anyimpl::get_policy()), object(NULL) + { + assign(x); + } + + /// Destructor. + ~any() { + policy->static_delete(&object); + } + + /// Assignment function from another any. + any& assign(const any& x) { + reset(); + policy = x.policy; + policy->clone(&x.object, &object); + return *this; + } + + /// Assignment function. + template + any& assign(const T& x) { + reset(); + policy = anyimpl::get_policy(); + policy->copy_from_value(&x, &object); + return *this; + } + + /// Assignment operator. + template + any& operator=(const T& x) { + return assign(x); + } + + /// Assignment operator, specialed for literal strings. + /// They have types like const char [6] which don't work as expected. + any& operator=(const char* x) { + return assign(x); + } + + /// Utility functions + any& swap(any& x) { + std::swap(policy, x.policy); + std::swap(object, x.object); + return *this; + } + + /// Cast operator. You can only cast to the original type. + template + T& cast() { + if (policy != anyimpl::get_policy()) + throw anyimpl::bad_any_cast(); + T* r = reinterpret_cast(policy->get_value(&object)); + return *r; + } + + /// Returns true if the any contains no value. + bool empty() const { + return policy == anyimpl::get_policy(); + } + + /// Frees any allocated memory, and sets the value to NULL. + void reset() { + policy->static_delete(&object); + policy = anyimpl::get_policy(); + } + + /// Returns true if the two types are the same. + bool compatible(const any& x) const { + return policy == x.policy; + } +}; +} diff --git a/utils/common/common.vpj b/utils/common/common.vpj old mode 100644 new mode 100755 index 0b8504c44..69059884c --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -1,232 +1,233 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/funcexp/func_inet_aton.cpp b/utils/funcexp/func_inet_aton.cpp index 53764d0ac..9a19a3c5a 100644 --- a/utils/funcexp/func_inet_aton.cpp +++ b/utils/funcexp/func_inet_aton.cpp @@ -34,7 +34,7 @@ namespace funcexp //------------------------------------------------------------------------------ // Return input argument type. -// See IDB_add in udfsdk.h for explanation of this function. +// See mcs_add in udfsdk.h for explanation of this function. //------------------------------------------------------------------------------ execplan::CalpontSystemCatalog::ColType Func_inet_aton::operationType( FunctionParm& fp, diff --git a/utils/funcexp/func_inet_ntoa.cpp b/utils/funcexp/func_inet_ntoa.cpp index a703c0cde..045becd3b 100644 --- a/utils/funcexp/func_inet_ntoa.cpp +++ b/utils/funcexp/func_inet_ntoa.cpp @@ -54,7 +54,7 @@ namespace funcexp //------------------------------------------------------------------------------ // Return input argument type. -// See IDB_add in udfsdk.h for explanation of this function. +// See mcs_add in udfsdk.h for explanation of this function. //------------------------------------------------------------------------------ execplan::CalpontSystemCatalog::ColType Func_inet_ntoa::operationType( FunctionParm& fp, diff --git a/utils/funcexp/func_lpad.cpp b/utils/funcexp/func_lpad.cpp old mode 100644 new mode 100755 index 97b444e04..7e58bcc32 --- a/utils/funcexp/func_lpad.cpp +++ b/utils/funcexp/func_lpad.cpp @@ -63,7 +63,7 @@ std::string Func_lpad::getStrVal(rowgroup::Row& row, const string& tstr = fp[0]->data()->getStrVal(row, isNull); // The result length in number of characters - int len = 0; + size_t len = 0; switch (fp[1]->data()->resultType().colDataType) { case execplan::CalpontSystemCatalog::BIGINT: @@ -129,16 +129,16 @@ std::string Func_lpad::getStrVal(rowgroup::Row& row, // determine the size of buffer to allocate, we can be sure the wide // char string won't be longer than strwclen = tstr.length(); // a guess to start with. This will be >= to the real count. - int alen = len; + size_t alen = len; if(strwclen > len) alen = strwclen; - int bufsize = (alen+1) * sizeof(wchar_t); + size_t bufsize = (alen+1) * sizeof(wchar_t); // Convert to wide characters. Do all further work in wide characters wchar_t* wcbuf = (wchar_t*)alloca(bufsize); strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen+1); - unsigned int strSize = strwclen; // The number of significant characters + size_t strSize = strwclen; // The number of significant characters const wchar_t* pWChar = wcbuf; for (i = 0; *pWChar != '\0' && i < strwclen; ++pWChar, ++i) { @@ -165,13 +165,13 @@ std::string Func_lpad::getStrVal(rowgroup::Row& row, // Convert the pad string to wide padwclen = pad.length(); // A guess to start. - int padbufsize = (padwclen+1) * sizeof(wchar_t); + size_t padbufsize = (padwclen+1) * sizeof(wchar_t); wchar_t* wcpad = (wchar_t*)alloca(padbufsize); // padwclen+1 is for giving count for the terminating null size_t padlen = utf8::idb_mbstowcs(wcpad, pad.c_str(), padwclen+1); // How many chars do we need? - unsigned int padspace = len - strSize; + size_t padspace = len - strSize; // Shift the contents of wcbuf to the right. wchar_t* startofstr = wcbuf + padspace; diff --git a/utils/funcexp/func_rpad.cpp b/utils/funcexp/func_rpad.cpp old mode 100644 new mode 100755 index c37f1b72d..458a2fb09 --- a/utils/funcexp/func_rpad.cpp +++ b/utils/funcexp/func_rpad.cpp @@ -63,7 +63,7 @@ std::string Func_rpad::getStrVal(rowgroup::Row& row, const string& tstr = fp[0]->data()->getStrVal(row, isNull); // The result length in number of characters - int len = 0; + size_t len = 0; switch (fp[1]->data()->resultType().colDataType) { case execplan::CalpontSystemCatalog::BIGINT: diff --git a/utils/funcexp/funcexp.vpj b/utils/funcexp/funcexp.vpj old mode 100644 new mode 100755 index 71f7af590..c07fcfd44 --- a/utils/funcexp/funcexp.vpj +++ b/utils/funcexp/funcexp.vpj @@ -1,332 +1,332 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/funcexp/functor_bool.h b/utils/funcexp/functor_bool.h index 5d3d615b0..04cd9c900 100644 --- a/utils/funcexp/functor_bool.h +++ b/utils/funcexp/functor_bool.h @@ -182,7 +182,7 @@ public: Func_isnull():fIsNotNull(false) {} Func_isnull(bool isnotnull) : fIsNotNull(isnotnull) {} /* - * Destructor. IDB_add does not need to do anything here to clean up. + * Destructor. isnull does not need to do anything here to clean up. */ virtual ~Func_isnull() {} diff --git a/utils/loggingcpp/ErrorMessage.txt b/utils/loggingcpp/ErrorMessage.txt old mode 100644 new mode 100755 index c4d134899..a6dd3b6e0 --- a/utils/loggingcpp/ErrorMessage.txt +++ b/utils/loggingcpp/ErrorMessage.txt @@ -148,6 +148,7 @@ 5001 ERR_FUNC_NON_IMPLEMENT %1%:%2% is not implemented. 5002 ERR_PSEUDOCOL_IDB_ONLY Pseudo column function '%1%' is only supported in Columnstore. 5003 ERR_PSEUDOCOL_WRONG_ARG Argument of pseudo column function '%1%' is invalid. +5004 ERR_WINDOW_FUNC_ONLY User defined function %1% may only be used with the OVER clause. # DBRM Errors 6001 ERR_NETWORK DBRM encountered a network error, check the controllernode. @@ -195,4 +196,10 @@ 9027 ERR_WF_ARG_OUT_OF_RANGE Argument '%1%' is out of range. 9028 ERR_WF_NOT_ALLOWED Window functions are not allowed in %1%. 9029 ERR_WF_IDB_ONLY Window function are only supported for Columnstore tables. -9030 ERR_WF_DATA_SET_TOO_BIG Window function data set exceeds memory limit. +9030 ERR_WF_DATA_SET_TOO_BIG Window function data set exceeds memory limit. +9031 ERR_WF_UDANF_ERROR User Defined Window function: %1%. +9032 ERR_WF_UDANF_NOT_ALLOWED User Defined Function %1% used with an OVER clause. +9033 ERR_WF_UDANF_ORDER_REQUIRED User Defined Function %1% without an ORDER BY clause in the OVER clause. +9034 ERR_WF_UDANF_ORDER_NOT_ALLOWED User Defined Function %1% with an ORDER BY clause in the OVER clause. +9035 ERR_WF_UDANF_FRAME_REQUIRED User Defined Function %1% without a FRAME clause in the OVER clause. +9036 ERR_WF_UDANF_FRAME_NOT_ALLOWED User Defined Function %1% with a FRAME clause in the OVER clause. diff --git a/utils/loggingcpp/loggingcpp.vpj b/utils/loggingcpp/loggingcpp.vpj old mode 100644 new mode 100755 index 514378a46..7924d725b --- a/utils/loggingcpp/loggingcpp.vpj +++ b/utils/loggingcpp/loggingcpp.vpj @@ -1,238 +1,238 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/messageqcpp/bytestream.cpp b/utils/messageqcpp/bytestream.cpp old mode 100644 new mode 100755 index bd66073c2..a9a212d65 --- a/utils/messageqcpp/bytestream.cpp +++ b/utils/messageqcpp/bytestream.cpp @@ -588,5 +588,53 @@ void ByteStream::peek(uuid& u) const memcpy(&u.data[0], fCurOutPtr, uuids::uuid::static_size()); } +ByteStream& ByteStream::operator<<(const float f) +{ + int sz = sizeof(float); + if (fBuf == 0 || (fCurInPtr - fBuf + sz > fMaxLen + ISSOverhead)) + growBuf(fMaxLen + BlockSize); + *((float *) fCurInPtr) = f; + fCurInPtr += sz; + + return *this; +} +ByteStream& ByteStream::operator<<(const double d) +{ + int sz = sizeof(double); + if (fBuf == 0 || (fCurInPtr - fBuf + sz > fMaxLen + ISSOverhead)) + growBuf(fMaxLen + BlockSize); + *((double *) fCurInPtr) = d; + fCurInPtr += sz; + + return *this; +} +ByteStream& ByteStream::operator>>(float& f) +{ + peek(f); + fCurOutPtr += sizeof(float); + return *this; +} +ByteStream& ByteStream::operator>>(double& d) +{ + peek(d); + fCurOutPtr += sizeof(double); + return *this; +} +void ByteStream::peek(float& f) const +{ + if (length() < sizeof(float)) + throw underflow_error("ByteStream>int64_t: not enough data in stream to fill datatype"); + + f = *((float *) fCurOutPtr); +} +void ByteStream::peek(double& d) const +{ + if (length() < sizeof(double)) + throw underflow_error("ByteStream>int64_t: not enough data in stream to fill datatype"); + + d = *((double *) fCurOutPtr); +} + + }//namespace messageqcpp diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h old mode 100644 new mode 100755 index 64e4b8dd8..054dfb9f5 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -144,6 +144,16 @@ public: * push an uint64_t onto the end of the stream. The byte order is whatever the native byte order is. */ EXPORT ByteStream& operator<<(const uint64_t o); + /** + * push an float onto the end of the stream. The byte order is + * whatever the native byte order is. + */ + EXPORT ByteStream& operator<<(const float f); + /** + * push an double onto the end of the stream. The byte order is + * whatever the native byte order is. + */ + EXPORT ByteStream& operator<<(const double d); /** * push a std::string onto the end of the stream. */ @@ -193,6 +203,16 @@ public: * extract an uint64_t from the front of the stream. The byte order is whatever the native byte order is. */ EXPORT ByteStream& operator>>(uint64_t& o); + /** + * extract a float from the front of the stream. The byte + * order is whatever the native byte order is. + */ + EXPORT ByteStream& operator>>(float& f); + /** + * extract a double from the front of the stream. The byte + * order is whatever the native byte order is. + */ + EXPORT ByteStream& operator>>(double& d); /** * extract a std::string from the front of the stream. */ @@ -248,6 +268,16 @@ public: * Peek at an uint64_t from the front of the stream. The byte order is whatever the native byte order is. */ EXPORT void peek(uint64_t& o) const; + /** + * Peek at a float from the front of the stream. The byte order + * is whatever the native byte order is. + */ + EXPORT void peek(float& f) const; + /** + * Peek at a double from the front of the stream. The byte + * order is whatever the native byte order is. + */ + EXPORT void peek(double& f) const; /** * Peek at a std::string from the front of the stream. */ diff --git a/utils/messageqcpp/messageqcpp.vpj b/utils/messageqcpp/messageqcpp.vpj old mode 100644 new mode 100755 index 318d99785..905f53125 --- a/utils/messageqcpp/messageqcpp.vpj +++ b/utils/messageqcpp/messageqcpp.vpj @@ -1,234 +1,234 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp old mode 100644 new mode 100755 index 2ecec9be4..f12f0d811 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -28,7 +28,7 @@ #include #include #include - +#include #include "joblisttypes.h" #include "resourcemanager.h" #include "groupconcat.h" @@ -459,7 +459,6 @@ inline void RowAggregation::updateFloatSum(float val1, float val2, int64_t col) fRow.setFloatField(val1 + val2, col); } - //------------------------------------------------------------------------------ // Verify if the column value is NULL // row(in) - Row to be included in aggregation. @@ -721,6 +720,41 @@ void RowAggregation::setJoinRowGroups(vector *pSmallSideRG, RowGroup * (*fSmallSideRGs)[i].initRow(&rowSmalls[i]); } +//------------------------------------------------------------------------------ +// For UDAF, we need to sometimes start a new context. +// +// This will be called any number of times by each of the batchprimitiveprocessor +// threads on the PM and by multple threads on the UM. It must remain +// thread safe. +//------------------------------------------------------------------------------ +void RowAggregation::resetUDAF(uint64_t funcColID) +{ + // Get the UDAF class pointer and store in the row definition object. + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); + + // resetUDAF needs to be re-entrant. Since we're modifying the context object + // by creating a new userData, we need a local copy. The copy constructor + // doesn't copy userData. + mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + + // Call the user reset for the group userData. Since, at this point, + // context's userData will be NULL, reset will generate a new one. + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + rc = rgContext.getFunction()->reset(&rgContext); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + rowUDAF->bInterrupted = true; + throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + } + + fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); + fRow.setUserData(rgContext, + rgContext.getUserDataSP(), + rgContext.getUserDataSize(), + rowUDAF->fAuxColumnIndex); + + rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. +} //------------------------------------------------------------------------------ // Initilalize the data members to meaningful values, setup the hashmap. @@ -780,7 +814,7 @@ void RowAggregation::initialize() //------------------------------------------------------------------------------ // Reset the working data to aggregate next logical block //------------------------------------------------------------------------------ -void RowAggregation::reset() +void RowAggregation::aggReset() { fTotalRowCount = 0; fMaxTotalRowCount = AGG_ROWGROUP_SIZE; @@ -798,15 +832,23 @@ void RowAggregation::reset() delete fAggMapPtr; fAggMapPtr = new RowAggMap_t(10, *fHasher, *fEq, *fAlloc); } - fResultDataVec.clear(); fResultDataVec.push_back(fRowGroupOut->getRGData()); + + // For UDAF, reset the data + for (uint64_t i = 0; i < fFunctionCols.size(); i++) + { + if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF) + { + resetUDAF(i); + } + } } -void RowAggregationUM::reset() +void RowAggregationUM::aggReset() { - RowAggregation::reset(); + RowAggregation::aggReset(); if (fKeyOnHeap) { @@ -843,6 +885,15 @@ void RowAggregationUM::aggregateRowWithRemap(Row& row) attachGroupConcatAg(); inserted.first->second = RowPosition(fResultDataVec.size()-1, fRowGroupOut->getRowCount()-1); + // If there's UDAF involved, reset the user data. + for (uint64_t i = 0; i < fFunctionCols.size(); i++) + { + if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF) + { + resetUDAF(i); + } + } + // replace the key value with an equivalent copy, yes this is OK const_cast((inserted.first->first)) = pos; } @@ -893,6 +944,16 @@ void RowAggregation::aggregateRow(Row& row) // replace the key value with an equivalent copy, yes this is OK const_cast(*(inserted.first)) = RowPosition(fResultDataVec.size() - 1, fRowGroupOut->getRowCount() - 1); + + // If there's UDAF involved, reset the user data. + for (uint64_t i = 0; i < fFunctionCols.size(); i++) + { + if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF) + { + resetUDAF(i); + } + } + } else { //fRow.setData(*(inserted.first)); @@ -1065,6 +1126,8 @@ void RowAggregation::makeAggFieldsNull(Row& row) case execplan::CalpontSystemCatalog::CHAR: case execplan::CalpontSystemCatalog::VARCHAR: case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::BLOB: { int colWidth = fRowGroupOut->getColumnWidth(colOut); if (colWidth <= 8) @@ -1386,7 +1449,7 @@ void RowAggregation::serialize(messageqcpp::ByteStream& bs) const bs << functionCount; for (uint64_t i = 0; i < functionCount; i++) - bs << *(fFunctionCols[i].get()); + fFunctionCols[i]->serialize(bs); } @@ -1415,9 +1478,18 @@ void RowAggregation::deserialize(messageqcpp::ByteStream& bs) for (uint64_t i = 0; i < functionCount; i++) { - SP_ROWAGG_FUNC_t funct( - new RowAggFunctionCol(ROWAGG_FUNCT_UNDEFINE, ROWAGG_FUNCT_UNDEFINE, 0, 0)); - bs >> *(funct.get()); + uint8_t funcType; + bs.peek(funcType); + SP_ROWAGG_FUNC_t funct; + if (funcType == ROWAGG_UDAF) + { + funct.reset(new RowUDAFFunctionCol(0, 0)); + } + else + { + funct.reset(new RowAggFunctionCol(ROWAGG_FUNCT_UNDEFINE, ROWAGG_FUNCT_UNDEFINE, 0, 0)); + } + funct->deserialize(bs); fFunctionCols.push_back(funct); } } @@ -1477,6 +1549,20 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_GROUP_CONCAT: break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + if (rowUDAF) + { + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + } + else + { + throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + break; + } + default: { std::ostringstream errmsg; @@ -1729,6 +1815,113 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux+1) + valIn*valIn, colAux+1); } +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, + RowUDAFFunctionCol* rowUDAF) +{ + std::vector valsIn; + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; + std::vector dataFlags; + + // Get the context for this rowGroup. Make a copy so we're thread safe. + mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + + // Turn on NULL flags + std::vector flags; + uint32_t flag = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) + { + if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + flag |= mcsv1sdk::PARAM_IS_NULL; + } + flags.push_back(flag); + rgContext.setDataFlags(&flags); + + mcsv1sdk::ColumnDatum datum; + + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << rgContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + valsIn.push_back(datum); + + // The intermediate values are stored in userData referenced by colAux. + rgContext.setUserData(fRow.getUserData(colAux)); + + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); + rgContext.setUserData(NULL); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + rowUDAF->bInterrupted = true; + throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + } +} //------------------------------------------------------------------------------ // Allocate a new data array for the output RowGroup @@ -1781,7 +1974,6 @@ void RowAggregation::loadEmptySet(messageqcpp::ByteStream& bs) fEmptyRowGroup.serializeRGData(bs); } - //------------------------------------------------------------------------------ // Row Aggregation constructor used on UM // For one-phase case, from projected RG to final aggregated RG @@ -1790,10 +1982,11 @@ RowAggregationUM::RowAggregationUM(const vector& rowAggGroupB const vector& rowAggFunctionCols, joblist::ResourceManager *r, boost::shared_ptr sessionLimit) : RowAggregation(rowAggGroupByCols, rowAggFunctionCols), fHasAvg(false), fKeyOnHeap(false), - fHasStatsFunc(false), fTotalMemUsage(0), fRm(r), fSessionMemLimit(sessionLimit), - fLastMemUsage(0), fNextRGIndex(0) + fHasStatsFunc(false), fHasUDAF(false),fTotalMemUsage(0), fRm(r), + fSessionMemLimit(sessionLimit), fLastMemUsage(0), fNextRGIndex(0) { - // Check if there are any avg functions. + // Check if there are any avg, stats or UDAF functions. + // These flags are used in finalize. for (uint64_t i = 0; i < fFunctionCols.size(); i++) { if (fFunctionCols[i]->fAggFunction == ROWAGG_AVG || @@ -1801,6 +1994,8 @@ RowAggregationUM::RowAggregationUM(const vector& rowAggGroupB fHasAvg = true; else if (fFunctionCols[i]->fAggFunction == ROWAGG_STATS) fHasStatsFunc = true; + else if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF) + fHasUDAF = true; } // Check if all groupby column selected @@ -1904,6 +2099,11 @@ void RowAggregationUM::finalize() calculateStatisticsFunctions(); } + if (fHasUDAF) + { + calculateUDAFColumns(); + } + if (fGroupConcat.size() > 0) setGroupConcatString(); @@ -1950,6 +2150,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) { int64_t colIn = fFunctionCols[i]->fInputColumnIndex; int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; switch (fFunctionCols[i]->fAggFunction) { @@ -1971,14 +2172,12 @@ void RowAggregationUM::updateEntry(const Row& rowIn) // The sum and count on UM may not be put next to each other: // use colOut to store the sum; // use colAux to store the count. - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; doAvg(rowIn, colIn, colOut, colAux); break; } case ROWAGG_STATS: { - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; doStatistics(rowIn, colIn, colOut, colAux); break; } @@ -2004,6 +2203,20 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_CONSTANT: break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + if (rowUDAF) + { + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + } + else + { + throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + break; + } + default: { // need a exception to show the value @@ -2143,6 +2356,251 @@ void RowAggregationUM::calculateAvgColumns() } } +// Sets the value from valOut into column colOut, performing any conversions. +void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) +{ + static const static_any::any& charTypeId = (char)1; + static const static_any::any& scharTypeId = (signed char)1; + static const static_any::any& shortTypeId = (short)1; + static const static_any::any& intTypeId = (int)1; + static const static_any::any& longTypeId = (long)1; + static const static_any::any& llTypeId = (long long)1; + static const static_any::any& ucharTypeId = (unsigned char)1; + static const static_any::any& ushortTypeId = (unsigned short)1; + static const static_any::any& uintTypeId = (unsigned int)1; + static const static_any::any& ulongTypeId = (unsigned long)1; + static const static_any::any& ullTypeId = (unsigned long long)1; + static const static_any::any& floatTypeId = (float)1; + static const static_any::any& doubleTypeId = (double)1; + static const std::string typeStr(""); + static const static_any::any& strTypeId = typeStr; + + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + if (valOut.empty()) + { + // Fields are initialized to NULL, which is what we want for empty; + return; + } + + // This may seem a bit convoluted. Users shouldn't return a type + // that they didn't set in mcsv1_UDAF::init(), but this + // handles whatever return type is given and casts + // it to whatever they said to return. + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + if (valOut.compatible(charTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(scharTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(shortTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(intTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(longTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(llTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(ucharTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(ushortTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(uintTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(ulongTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(ullTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + doubleOut = floatOut; + intOut = uintOut = floatOut; + oss << floatOut; + } + else if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + floatOut = (float)doubleOut; + uintOut = (uint64_t)doubleOut; + intOut = (int64_t)doubleOut; + oss << doubleOut; + } + + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + // Convert the string to numeric type, just in case. + intOut = atol(strOut.c_str()); + uintOut = strtoul(strOut.c_str(), NULL, 10); + doubleOut = strtod(strOut.c_str(), NULL); + floatOut = (float)doubleOut; + } + else + { + strOut = oss.str(); + } + + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + fRow.setIntField<1>(intOut, colOut); + break; + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + fRow.setIntField<2>(intOut, colOut); + break; + case execplan::CalpontSystemCatalog::INT: + fRow.setIntField<4>(intOut, colOut); + break; + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + fRow.setIntField<8>(intOut, colOut); + break; + case execplan::CalpontSystemCatalog::UTINYINT: + fRow.setUintField<1>(uintOut, colOut); + break; + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + fRow.setUintField<2>(uintOut, colOut); + break; + case execplan::CalpontSystemCatalog::UINT: + fRow.setUintField<4>(uintOut, colOut); + break; + case execplan::CalpontSystemCatalog::UBIGINT: + fRow.setUintField<8>(uintOut, colOut); + break; + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + + fRow.setUintField<8>(uintOut, colOut); + break; + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + fRow.setFloatField(floatOut, colOut); + break; + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + fRow.setDoubleField(doubleOut, colOut); + break; + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + fRow.setStringField(strOut, colOut); + break; + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + fRow.setVarBinaryField(strOut, colOut); + break; + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } +} + +//------------------------------------------------------------------------------ +// +// For each rowgroup, calculate the final value. +//------------------------------------------------------------------------------ +void RowAggregationUM::calculateUDAFColumns() +{ + RowUDAFFunctionCol* rowUDAF = NULL; + static_any::any valOut; + + + for (uint64_t i = 0; i < fFunctionCols.size(); i++) + { + if (fFunctionCols[i]->fAggFunction != ROWAGG_UDAF) + continue; + + rowUDAF = dynamic_cast(fFunctionCols[i].get()); + mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + + int64_t colOut = rowUDAF->fOutputColumnIndex; + int64_t colAux = rowUDAF->fAuxColumnIndex; + + // At this point, each row is an aggregated GROUP BY. + for (uint64_t j = 0; j < fRowGroupOut->getRowCount(); j++) + { + // Get the user data from the row and evaluate. + fRowGroupOut->getRow(j, &fRow); + + // Turn the NULL flag off. We can't know NULL at this point + rgContext.setDataFlags(NULL); + + // The intermediate values are stored in colAux. + rgContext.setUserData(fRow.getUserData(colAux)); + // Call the UDAF evaluate function + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + rc = rgContext.getFunction()->evaluate(&rgContext, valOut); + rgContext.setUserData(NULL); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + rowUDAF->bInterrupted = true; + throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + } + + // Set the returned value into the output row + SetUDAFValue(valOut, colOut); + } + rgContext.setUserData(NULL); + } +} //------------------------------------------------------------------------------ // After all PM rowgroups received, calculate the statistics. @@ -2222,7 +2680,6 @@ void RowAggregationUM::calculateStatisticsFunctions() } } - //------------------------------------------------------------------------------ // Fix the duplicate function columns -- same function same column id repeated //------------------------------------------------------------------------------ @@ -2248,7 +2705,6 @@ void RowAggregationUM::fixDuplicates(RowAggFunctionType funct) } } - //------------------------------------------------------------------------------ // Evaluate the functions and expressions //------------------------------------------------------------------------------ @@ -2262,7 +2718,6 @@ void RowAggregationUM::evaluateExpression() } } - //------------------------------------------------------------------------------ // Calculate the aggregate(constant) columns //------------------------------------------------------------------------------ @@ -2395,6 +2850,58 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u } break; + case ROWAGG_UDAF: + { + int64_t rowCnt = 0; + // For a NULL constant, call nextValue with NULL and then evaluate. + bool bInterrupted = false; + mcsv1sdk::mcsv1Context context; + context.setRowCnt(rowCnt); + context.setInterrupted(bInterrupted); + context.createUserData(); + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + std::vector valsIn; + + // Call a reset, then nextValue, then execute. This will evaluate + // the UDAF for the constant. + rc = context.getFunction()->reset(&context); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + context.setInterrupted(true); + throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + } + + // Turn the NULL and CONSTANT flags on. + std::vector flags; + uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + flags.push_back(flag); + context.setDataFlags(&flags); + + // Create a dummy datum + mcsv1sdk::ColumnDatum datum; + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = 0; + valsIn.push_back(datum); + + rc = context.getFunction()->nextValue(&context, valsIn); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + context.setInterrupted(true); + throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + } + static_any::any valOut; + rc = context.getFunction()->evaluate(&context, valOut); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + context.setInterrupted(true); + throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + } + // Set the returned value into the output row + SetUDAFValue(valOut, colOut); + context.setDataFlags(NULL); + } + break; + default: { fRow.setStringField("", colOut); @@ -2674,6 +3181,133 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData } break; + case ROWAGG_UDAF: + { + int64_t rowCnt = 0; + bool bInterrupted = false; + mcsv1sdk::mcsv1Context context; + context.setRowCnt(rowCnt); + context.setInterrupted(bInterrupted); + // Try the complex data initiation. If not implemented, use the simple, + context.createUserData(); + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + std::vector valsIn; + + // Call a reset, then nextValue, then execute. This will evaluate + // the UDAF for the constant. + rc = context.getFunction()->reset(&context); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + context.setInterrupted(true); + throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + } + + // Turn the CONSTANT flags on. + std::vector flags; + uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; + flags.push_back(flag); + context.setDataFlags(&flags); + + // Create a datum item for sending to UDAF + mcsv1sdk::ColumnDatum datum; + datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; + + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + { + datum.columnData = strtol(aggData.fConstValue.c_str(), 0, 10); + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.columnData = strtoul(aggData.fConstValue.c_str(), 0, 10); + } + break; + + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + double dbl = strtod(aggData.fConstValue.c_str(), 0); + double scale = pow(10.0, (double) fRowGroupOut->getScale()[i]); + datum.columnData = (int64_t)(scale*dbl); + datum.scale = scale; + datum.precision = fRowGroupOut->getPrecision()[i]; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.columnData = strtod(aggData.fConstValue.c_str(), 0); + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { +#ifdef _MSC_VER + datum.columnData = strtod(aggData.fConstValue.c_str(), 0); +#else + datum.columnData = strtof(aggData.fConstValue.c_str(), 0); +#endif + } + break; + + case execplan::CalpontSystemCatalog::DATE: + { + datum.columnData = DataConvert::stringToDate(aggData.fConstValue); + } + break; + + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.columnData = DataConvert::stringToDatetime(aggData.fConstValue); + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::BLOB: + default: + { + datum.columnData = aggData.fConstValue; + } + break; + } + + valsIn.push_back(datum); + rc = context.getFunction()->nextValue(&context, valsIn); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + context.setInterrupted(true); + throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + } + + static_any::any valOut; + rc = context.getFunction()->evaluate(&context, valOut); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + context.setInterrupted(true); + throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + } + // Set the returned value into the output row + SetUDAFValue(valOut, colOut); + context.setDataFlags(NULL); + } + break; + default: { fRow.setStringField(aggData.fConstValue, colOut); @@ -2823,6 +3457,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) { int64_t colIn = fFunctionCols[i]->fInputColumnIndex; int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; switch (fFunctionCols[i]->fAggFunction) { @@ -2845,14 +3480,12 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) // The sum and count on UM may not be put next to each other: // use colOut to store the sum; // use colAux to store the count. - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; doAvg(rowIn, colIn, colOut, colAux); break; } case ROWAGG_STATS: { - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; doStatistics(rowIn, colIn, colOut, colAux); break; } @@ -2878,6 +3511,20 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_CONSTANT: break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + if (rowUDAF) + { + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + } + else + { + throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + break; + } + default: { std::ostringstream errmsg; @@ -3050,6 +3697,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut fRow.setUintField(valIn ^ valOut, colOut); } +//------------------------------------------------------------------------------ +// Subaggregate the UDAF. This calls subaggregate for each partially +// aggregated row returned by the PM +// rowIn(in) - Row to be included in aggregation. +// colIn(in) - column in the input row group +// colOut(in) - column in the output row group +// colAux(in) - Where the UDAF userdata resides +// rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance +//------------------------------------------------------------------------------ +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, + RowUDAFFunctionCol* rowUDAF) +{ + static_any::any valOut; + mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + + // Turn on NULL flags + std::vector flags; + uint32_t flag = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) + flag |= mcsv1sdk::PARAM_IS_NULL; + flags.push_back(flag); + rgContext.setDataFlags(&flags); + + // The intermediate values are stored in colAux. + rgContext.setUserData(fRow.getUserData(colAux)); + + // Call the UDAF subEvaluate method + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + rc = rgContext.getFunction()->subEvaluate(&rgContext, rowIn.getUserData(colIn+1).get()); + rgContext.setUserData(NULL); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + rowUDAF->bInterrupted = true; + throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + } +} + //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ @@ -3163,6 +3847,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) { int64_t colIn = fFunctionCols[i]->fInputColumnIndex; int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; switch (fFunctionCols[i]->fAggFunction) { @@ -3192,7 +3877,6 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) // The sum and count on UM may not be put next to each other: // use colOut to store the sum; // use colAux to store the count. - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; doAvg(rowIn, colIn, colOut, colAux); break; } @@ -3202,14 +3886,12 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) // The sum and count on UM may not be put next to each other: // use colOut to store the sum; // use colAux to store the count. - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; RowAggregation::doAvg(rowIn, colIn, colOut, colAux); break; } case ROWAGG_STATS: { - int64_t colAux = fFunctionCols[i]->fAuxColumnIndex; doStatistics(rowIn, colIn, colOut, colAux); break; } @@ -3235,6 +3917,20 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_CONSTANT: break; + case ROWAGG_UDAF: + { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + if (rowUDAF) + { + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + } + else + { + throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); + } + break; + } + default: { std::ostringstream errmsg; diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h old mode 100644 new mode 100755 index f441ef7cc..a3e938d0b --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -49,6 +49,7 @@ #include "hasher.h" #include "stlpoolallocator.h" #include "returnedcolumn.h" +#include "mcsv1_udaf.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -64,6 +65,7 @@ struct RowPosition { uint64_t group:48; uint64_t row:16; + static const uint64_t MSB = 0x800000000000ULL; //48th bit is set inline RowPosition(uint64_t g, uint64_t r) : group(g), row(r) { } inline RowPosition() { } @@ -105,6 +107,9 @@ enum RowAggFunctionType // Constant ROWAGG_CONSTANT, + // User Defined Aggregate Function + ROWAGG_UDAF, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -169,7 +174,10 @@ struct RowAggFunctionCol int32_t inputColIndex, int32_t outputColIndex, int32_t auxColIndex = -1) : fAggFunction(aggFunction), fStatsFunction(stats), fInputColumnIndex(inputColIndex), fOutputColumnIndex(outputColIndex), fAuxColumnIndex(auxColIndex) {} - ~RowAggFunctionCol() {} + virtual ~RowAggFunctionCol() {} + + virtual void serialize(messageqcpp::ByteStream& bs) const; + virtual void deserialize(messageqcpp::ByteStream& bs); RowAggFunctionType fAggFunction; // aggregate function // statistics function stores ROWAGG_STATS in fAggFunction and real function in fStatsFunction @@ -178,24 +186,86 @@ struct RowAggFunctionCol uint32_t fInputColumnIndex; uint32_t fOutputColumnIndex; - // fAuxColumnIndex is used in 3 cases: + // fAuxColumnIndex is used in 4 cases: // 1. for AVG - point to the count column, the fInputColumnIndex is for sum // 2. for statistics function - point to sum(x), +1 is sum(x**2) - // 3. for duplicate - point to the real aggretate column to be copied from + // 3. for UDAF - contain the context user data as binary + // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; }; -inline messageqcpp::ByteStream& operator<<(messageqcpp::ByteStream& b, RowAggFunctionCol& o) -{ return (b << (uint8_t)o.fAggFunction << o.fInputColumnIndex << o.fOutputColumnIndex); } -inline messageqcpp::ByteStream& operator>>(messageqcpp::ByteStream& b, RowAggFunctionCol& o) -{ return (b >> (uint8_t&)o.fAggFunction >> o.fInputColumnIndex >> o.fOutputColumnIndex); } +struct RowUDAFFunctionCol : public RowAggFunctionCol +{ + RowUDAFFunctionCol(mcsv1sdk::mcsv1Context& context, int32_t inputColIndex, + int32_t outputColIndex, int32_t auxColIndex = -1) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, + inputColIndex, outputColIndex, auxColIndex), + fUDAFContext(context), bInterrupted(false) + { + fUDAFContext.setInterrupted(&bInterrupted); + } + RowUDAFFunctionCol(int32_t inputColIndex, + int32_t outputColIndex, int32_t auxColIndex = -1) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, + inputColIndex, outputColIndex, auxColIndex), + bInterrupted(false) + {} + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, + rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + {} + + virtual ~RowUDAFFunctionCol() {} + + virtual void serialize(messageqcpp::ByteStream& bs) const; + virtual void deserialize(messageqcpp::ByteStream& bs); + + mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context + bool bInterrupted; // Shared by all the threads +}; + +inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const +{ + bs << (uint8_t)fAggFunction; + bs << fInputColumnIndex; + bs << fOutputColumnIndex; +} + +inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) +{ + bs >> (uint8_t&)fAggFunction; + bs >> fInputColumnIndex; + bs >> fOutputColumnIndex; +} + +inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const +{ + RowAggFunctionCol::serialize(bs); + fUDAFContext.serialize(bs); +} + +inline void RowUDAFFunctionCol::deserialize(messageqcpp::ByteStream& bs) +{ + // This deserialize is called when the function gets to PrimProc. + // reset is called because we're starting a new sub-evaluate cycle. + RowAggFunctionCol::deserialize(bs); + fUDAFContext.unserialize(bs); + fUDAFContext.setInterrupted(&bInterrupted); + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + rc = fUDAFContext.getFunction()->reset(&fUDAFContext); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + bInterrupted = true; + throw logging::QueryDataExcept(fUDAFContext.getErrorMessage(), logging::aggregateFuncErr); + } +} struct ConstantAggData { std::string fConstValue; + std::string fUDAFName; // If a UDAF is called with constant. RowAggFunctionType fOp; bool fIsNull; @@ -205,6 +275,10 @@ struct ConstantAggData ConstantAggData(const std::string& v, RowAggFunctionType f, bool n) : fConstValue(v), fOp(f), fIsNull(n) {} + + ConstantAggData(const std::string& v, const std::string u, RowAggFunctionType f, bool n) : + fConstValue(v), fUDAFName(u), fOp(f), fIsNull(n) + {} }; typedef boost::shared_ptr SP_ROWAGG_GRPBY_t; @@ -377,7 +451,7 @@ class RowAggregation : public messageqcpp::Serializeable /** @brief reset RowAggregation outputRowGroup and hashMap */ - virtual void reset(); + virtual void aggReset(); /** @brief Define content of data to be aggregated and its aggregated output. * @@ -470,12 +544,15 @@ class RowAggregation : public messageqcpp::Serializeable virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); return true; } virtual bool newRowGroup(); virtual void clearAggMap() { if (fAggMapPtr) fAggMapPtr->clear(); } + void resetUDAF(uint64_t funcColID); + inline bool isNull(const RowGroup* pRowGroup, const Row& row, int64_t col); inline void makeAggFieldsNull(Row& row); inline void copyNullRow(Row& row) { copyRow(fNullRow, &row); } @@ -537,7 +614,6 @@ class RowAggregation : public messageqcpp::Serializeable friend class AggComparator; }; - //------------------------------------------------------------------------------ /** @brief derived Class that aggregates multi-rowgroups on UM * One-phase case: aggregate from projected RG to final aggregated RG. @@ -602,7 +678,7 @@ class RowAggregationUM : public RowAggregation void aggregateRow(Row &); //void initialize(); - virtual void reset(); + virtual void aggReset(); void setInputOutput(const RowGroup& pRowGroupIn, RowGroup* pRowGroupOut); @@ -628,6 +704,12 @@ class RowAggregationUM : public RowAggregation // calculate the statistics function all rows received. UM only function. void calculateStatisticsFunctions(); + // Sets the value from valOut into column colOut, performing any conversions. + void SetUDAFValue(static_any::any& valOut, int64_t colOut); + + // calculate the UDAF function all rows received. UM only function. + void calculateUDAFColumns(); + // fix duplicates. UM only function. void fixDuplicates(RowAggFunctionType funct); @@ -646,6 +728,7 @@ class RowAggregationUM : public RowAggregation bool fHasAvg; bool fKeyOnHeap; bool fHasStatsFunc; + bool fHasUDAF; boost::shared_ptr fDistinctAggregator; @@ -715,6 +798,7 @@ class RowAggregationUMP2 : public RowAggregationUM void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); bool countSpecial(const RowGroup* pRG) { return false; } }; diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp old mode 100644 new mode 100755 index 3561ef39a..cad0f34fb --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -38,7 +38,6 @@ using namespace std; #include -#include using namespace boost; #include "bytestream.h" @@ -113,7 +112,6 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) void StringStore::serialize(ByteStream &bs) const { uint32_t i; - std::string empty_str; bs << (uint32_t) mem.size(); bs << (uint8_t) empty; @@ -126,30 +124,25 @@ void StringStore::serialize(ByteStream &bs) const } } -uint32_t StringStore::deserialize(ByteStream &bs) +void StringStore::deserialize(ByteStream &bs) { uint32_t i; uint32_t count; - uint32_t size; std::string buf; uint8_t tmp8; - uint32_t ret = 0; //mem.clear(); bs >> count; mem.reserve(count); bs >> tmp8; empty = (bool) tmp8; - ret += 5; for (i = 0; i < count; i++) { //cout << "deserializing " << size << " bytes\n"; bs >> buf; shared_ptr newString(new std::string(buf.c_str())); mem.push_back(newString); - //bs.advance(size); - ret += (size + 4); } - return ret; + return; } void StringStore::clear() @@ -159,6 +152,106 @@ void StringStore::clear() empty = true; } +UserDataStore::UserDataStore() : fUseUserDataMutex(false) +{ +} + +UserDataStore::~UserDataStore() +{ +} + +uint32_t UserDataStore::storeUserData(mcsv1sdk::mcsv1Context& context, + boost::shared_ptr data, + uint32_t len) +{ + uint32_t ret = 0; + if (len == 0 || data == NULL) + { + return numeric_limits::max(); + } + + boost::mutex::scoped_lock lk(fMutex, defer_lock); + if (fUseUserDataMutex) + lk.lock(); + StoreData storeData; + storeData.length = len; + storeData.functionName = context.getName(); + storeData.userData = data; + vStoreData.push_back(storeData); + + ret = vStoreData.size(); + + return ret; +} + +boost::shared_ptr UserDataStore::getUserData(uint32_t off) const +{ + if (off == std::numeric_limits::max()) + return boost::shared_ptr(); + + if ((vStoreData.size() < off) || off == 0) + return boost::shared_ptr(); + + return vStoreData[off-1].userData; +} + + +void UserDataStore::serialize(ByteStream &bs) const +{ + size_t i; + + bs << (uint32_t) vStoreData.size(); + for (i = 0; i < vStoreData.size(); ++i) + { + const StoreData& storeData = vStoreData[i]; + bs << storeData.length; + bs << storeData.functionName; + storeData.userData->serialize(bs); + } +} + +void UserDataStore::deserialize(ByteStream &bs) +{ + size_t i; + uint32_t cnt; + bs >> cnt; + +// vStoreData.clear(); + vStoreData.resize(cnt); + + for (i = 0; i < cnt; i++) + { + bs >> vStoreData[i].length; + bs >> vStoreData[i].functionName; + + // We don't have easy access to the context here, so we do our own lookup + if (vStoreData[i].functionName.length() == 0) + { + throw std::logic_error("UserDataStore::deserialize: has empty name"); + } + mcsv1sdk::UDAF_MAP::iterator funcIter = mcsv1sdk::UDAFMap::getMap().find(vStoreData[i].functionName); + if (funcIter == mcsv1sdk::UDAFMap::getMap().end()) + { + std::ostringstream errmsg; + errmsg << "UserDataStore::deserialize: " << vStoreData[i].functionName << " is undefined"; + throw std::logic_error(errmsg.str()); + } + + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + mcsv1sdk::UserData* userData = NULL; + rc = funcIter->second->createUserData(userData, vStoreData[i].length); + if (rc != mcsv1sdk::mcsv1_UDAF::SUCCESS) + { + std::ostringstream errmsg; + errmsg << "UserDataStore::deserialize: " << vStoreData[i].functionName << " createUserData failed(" << rc << ")"; + throw std::logic_error(errmsg.str()); + } + userData->unserialize(bs); + vStoreData[i].userData = boost::shared_ptr(userData); + } + return; +} + //uint32_t rgDataCount = 0; RGData::RGData() @@ -222,7 +315,7 @@ void RGData::reinit(const RowGroup &rg) reinit(rg, 8192); } -RGData::RGData(const RGData &r) : rowData(r.rowData), strings(r.strings) +RGData::RGData(const RGData &r) : rowData(r.rowData), strings(r.strings), userDataStore(r.userDataStore) { //cout << "rgdata++ = " << __sync_add_and_fetch(&rgDataCount, 1) << endl; } @@ -244,49 +337,47 @@ void RGData::serialize(ByteStream &bs, uint32_t amount) const } else bs << (uint8_t) 0; + if (userDataStore) + { + bs << (uint8_t) 1; + userDataStore->serialize(bs); + } + else + bs << (uint8_t) 0; } -uint32_t RGData::deserialize(ByteStream &bs, bool hasLenField) +void RGData::deserialize(ByteStream &bs, bool hasLenField) { uint32_t amount, sig; uint8_t *buf; uint8_t tmp8; - uint32_t ret = 0; bs.peek(sig); if (sig == RGDATA_SIG) { bs >> sig; bs >> amount; - ret += 8; rowData.reset(new uint8_t[amount]); buf = bs.buf(); memcpy(rowData.get(), buf, amount); bs.advance(amount); bs >> tmp8; - ret += amount + 1; if (tmp8) { strings.reset(new StringStore()); - ret += strings->deserialize(bs); + strings->deserialize(bs); } else strings.reset(); - } - // crude backward compat. Remove after conversions are finished. - else { - if (hasLenField) { - bs >> amount; - ret += 4; + // UDAF user data + bs >> tmp8; + if (tmp8) { + userDataStore.reset(new UserDataStore()); + userDataStore->deserialize(bs); } else - amount = bs.length(); - rowData.reset(new uint8_t[amount]); - strings.reset(); - buf = bs.buf(); - memcpy(rowData.get(), buf, amount); - bs.advance(amount); - ret += amount; + userDataStore.reset(); } - return ret; + + return; } void RGData::clear() @@ -295,14 +386,25 @@ void RGData::clear() strings.reset(); } -Row::Row() : data(NULL), strings(NULL) { } +// UserDataStore is only used for UDAF. +// Just in time construction because most of the time we don't need one. +UserDataStore* RGData::getUserDataStore() +{ + if (!userDataStore) + { + userDataStore.reset(new UserDataStore); + } + return userDataStore.get(); +} + +Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { } Row::Row(const Row &r) : columnCount(r.columnCount), baseRid(r.baseRid), oldOffsets(r.oldOffsets), stOffsets(r.stOffsets), offsets(r.offsets), colWidths(r.colWidths), types(r.types), data(r.data), scale(r.scale), precision(r.precision), strings(r.strings), useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField), - sTableThreshold(r.sTableThreshold), forceInline(r.forceInline) + sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL) { } Row::~Row() { } @@ -623,9 +725,10 @@ bool Row::isNullValue(uint32_t colIndex) const break; default: { ostringstream os; - os << "Row::isNullValue(): got bad column type (" << types[colIndex] << - "). Width=" << getColumnWidth(colIndex) << endl; - os << toString() << endl; + os << "Row::isNullValue(): got bad column type ("; + os << types[colIndex]; + os << "). Width="; + os << getColumnWidth(colIndex) << endl; throw logic_error(os.str()); } } @@ -884,7 +987,9 @@ RowGroup & RowGroup::operator=(const RowGroup &r) return *this; } -RowGroup::~RowGroup() { } +RowGroup::~RowGroup() +{ +} void RowGroup::resetRowGroup(uint64_t rid) { diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h old mode 100644 new mode 100755 index 1347e5030..4eb1176dd --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -38,6 +38,7 @@ #include //#define NDEBUG #include +#include #include #include #include @@ -56,6 +57,7 @@ #include "bytestream.h" #include "calpontsystemcatalog.h" #include "exceptclasses.h" +#include "mcsv1_udaf.h" #include "branchpred.h" @@ -106,25 +108,74 @@ public: void clear(); void serialize(messageqcpp::ByteStream &) const; - uint32_t deserialize(messageqcpp::ByteStream &); + void deserialize(messageqcpp::ByteStream &); //@bug6065, make StringStore::storeString() thread safe void useStoreStringMutex(bool b) { fUseStoreStringMutex = b; } bool useStoreStringMutex() const { return fUseStoreStringMutex; } private: + std::string empty_str; + StringStore(const StringStore &); StringStore & operator=(const StringStore &); static const uint32_t CHUNK_SIZE = 64*1024; // allocators like powers of 2 // This is an overlay b/c the underlying data needs to be any size, - // and alloc'd in one chunk. data can't be a sepatate dynamic chunk. + // and alloc'd in one chunk. data can't be a separate dynamic chunk. std::vector > mem; bool empty; bool fUseStoreStringMutex; //@bug6065, make StringStore::storeString() thread safe boost::mutex fMutex; +}; +// Where we store user data for UDA(n)F +class UserDataStore +{ + // length represents the fixed portion length of userData. + // There may be variable length data in containers or other + // user created structures. + struct StoreData + { + int32_t length; + std::string functionName; + boost::shared_ptr userData; + StoreData() : length(0) { } + StoreData(const StoreData& rhs) + { + length = rhs.length; + functionName = rhs.functionName; + userData = rhs.userData; + } + }; + +public: + UserDataStore(); + virtual ~UserDataStore(); + + void serialize(messageqcpp::ByteStream &) const; + void deserialize(messageqcpp::ByteStream &); + + //Set to make UserDataStore thread safe + void useUserDataMutex(bool b) { fUseUserDataMutex = b; } + bool useUserDataMutex() const { return fUseUserDataMutex; } + + // Returns the offset + uint32_t storeUserData(mcsv1sdk::mcsv1Context& context, + boost::shared_ptr data, + uint32_t length); + + boost::shared_ptr getUserData(uint32_t offset) const; + +private: + UserDataStore(const UserDataStore &); + UserDataStore & operator=(const UserDataStore &); + + std::vector vStoreData; + + bool fUseUserDataMutex; + boost::mutex fMutex; }; #ifdef _MSC_VER @@ -152,7 +203,7 @@ public: // the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends // inline data with a length field. Once that's converted to string table format, that // option can go away. - uint32_t deserialize(messageqcpp::ByteStream &, bool hasLengthField=false); // returns the # of bytes read + void deserialize(messageqcpp::ByteStream &, bool hasLengthField=false); // returns the # of bytes read inline uint64_t getStringTableMemUsage(); void clear(); @@ -169,9 +220,14 @@ public: void useStoreStringMutex(bool b) { if (strings) strings->useStoreStringMutex(b); } bool useStoreStringMutex() const { return (strings ? (strings->useStoreStringMutex()) : false); } + UserDataStore* getUserDataStore(); + // make UserDataStore::storeData() thread safe + void useUserDataMutex(bool b) { if (userDataStore) userDataStore->useUserDataMutex(b); } + bool useUserDataMutex() const { return (userDataStore ? (userDataStore->useUserDataMutex()) : false); } + boost::shared_array rowData; boost::shared_ptr strings; - + boost::shared_ptr userDataStore; private: //boost::shared_array rowData; //boost::shared_ptr strings; @@ -187,14 +243,17 @@ class Row { public: struct Pointer { - inline Pointer() : data(NULL), strings(NULL) { } + inline Pointer() : data(NULL), strings(NULL), userDataStore(NULL) { } // Pointer(uint8_t*) implicitly makes old code compatible with the string table impl; // make it explicit to identify things that still might need to be changed - inline Pointer(uint8_t *d) : data(d), strings(NULL) { } - inline Pointer(uint8_t *d, StringStore *s) : data(d), strings(s) { } + inline Pointer(uint8_t *d) : data(d), strings(NULL), userDataStore(NULL) { } + inline Pointer(uint8_t *d, StringStore *s) : data(d), strings(s), userDataStore(NULL) { } + inline Pointer(uint8_t *d, StringStore *s, UserDataStore *u) : + data(d), strings(s), userDataStore(u) { } uint8_t *data; StringStore *strings; + UserDataStore *userDataStore; }; Row(); @@ -290,6 +349,11 @@ class Row inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const; inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex); + inline boost::shared_ptr getUserData(uint32_t colIndex) const; + inline void setUserData(mcsv1sdk::mcsv1Context& context, + boost::shared_ptr userData, + uint32_t len, uint32_t colIndex); + uint64_t getNullValue(uint32_t colIndex) const; bool isNullValue(uint32_t colIndex) const; @@ -332,6 +396,7 @@ class Row inline bool equals(const Row &, uint32_t lastCol) const; inline bool equals(const Row &) const; + inline void setUserDataStore(UserDataStore* u) {userDataStore = u;} private: uint32_t columnCount; uint64_t baseRid; @@ -353,10 +418,12 @@ class Row boost::shared_array forceInline; inline bool inStringTable(uint32_t col) const; + UserDataStore* userDataStore; // For UDAF + friend class RowGroup; }; -inline Row::Pointer Row::getPointer() const { return Pointer(data, strings); } +inline Row::Pointer Row::getPointer() const { return Pointer(data, strings, userDataStore); } inline uint8_t * Row::getData() const { return data; } inline void Row::setPointer(const Pointer &p) @@ -368,6 +435,7 @@ inline void Row::setPointer(const Pointer &p) useStringTable = hasStrings; offsets = (useStringTable ? stOffsets : oldOffsets); } + userDataStore = p.userDataStore; } inline void Row::setData(const Pointer &p) { setPointer(p); } @@ -613,6 +681,15 @@ inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) c } } +inline boost::shared_ptr Row::getUserData(uint32_t colIndex) const +{ + if (!userDataStore) + { + return boost::shared_ptr(); + } + return userDataStore->getUserData(*((uint32_t *) &data[offsets[colIndex]])); +} + inline double Row::getDoubleField(uint32_t colIndex) const { return *((double *) &data[offsets[colIndex]]); @@ -783,6 +860,19 @@ inline void Row::setVarBinaryField(const uint8_t *val, uint32_t len, uint32_t co } } +inline void Row::setUserData(mcsv1sdk::mcsv1Context& context, + boost::shared_ptr userData, + uint32_t len, uint32_t colIndex) +{ + if (!userDataStore) + { + return; + } + uint32_t offset = userDataStore->storeUserData(context, userData, len); + *((uint32_t *) &data[offsets[colIndex]]) = offset; + *((uint32_t *) &data[offsets[colIndex] + 4]) = len; +} + inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const { uint32_t n = offsets[destIndex + 1] - offsets[destIndex]; @@ -1149,6 +1239,7 @@ inline void RowGroup::getRow(uint32_t rowNum, Row *r) const r->baseRid = getBaseRid(); r->data = &(data[headerSize + (rowNum * offsets[columnCount])]); r->strings = strings; + r->userDataStore = rgData->userDataStore.get(); } inline void RowGroup::setData(uint8_t *d) @@ -1523,13 +1614,14 @@ inline RGData & RGData::operator=(const RGData &r) { rowData = r.rowData; strings = r.strings; + userDataStore = r.userDataStore; return *this; } inline void RGData::getRow(uint32_t num, Row *row) { uint32_t size = row->getSize(); - row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get())); + row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get(), userDataStore.get())); } } diff --git a/utils/rowgroup/rowgroup.vpj b/utils/rowgroup/rowgroup.vpj old mode 100644 new mode 100755 index 81a61fef6..88252f0dc --- a/utils/rowgroup/rowgroup.vpj +++ b/utils/rowgroup/rowgroup.vpj @@ -1,220 +1,220 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Version="10.0" + VendorName="SlickEdit" + TemplateName="GNU C/C++" + WorkingDir="."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt old mode 100644 new mode 100755 index 809140c7f..07693e223 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,13 +4,13 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) add_library(udfsdk SHARED ${udfsdk_LIB_SRCS}) -set_target_properties(udfsdk PROPERTIES VERSION 1.0.0 SOVERSION 1) +set_target_properties(udfsdk PROPERTIES VERSION 1.1.0 SOVERSION 1) install(TARGETS udfsdk DESTINATION ${ENGINE_LIBDIR} COMPONENT libs) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp new file mode 100755 index 000000000..0e34c8c77 --- /dev/null +++ b/utils/udfsdk/allnull.cpp @@ -0,0 +1,96 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include "allnull.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +struct allnull_data +{ + uint64_t totalQuantity; + uint64_t totalNulls; +}; + +#define OUT_TYPE int64_t +mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, + COL_TYPES& colTypes) +{ + context->setUserDataSize(sizeof(allnull_data)); + if (colTypes.size() < 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("allnull() with 0 arguments"); + return mcsv1_UDAF::ERROR; + } + context->setResultType(CalpontSystemCatalog::TINYINT); + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode allnull::finish(mcsv1Context* context) +{ + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) +{ + struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; + data->totalQuantity = 0; + data->totalNulls = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, + std::vector& valsIn) +{ + struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; + + for (size_t i = 0; i < context->getParameterCount(); i++) + { + data->totalQuantity++; + if (context->isParamNull(0)) + { + data->totalNulls++; + } + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode allnull::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + struct allnull_data* outData = (struct allnull_data*)context->getUserData()->data; + struct allnull_data* inData = (struct allnull_data*)userDataIn->data; + outData->totalQuantity += inData->totalQuantity; + outData->totalNulls += inData->totalNulls; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode allnull::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + OUT_TYPE allNull; + struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; + allNull = data->totalQuantity > 0 && data->totalNulls == data->totalQuantity; + valOut = allNull; + return mcsv1_UDAF::SUCCESS; +} + + diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h new file mode 100755 index 000000000..3d720b17d --- /dev/null +++ b/utils/udfsdk/allnull.h @@ -0,0 +1,225 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* mcsv1_UDAF.h +***********************************************************************/ + +/** + * Columnstore interface for writing a User Defined Aggregate + * Functions (UDAF) and User Defined Analytic Functions (UDAnF) + * or a function that can act as either - UDA(n)F + * + * The basic steps are: + * + * 1. Create a the UDA(n)F function interface in some .h file. + * 2. Create the UDF function implementation in some .cpp file + * 3. Create the connector stub (MariaDB UDAF definition) for + * this UDF function. + * 4. build the dynamic library using all of the source. + * 5 Put the library in $COLUMNSTORE_INSTALL/lib of + * all modules + * 6. restart the Columnstore system. + * 7. notify mysqld about the new functions with commands like: + * + * // An example of xor over a range for UDAF and UDAnF + * CREATE AGGREGATE FUNCTION mcs_bit_xor returns BOOL soname + * 'libudfsdk.so'; + * + * // An example that only makes sense as a UDAnF + * CREATE AGGREGATE FUNCTION mcs_interpolate returns REAL + * soname 'libudfsdk.so'; + * + * The UDAF functions may run distributed in the Columnstore + * engine. UDAnF do not run distributed. + * + * UDAF is User Defined Aggregate Function. + * UDAnF is User Defined Analytic Function. + * UDA(n)F is an acronym for a function that could be either. It + * is also used to describe the interface that is used for + * either. + */ +#ifndef HEADER_allnull +#define HEADER_allnull + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. +class allnull : public mcsv1_UDAF +{ +public: + // Defaults OK + allnull() : mcsv1_UDAF(){}; + virtual ~allnull(){}; + + /** + * init() + * + * Mandatory. Implement this to initialize flags and instance + * data. Called once per SQL statement. You can do any sanity + * checks here. + * + * colTypes (in) - A vector of ColDataType defining the + * parameters of the UDA(n)F call. These can be used to decide + * to override the default return type. If desired, the new + * return type can be set by context->setReturnType() and + * decimal precision can be set in context-> + * setResultDecimalCharacteristics. + * + * Return mcsv1_UDAF::ERROR on any error, such as non-compatible + * colTypes or wrong number of arguments. Else return + * mcsv1_UDAF::SUCCESS. + */ + virtual ReturnCode init(mcsv1Context* context, + COL_TYPES& colTypes); + + /** + * finish() + * + * Mandatory. Completes the UDA(n)F. Called once per SQL + * statement. Do not free any memory allocated by + * context->setUserDataSize(). The SDK Framework owns that memory + * and will handle that. Often, there is nothing to do here. + */ + virtual ReturnCode finish(mcsv1Context* context); + + /** + * reset() + * + * Mandatory. Reset the UDA(n)F for a new group, partition or, + * in some cases, new Window Frame. Do not free any memory + * allocated by context->setUserDataSize(). The SDK Framework owns + * that memory and will handle that. Use this opportunity to + * reset any variables in context->getUserData() needed for the + * next aggregation. May be called multiple times if running in + * a ditributed fashion. + * + * Use this opportunity to initialize the userData. + */ + virtual ReturnCode reset(mcsv1Context* context); + + /** + * nextValue() + * + * Mandatory. Handle a single row. + * + * colsIn - A vector of data structure describing the input + * data. + * + * This function is called once for every row in the filtered + * result set (before aggregation). It is very important that + * this function is efficient. + * + * If the UDAF is running in a distributed fashion, nextValue + * cannot depend on order, as it will only be called for each + * row found on the specific PM. + * + * valsIn (in) - a vector of the parameters from the row. + */ + virtual ReturnCode nextValue(mcsv1Context* context, + std::vector& valsIn); + + /** + * subEvaluate() + * + * Mandatory -- Called if the UDAF is running in a distributed + * fashion. Columnstore tries to run all aggregate functions + * distributed, depending on context. + * + * Perform an aggregation on rows partially aggregated by + * nextValue. Columnstore calls nextValue for each row on a + * given PM for a group (GROUP BY). subEvaluate is called on the + * UM to consolodate those values into a single instance of + * userData. Keep your aggregated totals in context's userData. + * The first time this is called for a group, reset() would have + * been called with this version of userData. + * + * Called for every partial data set in each group in GROUP BY. + * + * When subEvaluate has been called for all subAggregated data + * sets, Evaluate will be called with the same context as here. + * + * valIn (In) - This is a pointer to a memory block of the size + * set in setUserDataSize. It will contain the value of userData + * as seen in the last call to NextValue for a given PM. + * + */ + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); + + /** + * evaluate() + * + * Mandatory. Get the aggregated value. + * + * Called for every new group if UDAF GROUP BY, UDAnF partition + * or, in some cases, new Window Frame. + * + * Set the aggregated value into valOut. The datatype is assumed + * to be the same as that set in the init() function; + * + * If the UDAF is running in a distributed fashion, evaluate is + * called after a series of subEvaluate calls. + * + * valOut (out) - Set the aggregated value here. The datatype is + * assumed to be the same as that set in the init() function; + * + * To return a NULL value, don't assign to valOut. + */ + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + +protected: + +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_allnull.h + diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp new file mode 100755 index 000000000..fd78389d5 --- /dev/null +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -0,0 +1,258 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "mcsv1_udaf.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; +/** + * All UDA(n)F functions must be registered in the function map. + * They will be picked up by the Columnstore modules during + * startup. + * + * This is a temporary kludge until we get the library loader + * task complete + */ +UDAF_MAP UDAFMap::fm; +#include "allnull.h" +#include "ssq.h" +#include "median.h" +UDAF_MAP& UDAFMap::getMap() +{ + if (fm.size() > 0) + { + return fm; + } + // first: function name + // second: Function pointer + // please use lower case for the function name. Because the names might be + // case-insensitive in MySQL depending on the setting. In such case, + // the function names passed to the interface is always in lower case. + fm["allnull"] = new allnull(); + fm["ssq"] = new ssq(); + fm["median"] = new median(); + + return fm; +} + +int32_t mcsv1Context::getColWidth() +{ + if (fColWidth > 0) + { + return fColWidth; + } + // JIT initialization for types that have a defined size. + switch (fResultType) + { + case CalpontSystemCatalog::BIT: + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::CHAR: + fColWidth = 1; + break; + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::USMALLINT: + fColWidth = 2; + break; + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DATE: + fColWidth = 4; + break; + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::STRINT: + fColWidth = 8; + break; + case CalpontSystemCatalog::LONGDOUBLE: + fColWidth = sizeof(long double); + break; + default: + break; + } + return fColWidth; +} + +bool mcsv1Context::operator==(const mcsv1Context& c) const +{ + // We don't test the per row data fields. They don't determine + // if it's the same Context. + if (getName() != c.getName() + || fRunFlags != c.fRunFlags + || fContextFlags != c.fContextFlags + || fUserDataSize != c.fUserDataSize + || fResultType != c.fResultType + || fResultscale != c.fResultscale + || fResultPrecision != c.fResultPrecision + || fRowsInPartition != c.fRowsInPartition + || fStartFrame != c.fStartFrame + || fEndFrame != c.fEndFrame + || fStartConstant != c.fStartConstant + || fEndConstant != c.fEndConstant) + return false; + return true; +} + +bool mcsv1Context::operator!=(const mcsv1Context& c) const +{ + return (!(*this == c)); +} + +const std::string mcsv1Context::toString() const +{ + std::ostringstream output; + output << "mcsv1Context: " << getName() << std::endl; + output << " RunFlags=" << fRunFlags << " ContextFlags=" << fContextFlags << std::endl; + output << " UserDataSize=" << fUserDataSize << " ResultType=" << colDataTypeToString(fResultType) << std::endl; + output << " Resultscale=" << fResultscale << " ResultPrecision=" << fResultPrecision << std::endl; + output << " ErrorMsg=" << errorMsg << std::endl; + output << " bInterrupted=" << bInterrupted << " RowsInPartition=" << fRowsInPartition << std::endl; + output << " StartFrame=" << fStartFrame << " EndFrame=" << fEndFrame << std::endl; + output << " StartConstant=" << fStartConstant << " EndConstant=" << fEndConstant << std::endl; + return output.str(); +} + +mcsv1sdk::mcsv1_UDAF* mcsv1Context::getFunction() +{ + if (func) + { + return func; + } + + // Just in time initialization + if (functionName.length() == 0) + { + std::ostringstream errmsg; + errmsg << "mcsv1Context::getFunction: " << functionName << " is empty"; + throw std::logic_error(errmsg.str()); + } + mcsv1sdk::UDAF_MAP::iterator funcIter = mcsv1sdk::UDAFMap::getMap().find(functionName); + if (funcIter == mcsv1sdk::UDAFMap::getMap().end()) + { + std::ostringstream errmsg; + errmsg << "mcsv1Context::getFunction: " << functionName << " is undefined"; + throw std::logic_error(errmsg.str()); + } + func = funcIter->second; + return func; +} + +mcsv1sdk::mcsv1_UDAF* mcsv1Context::getFunction() const +{ + return const_cast(this)->getFunction(); +} + +void mcsv1Context::createUserData() +{ + // Try the function. If not implemented, create a byte array. + UserData* userData = NULL; + mcsv1_UDAF::ReturnCode rc = getFunction()->createUserData(userData, fUserDataSize); + if (rc == mcsv1_UDAF::ERROR) + { + std::ostringstream errmsg; + errmsg << "mcsv1Context::createUserData: " << functionName << errorMsg.c_str(); + throw std::logic_error(errmsg.str()); + } + setUserData(userData); +} + +void mcsv1Context::serialize(messageqcpp::ByteStream& b) const +{ + b.needAtLeast(sizeof(mcsv1Context)); + b << (ObjectReader::id_t) ObjectReader::MCSV1_CONTEXT; + b << functionName; + b << fRunFlags; + // Dont send context flags, These are set for each call + b << fUserDataSize; + b << (uint32_t)fResultType; + b << fResultscale; + b << fResultPrecision; + b << errorMsg; + // Don't send dataflags. These are set for each call + // bInterrupted is set internally. + b << fRowsInPartition; + b << (uint32_t)fStartFrame; + b << (uint32_t)fEndFrame; + b << fStartConstant; + b << fEndConstant; +} + +void mcsv1Context::unserialize(messageqcpp::ByteStream& b) +{ + ObjectReader::checkType(b, ObjectReader::MCSV1_CONTEXT); + b >> functionName; + b >> fRunFlags; + b >> fUserDataSize; + uint32_t iResultType; + b >> iResultType; + fResultType = (CalpontSystemCatalog::ColDataType)iResultType; + b >> fResultscale; + b >> fResultPrecision; + b >> errorMsg; + b >> fRowsInPartition; + uint32_t frame; + b >> frame; + fStartFrame = (WF_FRAME)frame; + b >> frame; + fEndFrame = (WF_FRAME)frame; + b >> fStartConstant; + b >> fEndConstant; +} + +void UserData::serialize(messageqcpp::ByteStream& bs) const +{ + bs << size; + bs.append(data, size); +} + +void UserData::unserialize(messageqcpp::ByteStream& bs) +{ + bs >> size; + memcpy(data, bs.buf(), size); + bs.advance(size); +} + +const std::string typeStr(""); +const static_any::any& mcsv1_UDAF::charTypeId = (char)1; +const static_any::any& mcsv1_UDAF::scharTypeId = (signed char)1; +const static_any::any& mcsv1_UDAF::shortTypeId = (short)1; +const static_any::any& mcsv1_UDAF::intTypeId = (int)1; +const static_any::any& mcsv1_UDAF::longTypeId = (long)1; +const static_any::any& mcsv1_UDAF::llTypeId = (long long)1; +const static_any::any& mcsv1_UDAF::ucharTypeId = (unsigned char)1; +const static_any::any& mcsv1_UDAF::ushortTypeId = (unsigned short)1; +const static_any::any& mcsv1_UDAF::uintTypeId = (unsigned int)1; +const static_any::any& mcsv1_UDAF::ulongTypeId = (unsigned long)1; +const static_any::any& mcsv1_UDAF::ullTypeId = (unsigned long long)1; +const static_any::any& mcsv1_UDAF::floatTypeId = (float)1; +const static_any::any& mcsv1_UDAF::doubleTypeId = (double)1; +const static_any::any& mcsv1_UDAF::strTypeId = typeStr; + + diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h new file mode 100755 index 000000000..382e01d4a --- /dev/null +++ b/utils/udfsdk/mcsv1_udaf.h @@ -0,0 +1,990 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* mcsv1_UDAF.h +***********************************************************************/ + +/** + * Columnstore interface for writing a User Defined Aggregate + * Functions (UDAF) and User Defined Analytic Functions (UDAnF) + * or a function that can act as either - UDA(n)F + * + * The basic steps are: + * + * 1. Create a the UDA(n)F function interface in some .h file. + * 2. Create the UDF function implementation in some .cpp file + * 3. Create the connector stub (MariaDB UDAF definition) for + * this UDF function. + * 4. build the dynamic library using all of the source. + * 5 Put the library in $COLUMNSTORE_INSTALL/lib of + * all modules + * 6. restart the Columnstore system. + * 7. notify mysqld about the new functions with commands like: + * + * // An example of xor over a range for UDAF and UDAnF + * CREATE AGGREGATE FUNCTION mcs_bit_xor returns BOOL soname + * 'libudfsdk.so'; + * + * // An example that only makes sense as a UDAnF + * CREATE AGGREGATE FUNCTION mcs_interpolate returns REAL + * soname 'libudfsdk.so'; + * + * The UDAF functions may run distributed in the Columnstore + * engine. UDAnF do not run distributed. + * + * UDAF is User Defined Aggregate Function. + * UDAnF is User Defined Analytic Function. + * UDA(n)F is an acronym for a function that could be either. It + * is also used to describe the interface that is used for + * either. + */ +#ifndef HEADER_mcsv1_udaf +#define HEADER_mcsv1_udaf + +#include +#include +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif +#include "any.hpp" +#include "calpontsystemcatalog.h" +#include "wf_frame.h" + +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ +/** + * A map from name to function object. + * + * This is temporary until we get the library loading task + * complete + * + * TODO: Remove when library loading is enabled. + */ +class mcsv1_UDAF; +typedef std::tr1::unordered_map UDAF_MAP; + +class UDAFMap +{ +public: + EXPORT UDAFMap(){}; + + EXPORT ~UDAFMap(){}; + + static EXPORT UDAF_MAP& getMap(); +private: + static UDAF_MAP fm; +}; + +/** + * A class to hold your user data + * + * If your UDAF only needs a fixed sized data struct, you need + * do nothing with this. Call setUserDataSize in your init + * function with the required size and the framework will take + * care of it. + * + * If you need something more or just want to control things, + * then override UserData with your data structure and + * implement createUserData in your function object to create + * your data structure. Your UserData destuctor should take care + * of any cleanup you may need (Simple containers clean + * themselves up). + */ +class mcsv1Context; + +struct UserData +{ + UserData() : size(0), data(NULL) {}; + UserData(size_t sz) {size = sz; data = new uint8_t[sz];} + + virtual ~UserData() { if (data) delete [] data;} + + /** + * serialize() + * + * User data is passed between process. In order to do so, it + * must be serialized. Since user data can have sub objects, + * containers and the like, it is up to the UDAF to provide the + * serialize function. The streaming functionality of + * messageqcpp::ByteStream must be used. + * + * The default streams the size and data buffer to the + * ByteStream + */ + virtual void serialize(messageqcpp::ByteStream& bs) const; + + /** + * unserialize() + * + * User data is passed between process. In order to do so, it + * must be unserialized. Since user data can have sub objects, + * containers and the like, it is up to the UDAF to provide the + * unserialize function. The streaming functionality of + * messageqcpp::ByteStream must be used. + * + * data is the datablock returned by createUserData. + * + * The default creates the data array and streams into data. + */ + virtual void unserialize(messageqcpp::ByteStream& bs); + + // The default data store. You may or may not wish to use these fields. + uint32_t size; + uint8_t* data; +private: + // For now, copy construction is unwanted + UserData(UserData&); +}; + +// Flags to define the type and limitations of a UDA(n)F +// Used in context->fRunFlags +static uint64_t UDAF_OVER_REQUIRED __attribute__ ((unused)) = 1; // May only be used as UDAnF +static uint64_t UDAF_OVER_ALLOWED __attribute__ ((unused)) = 1 << 1; // May be used as UDAF or UDAnF +static uint64_t UDAF_ORDER_REQUIRED __attribute__ ((unused)) = 1 << 2; // If used as UDAnF, ORDER BY is required +static uint64_t UDAF_ORDER_ALLOWED __attribute__ ((unused)) = 1 << 3; // If used as UDAnF, ORDER BY is optional +static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; // If used as UDAnF, a WINDOW FRAME is required +static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional +static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL. +static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed. + +// Flags set by the framework to define the context of the call. +// User code shouldn't use these directly +// used in context->fContextFlags +static uint64_t CONTEXT_IS_ANALYTIC __attribute__ ((unused)) = 1; // If called using OVER +static uint64_t CONTEXT_HAS_CURRENT_ROW __attribute__ ((unused)) = 1 << 1; // The current window contains the current row. +static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // The call was made by the PM + +// Flags that describe the contents of a specific input parameter +// These will be set in context->dataFlags for each method call by the framework. +// User code shouldn't use these directly +static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; + +// shorthand for the list of columns in the call sent to init() +// first is the actual column name and second is the data type in Columnstore. +typedef std::vector >COL_TYPES; + +// This is the context class that is passed to all API callbacks +// The framework potentially sets data here for each invocation of +// mcsv1_UDAF methods. Access methods are given for data useful to UDA(n)F. +// Don't modify anything directly except data retrieved with getUserData(). + +// UDA(n)F devlopers should not modify this class. The framework and other UDA(n)F +// rely on it being as it was when they were compiled. +// +// It's probable that future versions of Columnstore will add functionality to +// the context. UDA(n)F may need to be re-compiled in this case. +class mcsv1Context +{ +public: + EXPORT mcsv1Context(); + EXPORT mcsv1Context(const mcsv1Context& rhs); + // The destructor is virtual only in case a version 2 is made derived from v1 + // to promote backward compatibility. + // mcsv1Context should never be subclassed by UDA(n)F developers + EXPORT virtual ~mcsv1Context(); + + // Set an error message if something goes wrong + EXPORT void setErrorMessage(std::string errmsg); + + // Get the previously set error message + EXPORT const std::string& getErrorMessage() const; + + // Set the flags as a set. Return the previous flags. + EXPORT uint64_t setRunFlags(uint64_t flags); + // return the flags + EXPORT uint64_t getRunFlags() const; + + // The following set, get, clear and toggle methods can be used to manipulate + // multiple flags by ORing them together in the call sequence. + // Ex setRunFlag(UDAF_OVER_REQUIRED | UDAF_ORDER_REQUIRED); + // sets both flags and returns true if BOTH flags are already set. + // + // Set a specific flag and return its previous setting + EXPORT bool setRunFlag(uint64_t flag); + // Get a specific flag + EXPORT bool getRunFlag(uint64_t flag); + // clear a specific flag and return its previous setting + EXPORT bool clearRunFlag(uint64_t flag); + // toggle a specific flag and return its previous setting + EXPORT bool toggleRunFlag(uint64_t flag); + + // Use these to determine the way your UDA(n)F was called + // Valid in all method calls + EXPORT bool isAnalytic(); + EXPORT bool isWindowHasCurrentRow(); + + // Determine if the call is made by the UM + // This could be because the UDA(n)F is not being distributed + // Or it could be during setup or during consolodation of PM values. + // valid in all calls + EXPORT bool isUM(); + + // Determine if the call is made by the PM + // This will be during partial aggregation performed on the PM + // valid in all calls + EXPORT bool isPM(); + + // Parameter refinement description accessors + // valid in nextValue, dropValue and evaluateCumulative + size_t getParameterCount() const; + + // Determine if an input parameter is NULL + // valid in nextValue, dropValue and evaluateCumulative + EXPORT bool isParamNull(int paramIdx); + + // If a parameter is a constant, the UDA(n)F could presumably optimize its workings. + // During the first call to nextValue() or evaluateCumulative(). + // Is there a better way to determine this? + // valid in nextValue, dropValue and evaluateCumulative + EXPORT bool isParamConstant(int paramIdx); + + // For getting the result type. + EXPORT CalpontSystemCatalog::ColDataType getResultType() const; + + // For getting the decimal characteristics for the return value. + // These will be set to the default before init(). + EXPORT int32_t getScale() const; + EXPORT int32_t getPrecision() const; + + // If you want to change the result type + // valid in init() + EXPORT bool setResultType(CalpontSystemCatalog::ColDataType resultType); + + // For setting the decimal characteristics for the return value. + // This only makes sense if the return type is decimal, but should be set + // to (0, -1) for other types if the inout is decimal. + // valid in init() + EXPORT bool setScale(int32_t scale); + EXPORT bool setPrecision(int32_t precision); + + // For all types, get the return column width in bytes. Ex. INT will return 4. + EXPORT int32_t getColWidth(); + + // For non-numric return types, set the return column width. This defaults + // to the the length of the input. + // valid in init() + EXPORT bool setColWidth(int32_t colWidth); + + // If a method is known to take a while, call this periodically to see if something + // interupted the processing. If getInterrupted() returns true, then the executing + // method should clean up and exit. + EXPORT bool getInterrupted() const; + + // Returns the actual number of rows in the partition. If no partitioning, returns 0. + // valid in reset() + EXPORT uint64_t getRowsInPartition() const; + + // Returns the number of rows in the aggregate. This could be the total number of rows, + // the number of rows in the group, or the number of rows in the PM's subaggregate, + // depending on the context it was called. + // valid in subEvaluate() end evaluate(). + EXPORT uint64_t getRowCnt() const; + + // Allocate instance specific memory. This should be type cast to a structure overlay + // defined by the function. The actual allocatoin occurs in the various modules that + // do the aggregation. If the UDAF is being calculated in a distributed fashion, then + // multiple instances of this data may be allocated. Calls to the subaggregate functions + // do not share a context. + // You do not need to worry about freeing this memory. The framework handles all management. + // Call this during init() + EXPORT void setUserDataSize(int bytes); + + // Call this everywhere except init() + EXPORT UserData* getUserData(); + + // Many UDAnF need a default Window Frame. If none is set here, the default is + // UNBOUNDED PRECEDING to CURRENT ROW. + // It's possible to not allow the the WINDOW FRAME phrase in the UDAnF by setting + // the UDAF_WINDOWFRAME_REQUIRED and UDAF_WINDOWFRAME_ALLOWED both to false. Columnstore + // requires a Window Frame in order to process UDAnF. In this case, the default will + // be used for all calls. + // Possible values for start frame are + // WF_UNBOUNDED_PRECEDING, WF_CURRENT_ROW, WF_PRECEDING or WF_FOLLOWING + // possible values for end frame are + // WF_CURRENT_ROW, WF_UNBOUNDED_FOLLOWING, WF_PRECEDING or WF_FOLLOWING + // If WF_PRECEEdING and/or WF_FOLLOWING, a start or end constant should + // be included to say how many preceeding or following is the default + // Set this during init() + EXPORT bool setDefaultWindowFrame(WF_FRAME defaultStartFrame, + WF_FRAME defaultEndFrame, + int32_t startConstant = 0, // For WF_PRECEEDING or WF_FOLLOWING + int32_t endConstant = 0); // For WF_PRECEEDING or WF_FOLLOWING + + // There may be times you want to know the actual frame set by the caller + EXPORT void getStartFrame(WF_FRAME& startFrame, int32_t& startConstant) const; + EXPORT void getEndFrame(WF_FRAME& endFrame, int32_t& endConstant) const; + + // Deep Equivalence + bool operator==(const mcsv1Context& c) const; + bool operator!=(const mcsv1Context& c) const; + + // stream operator for debugging + EXPORT const std::string toString() const; + + // Get the name of the function + EXPORT const std::string& getName() const; + + EXPORT mcsv1Context& operator=(const mcsv1Context& rhs); + EXPORT mcsv1Context& copy(const mcsv1Context& rhs); + +private: + + uint64_t fRunFlags; // Set by the user to define the type of UDA(n)F + uint64_t fContextFlags; // Set by the framework to define this specific call. + int32_t fUserDataSize; + boost::shared_ptr fUserData; + CalpontSystemCatalog::ColDataType fResultType; + int32_t fColWidth; // The length in bytes of the return type + int32_t fResultscale; // For scale, the number of digits to the right of the decimal + int32_t fResultPrecision; // The max number of digits allowed in the decimal value + std::string errorMsg; + std::vector* dataFlags; // one entry for each parameter + bool* bInterrupted; // Gets set to true by the Framework if something happens + uint64_t fRowsInPartition; // Only valid in reset() + int64_t fRowCnt; // The number of rows involved in this aggregate. + WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call + WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call + int32_t fStartConstant; // for start frame WF_PRECEEDIMG or WF_FOLLOWING + int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING + std::string functionName; + mcsv1sdk::mcsv1_UDAF* func; + +public: + // For use by the framework + EXPORT void serialize(messageqcpp::ByteStream& b) const; + EXPORT void unserialize(messageqcpp::ByteStream& b); + EXPORT void createUserData(); + EXPORT void setUserData(boost::shared_ptr userData); + EXPORT void setUserData(UserData* userData); + EXPORT void setName(std::string name); + EXPORT void setContextFlags(uint64_t flags); + EXPORT void setContextFlag(uint64_t flag); + EXPORT void clearContextFlag(uint64_t flag); + EXPORT uint64_t getContextFlags() const; + EXPORT uint32_t getUserDataSize() const; + EXPORT std::vector& getDataFlags(); + EXPORT void setDataFlags(std::vector* flags); + EXPORT void setInterrupted(bool interrupted); + EXPORT void setInterrupted(bool* interrupted); + EXPORT void setRowCnt(uint64_t cnt); + EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); + EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; + EXPORT boost::shared_ptr getUserDataSP(); +}; + +// Since aggregate functions can operate on any data type, we use the following structure +// to define the input row data. To be type insensiteve, data is stored in type static_any::any. +// +// To access the data it must be type cast to the correct type using boost::any_cast. +// example for int data: +// +// if (dataType == CalpontSystemCatalog::INT) +// int myint = boost::any_castcolumnData; +// +// For multi-paramter aggregations, the colsIn vector of next_value() +// contains the ordered set of row parameters. +// +// For char, varchar, text, varbinary and blob types, columnData will be std::string. +struct ColumnDatum +{ + CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h + static_any::any columnData; + uint32_t scale; // If dataType is a DECIMAL type + uint32_t precision; // If dataType is a DECIMAL type + ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1){}; +}; + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. +class mcsv1_UDAF +{ +public: + enum ReturnCode + { + ERROR = 0, + SUCCESS = 1, + NOT_IMPLEMENTED = 2 // User UDA(n)F shouldn't return this + }; + // Defaults OK + mcsv1_UDAF(){}; + virtual ~mcsv1_UDAF(){}; + + /** + * init() + * + * Mandatory. Implement this to initialize flags and instance + * data. Called once per SQL statement. You can do any sanity + * checks here. + * + * colTypes (in) - A vector of ColDataType defining the + * parameters of the UDA(n)F call. These can be used to decide + * to override the default return type. If desired, the new + * return type can be set by context->setReturnType() and + * decimal scale and precision can be set by context->setScale + * and context->setPrecision respectively. + * + * Return mcsv1_UDAF::ERROR on any error, such as non-compatible + * colTypes or wrong number of arguments. Else return + * mcsv1_UDAF::SUCCESS. + */ + virtual ReturnCode init(mcsv1Context* context, + COL_TYPES& colTypes) = 0; + + /** + * finish() + * + * Mandatory. Completes the UDA(n)F. Called once per SQL + * statement. Do not free any memory allocated by + * createUserData(). The SDK Framework owns that memory + * and will handle that. Often, there is nothing to do here. + */ + virtual ReturnCode finish(mcsv1Context* context) = 0; + + /** + * reset() + * + * Mandatory. Reset the UDA(n)F for a new group, partition or, + * in some cases, new Window Frame. Do not free any memory + * allocated by createUserData(). The SDK Framework owns + * that memory and will handle that. Use this opportunity to + * reset any variables in context->getUserData() needed for the + * next aggregation. May be called multiple times if running in + * a ditributed fashion. + * + * Use this opportunity to initialize the userData. + */ + virtual ReturnCode reset(mcsv1Context* context) = 0; + + /** + * nextValue() + * + * Mandatory. Handle a single row. + * + * colsIn - A vector of data structure describing the input + * data. + * + * This function is called once for every row in the filtered + * result set (before aggregation). It is very important that + * this function is efficient. + * + * If the UDAF is running in a distributed fashion, nextValue + * cannot depend on order, as it will only be called for each + * row found on the specific PM. + * + * valsIn (in) - a vector of the parameters from the row. + */ + virtual ReturnCode nextValue(mcsv1Context* context, + std::vector& valsIn) = 0; + + /** + * subEvaluate() + * + * Mandatory -- Called if the UDAF is running in a distributed + * fashion. Columnstore tries to run all aggregate functions + * distributed, depending on context. + * + * Perform an aggregation on rows partially aggregated by + * nextValue. Columnstore calls nextValue for each row on a + * given PM for a group (GROUP BY). subEvaluate is called on the + * UM to consolodate those values into a single instance of + * userData. Keep your aggregated totals in context's userData. + * The first time this is called for a group, reset() would have + * been called with this version of userData. + * + * Called for every partial data set in each group in GROUP BY. + * + * When subEvaluate has been called for all subAggregated data + * sets, Evaluate will be called with the same context as here. + * + * valIn (In) - This is a pointer to a UserData class with the + * partially aggregated values. It will contain the value of + * userData as seen in the last call to NextValue for a given + * PM. + * + */ + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn) = 0; + + /** + * evaluate() + * + * Mandatory. Get the aggregated value. + * + * Called for every new group if UDAF GROUP BY, UDAnF partition + * or, in some cases, new Window Frame. + * + * Set the aggregated value into valOut. The datatype is assumed + * to be the same as that set in the init() function; + * + * If the UDAF is running in a distributed fashion, evaluate is + * called after a series of subEvaluate calls. + * + * valOut (out) - Set the aggregated value here. The datatype is + * assumed to be the same as that set in the init() function; + * + * To return a NULL value, don't assign to valOut. + */ + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut) = 0; + + /** + * dropValue() + * + * Optional -- If defined, the server will call this instead of + * reset for UDAnF. + * + * Don't implement if a UDAnF has one or more of the following: + * The UDAnF can't be used with a Window Frame + * The UDAnF is not reversable in some way + * The UDAnF is not interested in optimal performance + * + * If not implemented, reset() followed by a series of + * nextValue() will be called for each movement of the Window + * Frame. + * + * If implemented, then each movement of the Window Frame will + * result in dropValue() being called for each row falling out + * of the Frame and nextValue() being called for each new row + * coming into the Frame. + * + * valsDropped (in) - a vector of the parameters from the row + * leaving the Frame + * + * dropValue() will not be called for unbounded/current row type + * frames, as those are already optimized. + */ + virtual ReturnCode dropValue(mcsv1Context* context, + std::vector& valsDropped); + + /** + * createUserData() + * + * Optional -- The default is to create a data byte array of + * size as set in context->setUserDataSize() + * + * Create your variable length data structure via + * userData = new + * + * The data structure may contain references to containers or + * pointers to other objects. Remember that for distributed + * processing, this may be called multiple times for variaous + * computing blocks. At the least, it will be called once per PM + * that processes the data, and once more for the UM. For UDAnF, + * it may only be called once. + * + * Set length to the base length of the data structure you + * create. + * + */ + virtual ReturnCode createUserData(UserData*& userdata, int32_t& length); + +protected: + // These are handy for testing the actual type of static_any + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; +}; + +/*********************************************************************** + * There is no user modifiable code past this point + ***********************************************************************/ +// Function definitions for mcsv1Context +inline mcsv1Context::mcsv1Context() : + fRunFlags(UDAF_OVER_ALLOWED | UDAF_ORDER_ALLOWED | UDAF_WINDOWFRAME_ALLOWED), + fContextFlags(0), + fUserDataSize(0), + fResultType(CalpontSystemCatalog::UNDEFINED), + fColWidth(0), + fResultscale(0), + fResultPrecision(18), + dataFlags(NULL), + bInterrupted(NULL), + fRowsInPartition(0), + fStartFrame(WF_UNBOUNDED_PRECEDING), + fEndFrame(WF_CURRENT_ROW), + fStartConstant(0), + fEndConstant(0), + func(NULL) +{ +} + +inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : + fContextFlags(0), + fColWidth(0), + dataFlags(NULL), + bInterrupted(NULL), + func(NULL) +{ + copy(rhs); +} + +inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) +{ + fRunFlags = rhs.getRunFlags(); + fResultType = rhs.getResultType(); + fUserDataSize = rhs.getUserDataSize(); + fResultscale = rhs.getScale(); + fResultPrecision = rhs.getPrecision(); + rhs.getStartFrame(fStartFrame, fStartConstant); + rhs.getEndFrame(fEndFrame, fEndConstant); + functionName = rhs.getName(); + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + return *this; +} + +inline mcsv1Context::~mcsv1Context() +{ +} + +inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) +{ + fContextFlags = 0; + fColWidth = 0; + dataFlags = NULL; + bInterrupted = NULL; + func = NULL; + return copy(rhs); +} + +inline void mcsv1Context::setErrorMessage(std::string errmsg) +{ + errorMsg = errmsg; +} + +inline const std::string& mcsv1Context::getErrorMessage() const +{ + return errorMsg; +} + +inline uint64_t mcsv1Context::setRunFlags(uint64_t flags) +{ + uint64_t f = fRunFlags; + fRunFlags = flags; + return f; +} + +inline uint64_t mcsv1Context::getRunFlags() const +{ + return fRunFlags; +} + +inline bool mcsv1Context::setRunFlag(uint64_t flag) +{ + bool b = fRunFlags & flag; + fRunFlags |= flag; + return b; +} + +inline bool mcsv1Context::getRunFlag(uint64_t flag) +{ + return fRunFlags & flag; +} + +inline bool mcsv1Context::clearRunFlag(uint64_t flag) +{ + bool b = fRunFlags & flag; + fRunFlags &= ~flag; + return b; +} + +inline bool mcsv1Context::toggleRunFlag(uint64_t flag) +{ + bool b = fRunFlags & flag; + fRunFlags ^= flag; + return b; +} + +inline bool mcsv1Context::isAnalytic() +{ + return fContextFlags & CONTEXT_IS_ANALYTIC; +} + +inline bool mcsv1Context::isWindowHasCurrentRow() +{ + return fContextFlags & CONTEXT_HAS_CURRENT_ROW; +} + +inline bool mcsv1Context::isUM() +{ + return !(fContextFlags & CONTEXT_IS_PM); +} + +inline bool mcsv1Context::isPM() +{ + return fContextFlags & CONTEXT_IS_PM; +} + +inline size_t mcsv1Context::getParameterCount() const +{ + if (dataFlags) + return dataFlags->size(); + return 0; +} + +inline bool mcsv1Context::isParamNull(int paramIdx) +{ + if (dataFlags) + return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return false; +} + +inline bool mcsv1Context::isParamConstant(int paramIdx) +{ + if (dataFlags) + return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return false; +} + +inline CalpontSystemCatalog::ColDataType mcsv1Context::getResultType() const +{ + return fResultType; +} + +inline bool mcsv1Context::setResultType(CalpontSystemCatalog::ColDataType resultType) +{ + fResultType = resultType; + return true; // We may want to sanity check here. +} + +inline int32_t mcsv1Context::getScale() const +{ + return fResultscale; +} + +inline int32_t mcsv1Context::getPrecision() const +{ + return fResultPrecision; +} + +inline bool mcsv1Context::setScale(int32_t scale) +{ + fResultscale = scale; + return true; +} + +inline bool mcsv1Context::setPrecision(int32_t precision) +{ + fResultPrecision = precision; + return true; +} + +inline bool mcsv1Context::setColWidth(int32_t colWidth) +{ + fColWidth = colWidth; + return true; +} + +inline void mcsv1Context::setInterrupted(bool interrupted) +{ + if (bInterrupted) + { + *bInterrupted = interrupted; + } +} + +inline void mcsv1Context::setInterrupted(bool* interrupted) +{ + bInterrupted = interrupted; +} + +inline bool mcsv1Context::getInterrupted() const +{ + if (bInterrupted) + { + return bInterrupted; + } + return false; +} + +inline uint64_t mcsv1Context::getRowsInPartition() const +{ + return fRowsInPartition; +} + +inline uint64_t mcsv1Context::getRowCnt() const +{ + return fRowCnt; +} + +inline void mcsv1Context::setUserDataSize(int bytes) +{ + fUserDataSize = bytes; +} + +inline UserData* mcsv1Context::getUserData() +{ + if (!fUserData) + { + createUserData(); + } + return fUserData.get(); +} + +inline boost::shared_ptr mcsv1Context::getUserDataSP() +{ + if (!fUserData) + { + createUserData(); + } + return fUserData; +} + +inline void mcsv1Context::setUserData(boost::shared_ptr userData) +{ + fUserData = userData; +} + +inline void mcsv1Context::setUserData(UserData* userData) +{ + if (userData) + { + fUserData.reset(userData); + } + else + { + fUserData.reset(); + } +} + +inline bool mcsv1Context::setDefaultWindowFrame(WF_FRAME defaultStartFrame, + WF_FRAME defaultEndFrame, + int32_t startConstant, + int32_t endConstant) +{ + // TODO: Add sanity checks + fStartFrame = defaultStartFrame; + fEndFrame = defaultEndFrame; + fStartConstant = startConstant; + fEndConstant = endConstant; + return true; +} + +inline void mcsv1Context::getStartFrame(WF_FRAME& startFrame, int32_t& startConstant) const +{ + startFrame = fStartFrame; + startConstant = fStartConstant; +} + +inline void mcsv1Context::getEndFrame(WF_FRAME& endFrame, int32_t& endConstant) const +{ + endFrame = fEndFrame; + endConstant = fEndConstant; +} + +inline const std::string& mcsv1Context::getName() const +{ + return functionName; +} + +inline void mcsv1Context::setName(std::string name) +{ + functionName = name; +} + +inline void mcsv1Context::setRowCnt(uint64_t cnt) +{ + fRowCnt = cnt; +} + +inline uint64_t mcsv1Context::getContextFlags() const +{ + return fContextFlags; +} + +inline void mcsv1Context::setContextFlags(uint64_t flags) +{ + fContextFlags = flags; +} + +inline void mcsv1Context::setContextFlag(uint64_t flag) +{ + fContextFlags |= flag; +} + +inline void mcsv1Context::clearContextFlag(uint64_t flag) +{ + fContextFlags &= ~flag; +} + +inline uint32_t mcsv1Context::getUserDataSize() const +{ + return fUserDataSize; +} + +inline std::vector& mcsv1Context::getDataFlags() +{ + return *dataFlags; +} + +inline void mcsv1Context::setDataFlags(std::vector* flags) +{ + dataFlags = flags; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, + std::vector& valsDropped) +{ + return NOT_IMPLEMENTED; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, int32_t& length) +{ + userData = new UserData(length); + userData->size = length; + return SUCCESS; +} + +}; // namespace mcssdk + +#undef EXPORT + +#endif // HEADER_mcsv1_udaf.h + diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp new file mode 100755 index 000000000..2c47896f8 --- /dev/null +++ b/utils/udfsdk/median.cpp @@ -0,0 +1,314 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "median.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, + COL_TYPES& colTypes) +{ + if (colTypes.size() < 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("median() with 0 arguments"); + return mcsv1_UDAF::ERROR; + } + if (colTypes.size() > 1) + { + context->setErrorMessage("median() with more than 1 argument"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].second))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("median() with non-numeric argument"); + return mcsv1_UDAF::ERROR; + } + + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(context->getScale()*2); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode median::finish(mcsv1Context* context) +{ + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) +{ + MedianData* data = static_cast(context->getUserData()); + data->mData.clear(); + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, + std::vector& valsIn) +{ + static_any::any& valIn = valsIn[0].columnData; + MEDIAN_DATA& data = static_cast(context->getUserData())->mData; + DATATYPE val = 0.0; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn.compatible(charTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(scharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(shortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(intTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(longTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(llTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ucharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ushortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(uintTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ulongTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ullTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(floatTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(doubleTypeId)) + { + val = valIn.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + data[val]++; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode median::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + MEDIAN_DATA& outData = static_cast(context->getUserData())->mData; + const MEDIAN_DATA& inData = static_cast(userDataIn)->mData; + MEDIAN_DATA::const_iterator iter = inData.begin(); + for (; iter != inData.end(); ++iter) + { + outData[iter->first] += iter->second; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + uint64_t cnt1=0, cnt2=0; + MEDIAN_DATA& data = static_cast(context->getUserData())->mData; + MEDIAN_DATA::iterator iter(data.begin()); + MEDIAN_DATA::iterator revfrom(data.end()); + MEDIAN_DATA::reverse_iterator riter(revfrom); + cnt1 += iter->second; + cnt2 += riter->second; + while (iter->first < riter->first) + { + while (cnt1 < cnt2 && iter->first < riter->first) + { + ++iter; + cnt1 += iter->second; + } + while (cnt2 < cnt1 &&iter->first < riter->first) + { + ++riter; + cnt2 += riter->second; + } + while (cnt1 == cnt2 && iter->first < riter->first) + { + ++iter; + cnt1 += iter->second; + if (iter->first > riter->first) + { + break; + } + ++riter; + cnt2 += riter->second; + } + } + valOut = (iter->first + riter->first) / 2; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, + std::vector& valsDropped) +{ + static_any::any& valIn = valsDropped[0].columnData; + MEDIAN_DATA& data = static_cast(context->getUserData())->mData; + DATATYPE val = 0.0; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn.compatible(charTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(scharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(shortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(intTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(longTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(llTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ucharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ushortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(uintTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ulongTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ullTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(floatTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(doubleTypeId)) + { + val = valIn.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data[val]--; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode median::createUserData(UserData*& userData, int32_t& length) +{ + userData = new MedianData; + length = sizeof(MedianData); + return mcsv1_UDAF::SUCCESS; +} + +void MedianData::serialize(messageqcpp::ByteStream& bs) const +{ + MEDIAN_DATA::const_iterator iter = mData.begin(); + DATATYPE num; + uint32_t cnt; + bs << (int32_t)mData.size(); + for (; iter != mData.end(); ++iter) + { + num = iter->first; + bs << num; + cnt = iter->second; + bs << cnt; + } +} + +void MedianData::unserialize(messageqcpp::ByteStream& bs) +{ + mData.clear(); + int32_t sz; + DATATYPE num; + uint32_t cnt; + bs >> sz; + for (int i = 0; i < sz; ++i) + { + bs >> num; + bs >> cnt; + mData[num] = cnt; + } +} + diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h new file mode 100755 index 000000000..9d529aa87 --- /dev/null +++ b/utils/udfsdk/median.h @@ -0,0 +1,294 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* mcsv1_UDAF.h +***********************************************************************/ + +/** + * Columnstore interface for writing a User Defined Aggregate + * Functions (UDAF) and User Defined Analytic Functions (UDAnF) + * or a function that can act as either - UDA(n)F + * + * The basic steps are: + * + * 1. Create a the UDA(n)F function interface in some .h file. + * 2. Create the UDF function implementation in some .cpp file + * 3. Create the connector stub (MariaDB UDAF definition) for + * this UDF function. + * 4. build the dynamic library using all of the source. + * 5 Put the library in $COLUMNSTORE_INSTALL/lib of + * all modules + * 6. restart the Columnstore system. + * 7. notify mysqld about the new function: + * + * CREATE AGGREGATE FUNCTION median returns REAL soname + * 'libudf_mysql.so'; + * + * The UDAF functions may run distributed in the Columnstore + * engine. UDAnF do not run distributed. + * + * UDAF is User Defined Aggregate Function. + * UDAnF is User Defined Analytic Function. + * UDA(n)F is an acronym for a function that could be either. It + * is also used to describe the interface that is used for + * either. + */ +#ifndef HEADER_median +#define HEADER_median + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +#define DATATYPE double +typedef std::map MEDIAN_DATA; + +// Override UserData for data storage +struct MedianData : public UserData +{ + MedianData() {}; + + virtual ~MedianData(){} + + virtual void serialize(messageqcpp::ByteStream& bs) const; + virtual void unserialize(messageqcpp::ByteStream& bs); + + MEDIAN_DATA mData; +private: + // For now, copy construction is unwanted + MedianData(UserData&); +}; + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the median value of the dataset + +class median : public mcsv1_UDAF +{ +public: + // Defaults OK + median() : mcsv1_UDAF(){}; + virtual ~median(){}; + + /** + * init() + * + * Mandatory. Implement this to initialize flags and instance + * data. Called once per SQL statement. You can do any sanity + * checks here. + * + * colTypes (in) - A vector of ColDataType defining the + * parameters of the UDA(n)F call. These can be used to decide + * to override the default return type. If desired, the new + * return type can be set by context->setReturnType() and + * decimal scale and precision can be set by context->setScale + * and context->setPrecision respectively. + * + * Return mcsv1_UDAF::ERROR on any error, such as non-compatible + * colTypes or wrong number of arguments. Else return + * mcsv1_UDAF::SUCCESS. + */ + virtual ReturnCode init(mcsv1Context* context, + COL_TYPES& colTypes); + + /** + * finish() + * + * Mandatory. Completes the UDA(n)F. Called once per SQL + * statement. Do not free any memory allocated by + * context->setUserDataSize(). The SDK Framework owns that memory + * and will handle that. Often, there is nothing to do here. + */ + virtual ReturnCode finish(mcsv1Context* context); + + /** + * reset() + * + * Mandatory. Reset the UDA(n)F for a new group, partition or, + * in some cases, new Window Frame. Do not free any memory + * allocated by context->setUserDataSize(). The SDK Framework owns + * that memory and will handle that. Use this opportunity to + * reset any variables in context->getUserData() needed for the + * next aggregation. May be called multiple times if running in + * a ditributed fashion. + * + * Use this opportunity to initialize the userData. + */ + virtual ReturnCode reset(mcsv1Context* context); + + /** + * nextValue() + * + * Mandatory. Handle a single row. + * + * colsIn - A vector of data structure describing the input + * data. + * + * This function is called once for every row in the filtered + * result set (before aggregation). It is very important that + * this function is efficient. + * + * If the UDAF is running in a distributed fashion, nextValue + * cannot depend on order, as it will only be called for each + * row found on the specific PM. + * + * valsIn (in) - a vector of the parameters from the row. + */ + virtual ReturnCode nextValue(mcsv1Context* context, + std::vector& valsIn); + + /** + * subEvaluate() + * + * Mandatory -- Called if the UDAF is running in a distributed + * fashion. Columnstore tries to run all aggregate functions + * distributed, depending on context. + * + * Perform an aggregation on rows partially aggregated by + * nextValue. Columnstore calls nextValue for each row on a + * given PM for a group (GROUP BY). subEvaluate is called on the + * UM to consolodate those values into a single instance of + * userData. Keep your aggregated totals in context's userData. + * The first time this is called for a group, reset() would have + * been called with this version of userData. + * + * Called for every partial data set in each group in GROUP BY. + * + * When subEvaluate has been called for all subAggregated data + * sets, Evaluate will be called with the same context as here. + * + * valIn (In) - This is a pointer to a memory block of the size + * set in setUserDataSize. It will contain the value of userData + * as seen in the last call to NextValue for a given PM. + * + */ + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + /** + * evaluate() + * + * Mandatory. Get the aggregated value. + * + * Called for every new group if UDAF GROUP BY, UDAnF partition + * or, in some cases, new Window Frame. + * + * Set the aggregated value into valOut. The datatype is assumed + * to be the same as that set in the init() function; + * + * If the UDAF is running in a distributed fashion, evaluate is + * called after a series of subEvaluate calls. + * + * valOut (out) - Set the aggregated value here. The datatype is + * assumed to be the same as that set in the init() function; + * + * To return a NULL value, don't assign to valOut. + */ + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + /** + * dropValue() + * + * Optional -- If defined, the server will call this instead of + * reset for UDAnF. + * + * Don't implement if a UDAnF has one or more of the following: + * The UDAnF can't be used with a Window Frame + * The UDAnF is not reversable in some way + * The UDAnF is not interested in optimal performance + * + * If not implemented, reset() followed by a series of + * nextValue() will be called for each movement of the Window + * Frame. + * + * If implemented, then each movement of the Window Frame will + * result in dropValue() being called for each row falling out + * of the Frame and nextValue() being called for each new row + * coming into the Frame. + * + * valsDropped (in) - a vector of the parameters from the row + * leaving the Frame + * + * dropValue() will not be called for unbounded/current row type + * frames, as those are already optimized. + */ + virtual ReturnCode dropValue(mcsv1Context* context, + std::vector& valsDropped); + + /** + * createUserData() + * + * Optional -- If defined, the server will call this instead of + * createUserData on context. + * + * Create your variable length data structure via + * data = new + * + * The data structure may contain references to containers or + * pointers to other objects. Remember that for distributed + * processing, this may be called multiple times for variaous + * computing blocks. At the least, it will be called once per PM + * that processes the data, and once more for the UM. For UDAnF, + * it may only be called once. + * + * Set length to the length of the data structure you create. + * + * For each call to createUserData(), there will be a + * corresponding deleteUserData() where you must clean up. Any + * memory leaks are your fault. + * + */ + virtual ReturnCode createUserData(UserData*& data, int32_t& length); +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_median.h + diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp new file mode 100755 index 000000000..1be9aafb1 --- /dev/null +++ b/utils/udfsdk/ssq.cpp @@ -0,0 +1,250 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "ssq.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; +#define DATATYPE double + +struct ssq_data +{ + uint64_t scale; + DATATYPE sumsq; + ssq_data() : scale(0){} +}; + +#define OUT_TYPE int64_t +mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, + COL_TYPES& colTypes) +{ + if (colTypes.size() < 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("ssq() with 0 arguments"); + return mcsv1_UDAF::ERROR; + } + if (colTypes.size() > 1) + { + context->setErrorMessage("ssq() with more than 1 argument"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].second))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("ssq() with non-numeric argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(ssq_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(context->getScale()*2); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode ssq::finish(mcsv1Context* context) +{ + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) +{ + struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; + if (data) + { + data->scale = 0; + data->sumsq = 0; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, + std::vector& valsIn) +{ + static_any::any& valIn = valsIn[0].columnData; + struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn.compatible(charTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(scharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(shortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(intTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(longTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(llTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ucharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ushortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(uintTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ulongTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ullTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(floatTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(doubleTypeId)) + { + val = valIn.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + data->sumsq += val*val; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode ssq::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + struct ssq_data* outData = (struct ssq_data*)context->getUserData()->data; + struct ssq_data* inData = (struct ssq_data*)userDataIn->data; + outData->sumsq += inData->sumsq; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; + valOut = data->sumsq; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, + std::vector& valsDropped) +{ + static_any::any& valIn = valsDropped[0].columnData; + struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn.compatible(charTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(scharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(shortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(intTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(longTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(llTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ucharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ushortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(uintTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ulongTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ullTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(floatTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(doubleTypeId)) + { + val = valIn.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + data->sumsq -= val*val; + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h new file mode 100755 index 000000000..893c578af --- /dev/null +++ b/utils/udfsdk/ssq.h @@ -0,0 +1,248 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* mcsv1_UDAF.h +***********************************************************************/ + +/** + * Columnstore interface for writing a User Defined Aggregate + * Functions (UDAF) and User Defined Analytic Functions (UDAnF) + * or a function that can act as either - UDA(n)F + * + * The basic steps are: + * + * 1. Create a the UDA(n)F function interface in some .h file. + * 2. Create the UDF function implementation in some .cpp file + * 3. Create the connector stub (MariaDB UDAF definition) for + * this UDF function. + * 4. build the dynamic library using all of the source. + * 5 Put the library in $COLUMNSTORE_INSTALL/lib of + * all modules + * 6. restart the Columnstore system. + * 7. notify mysqld about the new function: + * + * CREATE AGGREGATE FUNCTION ssq returns REAL soname + * 'libudf_mysql.so'; + * + * The UDAF function will run distributed in the Columnstore + * engine. UDAnF do not run distributed. + * + * UDAF is User Defined Aggregate Function. + * UDAnF is User Defined Analytic Function. + * UDA(n)F is an acronym for a function that could be either. It + * is also used to describe the interface that is used for + * either. + */ +#ifndef HEADER_ssq +#define HEADER_ssq + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// A simple aggregate to return the sum of squares +class ssq : public mcsv1_UDAF +{ +public: + // Defaults OK + ssq() : mcsv1_UDAF(){}; + virtual ~ssq(){}; + + /** + * init() + * + * Mandatory. Implement this to initialize flags and instance + * data. Called once per SQL statement. You can do any sanity + * checks here. + * + * colTypes (in) - A vector of ColDataType defining the + * parameters of the UDA(n)F call. These can be used to decide + * to override the default return type. If desired, the new + * return type can be set by context->setReturnType() and + * decimal scale and precision can be set by context->setScale + * and context->setPrecision respectively. + * + * Return mcsv1_UDAF::ERROR on any error, such as non-compatible + * colTypes or wrong number of arguments. Else return + * mcsv1_UDAF::SUCCESS. + */ + virtual ReturnCode init(mcsv1Context* context, + COL_TYPES& colTypes); + + /** + * finish() + * + * Mandatory. Completes the UDA(n)F. Called once per SQL + * statement. Do not free any memory allocated by + * context->createUserData(). The SDK Framework owns that memory + * and will handle that. Often, there is nothing to do here. + */ + virtual ReturnCode finish(mcsv1Context* context); + + /** + * reset() + * + * Mandatory. Reset the UDA(n)F for a new group, partition or, + * in some cases, new Window Frame. Do not free any memory + * allocated by context->createUserData(). The SDK Framework + * owns that memory and will handle that. Use this opportunity + * to reset any variables in context->getUserData() needed for + * the next aggregation. May be called multiple times on + * different modules. + */ + virtual ReturnCode reset(mcsv1Context* context); + + /** + * nextValue() + * + * Mandatory. Handle a single row. + * + * colsIn - A vector of data structure describing the input + * data. + * + * This function is called once for every row in the filtered + * result set (before aggregation). It is very important that + * this function is efficient. + * + * If the UDAF is running in a distributed fashion, nextValue + * cannot depend on order, as it will only be called for each + * row found on the specific PM. + * + * valsIn (in) - a vector of the parameters from the row. + */ + virtual ReturnCode nextValue(mcsv1Context* context, + std::vector& valsIn); + + /** + * subEvaluate() + * + * Mandatory -- Called if the UDAF is running in a distributed + * fashion. Columnstore tries to run all aggregate functions + * distributed, depending on context. + * + * Perform an aggregation on rows partially aggregated by + * nextValue. Columnstore calls nextValue for each row on a + * given PM for a group (GROUP BY). subEvaluate is called on the + * UM to consolodate those values into a single instance of + * userData. Keep your aggregated totals in context's userData. + * The first time this is called for a group, reset() would have + * been called with this version of userData. + * + * Called for every partial data set in each group in GROUP BY. + * + * When subEvaluate has been called for all subAggregated data + * sets, Evaluate will be called. + * + * valIn (In) - This is a pointer to a memory block of the size + * set in setUserDataSize. It will contain the value of userData + * as seen in the last call to NextValue for a given PM. + * + */ + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); + + /** + * evaluate() + * + * Mandatory. Get the aggregated value. + * + * Called for every new group if UDAF GROUP BY, UDAnF partition + * or, in some cases, new Window Frame. + * + * Set the aggregated value into valOut. The datatype is assumed + * to be the same as that set in the init() function; + * + * If the UDAF is running in a distributed fashion, evaluate is + * called after a series of subEvaluate calls. + * + * valOut (out) - Set the aggregated value here. The datatype is + * assumed to be the same as that set in the init() function; + * + * To return a NULL value, don't assign to valOut. + */ + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + /** + * dropValue() + * + * Optional -- If defined, the server will call this instead of + * reset for UDAnF. + * + * Don't implement if a UDAnF has one or more of the following: + * The UDAnF can't be used with a Window Frame + * The UDAnF is not reversable in some way + * The UDAnF is not interested in optimal performance + * + * If not implemented, reset() followed by a series of + * nextValue() will be called for each movement of the Window + * Frame. + * + * If implemented, then each movement of the Window Frame will + * result in dropValue() being called for each row falling out + * of the Frame and nextValue() being called for each new row + * coming into the Frame. + * + * valsDropped (in) - a vector of the parameters from the row + * leaving the Frame + * + * dropValue() will not be called for unbounded/current row type + * frames, as those are already optimized. + */ + virtual ReturnCode dropValue(mcsv1Context* context, + std::vector& valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_ssq.h + diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp old mode 100644 new mode 100755 index e6707278b..6a8d34b28 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -168,13 +168,239 @@ void mcs_isnull_deinit(UDF_INIT* initid) } #ifdef _MSC_VER -__declspec(dllexport) +__declspec(dllexport)f #endif long long mcs_isnull(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) { return 0; } +/** + * ALLNULL connector stub + */ +struct allnull_data +{ + ulonglong totalQuantity; + ulonglong totalNulls; +}; + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool allnull_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + struct allnull_data* data; +// if (args->arg_count != 1) +// { +// strcpy(message,"allnull() requires one argument"); +// return 1; +// } + + if (!(data = (struct allnull_data*) malloc(sizeof(struct allnull_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->totalQuantity = 0; + data->totalNulls = 0; + + initid->ptr = (char*)data; + + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void allnull_deinit(UDF_INIT* initid) +{ + free(initid->ptr); +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +long long allnull(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) +{ + struct allnull_data* data = (struct allnull_data*)initid->ptr; + return data->totalQuantity > 0 && data->totalNulls == data->totalQuantity; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void +allnull_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + struct allnull_data* data = (struct allnull_data*)initid->ptr; + data->totalQuantity = 0; + data->totalNulls = 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void +allnull_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) +{ + struct allnull_data* data = (struct allnull_data*)initid->ptr; + const char *word=args->args[0]; + data->totalQuantity++; + if (!word) + { + data->totalNulls++; + } +} + +/** + * SSQ connector stub + */ +struct ssq_data +{ + double sumsq; +}; + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool ssq_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + struct ssq_data* data; + if (args->arg_count != 1) + { + strcpy(message,"ssq() requires one argument"); + return 1; + } + + if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumsq = 0; + + initid->ptr = (char*)data; + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void ssq_deinit(UDF_INIT* initid) +{ + free(initid->ptr); +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void +ssq_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ + struct ssq_data* data = (struct ssq_data*)initid->ptr; + data->sumsq = 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void +ssq_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) +{ + struct ssq_data* data = (struct ssq_data*)initid->ptr; + double val = cvtArgToDouble(args->arg_type[0], args->args[0]); + data->sumsq = val*val; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +long long ssq(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) +{ + struct ssq_data* data = (struct ssq_data*)initid->ptr; + return data->sumsq; +} + +//======================================================================= + +/** + * MEDIAN connector stub + */ +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + if (args->arg_count != 1) + { + strcpy(message,"median() requires one argument"); + return 1; + } + +/* + if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumsq = 0; + + initid->ptr = (char*)data; +*/ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void median_deinit(UDF_INIT* initid) +{ +// free(initid->ptr); +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void +median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) +{ +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// data->sumsq = 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void +median_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) +{ +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// double val = cvtArgToDouble(args->arg_type[0], args->args[0]); +// data->sumsq = val*val; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) +{ +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// return data->sumsq; + return 0; +} + } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj old mode 100644 new mode 100755 index 2d6aec416..4307720fc --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -202,12 +202,20 @@ + + + + + + + + + diff --git a/utils/utils.vpj b/utils/utils.vpj old mode 100644 new mode 100755 index 342be0482..53da962f3 --- a/utils/utils.vpj +++ b/utils/utils.vpj @@ -233,6 +233,7 @@ + @@ -277,6 +278,7 @@ + diff --git a/utils/windowfunction/CMakeLists.txt b/utils/windowfunction/CMakeLists.txt old mode 100644 new mode 100755 index 3482ad776..1a221511f --- a/utils/windowfunction/CMakeLists.txt +++ b/utils/windowfunction/CMakeLists.txt @@ -21,7 +21,8 @@ set(windowfunction_LIB_SRCS wf_ranking.cpp wf_row_number.cpp wf_stats.cpp - wf_sum_avg.cpp) + wf_sum_avg.cpp + wf_udaf.cpp) add_library(windowfunction SHARED ${windowfunction_LIB_SRCS}) diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp new file mode 100755 index 000000000..20d8c5af7 --- /dev/null +++ b/utils/windowfunction/wf_udaf.cpp @@ -0,0 +1,508 @@ +/************************************************************************************ + Copyright (C) 2017 MariaDB Corporation AB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not see + or write to the Free Software Foundation, Inc., + 51 Franklin St., Fifth Floor, Boston, MA 02110, USA + *************************************************************************************/ + + +//#define NDEBUG +#include +#include +#include +#include +using namespace std; + +#include +using namespace boost; + +#include "loggingid.h" +#include "errorcodes.h" +#include "idberrorinfo.h" +using namespace logging; + +#include "rowgroup.h" +using namespace rowgroup; + +#include "idborderby.h" +using namespace ordering; + +#include "joblisttypes.h" +#include "calpontsystemcatalog.h" +#include "constantcolumn.h" +using namespace execplan; + +#include "windowfunctionstep.h" +using namespace joblist; + +#include "wf_udaf.h" + + +namespace windowfunction +{ +template +boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) +{ + boost::shared_ptr func; + switch (ct) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + func.reset(new WF_udaf(id, name, context)); + break; + } + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + func.reset(new WF_udaf(id, name, context)); + break; + } + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + func.reset(new WF_udaf(id, name, context)); + break; + } + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + func.reset(new WF_udaf(id, name, context)); + break; + } + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + func.reset(new WF_udaf(id, name, context)); + break; + } + default: + { + string errStr = name + "(" + colType2String[ct] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + + // Get the UDAnF function object + WF_udaf* wfUDAF = (WF_udaf*)func.get(); + mcsv1sdk::mcsv1Context& udafContext = wfUDAF->getContext(); + udafContext.setInterrupted(wfUDAF->getInterruptedPtr()); + wfUDAF->resetData(); + return func; +} + +template +WF_udaf::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()), + bInterrupted(rhs.getInterrupted()), + fDistinct(rhs.getDistinct()) +{ + getContext().setInterrupted(getInterruptedPtr()); +} + +template +WindowFunctionType* WF_udaf::clone() const +{ + return new WF_udaf(*const_cast(this)); +} + +template +void WF_udaf::resetData() +{ + getContext().getFunction()->reset(&getContext()); + fSet.clear(); + WindowFunctionType::resetData(); +} + +template +void WF_udaf::parseParms(const std::vector& parms) +{ + bRespectNulls = true; + // parms[1]: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[1].get()); + idbassert(cc != NULL); + bool isNull = false; // dummy, harded coded + bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); +} + +template +bool WF_udaf::dropValues(int64_t b, int64_t e) +{ + if (!bHasDropValue) + { + // Save work if we discovered dropValue is not implemented in the UDAnF + return false; + } + + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + uint64_t colOut = fFieldIndex[0]; + uint64_t colIn = fFieldIndex[1]; + + mcsv1sdk::ColumnDatum datum; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colOut); + + for (int64_t i = b; i < e; i++) + { + if (i % 1000 == 0 && fStep->cancelled()) + break; + + fRow.setData(getPointer(fRowData->at(i))); + // Turn on NULL flags + std::vector flags; + uint32_t flag = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + continue; + } + flag |= mcsv1sdk::PARAM_IS_NULL; + } + flags.push_back(flag); + getContext().setDataFlags(&flags); + + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // TODO: when we impliment distinct, we need to revist this. + if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + { + continue; + } + + datum.columnData = valIn; + + std::vector valsIn; + valsIn.push_back(datum); + + rc = getContext().getFunction()->dropValue(&getContext(), valsIn); + if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) + { + bHasDropValue = false; + return false; + } + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + bInterrupted = true; + string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage()); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_UDANF_ERROR); + } + } + + return true; +} + +// Sets the value from valOut into column colOut, performing any conversions. +template +void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, + int64_t b, int64_t e, int64_t c) +{ + static const static_any::any& charTypeId = (char)1; + static const static_any::any& scharTypeId = (signed char)1; + static const static_any::any& shortTypeId = (short)1; + static const static_any::any& intTypeId = (int)1; + static const static_any::any& longTypeId = (long)1; + static const static_any::any& llTypeId = (long long)1; + static const static_any::any& ucharTypeId = (unsigned char)1; + static const static_any::any& ushortTypeId = (unsigned short)1; + static const static_any::any& uintTypeId = (unsigned int)1; + static const static_any::any& ulongTypeId = (unsigned long)1; + static const static_any::any& ullTypeId = (unsigned long long)1; + static const static_any::any& floatTypeId = (float)1; + static const static_any::any& doubleTypeId = (double)1; + static const std::string typeStr(""); + static const static_any::any& strTypeId = typeStr; + + CDT colDataType = fRow.getColType(colOut); + if (valOut.empty()) + { + // If valOut is empty, we return NULL + T* pv = NULL; + setValue(colDataType, b, e, c, pv); + fPrev = c; + return; + } + + // This may seem a bit convoluted. Users shouldn't return a type + // that they didn't set in mcsv1_UDAF::init(), but this + // handles whatever return type is given and casts + // it to whatever they said to return. + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + if (valOut.compatible(charTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(scharTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(shortTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(intTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(longTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(llTypeId)) + { + uintOut = intOut = valOut.cast(); + floatOut = intOut; + oss << intOut; + } + else if (valOut.compatible(ucharTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(ushortTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(uintTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(ulongTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(ullTypeId)) + { + intOut = uintOut = valOut.cast(); + floatOut = uintOut; + oss << uintOut; + } + else if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + doubleOut = floatOut; + intOut = uintOut = floatOut; + oss << floatOut; + } + else if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + floatOut = (float)doubleOut; + uintOut = (uint64_t)doubleOut; + intOut = (int64_t)doubleOut; + oss << doubleOut; + } + + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + // Convert the string to numeric type, just in case. + intOut = atol(strOut.c_str()); + uintOut = strtoul(strOut.c_str(), NULL, 10); + doubleOut = strtod(strOut.c_str(), NULL); + floatOut = (float)doubleOut; + } + else + { + strOut = oss.str(); + } + + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + setValue(colDataType, b, e, c, &intOut); + break; + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + setValue(colDataType, b, e, c, &uintOut); + break; + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + setValue(colDataType, b, e, c, &floatOut); + break; + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + setValue(colDataType, b, e, c, &doubleOut); + break; + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + setValue(colDataType, b, e, c, &strOut); + break; + default: + { + std::ostringstream errmsg; + errmsg << "WF_udaf: No logic for data type: " << colDataType; + cerr << errmsg.str() << endl; + throw runtime_error(errmsg.str().c_str()); + break; + } + } +} + +template +void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) +{ + mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + uint64_t colOut = fFieldIndex[0]; + static_any::any valOut; + + if ((fFrameUnit == WF__FRAME_ROWS) || + (fPrev == -1) || + (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) + { + // for unbounded - current row special handling + if (fPrev >= b && fPrev < c) + b = c; + else if (fPrev <= e && fPrev > c) + e = c; + + uint64_t colIn = fFieldIndex[1]; + + mcsv1sdk::ColumnDatum datum; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colOut); + + if (b<=c && c<=e) + getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); + else + getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); + + + for (int64_t i = b; i <= e; i++) + { + if (i % 1000 == 0 && fStep->cancelled()) + break; + + fRow.setData(getPointer(fRowData->at(i))); + // Turn on NULL flags + std::vector flags; + uint32_t flag = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + continue; + } + flag |= mcsv1sdk::PARAM_IS_NULL; + } + flags.push_back(flag); + getContext().setDataFlags(&flags); + + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + { + continue; + } + + if (fDistinct) + fSet.insert(valIn); + + datum.columnData = valIn; + + std::vector valsIn; + valsIn.push_back(datum); + + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + bInterrupted = true; + string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage()); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_UDANF_ERROR); + } + } + + rc = getContext().getFunction()->evaluate(&getContext(), fValOut); + if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) + { + bInterrupted = true; + string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage()); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_UDANF_ERROR); + } + } + + SetUDAFValue(fValOut, colOut, b, e, c); + + fPrev = c; +} + +template +boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context); + +} //namespace +// vim:ts=4 sw=4: + diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h new file mode 100755 index 000000000..776723fe8 --- /dev/null +++ b/utils/windowfunction/wf_udaf.h @@ -0,0 +1,77 @@ +/************************************************************************************ + Copyright (C) 2017 MariaDB Corporation AB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not see + or write to the Free Software Foundation, Inc., + 51 Franklin St., Fifth Floor, Boston, MA 02110, USA + *************************************************************************************/ + + +#ifndef UTILS_WF_UDAF_H +#define UTILS_WF_UDAF_H + +#include +#include "windowfunctiontype.h" +#include "mcsv1_udaf.h" + + +namespace windowfunction +{ + +// A class to control the execution of User Define Analytic Functions (UDAnF) +// as defined by a specialization of mcsv1sdk::mcsv1_UDAF +// The template parameter is currently only used to support DISTINCT, as +// as that is done via a set +template +class WF_udaf : public WindowFunctionType +{ +public: + WF_udaf(int id, const std::string& name, mcsv1sdk::mcsv1Context& context) : + WindowFunctionType(id, name), fUDAFContext(context), fDistinct(false), bHasDropValue(true) {} + WF_udaf(WF_udaf& rhs); + // pure virtual in base + void operator()(int64_t b, int64_t e, int64_t c); + WindowFunctionType* clone() const; + void resetData(); + void parseParms(const std::vector&); + virtual bool dropValues(int64_t, int64_t); + + mcsv1sdk::mcsv1Context& getContext() {return fUDAFContext;} + bool getInterrupted() {return bInterrupted;} + bool getInterruptedPtr() {return &bInterrupted;} + bool getDistinct() {return fDistinct;} + +protected: + void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c); + + mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context + bool bInterrupted; // Shared by all the threads + bool fDistinct; + bool bRespectNulls; // respect null | ignore null + bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. + std::set fSet; // To hold distinct values + static_any::any fValOut; // The return value + +public: + static boost::shared_ptr makeFunction(int id, const string& name, + int ct, mcsv1sdk::mcsv1Context& context); +}; + + +} // namespace + +#endif // UTILS_WF_UDAF_H + +// vim:ts=4 sw=4: + diff --git a/utils/windowfunction/windowfunction.cpp b/utils/windowfunction/windowfunction.cpp old mode 100644 new mode 100755 index d70a501c1..b16eed6f5 --- a/utils/windowfunction/windowfunction.cpp +++ b/utils/windowfunction/windowfunction.cpp @@ -163,11 +163,35 @@ void WindowFunction::operator()() } else { + pair w; + pair prevFrame; + int64_t b, e; + bool firstTime = true; for (int64_t i = begin; i <= end && !fStep->cancelled(); i++) { - pair w = fFrame->getWindow(begin, end, i); - fFunctionType->resetData(); - fFunctionType->operator()(w.first, w.second, i); + w = fFrame->getWindow(begin, end, i); + b = w.first; + e = w.second; + if (firstTime) + { + prevFrame = w; + } + // UDAnF functions may have a dropValue function implemented. + // If they do, we can optimize by calling dropValue() for those + // values leaving the window and nextValue for those entering, rather + // than a resetData() and then iterating over the entire window. + // Built-in functions may have this functionality added in the future. + if (fFunctionType->dropValues(prevFrame.first, w.first)) + { + b = firstTime ? w.first : prevFrame.second+1; + } + else + { + fFunctionType->resetData(); + } + fFunctionType->operator()(b, e, i); + prevFrame = w; + firstTime = false; } } } diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp old mode 100644 new mode 100755 index 1483ed27e..af2f96b96 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -58,6 +58,7 @@ using namespace joblist; #include "wf_row_number.h" #include "wf_stats.h" #include "wf_sum_avg.h" +#include "wf_udaf.h" namespace windowfunction { @@ -137,13 +138,16 @@ map WindowFunctionType::windowFunctionId = assign::map_list_of (string("REGR_SXX"), WF__REGR_SXX) (string("REGR_SXY"), WF__REGR_SXY) (string("REGR_SYY"), WF__REGR_SYY) + (string("UDAF_FUNC"), WF__UDAF) ; boost::shared_ptr - WindowFunctionType::makeWindowFunction(const string& name, int ct) + WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctionColumn* wc) { boost::shared_ptr af; int functionId = windowFunctionId[algorithm::to_upper_copy(name)]; + // The template parameters here are dummies to execute the static makeFunction + // which sets the real type based on ct. switch (functionId) { case WF__COUNT_ASTERISK: @@ -192,6 +196,9 @@ boost::shared_ptr case WF__PERCENTILE_DISC: af = WF_percentile::makeFunction(functionId, name, ct); break; + case WF__UDAF: + af = WF_udaf::makeFunction(functionId, name, ct, wc->getUDAFContext()); + break; case WF__REGR_SLOPE: case WF__REGR_INTERCEPT: case WF__REGR_COUNT: @@ -211,7 +218,6 @@ boost::shared_ptr return af; } - const string WindowFunctionType::toString() const { ostringstream oss; @@ -223,77 +229,81 @@ const string WindowFunctionType::toString() const return oss.str(); } - -template void WindowFunctionType::getValue(uint64_t i, T& t) +template void WindowFunctionType::getValue(uint64_t i, T& t, CDT* cdt) { } - -template<> void WindowFunctionType::getValue(uint64_t i, int64_t& t) +template<> void WindowFunctionType::getValue(uint64_t i, int64_t& t, CDT* cdt) { t = fRow.getIntField(i); + if (cdt) + { + *cdt = execplan::CalpontSystemCatalog::BIGINT; + } } - -template<> void WindowFunctionType::getValue(uint64_t i, uint64_t& t) +template<> void WindowFunctionType::getValue(uint64_t i, uint64_t& t, CDT* cdt) { t = fRow.getUintField(i); + if (cdt) + { + *cdt = execplan::CalpontSystemCatalog::UBIGINT; + } } - -template<> void WindowFunctionType::getValue(uint64_t i, double& t) +template<> void WindowFunctionType::getValue(uint64_t i, double& t, CDT* cdt) { t = fRow.getDoubleField(i); + if (cdt) + { + *cdt = execplan::CalpontSystemCatalog::DOUBLE; + } } - -template<> void WindowFunctionType::getValue(uint64_t i, float& t) +template<> void WindowFunctionType::getValue(uint64_t i, float& t, CDT* cdt) { t = fRow.getFloatField(i); + if (cdt) + { + *cdt = execplan::CalpontSystemCatalog::FLOAT; + } } - -template<> void WindowFunctionType::getValue(uint64_t i, string& t) +template<> void WindowFunctionType::getValue(uint64_t i, string& t, CDT* cdt) { t = fRow.getStringField(i); + // By not setting cdt, we let it default to the column's type } - template void WindowFunctionType::setValue(uint64_t i, T& t) { } - template<> void WindowFunctionType::setValue(uint64_t i, int64_t& t) { fRow.setIntField(t, i); } - template<> void WindowFunctionType::setValue(uint64_t i, uint64_t& t) { fRow.setUintField(t, i); } - template<> void WindowFunctionType::setValue(uint64_t i, double& t) { fRow.setDoubleField(t, i); } - template<> void WindowFunctionType::setValue(uint64_t i, float& t) { fRow.setFloatField(t, i); } - template<> void WindowFunctionType::setValue(uint64_t i, string& t) { fRow.setStringField(t, i); } - template void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v) { @@ -314,7 +324,6 @@ void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v) } } - template void WindowFunctionType::implicit2T(uint64_t i, T& t, int s) { @@ -384,55 +393,47 @@ void WindowFunctionType::implicit2T(uint64_t i, T& t, int s) } } - template<> void WindowFunctionType::implicit2T(uint64_t i, string& t, int) { t = fRow.getStringField(i); } - template void WindowFunctionType::getConstValue(ConstantColumn* cc, T& t, bool& b) { } - template<> void WindowFunctionType::getConstValue(ConstantColumn* cc, int64_t& t, bool& b) { t = cc->getIntVal(fRow, b); } - template<> void WindowFunctionType::getConstValue(ConstantColumn* cc, uint64_t& t, bool& b) { t = cc->getUintVal(fRow, b); } - template<> void WindowFunctionType::getConstValue(ConstantColumn* cc, double& t, bool& b) { t = cc->getDoubleVal(fRow, b); } - template<> void WindowFunctionType::getConstValue(ConstantColumn* cc, float& t, bool& b) { t = cc->getFloatVal(fRow, b); } - template<> void WindowFunctionType::getConstValue(ConstantColumn* cc, string& t, bool& b) { t = cc->getStrVal(fRow, b); } - template void WindowFunctionType::implicit2T(uint64_t, int64_t&, int); template void WindowFunctionType::implicit2T(uint64_t, uint64_t&, int); template void WindowFunctionType::implicit2T(uint64_t, float&, int); @@ -445,7 +446,6 @@ template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_ template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, string*); - void* WindowFunctionType::getNullValueByType(int ct, int pos) { static uint64_t bigIntNull = joblist::BIGINTNULL; @@ -566,7 +566,6 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } - } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h old mode 100644 new mode 100755 index a28e99c5c..9ab8ccc06 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -98,8 +98,9 @@ const int WF__REGR_AVGY = 32; const int WF__REGR_SXX = 33; const int WF__REGR_SXY = 34; const int WF__REGR_SYY = 35; +const int WF__UDAF = 36; - +typedef execplan::CalpontSystemCatalog::ColDataType CDT; /** @brief class WindowFunction * @@ -129,6 +130,10 @@ public: // @brief virtual parseParms() virtual void parseParms(const std::vector&) {} + // @brief virtual dropValues() For UDAnF functions + // return false if there's no dropValue() implemented in the function. + virtual bool dropValues(int64_t, int64_t) {return false;} + // @brief virtual display method virtual const std::string toString() const; @@ -148,14 +153,14 @@ public: void peer(const boost::shared_ptr& p) { fPeer = p; } void setCallback(joblist::WindowFunctionStep* step) { fStep = step; } - static boost::shared_ptr makeWindowFunction(const std::string&, int ct); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: static std::map windowFunctionId; // utility methods - template void getValue(uint64_t, T&); + template void getValue(uint64_t, T&, CDT* cdt = NULL); template void setValue(int, int64_t, int64_t, int64_t, T* = NULL); template void setValue(uint64_t, T&); template void implicit2T(uint64_t, T&, int); diff --git a/writeengine/splitter/we_splitterapp.cpp b/writeengine/splitter/we_splitterapp.cpp old mode 100644 new mode 100755 index 7ecbca854..f52f36245 --- a/writeengine/splitter/we_splitterapp.cpp +++ b/writeengine/splitter/we_splitterapp.cpp @@ -448,8 +448,6 @@ void WESplitterApp::invokeCpimport() std::vector Cmds; - - char* ptr = 0; std::istringstream ss(aCmdLineStr); std::string arg; std::vector v2(20, "");