diff --git a/datatypes/mcs_datatype.cpp b/datatypes/mcs_datatype.cpp index 2ca95ba2c..9dfe800f5 100644 --- a/datatypes/mcs_datatype.cpp +++ b/datatypes/mcs_datatype.cpp @@ -478,49 +478,49 @@ int TypeHandlerVarbinary::storeValueToField(rowgroup::Row& row, int pos, StoreFi int TypeHandlerSInt64::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const { int64_t val = row.getIntField<8>(pos); - return f->store_xlonglong(val); + return f->store_longlong(val); } int TypeHandlerUInt64::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const { uint64_t val = row.getUintField<8>(pos); - return f->store_xlonglong(static_cast(val)); + return f->store_ulonglong(val); } int TypeHandlerInt::storeValueToFieldSInt32(rowgroup::Row& row, int pos, StoreField* f) const { int64_t val = row.getIntField<4>(pos); - return f->store_xlonglong(val); + return f->store_longlong(val); } int TypeHandlerInt::storeValueToFieldUInt32(rowgroup::Row& row, int pos, StoreField* f) const { uint64_t val = row.getUintField<4>(pos); - return f->store_xlonglong(static_cast(val)); + return f->store_ulonglong(val); } int TypeHandlerSInt16::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const { int64_t val = row.getIntField<2>(pos); - return f->store_xlonglong(val); + return f->store_longlong(val); } int TypeHandlerUInt16::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const { uint64_t val = row.getUintField<2>(pos); - return f->store_xlonglong(static_cast(val)); + return f->store_ulonglong(val); } int TypeHandlerSInt8::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const { int64_t val = row.getIntField<1>(pos); - return f->store_xlonglong(val); + return f->store_longlong(val); } int TypeHandlerUInt8::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const { uint64_t val = row.getUintField<1>(pos); - return f->store_xlonglong(static_cast(val)); + return f->store_ulonglong(val); } /* diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index 8e2c551cf..2bdfada62 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -953,7 +953,8 @@ class StoreField virtual int store_timestamp(int64_t val) = 0; virtual int store_string(const char* str, size_t length) = 0; virtual int store_varbinary(const char* str, size_t length) = 0; - virtual int store_xlonglong(int64_t val) = 0; + virtual int store_longlong(int64_t val) = 0; + virtual int store_ulonglong(uint64_t val) = 0; virtual int store_float(float val) = 0; virtual int store_double(double val) = 0; virtual int store_long_double(long double val) = 0; diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 01c6fa29c..cab229ebe 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -979,7 +979,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // Changing col type based on a parm if multiple parms // doesn't really make sense. if (op != AggregateColumn::SUM && op != AggregateColumn::DISTINCT_SUM && - op != AggregateColumn::AVG && op != AggregateColumn::DISTINCT_AVG) + op != AggregateColumn::AVG && op != AggregateColumn::DISTINCT_AVG && + op != AggregateColumn::BIT_AND && op != AggregateColumn::BIT_OR && + op != AggregateColumn::BIT_XOR) { updateAggregateColType(aggc, srcp, op, jobInfo); } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 43a67f951..60f641a68 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1332,16 +1332,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector& keysAgg.push_back(key); scaleAgg.push_back(0); precisionAgg.push_back(-16); // for connector to skip null check - - if (isUnsigned(typeProj[colProj])) - { - typeAgg.push_back(CalpontSystemCatalog::UBIGINT); - } - else - { - typeAgg.push_back(CalpontSystemCatalog::BIGINT); - } - + typeAgg.push_back(CalpontSystemCatalog::UBIGINT); csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(bigIntWidth); } @@ -1941,16 +1932,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector keysAggPm.push_back(aggKey); scaleAggPm.push_back(0); precisionAggPm.push_back(-16); // for connector to skip null check - - if (isUnsigned(typeProj[colProj])) - { - typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); - } - else - { - typeAggPm.push_back(CalpontSystemCatalog::BIGINT); - } - + typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); csNumAggPm.push_back(8); widthAggPm.push_back(bigIntWidth); colAggPm++; @@ -4183,16 +4156,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vectorstore_binary(str, length); } - int store_xlonglong(int64_t val) override + int store_longlong(int64_t val) override { - idbassert(dynamic_cast(m_field)); - return m_field->store(val, static_cast(m_field)->unsigned_flag); + return m_field->store(val, 0); + } + + int store_ulonglong(uint64_t val)override + { + return m_field->store(static_cast(val), 1); } int store_float(float dl) override diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index c06acf0a7..8d75a0f88 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -5192,7 +5192,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) { CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colDataType = CalpontSystemCatalog::UBIGINT; ct.colWidth = 8; ct.scale = 0; ct.precision = -16; // borrowed to indicate skip null value check on connector diff --git a/dbcon/mysql/install_mcs_mysql.sh.in b/dbcon/mysql/install_mcs_mysql.sh.in index 2cead258a..7d3913cd7 100755 --- a/dbcon/mysql/install_mcs_mysql.sh.in +++ b/dbcon/mysql/install_mcs_mysql.sh.in @@ -78,7 +78,7 @@ CREATE OR REPLACE FUNCTION caldroppartitionsbyvalue RETURNS STRING SONAME 'ha_co CREATE OR REPLACE FUNCTION caldisablepartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so'; CREATE OR REPLACE FUNCTION calenablepartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so'; CREATE OR REPLACE FUNCTION calshowpartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so'; -CREATE OR REPLACE AGGREGATE FUNCTION moda RETURNS DECIMAL SONAME 'libregr_mysql.so'; +CREATE OR REPLACE AGGREGATE FUNCTION moda RETURNS STRING SONAME 'libregr_mysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_querystats; CREATE TABLE IF NOT EXISTS infinidb_querystats.querystats diff --git a/mysql-test/columnstore/basic/r/mcs98_moda_function.result b/mysql-test/columnstore/basic/r/mcs98_moda_function.result new file mode 100644 index 000000000..45899a4af --- /dev/null +++ b/mysql-test/columnstore/basic/r/mcs98_moda_function.result @@ -0,0 +1,161 @@ +DROP DATABASE IF EXISTS mcs98_db; +CREATE DATABASE mcs98_db; +USE mcs98_db; +CREATE TABLE t1 (t TINYINT, s SMALLINT, m MEDIUMINT, i INT, bi BIGINT, d1 DECIMAL(5,2), d2 DECIMAL(36,12), rl FLOAT, dbl DOUBLE)ENGINE=Columnstore; +INSERT INTO t1 VALUES(NULL, NULL, 1234, -1000012898, 700000012898, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23), +(12, 345, 1234, -1000012899, 70000001289, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23), +(12, 345, 1234, -1000012898, 700000012899, 34.22, 90000000000000000009.124312000092, 14.02, 3900000000000001.24), +(13, 346, 1235, NULL, NULL, NULL, NULL, NULL, NULL), +(28, 1345, 11234, -2000012898, 1700000012899, 134.22, 190000000000000000009.124312000092, 114.02, 13900000000000001.24); +SELECT moda(t) FROM t1; +moda(t) +12 +SELECT moda(s) FROM t1; +moda(s) +345 +SELECT moda(m) FROM t1; +moda(m) +1234 +SELECT moda(i) FROM t1; +moda(i) +-1000012898 +SELECT moda(bi) FROM t1; +moda(bi) +700000012899 +SELECT moda(d1) FROM t1; +moda(d1) +34.21 +SELECT moda(d2) FROM t1; +moda(d2) +90000000000000000009.124312000091 +SELECT moda(rl) FROM t1; +moda(rl) +14.010000228881836 +SELECT moda(dbl) FROM t1; +moda(dbl) +3.900000000000001e15 +CREATE TABLE t2 ( +t tinyint, +tu tinyint unsigned, +s smallint, +su smallint unsigned, +m mediumint, +mu mediumint unsigned, +i int, +iu int unsigned, +b bigint, +bu bigint unsigned, +d1 decimal(5,2), +du1 decimal(5,2) unsigned, +d2 decimal(36,12), +du2 decimal(36,12) unsigned, +rl float, +dbl double, +v char(4), +vc varchar(10) +) engine=columnstore; +INSERT INTO t2 VALUES(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816, +2223372036854775816, 22.34, 22.34, 70000000605040302018.124312000091, +70000000605040302019.124312000091, 2344.32, 12345678.91011, 'five', 'five123456'); +INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460, +-2223372036854775816, 2223372036854775816, -22.34, 22.34, +-70000000605040302019.124312000091, 70000000605040302029.124312000091, +-2344.32, -123456789.1011, 'four', 'five654321'); +INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807, +2223372036854775807, 13.34, 13.34, 70000000605040302019.124312000091, +70000000605040302029.124312000091, 234.432, 12345678.91011, 'five', 'five123456'); +INSERT INTO t2 VALUES(-1, 1, -201, 201, -1234867, 1234867, -7483646, 7483646, -2223372036854775807, +2223372036854775807, -13.34, 13.34, -70000000605040302019.124312000091, +70000000605040302019.124312000091, -234.432, -1234567.891011, 'four', 'four123456'); +INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816, +2223372036854775816, 22.34, 22.34, 70000000605040302019.124312000091, +70000000605040302019.124312000091, 2344.32, 1234567.891011, 'five', 'five123456'); +INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460, +-2223372036854775816, 2223372036854775816, -22.34, 22.34, +-70000000605040302019.124312000091, 70000000605040302029.124312000091, +-2344.32, -12345678.91011, 'four', 'five654321'); +INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807, +2223372036854775807, 13.34, 13.34, 70000000605040302018.124312000091, +70000000605040302029.124312000091, 234.432, 12345678.91011, 'six', 'six1234567'); +SELECT moda(t) FROM t2; +moda(t) +1 +SELECT moda(tu) FROM t2; +moda(tu) +10 +SELECT moda(s) FROM t2; +moda(s) +201 +SELECT moda(su) FROM t2; +moda(su) +2010 +SELECT moda(m) FROM t2; +moda(m) +1234867 +SELECT moda(mu) FROM t2; +moda(mu) +1237567 +SELECT moda(i) FROM t2; +moda(i) +7483646 +SELECT moda(iu) FROM t2; +moda(iu) +74836460 +SELECT moda(b) FROM t2; +moda(b) +2223372036854775807 +SELECT moda(bu) FROM t2; +moda(bu) +2223372036854775816 +SELECT moda(d1) FROM t2; +moda(d1) +13.34 +SELECT moda(du1) FROM t2; +moda(du1) +22.34 +SELECT moda(d2) FROM t2; +moda(d2) +-70000000605040302019.124312000091 +SELECT moda(du2) FROM t2; +moda(du2) +70000000605040302029.124312000091 +SELECT moda(rl) FROM t2; +moda(rl) +234.4320068359375 +SELECT moda(dbl) FROM t2; +moda(dbl) +12345678.91011 +SELECT moda(v) FROM t2; +moda(v) +four +SELECT moda(vc) FROM t2; +moda(vc) +five123456 +SELECT i FROM t2 WHERE i >= (SELECT moda(i) FROM t2); +i +74836460 +7483646 +74836460 +7483646 +SELECT d2 FROM t2 WHERE d2 < (SELECT moda(d2) FROM t2); +d2 +SELECT tu, moda(i) FROM t2 GROUP BY tu; +tu moda(i) +10 -74836460 +1 7483646 +NULL 0 +SELECT floor(moda(rl)) FROM t2; +floor(moda(rl)) +234 +SELECT ceiling(moda(dbl)) FROM t2; +ceiling(moda(dbl)) +12345679 +SELECT moda(floor(rl)) FROM t2; +moda(floor(rl)) +234 +SELECT t, moda(tu) 'q1' FROM t2 GROUP BY t HAVING moda(tu) > 5; +t q1 +10 10 +-10 10 +DROP DATABASE mcs98_db; diff --git a/mysql-test/columnstore/basic/t/mcs98_moda_function.test b/mysql-test/columnstore/basic/t/mcs98_moda_function.test new file mode 100644 index 000000000..807894eb9 --- /dev/null +++ b/mysql-test/columnstore/basic/t/mcs98_moda_function.test @@ -0,0 +1,98 @@ +# +# Test MODA Function +# Author: dhall, david.hall@mariadb.com +# +# Test MODA with various numeric types +-- source ../include/have_columnstore.inc +--disable_warnings +DROP DATABASE IF EXISTS mcs98_db; +--enable_warnings +CREATE DATABASE mcs98_db; +USE mcs98_db; +CREATE TABLE t1 (t TINYINT, s SMALLINT, m MEDIUMINT, i INT, bi BIGINT, d1 DECIMAL(5,2), d2 DECIMAL(36,12), rl FLOAT, dbl DOUBLE)ENGINE=Columnstore; +INSERT INTO t1 VALUES(NULL, NULL, 1234, -1000012898, 700000012898, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23), + (12, 345, 1234, -1000012899, 70000001289, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23), + (12, 345, 1234, -1000012898, 700000012899, 34.22, 90000000000000000009.124312000092, 14.02, 3900000000000001.24), + (13, 346, 1235, NULL, NULL, NULL, NULL, NULL, NULL), + (28, 1345, 11234, -2000012898, 1700000012899, 134.22, 190000000000000000009.124312000092, 114.02, 13900000000000001.24); +SELECT moda(t) FROM t1; +SELECT moda(s) FROM t1; +SELECT moda(m) FROM t1; +SELECT moda(i) FROM t1; +SELECT moda(bi) FROM t1; +SELECT moda(d1) FROM t1; +SELECT moda(d2) FROM t1; +SELECT moda(rl) FROM t1; +SELECT moda(dbl) FROM t1; +CREATE TABLE t2 ( + t tinyint, + tu tinyint unsigned, + s smallint, + su smallint unsigned, + m mediumint, + mu mediumint unsigned, + i int, + iu int unsigned, + b bigint, + bu bigint unsigned, + d1 decimal(5,2), + du1 decimal(5,2) unsigned, + d2 decimal(36,12), + du2 decimal(36,12) unsigned, + rl float, + dbl double, + v char(4), + vc varchar(10) +) engine=columnstore; +INSERT INTO t2 VALUES(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816, + 2223372036854775816, 22.34, 22.34, 70000000605040302018.124312000091, + 70000000605040302019.124312000091, 2344.32, 12345678.91011, 'five', 'five123456'); +INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460, + -2223372036854775816, 2223372036854775816, -22.34, 22.34, + -70000000605040302019.124312000091, 70000000605040302029.124312000091, + -2344.32, -123456789.1011, 'four', 'five654321'); +INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807, + 2223372036854775807, 13.34, 13.34, 70000000605040302019.124312000091, + 70000000605040302029.124312000091, 234.432, 12345678.91011, 'five', 'five123456'); +INSERT INTO t2 VALUES(-1, 1, -201, 201, -1234867, 1234867, -7483646, 7483646, -2223372036854775807, + 2223372036854775807, -13.34, 13.34, -70000000605040302019.124312000091, + 70000000605040302019.124312000091, -234.432, -1234567.891011, 'four', 'four123456'); +INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816, + 2223372036854775816, 22.34, 22.34, 70000000605040302019.124312000091, + 70000000605040302019.124312000091, 2344.32, 1234567.891011, 'five', 'five123456'); +INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460, + -2223372036854775816, 2223372036854775816, -22.34, 22.34, + -70000000605040302019.124312000091, 70000000605040302029.124312000091, + -2344.32, -12345678.91011, 'four', 'five654321'); +INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807, + 2223372036854775807, 13.34, 13.34, 70000000605040302018.124312000091, + 70000000605040302029.124312000091, 234.432, 12345678.91011, 'six', 'six1234567'); +SELECT moda(t) FROM t2; +SELECT moda(tu) FROM t2; +SELECT moda(s) FROM t2; +SELECT moda(su) FROM t2; +SELECT moda(m) FROM t2; +SELECT moda(mu) FROM t2; +SELECT moda(i) FROM t2; +SELECT moda(iu) FROM t2; +SELECT moda(b) FROM t2; +SELECT moda(bu) FROM t2; +SELECT moda(d1) FROM t2; +SELECT moda(du1) FROM t2; +SELECT moda(d2) FROM t2; +SELECT moda(du2) FROM t2; +SELECT moda(rl) FROM t2; +SELECT moda(dbl) FROM t2; +SELECT moda(v) FROM t2; +SELECT moda(vc) FROM t2; +SELECT i FROM t2 WHERE i >= (SELECT moda(i) FROM t2); +SELECT d2 FROM t2 WHERE d2 < (SELECT moda(d2) FROM t2); +SELECT tu, moda(i) FROM t2 GROUP BY tu; +SELECT floor(moda(rl)) FROM t2; +SELECT ceiling(moda(dbl)) FROM t2; +SELECT moda(floor(rl)) FROM t2; +SELECT t, moda(tu) 'q1' FROM t2 GROUP BY t HAVING moda(tu) > 5; +# Clean UP +DROP DATABASE mcs98_db; + diff --git a/utils/common/any.hpp b/utils/common/any.hpp index e3429d7aa..e511db3ed 100644 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -135,6 +135,7 @@ struct choose_policy }; BIG_POLICY(int128_t); +BIG_POLICY(long double); /// Specializations for small types. #define SMALL_POLICY(TYPE) \ diff --git a/utils/common/collation.h b/utils/common/collation.h index c9134eb19..424c74e90 100644 --- a/utils/common/collation.h +++ b/utils/common/collation.h @@ -141,10 +141,11 @@ class Charset Charset(CHARSET_INFO& cs) : mCharset(&cs) { } - Charset(CHARSET_INFO* cs) : mCharset(cs ? cs : &my_charset_bin) + Charset(CHARSET_INFO* cs = nullptr) : mCharset(cs ? cs : &my_charset_bin) { } Charset(uint32_t charsetNumber); + void setCharset(uint32_t charsetNumber); CHARSET_INFO& getCharset() const { return *mCharset; @@ -157,6 +158,10 @@ class Charset { return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length()) == 0; } + int strnncollsp(const std::string& str1, const std::string& str2) const + { + return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length()); + } int strnncollsp(const utils::ConstString& str1, const utils::ConstString& str2) const { return mCharset->strnncollsp(str1.str(), str1.length(), str2.str(), str2.length()); diff --git a/utils/common/utils_utf8.cpp b/utils/common/utils_utf8.cpp index 659bc61ac..f0471f7d3 100644 --- a/utils/common/utils_utf8.cpp +++ b/utils/common/utils_utf8.cpp @@ -29,4 +29,10 @@ Charset::Charset(uint32_t charsetNumber) : mCharset(&get_charset_or_bin(charsetN { } +void Charset::setCharset(uint32_t charsetNumber) +{ + mCharset = &get_charset_or_bin(charsetNumber); +} + } // namespace datatypes + diff --git a/utils/regr/moda.cpp b/utils/regr/moda.cpp index a6a6476a3..aae9584ac 100644 --- a/utils/regr/moda.cpp +++ b/utils/regr/moda.cpp @@ -102,6 +102,12 @@ mcsv1_UDAF* moda::getImpl(mcsv1Context* context) case execplan::CalpontSystemCatalog::FLOAT: data->modaImpl = &moda_impl_float; break; case execplan::CalpontSystemCatalog::DOUBLE: data->modaImpl = &moda_impl_double; break; case execplan::CalpontSystemCatalog::LONGDOUBLE: data->modaImpl = &moda_impl_longdouble; break; + + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::CHAR: + data->modaImpl = &moda_impl_string; + break; + default: data->modaImpl = NULL; } return data->modaImpl; @@ -125,14 +131,16 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes) if (!(datatypes::isNumeric(colTypes[0].dataType))) { - // The error message will be prepended with - // "The storage engine for the table doesn't support " - context->setErrorMessage("moda() with non-numeric argument"); - return mcsv1_UDAF::ERROR; + if (colTypes[0].dataType != datatypes::SystemCatalog::VARCHAR && + colTypes[0].dataType != datatypes::SystemCatalog::CHAR) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("moda() with invalid argument"); + return mcsv1_UDAF::ERROR; + } } - context->setResultType(colTypes[0].dataType); - if (colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL || colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL) { @@ -158,8 +166,10 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes) } context->setScale(colTypes[0].scale); + context->setPrecision(colTypes[0].precision); } - context->setPrecision(colTypes[0].precision); + + context->setResultType(colTypes[0].dataType); mcsv1_UDAF* impl = getImpl(context); @@ -167,7 +177,7 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes) { // The error message will be prepended with // "The storage engine for the table doesn't support " - context->setErrorMessage("moda() with non-numeric argument"); + context->setErrorMessage("moda() with implementation not found for data type"); return mcsv1_UDAF::ERROR; } @@ -178,6 +188,13 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes) template mcsv1_UDAF::ReturnCode Moda_impl_T::init(mcsv1Context* context, ColumnDatum* colTypes) { + if (!(colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL || + colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL)) + { + context->setColWidth(sizeof(T)); + context->setScale(0); + context->setPrecision(0); + } return mcsv1_UDAF::SUCCESS; } @@ -196,7 +213,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T::nextValue(mcsv1Context* context, ColumnDa { static_any::any& valIn = valsIn[0].columnData; ModaData* data = static_cast(context->getUserData()); - std::unordered_map >* map = data->getMap(); + std::unordered_map, comparator >* map = data->getMap(); if (valIn.empty()) { @@ -233,9 +250,9 @@ mcsv1_UDAF::ReturnCode Moda_impl_T::subEvaluate(mcsv1Context* context, const ModaData* outData = static_cast(context->getUserData()); const ModaData* inData = static_cast(userDataIn); - std::unordered_map >* outMap = outData->getMap(); - std::unordered_map >* inMap = inData->getMap(); - typename std::unordered_map >::const_iterator iter; + std::unordered_map, comparator >* outMap = outData->getMap(); + std::unordered_map, comparator >* inMap = inData->getMap(); + typename std::unordered_map, comparator >::const_iterator iter; for (iter = inMap->begin(); iter != inMap->end(); ++iter) { @@ -255,7 +272,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T::evaluate(mcsv1Context* context, static_an long double avg = 0; T val = 0; ModaData* data = static_cast(context->getUserData()); - std::unordered_map >* map = data->getMap(); + std::unordered_map, comparator >* map = data->getMap(); if (map->size() == 0) { @@ -264,7 +281,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T::evaluate(mcsv1Context* context, static_an } avg = data->fCount ? data->fSum / data->fCount : 0; - typename std::unordered_map >::iterator iter; + typename std::unordered_map, comparator >::iterator iter; for (iter = map->begin(); iter != map->end(); ++iter) { @@ -301,7 +318,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T::dropValue(mcsv1Context* context, ColumnDa { static_any::any& valDropped = valsDropped[0].columnData; ModaData* data = static_cast(context->getUserData()); - std::unordered_map >* map = data->getMap(); + std::unordered_map, comparator >* map = data->getMap(); if (valDropped.empty()) { @@ -350,6 +367,9 @@ void ModaData::serialize(messageqcpp::ByteStream& bs) const case execplan::CalpontSystemCatalog::FLOAT: serializeMap(bs); break; case execplan::CalpontSystemCatalog::DOUBLE: serializeMap(bs); break; case execplan::CalpontSystemCatalog::LONGDOUBLE: serializeMap(bs); break; + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + serializeMap(bs); break; default: throw std::runtime_error("ModaData::serialize with bad data type"); break; } } @@ -387,6 +407,9 @@ void ModaData::unserialize(messageqcpp::ByteStream& bs) case execplan::CalpontSystemCatalog::FLOAT: unserializeMap(bs); break; case execplan::CalpontSystemCatalog::DOUBLE: unserializeMap(bs); break; case execplan::CalpontSystemCatalog::LONGDOUBLE: unserializeMap(bs); break; + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + unserializeMap(bs); break; default: throw std::runtime_error("ModaData::unserialize with bad data type"); break; } } @@ -469,6 +492,136 @@ void ModaData::cleanup() clear(); deleteMap(); break; - default: throw std::runtime_error("ModaData::unserialize with bad data type"); break; + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + clear(); + deleteMap(); + break; + default: throw std::runtime_error("ModaData::cleanup with bad data type"); break; } } + +/************************************************************************************************ + * String Specialization +************************************************************************************************/ + +mcsv1_UDAF::ReturnCode Moda_impl_T::init(mcsv1Context* context, ColumnDatum* colTypes) +{ + cs.setCharset(context->getCharsetNumber()); + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode Moda_impl_T::reset(mcsv1Context* context) +{ + ModaData* data = static_cast(context->getUserData()); + data->fReturnType = context->getResultType(); + data->fColWidth = context->getColWidth(); + data->fCs_num = context->getCharsetNumber(); + data->clear(); + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode Moda_impl_T::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn = valsIn[0].columnData; + ModaData* data = static_cast(context->getUserData()); + std::unordered_map, comparator >* map = data->getMap(); + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + string val; + if (valIn.compatible(strTypeId)) + val = valIn.cast(); + + (*map)[val]++; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode Moda_impl_T::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + ModaData* outData = static_cast(context->getUserData()); + const ModaData* inData = static_cast(userDataIn); + std::unordered_map, comparator >* outMap = outData->getMap(); + std::unordered_map, comparator >* inMap = inData->getMap(); + typename std::unordered_map, comparator >::const_iterator iter; + + for (iter = inMap->begin(); iter != inMap->end(); ++iter) + { + (*outMap)[iter->first] += iter->second; + } + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode Moda_impl_T::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + uint64_t maxCnt = 0; + string val; + string lastVal; + ModaData* data = static_cast(context->getUserData()); + std::unordered_map, comparator >* map = data->getMap(); + + if (map->size() == 0) + { + valOut = string(); + return mcsv1_UDAF::SUCCESS; + } + + typename std::unordered_map, comparator >::iterator iter; + + for (iter = map->begin(); iter != map->end(); ++iter) + { + if (iter->second > maxCnt) + { + val = iter->first; + lastVal = val; + maxCnt = iter->second; + } + else if (iter->second == maxCnt) + { + // Tie breaker: choose smallest according to collation + if (cs.strnncollsp(val, lastVal) < 0) + { + val = iter->first; + } + } + } + + // If scale is > 0, then the original type was DECIMAL. Set the + // ResultType to DECIMAL so the delivery logic moves the decimal point. + if (context->getScale() > 0) + context->setResultType(execplan::CalpontSystemCatalog::DECIMAL); + + valOut = val; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode Moda_impl_T::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valDropped = valsDropped[0].columnData; + ModaData* data = static_cast(context->getUserData()); + std::unordered_map, comparator >* map = data->getMap(); + + if (valDropped.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + string val = convertAnyTo(valDropped); + + --data->fCount; + (*map)[val]--; + + return mcsv1_UDAF::SUCCESS; +} + + diff --git a/utils/regr/moda.h b/utils/regr/moda.h index eae95812c..2d3d9fb99 100644 --- a/utils/regr/moda.h +++ b/utils/regr/moda.h @@ -45,6 +45,7 @@ #include "calpontsystemcatalog.h" #include "windowfunctioncolumn.h" #include "hasher.h" +#include "collation.h" #if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) #define EXPORT __declspec(dllexport) @@ -58,6 +59,8 @@ namespace mcsv1sdk template struct hasher { + hasher(uint32_t cs_num){} + inline size_t operator()(T val) const { return fHasher((char*)&val, sizeof(T)); @@ -67,34 +70,74 @@ struct hasher utils::Hasher fHasher; }; +// A special hasher for double that may only have 10 bytes template <> struct hasher { + hasher(uint32_t cs_num){} inline size_t operator()(long double val) const { - if (sizeof(long double) == 8) // Probably just MSC, but you never know. - { - return fHasher((char*)&val, sizeof(long double)); - } - else - { - // For Linux x86_64, long double is stored in 128 bits, but only 80 are significant - return fHasher((char*)&val, 10); - } +#ifdef MASK_LONGDOUBLE + // For Linux x86_64, long double is stored in 128 bits, but only 80 are significant + return fHasher((char*)&val, 10); +#else + return fHasher((char*)&val, sizeof(long double)); +#endif } private: utils::Hasher fHasher; }; +// A collation aware hasher for strings +template<> +struct hasher +{ + hasher(uint32_t cs_num) : fHasher(cs_num){} + inline size_t operator()(string val) const + { + return fHasher(val.c_str(), val.size()); + } + +private: + datatypes::CollationAwareHasher fHasher; +}; + +template +struct comparator +{ + comparator(uint32_t cs_num){} + + bool operator()(const T& lhs, const T& rhs) const + { + return lhs == rhs; + } +}; +// A collation aware string comparator +template <> +struct comparator +{ + comparator(uint32_t cs_num) : fCs(cs_num) {} + + bool operator()(const std::string lhs, const std::string rhs) const + { + return fCs.eq(lhs, rhs); + } + private: + datatypes::Charset fCs; +}; + + + // Override UserData for data storage struct ModaData : public UserData { - ModaData() + ModaData(uint32_t cs_num = 8) : fMap(NULL) , fReturnType((uint32_t)execplan::CalpontSystemCatalog::UNDEFINED) , fColWidth(0) - , modaImpl(NULL){}; + , modaImpl(NULL) + , fCs_num(cs_num){} virtual ~ModaData() { @@ -105,22 +148,23 @@ struct ModaData : public UserData virtual void unserialize(messageqcpp::ByteStream& bs); template - std::unordered_map >* getMap() + std::unordered_map, comparator >* getMap() { if (!fMap) { // Just in time creation - fMap = new std::unordered_map >; + fMap = new std::unordered_map, comparator >( + 10, hasher(fCs_num), comparator(fCs_num)); } - return (std::unordered_map >*)fMap; + return (std::unordered_map, comparator >*)fMap; } // The const version is only called by serialize() // It shouldn't (and can't) create a new map. template - std::unordered_map >* getMap() const + std::unordered_map, comparator >* getMap() const { - return (std::unordered_map >*)fMap; + return (std::unordered_map, comparator >*)fMap; } template @@ -128,7 +172,7 @@ struct ModaData : public UserData { if (fMap) { - delete (std::unordered_map >*)fMap; + delete (std::unordered_map, comparator >*)fMap; fMap = NULL; } } @@ -148,6 +192,7 @@ struct ModaData : public UserData uint32_t fReturnType; uint32_t fColWidth; mcsv1_UDAF* modaImpl; // A pointer to one of the Moda_impl_T concrete classes + uint32_t fCs_num; private: // For now, copy construction is unwanted @@ -159,10 +204,11 @@ struct ModaData : public UserData template void serializeMap(messageqcpp::ByteStream& bs) const { - std::unordered_map >* map = getMap(); + bs << fCs_num; + std::unordered_map, comparator >* map = getMap(); if (map) { - typename std::unordered_map >::const_iterator iter; + typename std::unordered_map, comparator >::const_iterator iter; bs << (uint64_t)map->size(); for (iter = map->begin(); iter != map->end(); ++iter) { @@ -179,11 +225,13 @@ struct ModaData : public UserData template void unserializeMap(messageqcpp::ByteStream& bs) { + bs >> fCs_num; + uint32_t cnt; T num; uint64_t sz; bs >> sz; - std::unordered_map >* map = getMap(); + std::unordered_map, comparator >* map = getMap(); map->clear(); for (uint64_t i = 0; i < sz; ++i) { @@ -217,6 +265,31 @@ class Moda_impl_T : public mcsv1_UDAF } }; +template<> // string specialization +class Moda_impl_T : public mcsv1_UDAF +{ + public: + // Defaults OK + Moda_impl_T() : cs(8) {}; + virtual ~Moda_impl_T() {}; + + virtual mcsv1_UDAF::ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); + + virtual mcsv1_UDAF::ReturnCode reset(mcsv1Context* context); + virtual mcsv1_UDAF::ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + virtual mcsv1_UDAF::ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + virtual mcsv1_UDAF::ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + virtual mcsv1_UDAF::ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + + // Dummy: not used + virtual mcsv1_UDAF::ReturnCode createUserData(UserData*& userData, int32_t& length) + { + return mcsv1_UDAF::SUCCESS; + } + private: + datatypes::Charset cs; +}; + // moda returns the modal value of the dataset. If more than one value // have the same maximum number of occurances, then the one closest to // AVG wins. If two are the same distance from AVG, then the smaller wins. @@ -276,6 +349,7 @@ class moda : public mcsv1_UDAF Moda_impl_T moda_impl_float; Moda_impl_T moda_impl_double; Moda_impl_T moda_impl_longdouble; + Moda_impl_T moda_impl_string; }; }; // namespace mcsv1sdk diff --git a/utils/regr/modamysql.cpp b/utils/regr/modamysql.cpp index ac3584cef..487740c52 100644 --- a/utils/regr/modamysql.cpp +++ b/utils/regr/modamysql.cpp @@ -5,7 +5,10 @@ #include #include #include - +#include +#include +#include +#include "boost/lexical_cast.hpp" #include "idb_mysql.h" namespace @@ -48,7 +51,7 @@ struct moda_data } // namespace template -char* moda(CONTAINER& container, struct moda_data* data) +void moda(CONTAINER& container, struct moda_data* data) { TYPE avg = (TYPE)data->fCount ? data->fSum / data->fCount : 0; TYPE val = 0.0; @@ -73,8 +76,6 @@ char* moda(CONTAINER& container, struct moda_data* data) } data->result = std::to_string(val); - - return const_cast(data->result.c_str()); } extern "C" @@ -82,18 +83,22 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - my_bool moda_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + my_bool moda_init(UDF_INIT* initid, UDF_ARGS* args, char* message) { struct moda_data* data; + if (args->arg_count != 1) { - strcpy(message, "moda() requires one argument"); + strcpy(message, "moda() requires exactly one argument"); return 1; } if (!isNumeric(args->arg_type[0], args->attributes[0])) { - strcpy(message, "moda() with a non-numeric argument"); - return 1; + if (args->arg_type[0] != STRING_RESULT) + { + strcpy(message, "moda() with an invalid argument"); + return 1; + } } data = new moda_data; @@ -107,7 +112,7 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - void moda_deinit(UDF_INIT* initid) + void moda_deinit(UDF_INIT* initid) { struct moda_data* data = (struct moda_data*)initid->ptr; data->clear(); @@ -117,8 +122,8 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - void moda_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) + void moda_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) { struct moda_data* data = (struct moda_data*)initid->ptr; data->clear(); @@ -127,7 +132,7 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - void moda_add(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused))) + void moda_add(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused))) { // Test for NULL if (args->args[0] == 0) @@ -169,7 +174,7 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - void moda_remove(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused))) + void moda_remove(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused))) { // Test for NULL if (args->args[0] == 0) @@ -210,18 +215,25 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - char* moda(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* error __attribute__((unused))) +//char* moda(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* error __attribute__((unused))) + char* moda(UDF_INIT * initid, UDF_ARGS * args, char* result, ulong* res_length, char* is_null, char* error __attribute__((unused))) { struct moda_data* data = (struct moda_data*)initid->ptr; switch (args->arg_type[0]) { - case INT_RESULT: return moda(data->mapINT, data); - case REAL_RESULT: return moda(data->mapREAL, data); + case INT_RESULT: + moda(data->mapINT, data); + break; + case REAL_RESULT: + moda(data->mapREAL, data); + break; case DECIMAL_RESULT: - case STRING_RESULT: return moda(data->mapDECIMAL, data); + case STRING_RESULT: + moda(data->mapDECIMAL, data); + break; default: return NULL; } - - return NULL; + *res_length = data->result.size(); + return const_cast(data->result.c_str()); } } // Extern "C" diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index de467bf70..25dd8319f 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -1021,6 +1021,8 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in } // Handy helper functions + +// Doesn't work with string template inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) const {