1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-02 17:22:27 +03:00

Mcol 5092 MODA uses wrong column width for some types (#2450)

* MCOL-5092 Ensure column width is correct for datatype
                       Change MODA return type to STRING
                       Modify MODA to handle every numeric type
* MCOL-5162 MODA to support char and varchar with collation support

Fixes to the aggregate bit functions
When we fixed the storage sign issue for MCOL-5092, it uncovered a problem in the bit aggregates (bit_and, bit_or and bit_xor). These aggregates should always return UBIGINT, but they relied on the type of the argument column, which gave bad results.
This commit is contained in:
David.Hall
2022-08-11 15:16:11 -05:00
committed by GitHub
parent c906172bf5
commit 2020f35e88
16 changed files with 594 additions and 111 deletions

View File

@ -478,49 +478,49 @@ int TypeHandlerVarbinary::storeValueToField(rowgroup::Row& row, int pos, StoreFi
int TypeHandlerSInt64::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerSInt64::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const
{ {
int64_t val = row.getIntField<8>(pos); int64_t val = row.getIntField<8>(pos);
return f->store_xlonglong(val); return f->store_longlong(val);
} }
int TypeHandlerUInt64::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerUInt64::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const
{ {
uint64_t val = row.getUintField<8>(pos); uint64_t val = row.getUintField<8>(pos);
return f->store_xlonglong(static_cast<int64_t>(val)); return f->store_ulonglong(val);
} }
int TypeHandlerInt::storeValueToFieldSInt32(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerInt::storeValueToFieldSInt32(rowgroup::Row& row, int pos, StoreField* f) const
{ {
int64_t val = row.getIntField<4>(pos); int64_t val = row.getIntField<4>(pos);
return f->store_xlonglong(val); return f->store_longlong(val);
} }
int TypeHandlerInt::storeValueToFieldUInt32(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerInt::storeValueToFieldUInt32(rowgroup::Row& row, int pos, StoreField* f) const
{ {
uint64_t val = row.getUintField<4>(pos); uint64_t val = row.getUintField<4>(pos);
return f->store_xlonglong(static_cast<int64_t>(val)); return f->store_ulonglong(val);
} }
int TypeHandlerSInt16::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerSInt16::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const
{ {
int64_t val = row.getIntField<2>(pos); int64_t val = row.getIntField<2>(pos);
return f->store_xlonglong(val); return f->store_longlong(val);
} }
int TypeHandlerUInt16::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerUInt16::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const
{ {
uint64_t val = row.getUintField<2>(pos); uint64_t val = row.getUintField<2>(pos);
return f->store_xlonglong(static_cast<int64_t>(val)); return f->store_ulonglong(val);
} }
int TypeHandlerSInt8::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerSInt8::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const
{ {
int64_t val = row.getIntField<1>(pos); int64_t val = row.getIntField<1>(pos);
return f->store_xlonglong(val); return f->store_longlong(val);
} }
int TypeHandlerUInt8::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const int TypeHandlerUInt8::storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const
{ {
uint64_t val = row.getUintField<1>(pos); uint64_t val = row.getUintField<1>(pos);
return f->store_xlonglong(static_cast<int64_t>(val)); return f->store_ulonglong(val);
} }
/* /*

View File

@ -953,7 +953,8 @@ class StoreField
virtual int store_timestamp(int64_t val) = 0; virtual int store_timestamp(int64_t val) = 0;
virtual int store_string(const char* str, size_t length) = 0; virtual int store_string(const char* str, size_t length) = 0;
virtual int store_varbinary(const char* str, size_t length) = 0; virtual int store_varbinary(const char* str, size_t length) = 0;
virtual int store_xlonglong(int64_t val) = 0; virtual int store_longlong(int64_t val) = 0;
virtual int store_ulonglong(uint64_t val) = 0;
virtual int store_float(float val) = 0; virtual int store_float(float val) = 0;
virtual int store_double(double val) = 0; virtual int store_double(double val) = 0;
virtual int store_long_double(long double val) = 0; virtual int store_long_double(long double val) = 0;

View File

@ -979,7 +979,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
// Changing col type based on a parm if multiple parms // Changing col type based on a parm if multiple parms
// doesn't really make sense. // doesn't really make sense.
if (op != AggregateColumn::SUM && op != AggregateColumn::DISTINCT_SUM && if (op != AggregateColumn::SUM && op != AggregateColumn::DISTINCT_SUM &&
op != AggregateColumn::AVG && op != AggregateColumn::DISTINCT_AVG) op != AggregateColumn::AVG && op != AggregateColumn::DISTINCT_AVG &&
op != AggregateColumn::BIT_AND && op != AggregateColumn::BIT_OR &&
op != AggregateColumn::BIT_XOR)
{ {
updateAggregateColType(aggc, srcp, op, jobInfo); updateAggregateColType(aggc, srcp, op, jobInfo);
} }

View File

@ -1332,16 +1332,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
keysAgg.push_back(key); keysAgg.push_back(key);
scaleAgg.push_back(0); scaleAgg.push_back(0);
precisionAgg.push_back(-16); // for connector to skip null check precisionAgg.push_back(-16); // for connector to skip null check
typeAgg.push_back(CalpontSystemCatalog::UBIGINT);
if (isUnsigned(typeProj[colProj]))
{
typeAgg.push_back(CalpontSystemCatalog::UBIGINT);
}
else
{
typeAgg.push_back(CalpontSystemCatalog::BIGINT);
}
csNumAgg.push_back(csNumProj[colProj]); csNumAgg.push_back(csNumProj[colProj]);
widthAgg.push_back(bigIntWidth); widthAgg.push_back(bigIntWidth);
} }
@ -1941,16 +1932,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
keysAgg.push_back(aggKey); keysAgg.push_back(aggKey);
scaleAgg.push_back(0); scaleAgg.push_back(0);
precisionAgg.push_back(-16); // for connector to skip null check precisionAgg.push_back(-16); // for connector to skip null check
typeAgg.push_back(CalpontSystemCatalog::UBIGINT);
if (isUnsigned(typeProj[colProj]))
{
typeAgg.push_back(CalpontSystemCatalog::UBIGINT);
}
else
{
typeAgg.push_back(CalpontSystemCatalog::BIGINT);
}
csNumAgg.push_back(8); csNumAgg.push_back(8);
widthAgg.push_back(bigIntWidth); widthAgg.push_back(bigIntWidth);
colAgg++; colAgg++;
@ -3274,16 +3256,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
keysAggPm.push_back(aggKey); keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0); scaleAggPm.push_back(0);
precisionAggPm.push_back(-16); // for connector to skip null check precisionAggPm.push_back(-16); // for connector to skip null check
typeAggPm.push_back(CalpontSystemCatalog::UBIGINT);
if (isUnsigned(typeProj[colProj]))
{
typeAggPm.push_back(CalpontSystemCatalog::UBIGINT);
}
else
{
typeAggPm.push_back(CalpontSystemCatalog::BIGINT);
}
csNumAggPm.push_back(8); csNumAggPm.push_back(8);
widthAggPm.push_back(bigIntWidth); widthAggPm.push_back(bigIntWidth);
colAggPm++; colAggPm++;
@ -4183,16 +4156,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
keysAggPm.push_back(aggKey); keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0); scaleAggPm.push_back(0);
precisionAggPm.push_back(-16); // for connector to skip null check precisionAggPm.push_back(-16); // for connector to skip null check
typeAggPm.push_back(CalpontSystemCatalog::UBIGINT);
if (isUnsigned(typeProj[colProj]))
{
typeAggPm.push_back(CalpontSystemCatalog::UBIGINT);
}
else
{
typeAggPm.push_back(CalpontSystemCatalog::BIGINT);
}
csNumAggPm.push_back(8); csNumAggPm.push_back(8);
widthAggPm.push_back(bigIntWidth); widthAggPm.push_back(bigIntWidth);
++colAggPm; ++colAggPm;

View File

@ -98,10 +98,14 @@ class StoreFieldMariaDB : public StoreField
return m_field->store_binary(str, length); return m_field->store_binary(str, length);
} }
int store_xlonglong(int64_t val) override int store_longlong(int64_t val) override
{ {
idbassert(dynamic_cast<Field_num*>(m_field)); return m_field->store(val, 0);
return m_field->store(val, static_cast<Field_num*>(m_field)->unsigned_flag); }
int store_ulonglong(uint64_t val)override
{
return m_field->store(static_cast<int64_t>(val), 1);
} }
int store_float(float dl) override int store_float(float dl) override

View File

@ -5192,7 +5192,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC)
{ {
CalpontSystemCatalog::ColType ct; CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::BIGINT; ct.colDataType = CalpontSystemCatalog::UBIGINT;
ct.colWidth = 8; ct.colWidth = 8;
ct.scale = 0; ct.scale = 0;
ct.precision = -16; // borrowed to indicate skip null value check on connector ct.precision = -16; // borrowed to indicate skip null value check on connector

View File

@ -78,7 +78,7 @@ CREATE OR REPLACE FUNCTION caldroppartitionsbyvalue RETURNS STRING SONAME 'ha_co
CREATE OR REPLACE FUNCTION caldisablepartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so'; CREATE OR REPLACE FUNCTION caldisablepartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE FUNCTION calenablepartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so'; CREATE OR REPLACE FUNCTION calenablepartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE FUNCTION calshowpartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so'; CREATE OR REPLACE FUNCTION calshowpartitionsbyvalue RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE AGGREGATE FUNCTION moda RETURNS DECIMAL SONAME 'libregr_mysql.so'; CREATE OR REPLACE AGGREGATE FUNCTION moda RETURNS STRING SONAME 'libregr_mysql.so';
CREATE DATABASE IF NOT EXISTS infinidb_querystats; CREATE DATABASE IF NOT EXISTS infinidb_querystats;
CREATE TABLE IF NOT EXISTS infinidb_querystats.querystats CREATE TABLE IF NOT EXISTS infinidb_querystats.querystats

View File

@ -0,0 +1,161 @@
DROP DATABASE IF EXISTS mcs98_db;
CREATE DATABASE mcs98_db;
USE mcs98_db;
CREATE TABLE t1 (t TINYINT, s SMALLINT, m MEDIUMINT, i INT, bi BIGINT, d1 DECIMAL(5,2), d2 DECIMAL(36,12), rl FLOAT, dbl DOUBLE)ENGINE=Columnstore;
INSERT INTO t1 VALUES(NULL, NULL, 1234, -1000012898, 700000012898, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23),
(12, 345, 1234, -1000012899, 70000001289, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23),
(12, 345, 1234, -1000012898, 700000012899, 34.22, 90000000000000000009.124312000092, 14.02, 3900000000000001.24),
(13, 346, 1235, NULL, NULL, NULL, NULL, NULL, NULL),
(28, 1345, 11234, -2000012898, 1700000012899, 134.22, 190000000000000000009.124312000092, 114.02, 13900000000000001.24);
SELECT moda(t) FROM t1;
moda(t)
12
SELECT moda(s) FROM t1;
moda(s)
345
SELECT moda(m) FROM t1;
moda(m)
1234
SELECT moda(i) FROM t1;
moda(i)
-1000012898
SELECT moda(bi) FROM t1;
moda(bi)
700000012899
SELECT moda(d1) FROM t1;
moda(d1)
34.21
SELECT moda(d2) FROM t1;
moda(d2)
90000000000000000009.124312000091
SELECT moda(rl) FROM t1;
moda(rl)
14.010000228881836
SELECT moda(dbl) FROM t1;
moda(dbl)
3.900000000000001e15
CREATE TABLE t2 (
t tinyint,
tu tinyint unsigned,
s smallint,
su smallint unsigned,
m mediumint,
mu mediumint unsigned,
i int,
iu int unsigned,
b bigint,
bu bigint unsigned,
d1 decimal(5,2),
du1 decimal(5,2) unsigned,
d2 decimal(36,12),
du2 decimal(36,12) unsigned,
rl float,
dbl double,
v char(4),
vc varchar(10)
) engine=columnstore;
INSERT INTO t2 VALUES(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816,
2223372036854775816, 22.34, 22.34, 70000000605040302018.124312000091,
70000000605040302019.124312000091, 2344.32, 12345678.91011, 'five', 'five123456');
INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460,
-2223372036854775816, 2223372036854775816, -22.34, 22.34,
-70000000605040302019.124312000091, 70000000605040302029.124312000091,
-2344.32, -123456789.1011, 'four', 'five654321');
INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807,
2223372036854775807, 13.34, 13.34, 70000000605040302019.124312000091,
70000000605040302029.124312000091, 234.432, 12345678.91011, 'five', 'five123456');
INSERT INTO t2 VALUES(-1, 1, -201, 201, -1234867, 1234867, -7483646, 7483646, -2223372036854775807,
2223372036854775807, -13.34, 13.34, -70000000605040302019.124312000091,
70000000605040302019.124312000091, -234.432, -1234567.891011, 'four', 'four123456');
INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816,
2223372036854775816, 22.34, 22.34, 70000000605040302019.124312000091,
70000000605040302019.124312000091, 2344.32, 1234567.891011, 'five', 'five123456');
INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460,
-2223372036854775816, 2223372036854775816, -22.34, 22.34,
-70000000605040302019.124312000091, 70000000605040302029.124312000091,
-2344.32, -12345678.91011, 'four', 'five654321');
INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807,
2223372036854775807, 13.34, 13.34, 70000000605040302018.124312000091,
70000000605040302029.124312000091, 234.432, 12345678.91011, 'six', 'six1234567');
SELECT moda(t) FROM t2;
moda(t)
1
SELECT moda(tu) FROM t2;
moda(tu)
10
SELECT moda(s) FROM t2;
moda(s)
201
SELECT moda(su) FROM t2;
moda(su)
2010
SELECT moda(m) FROM t2;
moda(m)
1234867
SELECT moda(mu) FROM t2;
moda(mu)
1237567
SELECT moda(i) FROM t2;
moda(i)
7483646
SELECT moda(iu) FROM t2;
moda(iu)
74836460
SELECT moda(b) FROM t2;
moda(b)
2223372036854775807
SELECT moda(bu) FROM t2;
moda(bu)
2223372036854775816
SELECT moda(d1) FROM t2;
moda(d1)
13.34
SELECT moda(du1) FROM t2;
moda(du1)
22.34
SELECT moda(d2) FROM t2;
moda(d2)
-70000000605040302019.124312000091
SELECT moda(du2) FROM t2;
moda(du2)
70000000605040302029.124312000091
SELECT moda(rl) FROM t2;
moda(rl)
234.4320068359375
SELECT moda(dbl) FROM t2;
moda(dbl)
12345678.91011
SELECT moda(v) FROM t2;
moda(v)
four
SELECT moda(vc) FROM t2;
moda(vc)
five123456
SELECT i FROM t2 WHERE i >= (SELECT moda(i) FROM t2);
i
74836460
7483646
74836460
7483646
SELECT d2 FROM t2 WHERE d2 < (SELECT moda(d2) FROM t2);
d2
SELECT tu, moda(i) FROM t2 GROUP BY tu;
tu moda(i)
10 -74836460
1 7483646
NULL 0
SELECT floor(moda(rl)) FROM t2;
floor(moda(rl))
234
SELECT ceiling(moda(dbl)) FROM t2;
ceiling(moda(dbl))
12345679
SELECT moda(floor(rl)) FROM t2;
moda(floor(rl))
234
SELECT t, moda(tu) 'q1' FROM t2 GROUP BY t HAVING moda(tu) > 5;
t q1
10 10
-10 10
DROP DATABASE mcs98_db;

View File

@ -0,0 +1,98 @@
#
# Test MODA Function
# Author: dhall, david.hall@mariadb.com
#
# Test MODA with various numeric types
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS mcs98_db;
--enable_warnings
CREATE DATABASE mcs98_db;
USE mcs98_db;
CREATE TABLE t1 (t TINYINT, s SMALLINT, m MEDIUMINT, i INT, bi BIGINT, d1 DECIMAL(5,2), d2 DECIMAL(36,12), rl FLOAT, dbl DOUBLE)ENGINE=Columnstore;
INSERT INTO t1 VALUES(NULL, NULL, 1234, -1000012898, 700000012898, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23),
(12, 345, 1234, -1000012899, 70000001289, 34.21, 90000000000000000009.124312000091, 14.01, 3900000000000001.23),
(12, 345, 1234, -1000012898, 700000012899, 34.22, 90000000000000000009.124312000092, 14.02, 3900000000000001.24),
(13, 346, 1235, NULL, NULL, NULL, NULL, NULL, NULL),
(28, 1345, 11234, -2000012898, 1700000012899, 134.22, 190000000000000000009.124312000092, 114.02, 13900000000000001.24);
SELECT moda(t) FROM t1;
SELECT moda(s) FROM t1;
SELECT moda(m) FROM t1;
SELECT moda(i) FROM t1;
SELECT moda(bi) FROM t1;
SELECT moda(d1) FROM t1;
SELECT moda(d2) FROM t1;
SELECT moda(rl) FROM t1;
SELECT moda(dbl) FROM t1;
CREATE TABLE t2 (
t tinyint,
tu tinyint unsigned,
s smallint,
su smallint unsigned,
m mediumint,
mu mediumint unsigned,
i int,
iu int unsigned,
b bigint,
bu bigint unsigned,
d1 decimal(5,2),
du1 decimal(5,2) unsigned,
d2 decimal(36,12),
du2 decimal(36,12) unsigned,
rl float,
dbl double,
v char(4),
vc varchar(10)
) engine=columnstore;
INSERT INTO t2 VALUES(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816,
2223372036854775816, 22.34, 22.34, 70000000605040302018.124312000091,
70000000605040302019.124312000091, 2344.32, 12345678.91011, 'five', 'five123456');
INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460,
-2223372036854775816, 2223372036854775816, -22.34, 22.34,
-70000000605040302019.124312000091, 70000000605040302029.124312000091,
-2344.32, -123456789.1011, 'four', 'five654321');
INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807,
2223372036854775807, 13.34, 13.34, 70000000605040302019.124312000091,
70000000605040302029.124312000091, 234.432, 12345678.91011, 'five', 'five123456');
INSERT INTO t2 VALUES(-1, 1, -201, 201, -1234867, 1234867, -7483646, 7483646, -2223372036854775807,
2223372036854775807, -13.34, 13.34, -70000000605040302019.124312000091,
70000000605040302019.124312000091, -234.432, -1234567.891011, 'four', 'four123456');
INSERT INTO t2 VALUES(10, 10, 2010, 2010, 1237567, 1237567, 74836460, 74836460, 2223372036854775816,
2223372036854775816, 22.34, 22.34, 70000000605040302019.124312000091,
70000000605040302019.124312000091, 2344.32, 1234567.891011, 'five', 'five123456');
INSERT INTO t2 VALUES(-10, 10, -2010, 2010, -1237567, 1237567, -74836460, 74836460,
-2223372036854775816, 2223372036854775816, -22.34, 22.34,
-70000000605040302019.124312000091, 70000000605040302029.124312000091,
-2344.32, -12345678.91011, 'four', 'five654321');
INSERT INTO t2 VALUES(1, 1, 201, 201, 1234867, 1234867, 7483646, 7483646, 2223372036854775807,
2223372036854775807, 13.34, 13.34, 70000000605040302018.124312000091,
70000000605040302029.124312000091, 234.432, 12345678.91011, 'six', 'six1234567');
SELECT moda(t) FROM t2;
SELECT moda(tu) FROM t2;
SELECT moda(s) FROM t2;
SELECT moda(su) FROM t2;
SELECT moda(m) FROM t2;
SELECT moda(mu) FROM t2;
SELECT moda(i) FROM t2;
SELECT moda(iu) FROM t2;
SELECT moda(b) FROM t2;
SELECT moda(bu) FROM t2;
SELECT moda(d1) FROM t2;
SELECT moda(du1) FROM t2;
SELECT moda(d2) FROM t2;
SELECT moda(du2) FROM t2;
SELECT moda(rl) FROM t2;
SELECT moda(dbl) FROM t2;
SELECT moda(v) FROM t2;
SELECT moda(vc) FROM t2;
SELECT i FROM t2 WHERE i >= (SELECT moda(i) FROM t2);
SELECT d2 FROM t2 WHERE d2 < (SELECT moda(d2) FROM t2);
SELECT tu, moda(i) FROM t2 GROUP BY tu;
SELECT floor(moda(rl)) FROM t2;
SELECT ceiling(moda(dbl)) FROM t2;
SELECT moda(floor(rl)) FROM t2;
SELECT t, moda(tu) 'q1' FROM t2 GROUP BY t HAVING moda(tu) > 5;
# Clean UP
DROP DATABASE mcs98_db;

View File

@ -135,6 +135,7 @@ struct choose_policy<any>
}; };
BIG_POLICY(int128_t); BIG_POLICY(int128_t);
BIG_POLICY(long double);
/// Specializations for small types. /// Specializations for small types.
#define SMALL_POLICY(TYPE) \ #define SMALL_POLICY(TYPE) \

View File

@ -141,10 +141,11 @@ class Charset
Charset(CHARSET_INFO& cs) : mCharset(&cs) Charset(CHARSET_INFO& cs) : mCharset(&cs)
{ {
} }
Charset(CHARSET_INFO* cs) : mCharset(cs ? cs : &my_charset_bin) Charset(CHARSET_INFO* cs = nullptr) : mCharset(cs ? cs : &my_charset_bin)
{ {
} }
Charset(uint32_t charsetNumber); Charset(uint32_t charsetNumber);
void setCharset(uint32_t charsetNumber);
CHARSET_INFO& getCharset() const CHARSET_INFO& getCharset() const
{ {
return *mCharset; return *mCharset;
@ -157,6 +158,10 @@ class Charset
{ {
return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length()) == 0; return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length()) == 0;
} }
int strnncollsp(const std::string& str1, const std::string& str2) const
{
return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length());
}
int strnncollsp(const utils::ConstString& str1, const utils::ConstString& str2) const int strnncollsp(const utils::ConstString& str1, const utils::ConstString& str2) const
{ {
return mCharset->strnncollsp(str1.str(), str1.length(), str2.str(), str2.length()); return mCharset->strnncollsp(str1.str(), str1.length(), str2.str(), str2.length());

View File

@ -29,4 +29,10 @@ Charset::Charset(uint32_t charsetNumber) : mCharset(&get_charset_or_bin(charsetN
{ {
} }
void Charset::setCharset(uint32_t charsetNumber)
{
mCharset = &get_charset_or_bin(charsetNumber);
}
} // namespace datatypes } // namespace datatypes

View File

@ -102,6 +102,12 @@ mcsv1_UDAF* moda::getImpl(mcsv1Context* context)
case execplan::CalpontSystemCatalog::FLOAT: data->modaImpl = &moda_impl_float; break; case execplan::CalpontSystemCatalog::FLOAT: data->modaImpl = &moda_impl_float; break;
case execplan::CalpontSystemCatalog::DOUBLE: data->modaImpl = &moda_impl_double; break; case execplan::CalpontSystemCatalog::DOUBLE: data->modaImpl = &moda_impl_double; break;
case execplan::CalpontSystemCatalog::LONGDOUBLE: data->modaImpl = &moda_impl_longdouble; break; case execplan::CalpontSystemCatalog::LONGDOUBLE: data->modaImpl = &moda_impl_longdouble; break;
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::CHAR:
data->modaImpl = &moda_impl_string;
break;
default: data->modaImpl = NULL; default: data->modaImpl = NULL;
} }
return data->modaImpl; return data->modaImpl;
@ -125,14 +131,16 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
if (!(datatypes::isNumeric(colTypes[0].dataType))) if (!(datatypes::isNumeric(colTypes[0].dataType)))
{ {
// The error message will be prepended with if (colTypes[0].dataType != datatypes::SystemCatalog::VARCHAR &&
// "The storage engine for the table doesn't support " colTypes[0].dataType != datatypes::SystemCatalog::CHAR)
context->setErrorMessage("moda() with non-numeric argument"); {
return mcsv1_UDAF::ERROR; // The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("moda() with invalid argument");
return mcsv1_UDAF::ERROR;
}
} }
context->setResultType(colTypes[0].dataType);
if (colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL || if (colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL) colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL)
{ {
@ -158,8 +166,10 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
} }
context->setScale(colTypes[0].scale); context->setScale(colTypes[0].scale);
context->setPrecision(colTypes[0].precision);
} }
context->setPrecision(colTypes[0].precision);
context->setResultType(colTypes[0].dataType);
mcsv1_UDAF* impl = getImpl(context); mcsv1_UDAF* impl = getImpl(context);
@ -167,7 +177,7 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
{ {
// The error message will be prepended with // The error message will be prepended with
// "The storage engine for the table doesn't support " // "The storage engine for the table doesn't support "
context->setErrorMessage("moda() with non-numeric argument"); context->setErrorMessage("moda() with implementation not found for data type");
return mcsv1_UDAF::ERROR; return mcsv1_UDAF::ERROR;
} }
@ -178,6 +188,13 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
template <class T> template <class T>
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context, ColumnDatum* colTypes) mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context, ColumnDatum* colTypes)
{ {
if (!(colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL))
{
context->setColWidth(sizeof(T));
context->setScale(0);
context->setPrecision(0);
}
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;
} }
@ -196,7 +213,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDa
{ {
static_any::any& valIn = valsIn[0].columnData; static_any::any& valIn = valsIn[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData()); ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>(); std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
if (valIn.empty()) if (valIn.empty())
{ {
@ -233,9 +250,9 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::subEvaluate(mcsv1Context* context, const
ModaData* outData = static_cast<ModaData*>(context->getUserData()); ModaData* outData = static_cast<ModaData*>(context->getUserData());
const ModaData* inData = static_cast<const ModaData*>(userDataIn); const ModaData* inData = static_cast<const ModaData*>(userDataIn);
std::unordered_map<T, uint32_t, hasher<T> >* outMap = outData->getMap<T>(); std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* outMap = outData->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* inMap = inData->getMap<T>(); std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* inMap = inData->getMap<T>();
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter; typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::const_iterator iter;
for (iter = inMap->begin(); iter != inMap->end(); ++iter) for (iter = inMap->begin(); iter != inMap->end(); ++iter)
{ {
@ -255,7 +272,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
long double avg = 0; long double avg = 0;
T val = 0; T val = 0;
ModaData* data = static_cast<ModaData*>(context->getUserData()); ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>(); std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
if (map->size() == 0) if (map->size() == 0)
{ {
@ -264,7 +281,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
} }
avg = data->fCount ? data->fSum / data->fCount : 0; avg = data->fCount ? data->fSum / data->fCount : 0;
typename std::unordered_map<T, uint32_t, hasher<T> >::iterator iter; typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::iterator iter;
for (iter = map->begin(); iter != map->end(); ++iter) for (iter = map->begin(); iter != map->end(); ++iter)
{ {
@ -301,7 +318,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::dropValue(mcsv1Context* context, ColumnDa
{ {
static_any::any& valDropped = valsDropped[0].columnData; static_any::any& valDropped = valsDropped[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData()); ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>(); std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
if (valDropped.empty()) if (valDropped.empty())
{ {
@ -350,6 +367,9 @@ void ModaData::serialize(messageqcpp::ByteStream& bs) const
case execplan::CalpontSystemCatalog::FLOAT: serializeMap<float>(bs); break; case execplan::CalpontSystemCatalog::FLOAT: serializeMap<float>(bs); break;
case execplan::CalpontSystemCatalog::DOUBLE: serializeMap<double>(bs); break; case execplan::CalpontSystemCatalog::DOUBLE: serializeMap<double>(bs); break;
case execplan::CalpontSystemCatalog::LONGDOUBLE: serializeMap<long double>(bs); break; case execplan::CalpontSystemCatalog::LONGDOUBLE: serializeMap<long double>(bs); break;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
serializeMap<string>(bs); break;
default: throw std::runtime_error("ModaData::serialize with bad data type"); break; default: throw std::runtime_error("ModaData::serialize with bad data type"); break;
} }
} }
@ -387,6 +407,9 @@ void ModaData::unserialize(messageqcpp::ByteStream& bs)
case execplan::CalpontSystemCatalog::FLOAT: unserializeMap<float>(bs); break; case execplan::CalpontSystemCatalog::FLOAT: unserializeMap<float>(bs); break;
case execplan::CalpontSystemCatalog::DOUBLE: unserializeMap<double>(bs); break; case execplan::CalpontSystemCatalog::DOUBLE: unserializeMap<double>(bs); break;
case execplan::CalpontSystemCatalog::LONGDOUBLE: unserializeMap<long double>(bs); break; case execplan::CalpontSystemCatalog::LONGDOUBLE: unserializeMap<long double>(bs); break;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
unserializeMap<string>(bs); break;
default: throw std::runtime_error("ModaData::unserialize with bad data type"); break; default: throw std::runtime_error("ModaData::unserialize with bad data type"); break;
} }
} }
@ -469,6 +492,136 @@ void ModaData::cleanup()
clear<long double>(); clear<long double>();
deleteMap<long double>(); deleteMap<long double>();
break; break;
default: throw std::runtime_error("ModaData::unserialize with bad data type"); break; case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
clear<string>();
deleteMap<string>();
break;
default: throw std::runtime_error("ModaData::cleanup with bad data type"); break;
} }
} }
/************************************************************************************************
* String Specialization
************************************************************************************************/
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::init(mcsv1Context* context, ColumnDatum* colTypes)
{
cs.setCharset(context->getCharsetNumber());
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::reset(mcsv1Context* context)
{
ModaData* data = static_cast<ModaData*>(context->getUserData());
data->fReturnType = context->getResultType();
data->fColWidth = context->getColWidth();
data->fCs_num = context->getCharsetNumber();
data->clear<string>();
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
string val;
if (valIn.compatible(strTypeId))
val = valIn.cast<string>();
(*map)[val]++;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
ModaData* outData = static_cast<ModaData*>(context->getUserData());
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* outMap = outData->getMap<string>();
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* inMap = inData->getMap<string>();
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::const_iterator iter;
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
{
(*outMap)[iter->first] += iter->second;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::evaluate(mcsv1Context* context, static_any::any& valOut)
{
uint64_t maxCnt = 0;
string val;
string lastVal;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
if (map->size() == 0)
{
valOut = string();
return mcsv1_UDAF::SUCCESS;
}
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::iterator iter;
for (iter = map->begin(); iter != map->end(); ++iter)
{
if (iter->second > maxCnt)
{
val = iter->first;
lastVal = val;
maxCnt = iter->second;
}
else if (iter->second == maxCnt)
{
// Tie breaker: choose smallest according to collation
if (cs.strnncollsp(val, lastVal) < 0)
{
val = iter->first;
}
}
}
// If scale is > 0, then the original type was DECIMAL. Set the
// ResultType to DECIMAL so the delivery logic moves the decimal point.
if (context->getScale() > 0)
context->setResultType(execplan::CalpontSystemCatalog::DECIMAL);
valOut = val;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valDropped = valsDropped[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
if (valDropped.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
string val = convertAnyTo<string>(valDropped);
--data->fCount;
(*map)[val]--;
return mcsv1_UDAF::SUCCESS;
}

View File

@ -45,6 +45,7 @@
#include "calpontsystemcatalog.h" #include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h" #include "windowfunctioncolumn.h"
#include "hasher.h" #include "hasher.h"
#include "collation.h"
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) #if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport) #define EXPORT __declspec(dllexport)
@ -58,6 +59,8 @@ namespace mcsv1sdk
template <class T> template <class T>
struct hasher struct hasher
{ {
hasher(uint32_t cs_num){}
inline size_t operator()(T val) const inline size_t operator()(T val) const
{ {
return fHasher((char*)&val, sizeof(T)); return fHasher((char*)&val, sizeof(T));
@ -67,34 +70,74 @@ struct hasher
utils::Hasher fHasher; utils::Hasher fHasher;
}; };
// A special hasher for double that may only have 10 bytes
template <> template <>
struct hasher<long double> struct hasher<long double>
{ {
hasher(uint32_t cs_num){}
inline size_t operator()(long double val) const inline size_t operator()(long double val) const
{ {
if (sizeof(long double) == 8) // Probably just MSC, but you never know. #ifdef MASK_LONGDOUBLE
{ // For Linux x86_64, long double is stored in 128 bits, but only 80 are significant
return fHasher((char*)&val, sizeof(long double)); return fHasher((char*)&val, 10);
} #else
else return fHasher((char*)&val, sizeof(long double));
{ #endif
// For Linux x86_64, long double is stored in 128 bits, but only 80 are significant
return fHasher((char*)&val, 10);
}
} }
private: private:
utils::Hasher fHasher; utils::Hasher fHasher;
}; };
// A collation aware hasher for strings
template<>
struct hasher<string>
{
hasher(uint32_t cs_num) : fHasher(cs_num){}
inline size_t operator()(string val) const
{
return fHasher(val.c_str(), val.size());
}
private:
datatypes::CollationAwareHasher fHasher;
};
template<class T>
struct comparator
{
comparator(uint32_t cs_num){}
bool operator()(const T& lhs, const T& rhs) const
{
return lhs == rhs;
}
};
// A collation aware string comparator
template <>
struct comparator<std::string>
{
comparator(uint32_t cs_num) : fCs(cs_num) {}
bool operator()(const std::string lhs, const std::string rhs) const
{
return fCs.eq(lhs, rhs);
}
private:
datatypes::Charset fCs;
};
// Override UserData for data storage // Override UserData for data storage
struct ModaData : public UserData struct ModaData : public UserData
{ {
ModaData() ModaData(uint32_t cs_num = 8)
: fMap(NULL) : fMap(NULL)
, fReturnType((uint32_t)execplan::CalpontSystemCatalog::UNDEFINED) , fReturnType((uint32_t)execplan::CalpontSystemCatalog::UNDEFINED)
, fColWidth(0) , fColWidth(0)
, modaImpl(NULL){}; , modaImpl(NULL)
, fCs_num(cs_num){}
virtual ~ModaData() virtual ~ModaData()
{ {
@ -105,22 +148,23 @@ struct ModaData : public UserData
virtual void unserialize(messageqcpp::ByteStream& bs); virtual void unserialize(messageqcpp::ByteStream& bs);
template <class T> template <class T>
std::unordered_map<T, uint32_t, hasher<T> >* getMap() std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* getMap()
{ {
if (!fMap) if (!fMap)
{ {
// Just in time creation // Just in time creation
fMap = new std::unordered_map<T, uint32_t, hasher<T> >; fMap = new std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >(
10, hasher<T>(fCs_num), comparator<T>(fCs_num));
} }
return (std::unordered_map<T, uint32_t, hasher<T> >*)fMap; return (std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >*)fMap;
} }
// The const version is only called by serialize() // The const version is only called by serialize()
// It shouldn't (and can't) create a new map. // It shouldn't (and can't) create a new map.
template <class T> template <class T>
std::unordered_map<T, uint32_t, hasher<T> >* getMap() const std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* getMap() const
{ {
return (std::unordered_map<T, uint32_t, hasher<T> >*)fMap; return (std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >*)fMap;
} }
template <class T> template <class T>
@ -128,7 +172,7 @@ struct ModaData : public UserData
{ {
if (fMap) if (fMap)
{ {
delete (std::unordered_map<T, uint32_t, hasher<T> >*)fMap; delete (std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >*)fMap;
fMap = NULL; fMap = NULL;
} }
} }
@ -148,6 +192,7 @@ struct ModaData : public UserData
uint32_t fReturnType; uint32_t fReturnType;
uint32_t fColWidth; uint32_t fColWidth;
mcsv1_UDAF* modaImpl; // A pointer to one of the Moda_impl_T concrete classes mcsv1_UDAF* modaImpl; // A pointer to one of the Moda_impl_T concrete classes
uint32_t fCs_num;
private: private:
// For now, copy construction is unwanted // For now, copy construction is unwanted
@ -159,10 +204,11 @@ struct ModaData : public UserData
template <class T> template <class T>
void serializeMap(messageqcpp::ByteStream& bs) const void serializeMap(messageqcpp::ByteStream& bs) const
{ {
std::unordered_map<T, uint32_t, hasher<T> >* map = getMap<T>(); bs << fCs_num;
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = getMap<T>();
if (map) if (map)
{ {
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter; typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::const_iterator iter;
bs << (uint64_t)map->size(); bs << (uint64_t)map->size();
for (iter = map->begin(); iter != map->end(); ++iter) for (iter = map->begin(); iter != map->end(); ++iter)
{ {
@ -179,11 +225,13 @@ struct ModaData : public UserData
template <class T> template <class T>
void unserializeMap(messageqcpp::ByteStream& bs) void unserializeMap(messageqcpp::ByteStream& bs)
{ {
bs >> fCs_num;
uint32_t cnt; uint32_t cnt;
T num; T num;
uint64_t sz; uint64_t sz;
bs >> sz; bs >> sz;
std::unordered_map<T, uint32_t, hasher<T> >* map = getMap<T>(); std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = getMap<T>();
map->clear(); map->clear();
for (uint64_t i = 0; i < sz; ++i) for (uint64_t i = 0; i < sz; ++i)
{ {
@ -217,6 +265,31 @@ class Moda_impl_T : public mcsv1_UDAF
} }
}; };
template<> // string specialization
class Moda_impl_T<string> : public mcsv1_UDAF
{
public:
// Defaults OK
Moda_impl_T() : cs(8) {};
virtual ~Moda_impl_T() {};
virtual mcsv1_UDAF::ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes);
virtual mcsv1_UDAF::ReturnCode reset(mcsv1Context* context);
virtual mcsv1_UDAF::ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual mcsv1_UDAF::ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual mcsv1_UDAF::ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual mcsv1_UDAF::ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
// Dummy: not used
virtual mcsv1_UDAF::ReturnCode createUserData(UserData*& userData, int32_t& length)
{
return mcsv1_UDAF::SUCCESS;
}
private:
datatypes::Charset cs;
};
// moda returns the modal value of the dataset. If more than one value // moda returns the modal value of the dataset. If more than one value
// have the same maximum number of occurances, then the one closest to // have the same maximum number of occurances, then the one closest to
// AVG wins. If two are the same distance from AVG, then the smaller wins. // AVG wins. If two are the same distance from AVG, then the smaller wins.
@ -276,6 +349,7 @@ class moda : public mcsv1_UDAF
Moda_impl_T<float> moda_impl_float; Moda_impl_T<float> moda_impl_float;
Moda_impl_T<double> moda_impl_double; Moda_impl_T<double> moda_impl_double;
Moda_impl_T<long double> moda_impl_longdouble; Moda_impl_T<long double> moda_impl_longdouble;
Moda_impl_T<string> moda_impl_string;
}; };
}; // namespace mcsv1sdk }; // namespace mcsv1sdk

View File

@ -5,7 +5,10 @@
#include <string.h> #include <string.h>
#include <unordered_map> #include <unordered_map>
#include <algorithm> #include <algorithm>
#include <sstream>
#include <iostream>
#include <charconv>
#include "boost/lexical_cast.hpp"
#include "idb_mysql.h" #include "idb_mysql.h"
namespace namespace
@ -48,7 +51,7 @@ struct moda_data
} // namespace } // namespace
template <class TYPE, class CONTAINER> template <class TYPE, class CONTAINER>
char* moda(CONTAINER& container, struct moda_data* data) void moda(CONTAINER& container, struct moda_data* data)
{ {
TYPE avg = (TYPE)data->fCount ? data->fSum / data->fCount : 0; TYPE avg = (TYPE)data->fCount ? data->fSum / data->fCount : 0;
TYPE val = 0.0; TYPE val = 0.0;
@ -73,8 +76,6 @@ char* moda(CONTAINER& container, struct moda_data* data)
} }
data->result = std::to_string(val); data->result = std::to_string(val);
return const_cast<char*>(data->result.c_str());
} }
extern "C" extern "C"
@ -82,18 +83,22 @@ extern "C"
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(dllexport) __declspec(dllexport)
#endif #endif
my_bool moda_init(UDF_INIT* initid, UDF_ARGS* args, char* message) my_bool moda_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{ {
struct moda_data* data; struct moda_data* data;
if (args->arg_count != 1) if (args->arg_count != 1)
{ {
strcpy(message, "moda() requires one argument"); strcpy(message, "moda() requires exactly one argument");
return 1; return 1;
} }
if (!isNumeric(args->arg_type[0], args->attributes[0])) if (!isNumeric(args->arg_type[0], args->attributes[0]))
{ {
strcpy(message, "moda() with a non-numeric argument"); if (args->arg_type[0] != STRING_RESULT)
return 1; {
strcpy(message, "moda() with an invalid argument");
return 1;
}
} }
data = new moda_data; data = new moda_data;
@ -107,7 +112,7 @@ extern "C"
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(dllexport) __declspec(dllexport)
#endif #endif
void moda_deinit(UDF_INIT* initid) void moda_deinit(UDF_INIT* initid)
{ {
struct moda_data* data = (struct moda_data*)initid->ptr; struct moda_data* data = (struct moda_data*)initid->ptr;
data->clear(); data->clear();
@ -117,8 +122,8 @@ extern "C"
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(dllexport) __declspec(dllexport)
#endif #endif
void moda_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), void moda_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused))) char* message __attribute__((unused)))
{ {
struct moda_data* data = (struct moda_data*)initid->ptr; struct moda_data* data = (struct moda_data*)initid->ptr;
data->clear(); data->clear();
@ -127,7 +132,7 @@ extern "C"
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(dllexport) __declspec(dllexport)
#endif #endif
void moda_add(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused))) void moda_add(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused)))
{ {
// Test for NULL // Test for NULL
if (args->args[0] == 0) if (args->args[0] == 0)
@ -169,7 +174,7 @@ extern "C"
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(dllexport) __declspec(dllexport)
#endif #endif
void moda_remove(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused))) void moda_remove(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message __attribute__((unused)))
{ {
// Test for NULL // Test for NULL
if (args->args[0] == 0) if (args->args[0] == 0)
@ -210,18 +215,25 @@ extern "C"
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(dllexport) __declspec(dllexport)
#endif #endif
char* moda(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* error __attribute__((unused))) //char* moda(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* error __attribute__((unused)))
char* moda(UDF_INIT * initid, UDF_ARGS * args, char* result, ulong* res_length, char* is_null, char* error __attribute__((unused)))
{ {
struct moda_data* data = (struct moda_data*)initid->ptr; struct moda_data* data = (struct moda_data*)initid->ptr;
switch (args->arg_type[0]) switch (args->arg_type[0])
{ {
case INT_RESULT: return moda<int64_t>(data->mapINT, data); case INT_RESULT:
case REAL_RESULT: return moda<double>(data->mapREAL, data); moda<int64_t>(data->mapINT, data);
break;
case REAL_RESULT:
moda<double>(data->mapREAL, data);
break;
case DECIMAL_RESULT: case DECIMAL_RESULT:
case STRING_RESULT: return moda<long double>(data->mapDECIMAL, data); case STRING_RESULT:
moda<long double>(data->mapDECIMAL, data);
break;
default: return NULL; default: return NULL;
} }
*res_length = data->result.size();
return NULL; return const_cast<char*>(data->result.c_str());
} }
} // Extern "C" } // Extern "C"

View File

@ -1021,6 +1021,8 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in
} }
// Handy helper functions // Handy helper functions
// Doesn't work with string
template <typename T> template <typename T>
inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) const inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) const
{ {