1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00

Welford algorithm for STD and VAR

Naive algorithm for calculating STD and VAR is subject to catastrophic
cancellation. A well-known Welford's algorithms is used instead.
This commit is contained in:
Andrey Piskunov 2022-06-01 19:02:24 +03:00
parent 4e50fca460
commit c5fa27475d
8 changed files with 1003 additions and 40 deletions

View File

@ -1478,7 +1478,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
functionVec[i]->fAuxColumnIndex = lastCol;
// sum(x)
// mean(x)
oidsAgg.push_back(oidsProj[j]);
keysAgg.push_back(keysProj[j]);
scaleAgg.push_back(0);
@ -1488,7 +1488,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
widthAgg.push_back(sizeof(long double));
++lastCol;
// sum(x**2)
// sum(x_i - mean)^2
oidsAgg.push_back(oidsProj[j]);
keysAgg.push_back(keysProj[j]);
scaleAgg.push_back(0);
@ -1910,7 +1910,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
widthAgg.push_back(sizeof(double));
funct->fAuxColumnIndex = ++colAgg;
// sum(x)
// mean(x)
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
scaleAgg.push_back(0);
@ -1920,7 +1920,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
widthAgg.push_back(sizeof(long double));
++colAgg;
// sum(x**2)
// sum(x_i - mean)^2
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
scaleAgg.push_back(0);
@ -2581,7 +2581,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
functionVec2[i]->fAuxColumnIndex = lastCol;
// sum(x)
// mean(x)
oidsAggDist.push_back(oidsAgg[j]);
keysAggDist.push_back(keysAgg[j]);
scaleAggDist.push_back(0);
@ -2591,7 +2591,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
widthAggDist.push_back(sizeof(long double));
++lastCol;
// sum(x**2)
// sum(x_i - mean)^2
oidsAggDist.push_back(oidsAgg[j]);
keysAggDist.push_back(keysAgg[j]);
scaleAggDist.push_back(0);
@ -3243,7 +3243,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
widthAggPm.push_back(sizeof(double));
funct->fAuxColumnIndex = ++colAggPm;
// sum(x)
// mean(x)
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
@ -3253,7 +3253,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
widthAggPm.push_back(sizeof(long double));
++colAggPm;
// sum(x**2)
// sum(x_i - mean)^2
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
@ -3701,7 +3701,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
functionVecUm[i]->fAuxColumnIndex = lastCol;
// sum(x)
// mean(x)
oidsAggUm.push_back(oidsAggPm[j]);
keysAggUm.push_back(keysAggPm[j]);
scaleAggUm.push_back(0);
@ -3711,7 +3711,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
widthAggUm.push_back(sizeof(long double));
++lastCol;
// sum(x**2)
// sum(x_i - mean)^2
oidsAggUm.push_back(oidsAggPm[j]);
keysAggUm.push_back(keysAggPm[j]);
scaleAggUm.push_back(0);
@ -4152,7 +4152,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
widthAggPm.push_back(sizeof(double));
funct->fAuxColumnIndex = ++colAggPm;
// sum(x)
// mean(x)
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
@ -4162,7 +4162,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
widthAggPm.push_back(sizeof(long double));
++colAggPm;
// sum(x**2)
// sum(x_i - mean)^2
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
@ -4808,7 +4808,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
functionVecUm[i]->fAuxColumnIndex = lastCol;
// sum(x)
// mean(x)
oidsAggDist.push_back(oidsAggPm[j]);
keysAggDist.push_back(keysAggPm[j]);
scaleAggDist.push_back(0);
@ -4818,7 +4818,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
widthAggDist.push_back(sizeof(long double));
++lastCol;
// sum(x**2)
// sum(x_i - mean)^2
oidsAggDist.push_back(oidsAggPm[j]);
keysAggDist.push_back(keysAggPm[j]);
scaleAggDist.push_back(0);

View File

@ -0,0 +1,217 @@
DROP DATABASE IF EXISTS std_test_db;
CREATE DATABASE std_test_db;
USE std_test_db;
create table t1 (
col_signed tinyint,
col_unsigned tinyint unsigned
)engine=columnstore;
LOAD DATA LOCAL infile 'MTR_SUITE_DIR/../std_data/tinyint_range.tbl' INTO TABLE t1 FIELDS TERMINATED BY '|';;
ALTER TABLE t1 ADD COLUMN col_small_signed SMALLINT;
ALTER TABLE t1 ADD COLUMN col_small_unsigned SMALLINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_med_signed MEDIUMINT;
ALTER TABLE t1 ADD COLUMN col_med_unsigned MEDIUMINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_int_signed INT;
ALTER TABLE t1 ADD COLUMN col_int_unsigned INT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_big_signed BIGINT;
ALTER TABLE t1 ADD COLUMN col_big_unsigned BIGINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_dec_signed DECIMAL(38,0);
ALTER TABLE t1 ADD COLUMN col_dec_unsigned DECIMAL(38,0) UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_float_signed FLOAT;
ALTER TABLE t1 ADD COLUMN col_float_unsigned FLOAT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_double_signed DOUBLE;
ALTER TABLE t1 ADD COLUMN col_double_unsigned DOUBLE UNSIGNED;
UPDATE t1 SET col_small_signed=col_signed + sign(col_signed) * 32000;
UPDATE t1 SET col_small_unsigned=col_unsigned + 65000;
UPDATE t1 SET col_med_signed=col_signed + sign(col_signed) * 8388000;
UPDATE t1 SET col_med_unsigned=col_unsigned + 16776000;
UPDATE t1 SET col_int_signed=col_signed + sign(col_signed) * 2147483000;
UPDATE t1 SET col_int_unsigned=col_unsigned + 4294000000;
UPDATE t1 SET col_big_signed=col_signed + sign(col_signed) * 9223372036854775000;
UPDATE t1 SET col_big_unsigned=col_unsigned + 18446744073709551000;
UPDATE t1 SET col_dec_signed=col_signed + sign(col_signed) * 800000000000000000000000000000000001;
UPDATE t1 SET col_dec_unsigned=col_unsigned + 800000000000000000000000000000000003;
UPDATE t1 SET col_float_signed=col_signed + 0.637 + sign(col_signed) * 8388000;
UPDATE t1 SET col_float_unsigned=col_unsigned + 0.637 + 16776000;
UPDATE t1 SET col_double_signed=col_signed + 0.637 + sign(col_signed) * 2147483000;
UPDATE t1 SET col_double_unsigned=col_unsigned + 0.637 + 4294000000;
SELECT 'q1', floor(STD(col_signed)) FROM t1;
q1 floor(STD(col_signed))
q1 73
SELECT 'q2', floor(STD(col_unsigned)) FROM t1;
q2 floor(STD(col_unsigned))
q2 73
SELECT 'q3', floor(STD(col_small_signed)) FROM t1;
q3 floor(STD(col_small_signed))
q3 32000
SELECT 'q4', floor(STD(col_small_unsigned)) FROM t1;
q4 floor(STD(col_small_unsigned))
q4 73
SELECT 'q5', floor(STD(col_med_signed)) FROM t1;
q5 floor(STD(col_med_signed))
q5 8371470
SELECT 'q6', floor(STD(col_med_unsigned)) FROM t1;
q6 floor(STD(col_med_unsigned))
q6 73
SELECT 'q7', floor(STD(col_int_signed)) FROM t1;
q7 floor(STD(col_int_signed))
q7 2143234889
SELECT 'q8', floor(STD(col_int_unsigned)) FROM t1;
q8 floor(STD(col_int_unsigned))
q8 73
SELECT 'q9', floor(STD(col_big_signed)) FROM t1;
q9 floor(STD(col_big_signed))
q9 9205126264421172000
SELECT 'q10', floor(STD(col_big_unsigned)) FROM t1;
q10 floor(STD(col_big_unsigned))
q10 73
SELECT 'q11', floor(STD(col_dec_signed)) FROM t1;
q11 floor(STD(col_dec_signed))
q11 798417431511104800000000000000000000
SELECT 'q13', floor(STD(col_float_signed)) FROM t1;
q13 floor(STD(col_float_signed))
q13 8371470
SELECT 'q14', floor(STD(col_float_unsigned)) FROM t1;
q14 floor(STD(col_float_unsigned))
q14 73
SELECT 'q15', floor(STD(col_double_signed)) FROM t1;
q15 floor(STD(col_double_signed))
q15 2143234889
SELECT 'q16', floor(STD(col_double_unsigned)) FROM t1;
q16 floor(STD(col_double_unsigned))
q16 73
SELECT 'q17', floor(STDDEV_SAMP(col_signed)) FROM t1;
q17 floor(STDDEV_SAMP(col_signed))
q17 73
SELECT 'q18', floor(STDDEV_SAMP(col_unsigned)) FROM t1;
q18 floor(STDDEV_SAMP(col_unsigned))
q18 73
SELECT 'q19', floor(STDDEV_SAMP(col_small_signed)) FROM t1;
q19 floor(STDDEV_SAMP(col_small_signed))
q19 32063
SELECT 'q20', floor(STDDEV_SAMP(col_small_unsigned)) FROM t1;
q20 floor(STDDEV_SAMP(col_small_unsigned))
q20 73
SELECT 'q21', floor(STDDEV_SAMP(col_med_signed)) FROM t1;
q21 floor(STDDEV_SAMP(col_med_signed))
q21 8387998
SELECT 'q22', floor(STDDEV_SAMP(col_med_unsigned)) FROM t1;
q22 floor(STDDEV_SAMP(col_med_unsigned))
q22 73
SELECT 'q23', floor(STDDEV_SAMP(col_int_signed)) FROM t1;
q23 floor(STDDEV_SAMP(col_int_signed))
q23 2147466354
SELECT 'q24', floor(STDDEV_SAMP(col_int_unsigned)) FROM t1;
q24 floor(STDDEV_SAMP(col_int_unsigned))
q24 73
SELECT 'q25', floor(STDDEV_SAMP(col_big_signed)) FROM t1;
q25 floor(STDDEV_SAMP(col_big_signed))
q25 9223300272764652000
SELECT 'q26', floor(STDDEV_SAMP(col_big_unsigned)) FROM t1;
q26 floor(STDDEV_SAMP(col_big_unsigned))
q26 73
SELECT 'q27', floor(STDDEV_SAMP(col_dec_signed)) FROM t1;
q27 floor(STDDEV_SAMP(col_dec_signed))
q27 799993775457406500000000000000000000
SELECT 'q29', floor(STDDEV_SAMP(col_float_signed)) FROM t1;
q29 floor(STDDEV_SAMP(col_float_signed))
q29 8387998
SELECT 'q30', floor(STDDEV_SAMP(col_float_unsigned)) FROM t1;
q30 floor(STDDEV_SAMP(col_float_unsigned))
q30 73
SELECT 'q31', floor(STDDEV_SAMP(col_double_signed)) FROM t1;
q31 floor(STDDEV_SAMP(col_double_signed))
q31 2147466354
SELECT 'q32', floor(STDDEV_SAMP(col_double_unsigned)) FROM t1;
q32 floor(STDDEV_SAMP(col_double_unsigned))
q32 73
SELECT 'q33', floor(VAR_POP(col_signed)) FROM t1;
q33 floor(VAR_POP(col_signed))
q33 5376
SELECT 'q34', floor(VAR_POP(col_unsigned)) FROM t1;
q34 floor(VAR_POP(col_unsigned))
q34 5376
SELECT 'q35', floor(VAR_POP(col_small_signed)) FROM t1;
q35 floor(VAR_POP(col_small_signed))
q35 1024021882
SELECT 'q36', floor(VAR_POP(col_small_unsigned)) FROM t1;
q36 floor(VAR_POP(col_small_unsigned))
q36 5376
SELECT 'q37', floor(VAR_POP(col_med_signed)) FROM t1;
q37 floor(VAR_POP(col_med_signed))
q37 70081516547007
SELECT 'q38', floor(VAR_POP(col_med_unsigned)) FROM t1;
q38 floor(VAR_POP(col_med_unsigned))
q38 5376
SELECT 'q39', floor(VAR_POP(col_int_signed)) FROM t1;
q39 floor(VAR_POP(col_int_signed))
q39 4593455793567983000
SELECT 'q40', floor(VAR_POP(col_int_unsigned)) FROM t1;
q40 floor(VAR_POP(col_int_unsigned))
q40 5376
SELECT 'q41', floor(VAR_POP(col_big_signed)) FROM t1;
q41 floor(VAR_POP(col_big_signed))
q41 84734349543936475000000000000000000000
SELECT 'q42', floor(VAR_POP(col_big_unsigned)) FROM t1;
q42 floor(VAR_POP(col_big_unsigned))
q42 5376
SELECT 'q43', floor(VAR_POP(col_dec_signed)) FROM t1;
q43 floor(VAR_POP(col_dec_signed))
q43 637470394940789800000000000000000000000000000000000000000000000000000000
SELECT 'q45', floor(VAR_POP(col_float_signed)) FROM t1;
q45 floor(VAR_POP(col_float_signed))
q45 70081516546971
SELECT 'q46', floor(VAR_POP(col_float_unsigned)) FROM t1;
q46 floor(VAR_POP(col_float_unsigned))
q46 5376
SELECT 'q47', floor(VAR_POP(col_double_signed)) FROM t1;
q47 floor(VAR_POP(col_double_signed))
q47 4593455793567983000
SELECT 'q48', floor(VAR_POP(col_double_unsigned)) FROM t1;
q48 floor(VAR_POP(col_double_unsigned))
q48 5376
SELECT 'q49', floor(VAR_SAMP(col_signed)) FROM t1;
q49 floor(VAR_SAMP(col_signed))
q49 5397
SELECT 'q50', floor(VAR_SAMP(col_unsigned)) FROM t1;
q50 floor(VAR_SAMP(col_unsigned))
q50 5397
SELECT 'q51', floor(VAR_SAMP(col_small_signed)) FROM t1;
q51 floor(VAR_SAMP(col_small_signed))
q51 1028069399
SELECT 'q52', floor(VAR_SAMP(col_small_unsigned)) FROM t1;
q52 floor(VAR_SAMP(col_small_unsigned))
q52 5397
SELECT 'q53', floor(VAR_SAMP(col_med_signed)) FROM t1;
q53 floor(VAR_SAMP(col_med_signed))
q53 70358518588695
SELECT 'q54', floor(VAR_SAMP(col_med_unsigned)) FROM t1;
q54 floor(VAR_SAMP(col_med_unsigned))
q54 5397
SELECT 'q55', floor(VAR_SAMP(col_int_signed)) FROM t1;
q55 floor(VAR_SAMP(col_int_signed))
q55 4611611745321216000
SELECT 'q56', floor(VAR_SAMP(col_int_unsigned)) FROM t1;
q56 floor(VAR_SAMP(col_int_unsigned))
q56 5397
SELECT 'q57', floor(VAR_SAMP(col_big_signed)) FROM t1;
q57 floor(VAR_SAMP(col_big_signed))
q57 85069267921580490000000000000000000000
SELECT 'q58', floor(VAR_SAMP(col_big_unsigned)) FROM t1;
q58 floor(VAR_SAMP(col_big_unsigned))
q58 5397
SELECT 'q59', floor(VAR_SAMP(col_dec_signed)) FROM t1;
q59 floor(VAR_SAMP(col_dec_signed))
q59 639990040770595400000000000000000000000000000000000000000000000000000000
SELECT 'q61', floor(VAR_SAMP(col_float_signed)) FROM t1;
q61 floor(VAR_SAMP(col_float_signed))
q61 70358518588659
SELECT 'q62', floor(VAR_SAMP(col_float_unsigned)) FROM t1;
q62 floor(VAR_SAMP(col_float_unsigned))
q62 5397
SELECT 'q63', floor(VAR_SAMP(col_double_signed)) FROM t1;
q63 floor(VAR_SAMP(col_double_signed))
q63 4611611745321216000
SELECT 'q64', floor(VAR_SAMP(col_double_unsigned)) FROM t1;
q64 floor(VAR_SAMP(col_double_unsigned))
q64 5397
DROP DATABASE std_test_db;

View File

@ -0,0 +1,229 @@
DROP DATABASE IF EXISTS std_test_db;
CREATE DATABASE std_test_db;
USE std_test_db;
create table t1 (
col_signed tinyint,
col_unsigned tinyint unsigned
);
LOAD DATA LOCAL infile 'MTR_SUITE_DIR/../std_data/tinyint_range.tbl' INTO TABLE t1 FIELDS TERMINATED BY '|';;
ALTER TABLE t1 ADD COLUMN col_small_signed SMALLINT;
ALTER TABLE t1 ADD COLUMN col_small_unsigned SMALLINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_med_signed MEDIUMINT;
ALTER TABLE t1 ADD COLUMN col_med_unsigned MEDIUMINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_int_signed INT;
ALTER TABLE t1 ADD COLUMN col_int_unsigned INT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_big_signed BIGINT;
ALTER TABLE t1 ADD COLUMN col_big_unsigned BIGINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_dec_signed DECIMAL(38,0);
ALTER TABLE t1 ADD COLUMN col_dec_unsigned DECIMAL(38,0) UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_float_signed FLOAT;
ALTER TABLE t1 ADD COLUMN col_float_unsigned FLOAT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_double_signed DOUBLE;
ALTER TABLE t1 ADD COLUMN col_double_unsigned DOUBLE UNSIGNED;
UPDATE t1 SET col_small_signed=col_signed + sign(col_signed) * 32000;
UPDATE t1 SET col_small_unsigned=col_unsigned + 65000;
UPDATE t1 SET col_med_signed=col_signed + sign(col_signed) * 8388000;
UPDATE t1 SET col_med_unsigned=col_unsigned + 16776000;
UPDATE t1 SET col_int_signed=col_signed + sign(col_signed) * 2147483000;
UPDATE t1 SET col_int_unsigned=col_unsigned + 4294000000;
UPDATE t1 SET col_big_signed=col_signed + sign(col_signed) * 9223372036854775000;
UPDATE t1 SET col_big_unsigned=col_unsigned + 18446744073709551000;
UPDATE t1 SET col_dec_signed=col_signed + sign(col_signed) * 800000000000000000000000000000000001;
UPDATE t1 SET col_dec_unsigned=col_unsigned + 800000000000000000000000000000000003;
UPDATE t1 SET col_float_signed=col_signed + 0.637 + sign(col_signed) * 8388000;
UPDATE t1 SET col_float_unsigned=col_unsigned + 0.637 + 16776000;
UPDATE t1 SET col_double_signed=col_signed + 0.637 + sign(col_signed) * 2147483000;
UPDATE t1 SET col_double_unsigned=col_unsigned + 0.637 + 4294000000;
SELECT 'q1', floor(STD(col_signed)) FROM t1;
q1 floor(STD(col_signed))
q1 73
SELECT 'q2', floor(STD(col_unsigned)) FROM t1;
q2 floor(STD(col_unsigned))
q2 73
SELECT 'q3', floor(STD(col_small_signed)) FROM t1;
q3 floor(STD(col_small_signed))
q3 32000
SELECT 'q4', floor(STD(col_small_unsigned)) FROM t1;
q4 floor(STD(col_small_unsigned))
q4 73
SELECT 'q5', floor(STD(col_med_signed)) FROM t1;
q5 floor(STD(col_med_signed))
q5 8371470
SELECT 'q6', floor(STD(col_med_unsigned)) FROM t1;
q6 floor(STD(col_med_unsigned))
q6 73
SELECT 'q7', floor(STD(col_int_signed)) FROM t1;
q7 floor(STD(col_int_signed))
q7 2143234889
SELECT 'q8', floor(STD(col_int_unsigned)) FROM t1;
q8 floor(STD(col_int_unsigned))
q8 73
SELECT 'q9', floor(STD(col_big_signed)) FROM t1;
q9 floor(STD(col_big_signed))
q9 9205126264421171000
SELECT 'q10', floor(STD(col_big_unsigned)) FROM t1;
q10 floor(STD(col_big_unsigned))
q10 0
SELECT 'q11', floor(STD(col_dec_signed)) FROM t1;
q11 floor(STD(col_dec_signed))
q11 798417431511105000000000000000000000
SELECT 'q12', floor(STD(col_dec_unsigned)) FROM t1;
q12 floor(STD(col_dec_unsigned))
q12 0
SELECT 'q13', floor(STD(col_float_signed)) FROM t1;
q13 floor(STD(col_float_signed))
q13 8371470
SELECT 'q14', floor(STD(col_float_unsigned)) FROM t1;
q14 floor(STD(col_float_unsigned))
q14 73
SELECT 'q15', floor(STD(col_double_signed)) FROM t1;
q15 floor(STD(col_double_signed))
q15 2143234889
SELECT 'q16', floor(STD(col_double_unsigned)) FROM t1;
q16 floor(STD(col_double_unsigned))
q16 73
SELECT 'q17', floor(STDDEV_SAMP(col_signed)) FROM t1;
q17 floor(STDDEV_SAMP(col_signed))
q17 73
SELECT 'q18', floor(STDDEV_SAMP(col_unsigned)) FROM t1;
q18 floor(STDDEV_SAMP(col_unsigned))
q18 73
SELECT 'q19', floor(STDDEV_SAMP(col_small_signed)) FROM t1;
q19 floor(STDDEV_SAMP(col_small_signed))
q19 32063
SELECT 'q20', floor(STDDEV_SAMP(col_small_unsigned)) FROM t1;
q20 floor(STDDEV_SAMP(col_small_unsigned))
q20 73
SELECT 'q21', floor(STDDEV_SAMP(col_med_signed)) FROM t1;
q21 floor(STDDEV_SAMP(col_med_signed))
q21 8387998
SELECT 'q22', floor(STDDEV_SAMP(col_med_unsigned)) FROM t1;
q22 floor(STDDEV_SAMP(col_med_unsigned))
q22 73
SELECT 'q23', floor(STDDEV_SAMP(col_int_signed)) FROM t1;
q23 floor(STDDEV_SAMP(col_int_signed))
q23 2147466354
SELECT 'q24', floor(STDDEV_SAMP(col_int_unsigned)) FROM t1;
q24 floor(STDDEV_SAMP(col_int_unsigned))
q24 73
SELECT 'q25', floor(STDDEV_SAMP(col_big_signed)) FROM t1;
q25 floor(STDDEV_SAMP(col_big_signed))
q25 9223300272764650000
SELECT 'q26', floor(STDDEV_SAMP(col_big_unsigned)) FROM t1;
q26 floor(STDDEV_SAMP(col_big_unsigned))
q26 0
SELECT 'q27', floor(STDDEV_SAMP(col_dec_signed)) FROM t1;
q27 floor(STDDEV_SAMP(col_dec_signed))
q27 799993775457406500000000000000000000
SELECT 'q28', floor(STDDEV_SAMP(col_dec_unsigned)) FROM t1;
q28 floor(STDDEV_SAMP(col_dec_unsigned))
q28 0
SELECT 'q29', floor(STDDEV_SAMP(col_float_signed)) FROM t1;
q29 floor(STDDEV_SAMP(col_float_signed))
q29 8387998
SELECT 'q30', floor(STDDEV_SAMP(col_float_unsigned)) FROM t1;
q30 floor(STDDEV_SAMP(col_float_unsigned))
q30 73
SELECT 'q31', floor(STDDEV_SAMP(col_double_signed)) FROM t1;
q31 floor(STDDEV_SAMP(col_double_signed))
q31 2147466354
SELECT 'q32', floor(STDDEV_SAMP(col_double_unsigned)) FROM t1;
q32 floor(STDDEV_SAMP(col_double_unsigned))
q32 73
SELECT 'q33', floor(VAR_POP(col_signed)) FROM t1;
q33 floor(VAR_POP(col_signed))
q33 5376
SELECT 'q34', floor(VAR_POP(col_unsigned)) FROM t1;
q34 floor(VAR_POP(col_unsigned))
q34 5376
SELECT 'q35', floor(VAR_POP(col_small_signed)) FROM t1;
q35 floor(VAR_POP(col_small_signed))
q35 1024021882
SELECT 'q36', floor(VAR_POP(col_small_unsigned)) FROM t1;
q36 floor(VAR_POP(col_small_unsigned))
q36 5376
SELECT 'q37', floor(VAR_POP(col_med_signed)) FROM t1;
q37 floor(VAR_POP(col_med_signed))
q37 70081516547007
SELECT 'q38', floor(VAR_POP(col_med_unsigned)) FROM t1;
q38 floor(VAR_POP(col_med_unsigned))
q38 5376
SELECT 'q39', floor(VAR_POP(col_int_signed)) FROM t1;
q39 floor(VAR_POP(col_int_signed))
q39 4593455793567980000
SELECT 'q40', floor(VAR_POP(col_int_unsigned)) FROM t1;
q40 floor(VAR_POP(col_int_unsigned))
q40 5376
SELECT 'q41', floor(VAR_POP(col_big_signed)) FROM t1;
q41 floor(VAR_POP(col_big_signed))
q41 84734349543936470000000000000000000000
SELECT 'q42', floor(VAR_POP(col_big_unsigned)) FROM t1;
q42 floor(VAR_POP(col_big_unsigned))
q42 0
SELECT 'q43', floor(VAR_POP(col_dec_signed)) FROM t1;
q43 floor(VAR_POP(col_dec_signed))
q43 637470394940789900000000000000000000000000000000000000000000000000000000
SELECT 'q44', floor(VAR_POP(col_dec_unsigned)) FROM t1;
q44 floor(VAR_POP(col_dec_unsigned))
q44 0
SELECT 'q45', floor(VAR_POP(col_float_signed)) FROM t1;
q45 floor(VAR_POP(col_float_signed))
q45 70081516546971
SELECT 'q46', floor(VAR_POP(col_float_unsigned)) FROM t1;
q46 floor(VAR_POP(col_float_unsigned))
q46 5376
SELECT 'q47', floor(VAR_POP(col_double_signed)) FROM t1;
q47 floor(VAR_POP(col_double_signed))
q47 4593455793567980000
SELECT 'q48', floor(VAR_POP(col_double_unsigned)) FROM t1;
q48 floor(VAR_POP(col_double_unsigned))
q48 5376
SELECT 'q49', floor(VAR_SAMP(col_signed)) FROM t1;
q49 floor(VAR_SAMP(col_signed))
q49 5397
SELECT 'q50', floor(VAR_SAMP(col_unsigned)) FROM t1;
q50 floor(VAR_SAMP(col_unsigned))
q50 5397
SELECT 'q51', floor(VAR_SAMP(col_small_signed)) FROM t1;
q51 floor(VAR_SAMP(col_small_signed))
q51 1028069399
SELECT 'q52', floor(VAR_SAMP(col_small_unsigned)) FROM t1;
q52 floor(VAR_SAMP(col_small_unsigned))
q52 5397
SELECT 'q53', floor(VAR_SAMP(col_med_signed)) FROM t1;
q53 floor(VAR_SAMP(col_med_signed))
q53 70358518588695
SELECT 'q54', floor(VAR_SAMP(col_med_unsigned)) FROM t1;
q54 floor(VAR_SAMP(col_med_unsigned))
q54 5397
SELECT 'q55', floor(VAR_SAMP(col_int_signed)) FROM t1;
q55 floor(VAR_SAMP(col_int_signed))
q55 4611611745321213400
SELECT 'q56', floor(VAR_SAMP(col_int_unsigned)) FROM t1;
q56 floor(VAR_SAMP(col_int_unsigned))
q56 5397
SELECT 'q57', floor(VAR_SAMP(col_big_signed)) FROM t1;
q57 floor(VAR_SAMP(col_big_signed))
q57 85069267921580480000000000000000000000
SELECT 'q58', floor(VAR_SAMP(col_big_unsigned)) FROM t1;
q58 floor(VAR_SAMP(col_big_unsigned))
q58 0
SELECT 'q59', floor(VAR_SAMP(col_dec_signed)) FROM t1;
q59 floor(VAR_SAMP(col_dec_signed))
q59 639990040770595400000000000000000000000000000000000000000000000000000000
SELECT 'q60', floor(VAR_SAMP(col_dec_unsigned)) FROM t1;
q60 floor(VAR_SAMP(col_dec_unsigned))
q60 0
SELECT 'q61', floor(VAR_SAMP(col_float_signed)) FROM t1;
q61 floor(VAR_SAMP(col_float_signed))
q61 70358518588659
SELECT 'q62', floor(VAR_SAMP(col_float_unsigned)) FROM t1;
q62 floor(VAR_SAMP(col_float_unsigned))
q62 5397
SELECT 'q63', floor(VAR_SAMP(col_double_signed)) FROM t1;
q63 floor(VAR_SAMP(col_double_signed))
q63 4611611745321213400
SELECT 'q64', floor(VAR_SAMP(col_double_unsigned)) FROM t1;
q64 floor(VAR_SAMP(col_double_unsigned))
q64 5397
DROP DATABASE std_test_db;

View File

@ -33,7 +33,7 @@ q5 floor(STD(u_custKey))
q5 6749
SELECT 'q6', floor(STD(u_bigcustKey)) FROM customer;
q6 floor(STD(u_bigcustKey))
q6 6688
q6 6749
SELECT 'q7', AVG(u_custKey) FROM customer;
q7 AVG(u_custKey)
q7 4294007575.1667

View File

@ -0,0 +1,118 @@
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS std_test_db;
--enable_warnings
CREATE DATABASE std_test_db;
USE std_test_db;
create table t1 (
col_signed tinyint,
col_unsigned tinyint unsigned
)engine=columnstore;
--replace_result $MTR_SUITE_DIR MTR_SUITE_DIR
--eval LOAD DATA LOCAL infile '$MTR_SUITE_DIR/../std_data/tinyint_range.tbl' INTO TABLE t1 FIELDS TERMINATED BY '|';
ALTER TABLE t1 ADD COLUMN col_small_signed SMALLINT;
ALTER TABLE t1 ADD COLUMN col_small_unsigned SMALLINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_med_signed MEDIUMINT;
ALTER TABLE t1 ADD COLUMN col_med_unsigned MEDIUMINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_int_signed INT;
ALTER TABLE t1 ADD COLUMN col_int_unsigned INT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_big_signed BIGINT;
ALTER TABLE t1 ADD COLUMN col_big_unsigned BIGINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_dec_signed DECIMAL(38,0);
ALTER TABLE t1 ADD COLUMN col_dec_unsigned DECIMAL(38,0) UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_float_signed FLOAT;
ALTER TABLE t1 ADD COLUMN col_float_unsigned FLOAT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_double_signed DOUBLE;
ALTER TABLE t1 ADD COLUMN col_double_unsigned DOUBLE UNSIGNED;
UPDATE t1 SET col_small_signed=col_signed + sign(col_signed) * 32000;
UPDATE t1 SET col_small_unsigned=col_unsigned + 65000;
UPDATE t1 SET col_med_signed=col_signed + sign(col_signed) * 8388000;
UPDATE t1 SET col_med_unsigned=col_unsigned + 16776000;
UPDATE t1 SET col_int_signed=col_signed + sign(col_signed) * 2147483000;
UPDATE t1 SET col_int_unsigned=col_unsigned + 4294000000;
UPDATE t1 SET col_big_signed=col_signed + sign(col_signed) * 9223372036854775000;
UPDATE t1 SET col_big_unsigned=col_unsigned + 18446744073709551000;
UPDATE t1 SET col_dec_signed=col_signed + sign(col_signed) * 800000000000000000000000000000000001;
UPDATE t1 SET col_dec_unsigned=col_unsigned + 800000000000000000000000000000000003;
UPDATE t1 SET col_float_signed=col_signed + 0.637 + sign(col_signed) * 8388000;
UPDATE t1 SET col_float_unsigned=col_unsigned + 0.637 + 16776000;
UPDATE t1 SET col_double_signed=col_signed + 0.637 + sign(col_signed) * 2147483000;
UPDATE t1 SET col_double_unsigned=col_unsigned + 0.637 + 4294000000;
SELECT 'q1', floor(STD(col_signed)) FROM t1;
SELECT 'q2', floor(STD(col_unsigned)) FROM t1;
SELECT 'q3', floor(STD(col_small_signed)) FROM t1;
SELECT 'q4', floor(STD(col_small_unsigned)) FROM t1;
SELECT 'q5', floor(STD(col_med_signed)) FROM t1;
SELECT 'q6', floor(STD(col_med_unsigned)) FROM t1;
SELECT 'q7', floor(STD(col_int_signed)) FROM t1;
SELECT 'q8', floor(STD(col_int_unsigned)) FROM t1;
SELECT 'q9', floor(STD(col_big_signed)) FROM t1;
SELECT 'q10', floor(STD(col_big_unsigned)) FROM t1;
SELECT 'q11', floor(STD(col_dec_signed)) FROM t1;
SELECT 'q13', floor(STD(col_float_signed)) FROM t1;
SELECT 'q14', floor(STD(col_float_unsigned)) FROM t1;
SELECT 'q15', floor(STD(col_double_signed)) FROM t1;
SELECT 'q16', floor(STD(col_double_unsigned)) FROM t1;
SELECT 'q17', floor(STDDEV_SAMP(col_signed)) FROM t1;
SELECT 'q18', floor(STDDEV_SAMP(col_unsigned)) FROM t1;
SELECT 'q19', floor(STDDEV_SAMP(col_small_signed)) FROM t1;
SELECT 'q20', floor(STDDEV_SAMP(col_small_unsigned)) FROM t1;
SELECT 'q21', floor(STDDEV_SAMP(col_med_signed)) FROM t1;
SELECT 'q22', floor(STDDEV_SAMP(col_med_unsigned)) FROM t1;
SELECT 'q23', floor(STDDEV_SAMP(col_int_signed)) FROM t1;
SELECT 'q24', floor(STDDEV_SAMP(col_int_unsigned)) FROM t1;
SELECT 'q25', floor(STDDEV_SAMP(col_big_signed)) FROM t1;
SELECT 'q26', floor(STDDEV_SAMP(col_big_unsigned)) FROM t1;
SELECT 'q27', floor(STDDEV_SAMP(col_dec_signed)) FROM t1;
SELECT 'q29', floor(STDDEV_SAMP(col_float_signed)) FROM t1;
SELECT 'q30', floor(STDDEV_SAMP(col_float_unsigned)) FROM t1;
SELECT 'q31', floor(STDDEV_SAMP(col_double_signed)) FROM t1;
SELECT 'q32', floor(STDDEV_SAMP(col_double_unsigned)) FROM t1;
SELECT 'q33', floor(VAR_POP(col_signed)) FROM t1;
SELECT 'q34', floor(VAR_POP(col_unsigned)) FROM t1;
SELECT 'q35', floor(VAR_POP(col_small_signed)) FROM t1;
SELECT 'q36', floor(VAR_POP(col_small_unsigned)) FROM t1;
SELECT 'q37', floor(VAR_POP(col_med_signed)) FROM t1;
SELECT 'q38', floor(VAR_POP(col_med_unsigned)) FROM t1;
SELECT 'q39', floor(VAR_POP(col_int_signed)) FROM t1;
SELECT 'q40', floor(VAR_POP(col_int_unsigned)) FROM t1;
SELECT 'q41', floor(VAR_POP(col_big_signed)) FROM t1;
SELECT 'q42', floor(VAR_POP(col_big_unsigned)) FROM t1;
SELECT 'q43', floor(VAR_POP(col_dec_signed)) FROM t1;
SELECT 'q45', floor(VAR_POP(col_float_signed)) FROM t1;
SELECT 'q46', floor(VAR_POP(col_float_unsigned)) FROM t1;
SELECT 'q47', floor(VAR_POP(col_double_signed)) FROM t1;
SELECT 'q48', floor(VAR_POP(col_double_unsigned)) FROM t1;
SELECT 'q49', floor(VAR_SAMP(col_signed)) FROM t1;
SELECT 'q50', floor(VAR_SAMP(col_unsigned)) FROM t1;
SELECT 'q51', floor(VAR_SAMP(col_small_signed)) FROM t1;
SELECT 'q52', floor(VAR_SAMP(col_small_unsigned)) FROM t1;
SELECT 'q53', floor(VAR_SAMP(col_med_signed)) FROM t1;
SELECT 'q54', floor(VAR_SAMP(col_med_unsigned)) FROM t1;
SELECT 'q55', floor(VAR_SAMP(col_int_signed)) FROM t1;
SELECT 'q56', floor(VAR_SAMP(col_int_unsigned)) FROM t1;
SELECT 'q57', floor(VAR_SAMP(col_big_signed)) FROM t1;
SELECT 'q58', floor(VAR_SAMP(col_big_unsigned)) FROM t1;
SELECT 'q59', floor(VAR_SAMP(col_dec_signed)) FROM t1;
SELECT 'q61', floor(VAR_SAMP(col_float_signed)) FROM t1;
SELECT 'q62', floor(VAR_SAMP(col_float_unsigned)) FROM t1;
SELECT 'q63', floor(VAR_SAMP(col_double_signed)) FROM t1;
SELECT 'q64', floor(VAR_SAMP(col_double_unsigned)) FROM t1;
# Clean UP
DROP DATABASE std_test_db;

View File

@ -0,0 +1,121 @@
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS std_test_db;
--enable_warnings
CREATE DATABASE std_test_db;
USE std_test_db;
create table t1 (
col_signed tinyint,
col_unsigned tinyint unsigned
);
--replace_result $MTR_SUITE_DIR MTR_SUITE_DIR
--eval LOAD DATA LOCAL infile '$MTR_SUITE_DIR/../std_data/tinyint_range.tbl' INTO TABLE t1 FIELDS TERMINATED BY '|';
ALTER TABLE t1 ADD COLUMN col_small_signed SMALLINT;
ALTER TABLE t1 ADD COLUMN col_small_unsigned SMALLINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_med_signed MEDIUMINT;
ALTER TABLE t1 ADD COLUMN col_med_unsigned MEDIUMINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_int_signed INT;
ALTER TABLE t1 ADD COLUMN col_int_unsigned INT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_big_signed BIGINT;
ALTER TABLE t1 ADD COLUMN col_big_unsigned BIGINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_dec_signed DECIMAL(38,0);
ALTER TABLE t1 ADD COLUMN col_dec_unsigned DECIMAL(38,0) UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_float_signed FLOAT;
ALTER TABLE t1 ADD COLUMN col_float_unsigned FLOAT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_double_signed DOUBLE;
ALTER TABLE t1 ADD COLUMN col_double_unsigned DOUBLE UNSIGNED;
UPDATE t1 SET col_small_signed=col_signed + sign(col_signed) * 32000;
UPDATE t1 SET col_small_unsigned=col_unsigned + 65000;
UPDATE t1 SET col_med_signed=col_signed + sign(col_signed) * 8388000;
UPDATE t1 SET col_med_unsigned=col_unsigned + 16776000;
UPDATE t1 SET col_int_signed=col_signed + sign(col_signed) * 2147483000;
UPDATE t1 SET col_int_unsigned=col_unsigned + 4294000000;
UPDATE t1 SET col_big_signed=col_signed + sign(col_signed) * 9223372036854775000;
UPDATE t1 SET col_big_unsigned=col_unsigned + 18446744073709551000;
UPDATE t1 SET col_dec_signed=col_signed + sign(col_signed) * 800000000000000000000000000000000001;
UPDATE t1 SET col_dec_unsigned=col_unsigned + 800000000000000000000000000000000003;
UPDATE t1 SET col_float_signed=col_signed + 0.637 + sign(col_signed) * 8388000;
UPDATE t1 SET col_float_unsigned=col_unsigned + 0.637 + 16776000;
UPDATE t1 SET col_double_signed=col_signed + 0.637 + sign(col_signed) * 2147483000;
UPDATE t1 SET col_double_unsigned=col_unsigned + 0.637 + 4294000000;
SELECT 'q1', floor(STD(col_signed)) FROM t1;
SELECT 'q2', floor(STD(col_unsigned)) FROM t1;
SELECT 'q3', floor(STD(col_small_signed)) FROM t1;
SELECT 'q4', floor(STD(col_small_unsigned)) FROM t1;
SELECT 'q5', floor(STD(col_med_signed)) FROM t1;
SELECT 'q6', floor(STD(col_med_unsigned)) FROM t1;
SELECT 'q7', floor(STD(col_int_signed)) FROM t1;
SELECT 'q8', floor(STD(col_int_unsigned)) FROM t1;
SELECT 'q9', floor(STD(col_big_signed)) FROM t1;
SELECT 'q10', floor(STD(col_big_unsigned)) FROM t1;
SELECT 'q11', floor(STD(col_dec_signed)) FROM t1;
SELECT 'q12', floor(STD(col_dec_unsigned)) FROM t1;
SELECT 'q13', floor(STD(col_float_signed)) FROM t1;
SELECT 'q14', floor(STD(col_float_unsigned)) FROM t1;
SELECT 'q15', floor(STD(col_double_signed)) FROM t1;
SELECT 'q16', floor(STD(col_double_unsigned)) FROM t1;
SELECT 'q17', floor(STDDEV_SAMP(col_signed)) FROM t1;
SELECT 'q18', floor(STDDEV_SAMP(col_unsigned)) FROM t1;
SELECT 'q19', floor(STDDEV_SAMP(col_small_signed)) FROM t1;
SELECT 'q20', floor(STDDEV_SAMP(col_small_unsigned)) FROM t1;
SELECT 'q21', floor(STDDEV_SAMP(col_med_signed)) FROM t1;
SELECT 'q22', floor(STDDEV_SAMP(col_med_unsigned)) FROM t1;
SELECT 'q23', floor(STDDEV_SAMP(col_int_signed)) FROM t1;
SELECT 'q24', floor(STDDEV_SAMP(col_int_unsigned)) FROM t1;
SELECT 'q25', floor(STDDEV_SAMP(col_big_signed)) FROM t1;
SELECT 'q26', floor(STDDEV_SAMP(col_big_unsigned)) FROM t1;
SELECT 'q27', floor(STDDEV_SAMP(col_dec_signed)) FROM t1;
SELECT 'q28', floor(STDDEV_SAMP(col_dec_unsigned)) FROM t1;
SELECT 'q29', floor(STDDEV_SAMP(col_float_signed)) FROM t1;
SELECT 'q30', floor(STDDEV_SAMP(col_float_unsigned)) FROM t1;
SELECT 'q31', floor(STDDEV_SAMP(col_double_signed)) FROM t1;
SELECT 'q32', floor(STDDEV_SAMP(col_double_unsigned)) FROM t1;
SELECT 'q33', floor(VAR_POP(col_signed)) FROM t1;
SELECT 'q34', floor(VAR_POP(col_unsigned)) FROM t1;
SELECT 'q35', floor(VAR_POP(col_small_signed)) FROM t1;
SELECT 'q36', floor(VAR_POP(col_small_unsigned)) FROM t1;
SELECT 'q37', floor(VAR_POP(col_med_signed)) FROM t1;
SELECT 'q38', floor(VAR_POP(col_med_unsigned)) FROM t1;
SELECT 'q39', floor(VAR_POP(col_int_signed)) FROM t1;
SELECT 'q40', floor(VAR_POP(col_int_unsigned)) FROM t1;
SELECT 'q41', floor(VAR_POP(col_big_signed)) FROM t1;
SELECT 'q42', floor(VAR_POP(col_big_unsigned)) FROM t1;
SELECT 'q43', floor(VAR_POP(col_dec_signed)) FROM t1;
SELECT 'q44', floor(VAR_POP(col_dec_unsigned)) FROM t1;
SELECT 'q45', floor(VAR_POP(col_float_signed)) FROM t1;
SELECT 'q46', floor(VAR_POP(col_float_unsigned)) FROM t1;
SELECT 'q47', floor(VAR_POP(col_double_signed)) FROM t1;
SELECT 'q48', floor(VAR_POP(col_double_unsigned)) FROM t1;
SELECT 'q49', floor(VAR_SAMP(col_signed)) FROM t1;
SELECT 'q50', floor(VAR_SAMP(col_unsigned)) FROM t1;
SELECT 'q51', floor(VAR_SAMP(col_small_signed)) FROM t1;
SELECT 'q52', floor(VAR_SAMP(col_small_unsigned)) FROM t1;
SELECT 'q53', floor(VAR_SAMP(col_med_signed)) FROM t1;
SELECT 'q54', floor(VAR_SAMP(col_med_unsigned)) FROM t1;
SELECT 'q55', floor(VAR_SAMP(col_int_signed)) FROM t1;
SELECT 'q56', floor(VAR_SAMP(col_int_unsigned)) FROM t1;
SELECT 'q57', floor(VAR_SAMP(col_big_signed)) FROM t1;
SELECT 'q58', floor(VAR_SAMP(col_big_unsigned)) FROM t1;
SELECT 'q59', floor(VAR_SAMP(col_dec_signed)) FROM t1;
SELECT 'q60', floor(VAR_SAMP(col_dec_unsigned)) FROM t1;
SELECT 'q61', floor(VAR_SAMP(col_float_signed)) FROM t1;
SELECT 'q62', floor(VAR_SAMP(col_float_unsigned)) FROM t1;
SELECT 'q63', floor(VAR_SAMP(col_double_signed)) FROM t1;
SELECT 'q64', floor(VAR_SAMP(col_double_unsigned)) FROM t1;
# Clean UP
DROP DATABASE std_test_db;

View File

@ -0,0 +1,254 @@
-126|0
-125|1
-124|2
-123|3
-122|4
-121|5
-120|6
-119|7
-118|8
-117|9
-116|10
-115|11
-114|12
-113|13
-112|14
-111|15
-110|16
-109|17
-108|18
-107|19
-106|20
-105|21
-104|22
-103|23
-102|24
-101|25
-100|26
-99|27
-98|28
-97|29
-96|30
-95|31
-94|32
-93|33
-92|34
-91|35
-90|36
-89|37
-88|38
-87|39
-86|40
-85|41
-84|42
-83|43
-82|44
-81|45
-80|46
-79|47
-78|48
-77|49
-76|50
-75|51
-74|52
-73|53
-72|54
-71|55
-70|56
-69|57
-68|58
-67|59
-66|60
-65|61
-64|62
-63|63
-62|64
-61|65
-60|66
-59|67
-58|68
-57|69
-56|70
-55|71
-54|72
-53|73
-52|74
-51|75
-50|76
-49|77
-48|78
-47|79
-46|80
-45|81
-44|82
-43|83
-42|84
-41|85
-40|86
-39|87
-38|88
-37|89
-36|90
-35|91
-34|92
-33|93
-32|94
-31|95
-30|96
-29|97
-28|98
-27|99
-26|100
-25|101
-24|102
-23|103
-22|104
-21|105
-20|106
-19|107
-18|108
-17|109
-16|110
-15|111
-14|112
-13|113
-12|114
-11|115
-10|116
-9|117
-8|118
-7|119
-6|120
-5|121
-4|122
-3|123
-2|124
-1|125
0|126
1|127
2|128
3|129
4|130
5|131
6|132
7|133
8|134
9|135
10|136
11|137
12|138
13|139
14|140
15|141
16|142
17|143
18|144
19|145
20|146
21|147
22|148
23|149
24|150
25|151
26|152
27|153
28|154
29|155
30|156
31|157
32|158
33|159
34|160
35|161
36|162
37|163
38|164
39|165
40|166
41|167
42|168
43|169
44|170
45|171
46|172
47|173
48|174
49|175
50|176
51|177
52|178
53|179
54|180
55|181
56|182
57|183
58|184
59|185
60|186
61|187
62|188
63|189
64|190
65|191
66|192
67|193
68|194
69|195
70|196
71|197
72|198
73|199
74|200
75|201
76|202
77|203
78|204
79|205
80|206
81|207
82|208
83|209
84|210
85|211
86|212
87|213
88|214
89|215
90|216
91|217
92|218
93|219
94|220
95|221
96|222
97|223
98|224
99|225
100|226
101|227
102|228
103|229
104|230
105|231
106|232
107|233
108|234
109|235
110|236
111|237
112|238
113|239
114|240
115|241
116|242
117|243
118|244
119|245
120|246
121|247
122|248
123|249
124|250
125|251
126|252
127|253

View File

@ -1900,8 +1900,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
// rowIn(in) - Row to be included in aggregation.
// colIn(in) - column in the input row group
// colOut(in) - column in the output row group stores the count
// colAux(in) - column in the output row group stores the sum(x)
// colAux + 1 - column in the output row group stores the sum(x**2)
// colAux(in) - column in the output row group stores the mean(x)
// colAux + 1 - column in the output row group stores the sum(x_i - mean)^2
//------------------------------------------------------------------------------
void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux)
{
@ -1960,9 +1960,17 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu
break;
}
fRow.setDoubleField(fRow.getDoubleField(colOut) + 1.0, colOut);
fRow.setLongDoubleField(fRow.getLongDoubleField(colAux) + valIn, colAux);
fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1);
double count = fRow.getDoubleField(colOut) + 1.0;
long double mean = fRow.getLongDoubleField(colAux);
long double M2 = fRow.getLongDoubleField(colAux + 1);
volatile long double delta = valIn - mean;
mean += delta/count;
M2 += delta * (valIn - mean);
fRow.setDoubleField(count, colOut);
fRow.setLongDoubleField(mean, colAux);
fRow.setLongDoubleField(M2, colAux + 1);
}
void RowAggregation::mergeStatistics(const Row& rowIn, uint64_t colOut, uint64_t colAux)
@ -3156,31 +3164,26 @@ void RowAggregationUM::calculateStatisticsFunctions()
}
else // count > 1
{
long double sum1 = fRow.getLongDoubleField(colAux);
long double sum2 = fRow.getLongDoubleField(colAux + 1);
long double M2 = fRow.getLongDoubleField(colAux + 1);
uint32_t scale = fRow.getScale(colOut);
auto factor = datatypes::scaleDivisor<long double>(scale);
if (scale != 0) // adjust the scale if necessary
{
sum1 /= factor;
sum2 /= factor * factor;
M2 /= factor * factor;
}
long double stat = sum1 * sum1 / cnt;
stat = sum2 - stat;
if (fFunctionCols[i]->fStatsFunction == ROWAGG_STDDEV_POP)
stat = sqrt(stat / cnt);
M2 = sqrt(M2 / cnt);
else if (fFunctionCols[i]->fStatsFunction == ROWAGG_STDDEV_SAMP)
stat = sqrt(stat / (cnt - 1));
M2 = sqrt(M2 / (cnt - 1));
else if (fFunctionCols[i]->fStatsFunction == ROWAGG_VAR_POP)
stat = stat / cnt;
M2 = M2 / cnt;
else if (fFunctionCols[i]->fStatsFunction == ROWAGG_VAR_SAMP)
stat = stat / (cnt - 1);
M2 = M2 / (cnt - 1);
fRow.setDoubleField(stat, colOut);
fRow.setDoubleField(M2, colOut);
}
}
}
@ -4281,18 +4284,39 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
// Update the sum and count fields for stattistics if input is not null.
// rowIn(in) - Row to be included in aggregation.
// colIn(in) - column in the input row group stores the count/logical block
// colIn + 1 - column in the input row group stores the sum(x)/logical block
// colIn + 2 - column in the input row group stores the sum(x**2)/logical block
// colIn + 1 - column in the input row group stores the mean(x)/logical block
// colIn + 2 - column in the input row group stores the sum(x_i - mean)^2/logical block
// colOut(in) - column in the output row group stores the count
// colAux(in) - column in the output row group stores the sum(x)
// colAux + 1 - column in the output row group stores the sum(x**2)
// colAux(in) - column in the output row group stores the mean(x)
// colAux + 1 - column in the output row group stores the sum(x_i - mean)^2
//------------------------------------------------------------------------------
void RowAggregationUMP2::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux)
{
fRow.setDoubleField(fRow.getDoubleField(colOut) + rowIn.getDoubleField(colIn), colOut);
fRow.setLongDoubleField(fRow.getLongDoubleField(colAux) + rowIn.getLongDoubleField(colIn + 1), colAux);
fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + rowIn.getLongDoubleField(colIn + 2),
colAux + 1);
double count = fRow.getDoubleField(colOut);
long double mean = fRow.getLongDoubleField(colAux);
long double M2 = fRow.getLongDoubleField(colAux + 1);
double block_count = rowIn.getDoubleField(colIn);
long double block_mean = rowIn.getLongDoubleField(colIn + 1);
long double block_M2 = rowIn.getLongDoubleField(colIn + 2);
double next_count = count + block_count;
long double next_mean;
long double next_M2;
if (next_count == 0)
{
next_mean = 0;
next_M2 = 0;
}
else
{
volatile long double delta = mean - block_mean;
next_mean = (mean * count + block_mean * block_count) / next_count;
next_M2 = M2 + block_M2 + delta * delta * (count * block_count / next_count);
}
fRow.setDoubleField(next_count, colOut);
fRow.setLongDoubleField(next_mean, colAux);
fRow.setLongDoubleField(next_M2, colAux + 1);
}
//------------------------------------------------------------------------------