1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

Welford's algorithm STD and VAR on window functions

This commit is contained in:
Andrey Piskunov
2022-06-01 19:05:33 +03:00
parent c5fa27475d
commit 66c69c7609
6 changed files with 31809 additions and 15 deletions

View File

@ -139,8 +139,8 @@ WindowFunctionType* WF_stats<T>::clone() const
template <typename T>
void WF_stats<T>::resetData()
{
fSum1 = 0;
fSum2 = 0;
fMean = 0;
fM2sum = 0;
fCount = 0;
fStats = 0.0;
@ -171,33 +171,28 @@ void WF_stats<T>::operator()(int64_t b, int64_t e, int64_t c)
if (fRow.isNullValue(colIn) == true)
continue;
// Welford's single-pass algorithm
T valIn;
getValue(colIn, valIn, &cdt);
long double val = (long double)valIn;
fSum1 += val;
fSum2 += val * val;
fCount++;
long double delta = val - fMean;
fMean += delta/fCount;
fM2sum += delta * (val - fMean);
}
if (fCount > 1)
{
uint32_t scale = fRow.getScale(colIn);
auto factor = datatypes::scaleDivisor<long double>(scale);
long double ldSum1 = fSum1;
long double ldSum2 = fSum2;
long double stat = fM2sum;
// adjust the scale if necessary
if (scale != 0 && cdt != CalpontSystemCatalog::LONGDOUBLE)
{
ldSum1 /= factor;
ldSum2 /= factor * factor;
stat /= factor * factor;
}
long double stat = ldSum1 * ldSum1 / fCount;
stat = ldSum2 - stat;
if (fFunctionId == WF__STDDEV_POP)
stat = sqrt(stat / fCount);
else if (fFunctionId == WF__STDDEV_SAMP)

View File

@ -40,8 +40,8 @@ class WF_stats : public WindowFunctionType
static boost::shared_ptr<WindowFunctionType> makeFunction(int, const string&, int, WindowFunctionColumn*);
protected:
long double fSum1;
long double fSum2;
long double fMean;
long double fM2sum;
uint64_t fCount;
double fStats;
};