MCOL-1793 Window functions fail when current row outside of window

2025-07-30 19:23:07 +03:00 · 2018-10-29 12:18:15 -05:00
parent e14e58ab6c
commit 72eb1d7345
6 changed files with 36 additions and 458 deletions
--- a/utils/udfsdk/avg_mode.cpp
+++ b/utils/udfsdk/avg_mode.cpp
@ -69,65 +69,13 @@ mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* v
 {
    static_any::any& valIn = valsIn[0].columnData;
    MODE_DATA& data = static_cast<ModeData*>(context->getUserData())->mData;
-    DATATYPE val = 0.0;

    if (valIn.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn.compatible(charTypeId))
-    {
-        val = valIn.cast<char>();
-    }
-    else if (valIn.compatible(scharTypeId))
-    {
-        val = valIn.cast<signed char>();
-    }
-    else if (valIn.compatible(shortTypeId))
-    {
-        val = valIn.cast<short>();
-    }
-    else if (valIn.compatible(intTypeId))
-    {
-        val = valIn.cast<int>();
-    }
-    else if (valIn.compatible(longTypeId))
-    {
-        val = valIn.cast<long>();
-    }
-    else if (valIn.compatible(llTypeId))
-    {
-        val = valIn.cast<long long>();
-    }
-    else if (valIn.compatible(ucharTypeId))
-    {
-        val = valIn.cast<unsigned char>();
-    }
-    else if (valIn.compatible(ushortTypeId))
-    {
-        val = valIn.cast<unsigned short>();
-    }
-    else if (valIn.compatible(uintTypeId))
-    {
-        val = valIn.cast<unsigned int>();
-    }
-    else if (valIn.compatible(ulongTypeId))
-    {
-        val = valIn.cast<unsigned long>();
-    }
-    else if (valIn.compatible(ullTypeId))
-    {
-        val = valIn.cast<unsigned long long>();
-    }
-    else if (valIn.compatible(floatTypeId))
-    {
-        val = valIn.cast<float>();
-    }
-    else if (valIn.compatible(doubleTypeId))
-    {
-        val = valIn.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsIn[0].scale;
@ -190,65 +138,13 @@ mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* v
 {
    static_any::any& valIn = valsDropped[0].columnData;
    MODE_DATA& data = static_cast<ModeData*>(context->getUserData())->mData;
-    DATATYPE val = 0.0;

    if (valIn.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn.compatible(charTypeId))
-    {
-        val = valIn.cast<char>();
-    }
-    else if (valIn.compatible(scharTypeId))
-    {
-        val = valIn.cast<signed char>();
-    }
-    else if (valIn.compatible(shortTypeId))
-    {
-        val = valIn.cast<short>();
-    }
-    else if (valIn.compatible(intTypeId))
-    {
-        val = valIn.cast<int>();
-    }
-    else if (valIn.compatible(longTypeId))
-    {
-        val = valIn.cast<long>();
-    }
-    else if (valIn.compatible(llTypeId))
-    {
-        val = valIn.cast<long long>();
-    }
-    else if (valIn.compatible(ucharTypeId))
-    {
-        val = valIn.cast<unsigned char>();
-    }
-    else if (valIn.compatible(ushortTypeId))
-    {
-        val = valIn.cast<unsigned short>();
-    }
-    else if (valIn.compatible(uintTypeId))
-    {
-        val = valIn.cast<unsigned int>();
-    }
-    else if (valIn.compatible(ulongTypeId))
-    {
-        val = valIn.cast<unsigned long>();
-    }
-    else if (valIn.compatible(ullTypeId))
-    {
-        val = valIn.cast<unsigned long long>();
-    }
-    else if (valIn.compatible(floatTypeId))
-    {
-        val = valIn.cast<float>();
-    }
-    else if (valIn.compatible(doubleTypeId))
-    {
-        val = valIn.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsDropped[0].scale;
--- a/utils/udfsdk/avgx.cpp
+++ b/utils/udfsdk/avgx.cpp
@ -75,69 +75,13 @@ mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsI
 {
    static_any::any& valIn_x = valsIn[0].columnData;
    struct  avgx_data* data = (struct avgx_data*)context->getUserData()->data;
-    DATATYPE val = 0.0;

    if (valIn_x.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn_x.compatible(longTypeId))
-    {
-        val = valIn_x.cast<long>();
-    }
-    else if (valIn_x.compatible(charTypeId))
-    {
-        val = valIn_x.cast<char>();
-    }
-    else if (valIn_x.compatible(scharTypeId))
-    {
-        val = valIn_x.cast<signed char>();
-    }
-    else if (valIn_x.compatible(shortTypeId))
-    {
-        val = valIn_x.cast<short>();
-    }
-    else if (valIn_x.compatible(intTypeId))
-    {
-        val = valIn_x.cast<int>();
-    }
-    else if (valIn_x.compatible(longTypeId))
-    {
-        val = valIn_x.cast<long>();
-    }
-    else if (valIn_x.compatible(llTypeId))
-    {
-        val = valIn_x.cast<long long>();
-    }
-    else if (valIn_x.compatible(ucharTypeId))
-    {
-        val = valIn_x.cast<unsigned char>();
-    }
-    else if (valIn_x.compatible(ushortTypeId))
-    {
-        val = valIn_x.cast<unsigned short>();
-    }
-    else if (valIn_x.compatible(uintTypeId))
-    {
-        val = valIn_x.cast<unsigned int>();
-    }
-    else if (valIn_x.compatible(ulongTypeId))
-    {
-        val = valIn_x.cast<unsigned long>();
-    }
-    else if (valIn_x.compatible(ullTypeId))
-    {
-        val = valIn_x.cast<unsigned long long>();
-    }
-    else if (valIn_x.compatible(floatTypeId))
-    {
-        val = valIn_x.cast<float>();
-    }
-    else if (valIn_x.compatible(doubleTypeId))
-    {
-        val = valIn_x.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn_x);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsIn[0].scale;
@ -183,65 +127,13 @@ mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsD
 {
    static_any::any& valIn_x = valsDropped[0].columnData;
    struct avgx_data* data = (struct avgx_data*)context->getUserData()->data;
-    DATATYPE val = 0.0;

    if (valIn_x.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn_x.compatible(charTypeId))
-    {
-        val = valIn_x.cast<char>();
-    }
-    else if (valIn_x.compatible(scharTypeId))
-    {
-        val = valIn_x.cast<signed char>();
-    }
-    else if (valIn_x.compatible(shortTypeId))
-    {
-        val = valIn_x.cast<short>();
-    }
-    else if (valIn_x.compatible(intTypeId))
-    {
-        val = valIn_x.cast<int>();
-    }
-    else if (valIn_x.compatible(longTypeId))
-    {
-        val = valIn_x.cast<long>();
-    }
-    else if (valIn_x.compatible(llTypeId))
-    {
-        val = valIn_x.cast<long long>();
-    }
-    else if (valIn_x.compatible(ucharTypeId))
-    {
-        val = valIn_x.cast<unsigned char>();
-    }
-    else if (valIn_x.compatible(ushortTypeId))
-    {
-        val = valIn_x.cast<unsigned short>();
-    }
-    else if (valIn_x.compatible(uintTypeId))
-    {
-        val = valIn_x.cast<unsigned int>();
-    }
-    else if (valIn_x.compatible(ulongTypeId))
-    {
-        val = valIn_x.cast<unsigned long>();
-    }
-    else if (valIn_x.compatible(ullTypeId))
-    {
-        val = valIn_x.cast<unsigned long long>();
-    }
-    else if (valIn_x.compatible(floatTypeId))
-    {
-        val = valIn_x.cast<float>();
-    }
-    else if (valIn_x.compatible(doubleTypeId))
-    {
-        val = valIn_x.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn_x);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsDropped[0].scale;
--- a/utils/udfsdk/docs/source/usage/sourcefile.rst
+++ b/utils/udfsdk/docs/source/usage/sourcefile.rst
@ -124,9 +124,9 @@ nextValue()

 nextValue() is called from the PM for aggregate usage and the UM for Analytic usage.

-valsIn contains a vector of all the parameters from the function call in the SQL query (In Columndtore 1.1, this will always contain exactly one entry).
+valsIn contains a vector of all the parameters from the function call in the SQL query.

-Depending on your function, you may wish to be able to handle many different types of input. A good way to handle this is to have a series of if..else..if statements comparing the input type and dealing with each separately. For instace, if you want to handle multiple numeric types, you might use::
+Depending on your function, you may wish to be able to handle many different types of input. There's a helper template function convertAnyTo() which will convert the input static:any value to the designated type. For Example, if your internal accumulater is of type double, you might use::

 	static_any::any& valIn = valsDropped[0].columnData;
 	AVGData& data = static_cast<MedianData*>(context->getUserData())->mData;
@ -137,21 +137,7 @@ Depending on your function, you may wish to be able to handle many different typ
 		return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
 	}

-	if (valIn.compatible(charTypeId))
-	{
-		val = valIn.cast<char>();
-	}
-	else if (valIn.compatible(scharTypeId))
-	{
-		val = valIn.cast<signed char>();
-	}
-	else if (valIn.compatible(shortTypeId))
-	{
-		val = valIn.cast<short>();
-	}
-	.
-	.
-	.
+	val = convertAnyTo<double>(valIn);

 Once you've gotten your data in a format you like, then do your aggregation. For AVG, you might see::

--- a/utils/udfsdk/median.cpp
+++ b/utils/udfsdk/median.cpp
@ -69,65 +69,13 @@ mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* val
 {
    static_any::any& valIn = valsIn[0].columnData;
    MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;
-    DATATYPE val = 0.0;

    if (valIn.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn.compatible(charTypeId))
-    {
-        val = valIn.cast<char>();
-    }
-    else if (valIn.compatible(scharTypeId))
-    {
-        val = valIn.cast<signed char>();
-    }
-    else if (valIn.compatible(shortTypeId))
-    {
-        val = valIn.cast<short>();
-    }
-    else if (valIn.compatible(intTypeId))
-    {
-        val = valIn.cast<int>();
-    }
-    else if (valIn.compatible(longTypeId))
-    {
-        val = valIn.cast<long>();
-    }
-    else if (valIn.compatible(llTypeId))
-    {
-        val = valIn.cast<long long>();
-    }
-    else if (valIn.compatible(ucharTypeId))
-    {
-        val = valIn.cast<unsigned char>();
-    }
-    else if (valIn.compatible(ushortTypeId))
-    {
-        val = valIn.cast<unsigned short>();
-    }
-    else if (valIn.compatible(uintTypeId))
-    {
-        val = valIn.cast<unsigned int>();
-    }
-    else if (valIn.compatible(ulongTypeId))
-    {
-        val = valIn.cast<unsigned long>();
-    }
-    else if (valIn.compatible(ullTypeId))
-    {
-        val = valIn.cast<unsigned long long>();
-    }
-    else if (valIn.compatible(floatTypeId))
-    {
-        val = valIn.cast<float>();
-    }
-    else if (valIn.compatible(doubleTypeId))
-    {
-        val = valIn.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsIn[0].scale;
@ -215,65 +163,13 @@ mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* val
 {
    static_any::any& valIn = valsDropped[0].columnData;
    MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;
-    DATATYPE val = 0.0;

    if (valIn.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn.compatible(charTypeId))
-    {
-        val = valIn.cast<char>();
-    }
-    else if (valIn.compatible(scharTypeId))
-    {
-        val = valIn.cast<signed char>();
-    }
-    else if (valIn.compatible(shortTypeId))
-    {
-        val = valIn.cast<short>();
-    }
-    else if (valIn.compatible(intTypeId))
-    {
-        val = valIn.cast<int>();
-    }
-    else if (valIn.compatible(longTypeId))
-    {
-        val = valIn.cast<long>();
-    }
-    else if (valIn.compatible(llTypeId))
-    {
-        val = valIn.cast<long long>();
-    }
-    else if (valIn.compatible(ucharTypeId))
-    {
-        val = valIn.cast<unsigned char>();
-    }
-    else if (valIn.compatible(ushortTypeId))
-    {
-        val = valIn.cast<unsigned short>();
-    }
-    else if (valIn.compatible(uintTypeId))
-    {
-        val = valIn.cast<unsigned int>();
-    }
-    else if (valIn.compatible(ulongTypeId))
-    {
-        val = valIn.cast<unsigned long>();
-    }
-    else if (valIn.compatible(ullTypeId))
-    {
-        val = valIn.cast<unsigned long long>();
-    }
-    else if (valIn.compatible(floatTypeId))
-    {
-        val = valIn.cast<float>();
-    }
-    else if (valIn.compatible(doubleTypeId))
-    {
-        val = valIn.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsDropped[0].scale;
--- a/utils/udfsdk/ssq.cpp
+++ b/utils/udfsdk/ssq.cpp
@ -85,65 +85,13 @@ mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn
 {
    static_any::any& valIn = valsIn[0].columnData;
    struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
-    DATATYPE val = 0.0;

    if (context->isParamNull(0) || valIn.empty())
    {
        return mcsv1_UDAF::SUCCESS;
    }

-    if (valIn.compatible(charTypeId))
-    {
-        val = valIn.cast<char>();
-    }
-    else if (valIn.compatible(scharTypeId))
-    {
-        val = valIn.cast<signed char>();
-    }
-    else if (valIn.compatible(shortTypeId))
-    {
-        val = valIn.cast<short>();
-    }
-    else if (valIn.compatible(intTypeId))
-    {
-        val = valIn.cast<int>();
-    }
-    else if (valIn.compatible(longTypeId))
-    {
-        val = valIn.cast<long>();
-    }
-    else if (valIn.compatible(llTypeId))
-    {
-        val = valIn.cast<long long>();
-    }
-    else if (valIn.compatible(ucharTypeId))
-    {
-        val = valIn.cast<unsigned char>();
-    }
-    else if (valIn.compatible(ushortTypeId))
-    {
-        val = valIn.cast<unsigned short>();
-    }
-    else if (valIn.compatible(uintTypeId))
-    {
-        val = valIn.cast<unsigned int>();
-    }
-    else if (valIn.compatible(ulongTypeId))
-    {
-        val = valIn.cast<unsigned long>();
-    }
-    else if (valIn.compatible(ullTypeId))
-    {
-        val = valIn.cast<unsigned long long>();
-    }
-    else if (valIn.compatible(floatTypeId))
-    {
-        val = valIn.cast<float>();
-    }
-    else if (valIn.compatible(doubleTypeId))
-    {
-        val = valIn.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsIn[0].scale;
@ -186,65 +134,13 @@ mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDr
 {
    static_any::any& valIn = valsDropped[0].columnData;
    struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
-    DATATYPE val = 0.0;

    if (valIn.empty())
    {
        return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
    }

-    if (valIn.compatible(charTypeId))
-    {
-        val = valIn.cast<char>();
-    }
-    else if (valIn.compatible(scharTypeId))
-    {
-        val = valIn.cast<signed char>();
-    }
-    else if (valIn.compatible(shortTypeId))
-    {
-        val = valIn.cast<short>();
-    }
-    else if (valIn.compatible(intTypeId))
-    {
-        val = valIn.cast<int>();
-    }
-    else if (valIn.compatible(longTypeId))
-    {
-        val = valIn.cast<long>();
-    }
-    else if (valIn.compatible(llTypeId))
-    {
-        val = valIn.cast<long long>();
-    }
-    else if (valIn.compatible(ucharTypeId))
-    {
-        val = valIn.cast<unsigned char>();
-    }
-    else if (valIn.compatible(ushortTypeId))
-    {
-        val = valIn.cast<unsigned short>();
-    }
-    else if (valIn.compatible(uintTypeId))
-    {
-        val = valIn.cast<unsigned int>();
-    }
-    else if (valIn.compatible(ulongTypeId))
-    {
-        val = valIn.cast<unsigned long>();
-    }
-    else if (valIn.compatible(ullTypeId))
-    {
-        val = valIn.cast<unsigned long long>();
-    }
-    else if (valIn.compatible(floatTypeId))
-    {
-        val = valIn.cast<float>();
-    }
-    else if (valIn.compatible(doubleTypeId))
-    {
-        val = valIn.cast<double>();
-    }
+    DATATYPE val = convertAnyTo<double>(valIn);

    // For decimal types, we need to move the decimal point.
    uint32_t scale = valsDropped[0].scale;
--- a/utils/windowfunction/windowfunction.cpp
+++ b/utils/windowfunction/windowfunction.cpp
@ -187,21 +187,33 @@ void WindowFunction::operator()()
                        prevFrame = w;
                    }

-                    // UDAnF functions may have a dropValue function implemented.
-                    // If they do, we can optimize by calling dropValue() for those
-                    // values leaving the window and nextValue for those entering, rather
-                    // than a resetData() and then iterating over the entire window.
-                    // Built-in functions may have this functionality added in the future.
-                    if (fFunctionType->dropValues(prevFrame.first, w.first))
+                    // If b > e then the frame is entirely outside of the partition
+                    // and there's no values to add
+                    if (b <= e)
                    {
-                        b = firstTime ? w.first : prevFrame.second + 1;
-                    }
-                    else
-                    {
-                        fFunctionType->resetData();
-                    }
+                        // UDAnF functions may have a dropValue function implemented.
+                        // If they do, we can optimize by calling dropValue() for those
+                        // values leaving the window and nextValue for those entering, rather
+                        // than a resetData() and then iterating over the entire window.
+                        // Built-in functions may have this functionality added in the future.
+                        // If b > e, then nothing to drop.
+                        if (!firstTime) 
+                        {
+                            if (fFunctionType->dropValues(prevFrame.first, w.first))
+                            {
+                                // Adjust the beginning of the frame for nextValue
+                                // to start where the previous frame left off.
+                                b = prevFrame.second + 1;
+                            }
+                            else
+                            {
+                                // dropValues failed so do the entire frame.
+                                fFunctionType->resetData();
+                            }
+                        }

-                    fFunctionType->operator()(b, e, i);
+                        fFunctionType->operator()(b, e, i); // Calls nextValue
+                    }
                    prevFrame = w;
                    firstTime = false;
                }