From a86f432f35cef13e5fc90441e88d972c72fae10f Mon Sep 17 00:00:00 2001
From: Alexander Barkov <bar@mariadb.com>
Date: Thu, 1 Apr 2021 21:32:31 +0400
Subject: [PATCH] Fixing DOUBLE-to-[U]INT conversion (MCOL-4649, MCOL-4631,
 MCOL-4647)

Bugs fixed:
- MCOL-4649 CAST(double AS UNSIGNED) returns 0
- MCOL-4631 CAST(double AS SIGNED) returns 0 or NULL
- MCOL-4647 SEC_TO_TIME(double_or_float) returns a wrong result

Problems:
- The code in Func_cast_unsigned::getUintVal() and
  Func_cast_signed::getIntVal() did not properly check
  the double value to fit inside a uint64_t/int64_t range.
  So the corner cases:
  - numeric_limits<uint64_t>::max()-2 for uint64_t
  - numeric_limits<int64_t>::max() for int64_t
  produced unexpected results.

  The problem was in tests like this:
    if (value > (double) numeric_limits<int64_t>::max())
  A correct test would be:
    if (value >= (double) numeric_limits<int64_t>::max())

- The code in Func_sec_to_time::getStrVal() searched for the decimal
  dot character, assuming that the next character after the dot
  was the leftmost fractional digit.
  This assumption was wrong because huge double numbers use
  scientific notation. So for example in "2.5e-40" the
  digit "5" following the dot is NOT the leftmost fractional digit.
  Also, the code in Func_sec_to_time::getStrVal() was slow
  because of using non necessary to-string and from-string
  data conversion.
  Also, the code in Func_sec_to_time::getStrVal() evaluated
  the argument two times: using getStrVal() then using getIntVal().

Solution:
- Adding new classes TDouble and TLongDouble.
- Adding a few function templates to reuse the code easier.
- Moving the conversion code inside TDouble and TLongDouble
  methods toMCSSInt64Round() and toMCSUInt64Round().
- Reusing new classes and their methods in func_cast.cc and
  func_sec_to_time.cc.
---
 datatypes/mcs_datatype.h            |  3 +-
 datatypes/mcs_datatype_basic.h      | 95 +++++++++++++++++++++++++++++
 datatypes/mcs_double.h              | 56 +++++++++++++++++
 datatypes/mcs_longdouble.h          | 56 +++++++++++++++++
 mtr/basic/r/func_cast.result        | 20 ++++++
 mtr/basic/r/func_sec_to_time.result | 18 ++++++
 mtr/basic/t/func_cast.test          | 28 +++++++++
 mtr/basic/t/func_sec_to_time.test   | 14 +++++
 utils/funcexp/func_cast.cpp         | 68 +++------------------
 utils/funcexp/func_sec_to_time.cpp  | 43 ++-----------
 10 files changed, 302 insertions(+), 99 deletions(-)
 create mode 100644 datatypes/mcs_datatype_basic.h
 create mode 100644 datatypes/mcs_double.h
 create mode 100644 datatypes/mcs_longdouble.h
 create mode 100644 mtr/basic/r/func_cast.result
 create mode 100644 mtr/basic/r/func_sec_to_time.result
 create mode 100644 mtr/basic/t/func_cast.test
 create mode 100644 mtr/basic/t/func_sec_to_time.test

diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h
index 0929362fa..4bda25d0c 100644
--- a/datatypes/mcs_datatype.h
+++ b/datatypes/mcs_datatype.h
@@ -24,7 +24,8 @@
 #include "mcs_numeric_limits.h"
 #include "mcs_data_condition.h"
 #include "mcs_decimal.h"
-
+#include "mcs_double.h"
+#include "mcs_longdouble.h"
 
 #ifdef _MSC_VER
 typedef int     mcs_sint32_t;
diff --git a/datatypes/mcs_datatype_basic.h b/datatypes/mcs_datatype_basic.h
new file mode 100644
index 000000000..9450692f3
--- /dev/null
+++ b/datatypes/mcs_datatype_basic.h
@@ -0,0 +1,95 @@
+/*
+   Copyright (C) 2021 MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.
+*/
+#ifndef MCS_DATATYPE_BASIC_H_INCLUDED
+#define MCS_DATATYPE_BASIC_H_INCLUDED
+
+/*
+  This file contains simple definitions that can be
+  needed in multiple mcs_TYPE.h files.
+*/
+
+namespace datatypes
+{
+
+
+// Convert a positive floating point value to
+// a signed or unsigned integer with rounding
+// SRC - a floating point data type (float, double, float128_t)
+// DST - a signed or unsigned integer data type (int32_t, uint64_t, int128_t, etc)
+template<typename SRC, typename DST>
+DST positiveXFloatToXIntRound(SRC value, DST limit)
+{
+  SRC tmp = value + 0.5;
+  if (tmp >= static_cast<SRC>(limit))
+    return limit;
+  return static_cast<DST>(tmp);
+}
+
+
+// Convert a negative floating point value to
+// a signed integer with rounding
+// SRC - a floating point data type (float, double, float128_t)
+// DST - a signed integer data type (int32_t, int64_t, int128_t, etc)
+template<typename SRC, typename DST>
+DST negativeXFloatToXIntRound(SRC value, DST limit)
+{
+  SRC tmp = value - 0.5;
+  if (tmp <= static_cast<SRC>(limit))
+    return limit;
+  return static_cast<DST>(tmp);
+}
+
+
+// Convert a floating point value to ColumnStore int64_t
+// Magic values cannot be returned.
+template<typename SRC>
+int64_t xFloatToMCSSInt64Round(SRC value)
+{
+  if (value > 0)
+    return positiveXFloatToXIntRound<SRC, int64_t>(
+                                           value,
+                                           numeric_limits<int64_t>::max());
+  if (value < 0)
+    return negativeXFloatToXIntRound<SRC, int64_t>(
+                                           value,
+                                           numeric_limits<int64_t>::min() + 2);
+  return 0;
+}
+
+
+// Convert a floating point value to ColumnStore uint64_t
+// Magic values cannot be returned.
+template<typename SRC>
+uint64_t xFloatToMCSUInt64Round(SRC value)
+{
+  if (value > 0)
+    return positiveXFloatToXIntRound<SRC, uint64_t>(
+                                           value,
+                                           numeric_limits<uint64_t>::max() - 2);
+  if (value < 0)
+    return negativeXFloatToXIntRound<SRC, uint64_t>(value, 0);
+
+  return 0;
+}
+
+
+} //end of namespace datatypes
+
+#endif // MCS_DATATYPE_BASIC_H_INCLUDED
+// vim:ts=2 sw=2:
diff --git a/datatypes/mcs_double.h b/datatypes/mcs_double.h
new file mode 100644
index 000000000..04bfbf3c9
--- /dev/null
+++ b/datatypes/mcs_double.h
@@ -0,0 +1,56 @@
+/*
+   Copyright (C) 2021 MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.
+*/
+#ifndef MCS_DOUBLE_H_INCLUDED
+#define MCS_DOUBLE_H_INCLUDED
+
+#include "mcs_datatype_basic.h"
+
+namespace datatypes
+{
+
+class TDouble
+{
+protected:
+  double mValue;
+public:
+  TDouble(): mValue(0) { }
+
+  explicit TDouble(double value): mValue(value) { }
+
+  explicit operator double () const
+  {
+    return mValue;
+  }
+
+  int64_t toMCSSInt64Round() const
+  {
+    return xFloatToMCSSInt64Round<double>(mValue);
+  }
+
+  uint64_t toMCSUInt64Round() const
+  {
+    return xFloatToMCSUInt64Round<double>(mValue);
+  }
+};
+
+
+} //end of namespace datatypes
+
+#endif // MCS_DOUBLE_H_INCLUDED
+// vim:ts=2 sw=2:
diff --git a/datatypes/mcs_longdouble.h b/datatypes/mcs_longdouble.h
new file mode 100644
index 000000000..60caa3982
--- /dev/null
+++ b/datatypes/mcs_longdouble.h
@@ -0,0 +1,56 @@
+/*
+   Copyright (C) 2021 MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.
+*/
+#ifndef MCS_LONGDOUBLE_H_INCLUDED
+#define MCS_LONGDOUBLE_H_INCLUDED
+
+#include "mcs_datatype_basic.h"
+
+namespace datatypes
+{
+
+class TLongDouble
+{
+protected:
+  long double mValue;
+public:
+  TLongDouble(): mValue(0) { }
+
+  explicit TLongDouble(long double value): mValue(value) { }
+
+  explicit operator long double () const
+  {
+    return mValue;
+  }
+
+  int64_t toMCSSInt64Round() const
+  {
+    return xFloatToMCSSInt64Round<long double>(mValue);
+  }
+
+  uint64_t toMCSUInt64Round() const
+  {
+    return xFloatToMCSUInt64Round<long double>(mValue);
+  }
+};
+
+
+} //end of namespace datatypes
+
+#endif // MCS_LONGDOUBLE_H_INCLUDED
+// vim:ts=2 sw=2:
diff --git a/mtr/basic/r/func_cast.result b/mtr/basic/r/func_cast.result
new file mode 100644
index 000000000..68550cde2
--- /dev/null
+++ b/mtr/basic/r/func_cast.result
@@ -0,0 +1,20 @@
+#
+# MCOL-4631 CAST(double AS SIGNED) returns 0 or NULL
+#
+CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL);
+INSERT INTO t1 VALUES (9.2233720368547758e+18, 9.2233720368547758e+18);
+INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614);
+SELECT d1, CAST(d1 AS SIGNED), CAST(d2 AS SIGNED) FROM t1;
+d1	CAST(d1 AS SIGNED)	CAST(d2 AS SIGNED)
+9.223372036854776e18	9223372036854775807	9223372036854775807
+1.8446744073709552e19	9223372036854775807	9223372036854775807
+DROP TABLE t1;
+#
+# MCOL-4649 MCOL-4631 CAST(double AS UNSIGNED) returns 0
+#
+CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL);
+INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614);
+SELECT d1, CAST(d1 AS UNSIGNED), CAST(d2 AS UNSIGNED) FROM t1;
+d1	CAST(d1 AS UNSIGNED)	CAST(d2 AS UNSIGNED)
+1.8446744073709552e19	18446744073709551615	18446744073709551615
+DROP TABLE t1;
diff --git a/mtr/basic/r/func_sec_to_time.result b/mtr/basic/r/func_sec_to_time.result
new file mode 100644
index 000000000..96f57fc62
--- /dev/null
+++ b/mtr/basic/r/func_sec_to_time.result
@@ -0,0 +1,18 @@
+#
+# MCOL-4647 SEC_TO_TIME(double_or_float) returns a wrong result
+#
+CREATE TABLE t1 (a DOUBLE);
+INSERT INTO t1 VALUES (0.000025e-35);
+INSERT INTO t1 VALUES (-1),(1);
+INSERT INTO t1 VALUES (-900),(900);
+INSERT INTO t1 VALUES (-1000000),(1000000);
+SELECT a, SEC_TO_TIME(a) FROM t1 ORDER BY 1;
+a	SEC_TO_TIME(a)
+-1000000	-277:46:40.000000
+-900	-00:15:00.000000
+-1	-00:00:01.000000
+2.5e-40	00:00:00.000000
+1	00:00:01.000000
+900	00:15:00.000000
+1000000	277:46:40.000000
+DROP TABLE t1;
diff --git a/mtr/basic/t/func_cast.test b/mtr/basic/t/func_cast.test
new file mode 100644
index 000000000..34a444203
--- /dev/null
+++ b/mtr/basic/t/func_cast.test
@@ -0,0 +1,28 @@
+--source ../include/have_columnstore.inc
+--source ../include/combinations.myisam-columnstore.inc
+
+--echo #
+--echo # MCOL-4631 CAST(double AS SIGNED) returns 0 or NULL
+--echo #
+
+CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL);
+INSERT INTO t1 VALUES (9.2233720368547758e+18, 9.2233720368547758e+18);
+INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614);
+--disable_warnings
+SELECT d1, CAST(d1 AS SIGNED), CAST(d2 AS SIGNED) FROM t1;
+--enable_warnings
+DROP TABLE t1;
+
+
+--echo #
+--echo # MCOL-4649 MCOL-4631 CAST(double AS UNSIGNED) returns 0
+--echo #
+
+CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL);
+INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614);
+--disable_warnings
+--replace_result 18446744073709551613 18446744073709551615
+SELECT d1, CAST(d1 AS UNSIGNED), CAST(d2 AS UNSIGNED) FROM t1;
+--enable_warnings
+DROP TABLE t1;
+
diff --git a/mtr/basic/t/func_sec_to_time.test b/mtr/basic/t/func_sec_to_time.test
new file mode 100644
index 000000000..e9524a639
--- /dev/null
+++ b/mtr/basic/t/func_sec_to_time.test
@@ -0,0 +1,14 @@
+--source ../include/have_columnstore.inc
+--source ../include/combinations.myisam-columnstore.inc
+
+--echo #
+--echo # MCOL-4647 SEC_TO_TIME(double_or_float) returns a wrong result
+--echo #
+
+CREATE TABLE t1 (a DOUBLE);
+INSERT INTO t1 VALUES (0.000025e-35);
+INSERT INTO t1 VALUES (-1),(1);
+INSERT INTO t1 VALUES (-900),(900);
+INSERT INTO t1 VALUES (-1000000),(1000000);
+SELECT a, SEC_TO_TIME(a) FROM t1 ORDER BY 1;
+DROP TABLE t1;
diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp
index 3a6c28f32..8efcf7208 100644
--- a/utils/funcexp/func_cast.cpp
+++ b/utils/funcexp/func_cast.cpp
@@ -126,41 +126,15 @@ int64_t Func_cast_signed::getIntVal(Row& row,
         case execplan::CalpontSystemCatalog::DOUBLE:
         case execplan::CalpontSystemCatalog::UDOUBLE:
         {
-            double value = parm[0]->data()->getDoubleVal(row, isNull);
-
-            if (value > 0)
-                value += 0.5;
-            else if (value < 0)
-                value -= 0.5;
-
-            int64_t ret = (int64_t) value;
-
-            if (value > (double) numeric_limits<int64_t>::max())
-                ret = numeric_limits<int64_t>::max();
-            else if (value < (double) (numeric_limits<int64_t>::min() + 2))
-                ret = numeric_limits<int64_t>::min() + 2; // IDB min for bigint
-
-            return ret;
+            datatypes::TDouble d(parm[0]->data()->getDoubleVal(row, isNull));
+            return d.toMCSSInt64Round();
         }
         break;
 
         case execplan::CalpontSystemCatalog::LONGDOUBLE:
         {
-            long double value = parm[0]->data()->getLongDoubleVal(row, isNull);
-
-            if (value > 0)
-                value += 0.5;
-            else if (value < 0)
-                value -= 0.5;
-
-            int64_t ret = (int64_t) value;
-
-            if (value > (long double) numeric_limits<int64_t>::max())
-                ret = numeric_limits<int64_t>::max();
-            else if (value < (long double) (numeric_limits<int64_t>::min() + 2))
-                ret = numeric_limits<int64_t>::min() + 2; // IDB min for bigint
-
-            return ret;
+            datatypes::TLongDouble d(parm[0]->data()->getLongDoubleVal(row, isNull));
+            return d.toMCSSInt64Round();
         }
         break;
 
@@ -272,41 +246,15 @@ uint64_t Func_cast_unsigned::getUintVal(Row& row,
         case execplan::CalpontSystemCatalog::DOUBLE:
         case execplan::CalpontSystemCatalog::UDOUBLE:
         {
-            double value = parm[0]->data()->getDoubleVal(row, isNull);
-
-            if (value > 0)
-                value += 0.5;
-            else if (value < 0)
-                value -= 0.5;
-
-            uint64_t ret = (uint64_t) value;
-
-            if (value > (double) numeric_limits<uint64_t>::max() - 2)
-                ret = numeric_limits<int64_t>::max();
-            else if (value < 0)
-                ret = 0;
-
-            return ret;
+            datatypes::TDouble d(parm[0]->data()->getDoubleVal(row, isNull));
+            return d.toMCSUInt64Round();
         }
         break;
 
         case execplan::CalpontSystemCatalog::LONGDOUBLE:
         {
-            long double value = parm[0]->data()->getLongDoubleVal(row, isNull);
-
-            if (value > 0)
-                value += 0.5;
-            else if (value < 0)
-                value -= 0.5;
-
-            uint64_t ret = (uint64_t) value;
-
-            if (value > (long double) numeric_limits<uint64_t>::max() - 2)
-                ret = numeric_limits<int64_t>::max();
-            else if (value < 0)
-                ret = 0;
-
-            return ret;
+            datatypes::TLongDouble d(parm[0]->data()->getLongDoubleVal(row, isNull));
+            return d.toMCSUInt64Round();
         }
         break;
 
diff --git a/utils/funcexp/func_sec_to_time.cpp b/utils/funcexp/func_sec_to_time.cpp
index 8d5594ca8..58fb9d2e3 100644
--- a/utils/funcexp/func_sec_to_time.cpp
+++ b/utils/funcexp/func_sec_to_time.cpp
@@ -75,47 +75,14 @@ string Func_sec_to_time::getStrVal(rowgroup::Row& row,
 
         case execplan::CalpontSystemCatalog::DOUBLE:
         {
-            const string& valStr = parm[0]->data()->getStrVal(row, isNull);
-            val = parm[0]->data()->getIntVal(row, isNull);
-            size_t x = valStr.find(".");
-
-            if (x < string::npos)
-            {
-                string tmp = valStr.substr(x + 1, 1);
-                char* ptr = &tmp[0];
-                int i = atoi(ptr);
-
-                if (i >= 5)
-                {
-                    if (val > 0)
-                        val += 1;
-                    else
-                        val -= 1;
-                }
-            }
+            datatypes::TDouble d(parm[0]->data()->getDoubleVal(row, isNull));
+            val = d.toMCSSInt64Round();
+            break;
         }
-        break;
-
         case execplan::CalpontSystemCatalog::FLOAT:
         {
-            const string& valStr = parm[0]->data()->getStrVal(row, isNull);
-            val = parm[0]->data()->getIntVal(row, isNull);
-            size_t x = valStr.find(".");
-
-            if (x < string::npos)
-            {
-                string tmp = valStr.substr(x + 1, 1);
-                char* ptr = &tmp[0];
-                int i = atoi(ptr);
-
-                if (i >= 5)
-                {
-                    if (val > 0)
-                        val += 1;
-                    else
-                        val -= 1;
-                }
-            }
+            datatypes::TDouble d(parm[0]->data()->getFloatVal(row, isNull));
+            val = d.toMCSSInt64Round();
         }
         break;