From a86f432f35cef13e5fc90441e88d972c72fae10f Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Thu, 1 Apr 2021 21:32:31 +0400 Subject: [PATCH] Fixing DOUBLE-to-[U]INT conversion (MCOL-4649, MCOL-4631, MCOL-4647) Bugs fixed: - MCOL-4649 CAST(double AS UNSIGNED) returns 0 - MCOL-4631 CAST(double AS SIGNED) returns 0 or NULL - MCOL-4647 SEC_TO_TIME(double_or_float) returns a wrong result Problems: - The code in Func_cast_unsigned::getUintVal() and Func_cast_signed::getIntVal() did not properly check the double value to fit inside a uint64_t/int64_t range. So the corner cases: - numeric_limits::max()-2 for uint64_t - numeric_limits::max() for int64_t produced unexpected results. The problem was in tests like this: if (value > (double) numeric_limits::max()) A correct test would be: if (value >= (double) numeric_limits::max()) - The code in Func_sec_to_time::getStrVal() searched for the decimal dot character, assuming that the next character after the dot was the leftmost fractional digit. This assumption was wrong because huge double numbers use scientific notation. So for example in "2.5e-40" the digit "5" following the dot is NOT the leftmost fractional digit. Also, the code in Func_sec_to_time::getStrVal() was slow because of using non necessary to-string and from-string data conversion. Also, the code in Func_sec_to_time::getStrVal() evaluated the argument two times: using getStrVal() then using getIntVal(). Solution: - Adding new classes TDouble and TLongDouble. - Adding a few function templates to reuse the code easier. - Moving the conversion code inside TDouble and TLongDouble methods toMCSSInt64Round() and toMCSUInt64Round(). - Reusing new classes and their methods in func_cast.cc and func_sec_to_time.cc. --- datatypes/mcs_datatype.h | 3 +- datatypes/mcs_datatype_basic.h | 95 +++++++++++++++++++++++++++++ datatypes/mcs_double.h | 56 +++++++++++++++++ datatypes/mcs_longdouble.h | 56 +++++++++++++++++ mtr/basic/r/func_cast.result | 20 ++++++ mtr/basic/r/func_sec_to_time.result | 18 ++++++ mtr/basic/t/func_cast.test | 28 +++++++++ mtr/basic/t/func_sec_to_time.test | 14 +++++ utils/funcexp/func_cast.cpp | 68 +++------------------ utils/funcexp/func_sec_to_time.cpp | 43 ++----------- 10 files changed, 302 insertions(+), 99 deletions(-) create mode 100644 datatypes/mcs_datatype_basic.h create mode 100644 datatypes/mcs_double.h create mode 100644 datatypes/mcs_longdouble.h create mode 100644 mtr/basic/r/func_cast.result create mode 100644 mtr/basic/r/func_sec_to_time.result create mode 100644 mtr/basic/t/func_cast.test create mode 100644 mtr/basic/t/func_sec_to_time.test diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index 0929362fa..4bda25d0c 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -24,7 +24,8 @@ #include "mcs_numeric_limits.h" #include "mcs_data_condition.h" #include "mcs_decimal.h" - +#include "mcs_double.h" +#include "mcs_longdouble.h" #ifdef _MSC_VER typedef int mcs_sint32_t; diff --git a/datatypes/mcs_datatype_basic.h b/datatypes/mcs_datatype_basic.h new file mode 100644 index 000000000..9450692f3 --- /dev/null +++ b/datatypes/mcs_datatype_basic.h @@ -0,0 +1,95 @@ +/* + Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. +*/ +#ifndef MCS_DATATYPE_BASIC_H_INCLUDED +#define MCS_DATATYPE_BASIC_H_INCLUDED + +/* + This file contains simple definitions that can be + needed in multiple mcs_TYPE.h files. +*/ + +namespace datatypes +{ + + +// Convert a positive floating point value to +// a signed or unsigned integer with rounding +// SRC - a floating point data type (float, double, float128_t) +// DST - a signed or unsigned integer data type (int32_t, uint64_t, int128_t, etc) +template +DST positiveXFloatToXIntRound(SRC value, DST limit) +{ + SRC tmp = value + 0.5; + if (tmp >= static_cast(limit)) + return limit; + return static_cast(tmp); +} + + +// Convert a negative floating point value to +// a signed integer with rounding +// SRC - a floating point data type (float, double, float128_t) +// DST - a signed integer data type (int32_t, int64_t, int128_t, etc) +template +DST negativeXFloatToXIntRound(SRC value, DST limit) +{ + SRC tmp = value - 0.5; + if (tmp <= static_cast(limit)) + return limit; + return static_cast(tmp); +} + + +// Convert a floating point value to ColumnStore int64_t +// Magic values cannot be returned. +template +int64_t xFloatToMCSSInt64Round(SRC value) +{ + if (value > 0) + return positiveXFloatToXIntRound( + value, + numeric_limits::max()); + if (value < 0) + return negativeXFloatToXIntRound( + value, + numeric_limits::min() + 2); + return 0; +} + + +// Convert a floating point value to ColumnStore uint64_t +// Magic values cannot be returned. +template +uint64_t xFloatToMCSUInt64Round(SRC value) +{ + if (value > 0) + return positiveXFloatToXIntRound( + value, + numeric_limits::max() - 2); + if (value < 0) + return negativeXFloatToXIntRound(value, 0); + + return 0; +} + + +} //end of namespace datatypes + +#endif // MCS_DATATYPE_BASIC_H_INCLUDED +// vim:ts=2 sw=2: diff --git a/datatypes/mcs_double.h b/datatypes/mcs_double.h new file mode 100644 index 000000000..04bfbf3c9 --- /dev/null +++ b/datatypes/mcs_double.h @@ -0,0 +1,56 @@ +/* + Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. +*/ +#ifndef MCS_DOUBLE_H_INCLUDED +#define MCS_DOUBLE_H_INCLUDED + +#include "mcs_datatype_basic.h" + +namespace datatypes +{ + +class TDouble +{ +protected: + double mValue; +public: + TDouble(): mValue(0) { } + + explicit TDouble(double value): mValue(value) { } + + explicit operator double () const + { + return mValue; + } + + int64_t toMCSSInt64Round() const + { + return xFloatToMCSSInt64Round(mValue); + } + + uint64_t toMCSUInt64Round() const + { + return xFloatToMCSUInt64Round(mValue); + } +}; + + +} //end of namespace datatypes + +#endif // MCS_DOUBLE_H_INCLUDED +// vim:ts=2 sw=2: diff --git a/datatypes/mcs_longdouble.h b/datatypes/mcs_longdouble.h new file mode 100644 index 000000000..60caa3982 --- /dev/null +++ b/datatypes/mcs_longdouble.h @@ -0,0 +1,56 @@ +/* + Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. +*/ +#ifndef MCS_LONGDOUBLE_H_INCLUDED +#define MCS_LONGDOUBLE_H_INCLUDED + +#include "mcs_datatype_basic.h" + +namespace datatypes +{ + +class TLongDouble +{ +protected: + long double mValue; +public: + TLongDouble(): mValue(0) { } + + explicit TLongDouble(long double value): mValue(value) { } + + explicit operator long double () const + { + return mValue; + } + + int64_t toMCSSInt64Round() const + { + return xFloatToMCSSInt64Round(mValue); + } + + uint64_t toMCSUInt64Round() const + { + return xFloatToMCSUInt64Round(mValue); + } +}; + + +} //end of namespace datatypes + +#endif // MCS_LONGDOUBLE_H_INCLUDED +// vim:ts=2 sw=2: diff --git a/mtr/basic/r/func_cast.result b/mtr/basic/r/func_cast.result new file mode 100644 index 000000000..68550cde2 --- /dev/null +++ b/mtr/basic/r/func_cast.result @@ -0,0 +1,20 @@ +# +# MCOL-4631 CAST(double AS SIGNED) returns 0 or NULL +# +CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL); +INSERT INTO t1 VALUES (9.2233720368547758e+18, 9.2233720368547758e+18); +INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614); +SELECT d1, CAST(d1 AS SIGNED), CAST(d2 AS SIGNED) FROM t1; +d1 CAST(d1 AS SIGNED) CAST(d2 AS SIGNED) +9.223372036854776e18 9223372036854775807 9223372036854775807 +1.8446744073709552e19 9223372036854775807 9223372036854775807 +DROP TABLE t1; +# +# MCOL-4649 MCOL-4631 CAST(double AS UNSIGNED) returns 0 +# +CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL); +INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614); +SELECT d1, CAST(d1 AS UNSIGNED), CAST(d2 AS UNSIGNED) FROM t1; +d1 CAST(d1 AS UNSIGNED) CAST(d2 AS UNSIGNED) +1.8446744073709552e19 18446744073709551615 18446744073709551615 +DROP TABLE t1; diff --git a/mtr/basic/r/func_sec_to_time.result b/mtr/basic/r/func_sec_to_time.result new file mode 100644 index 000000000..96f57fc62 --- /dev/null +++ b/mtr/basic/r/func_sec_to_time.result @@ -0,0 +1,18 @@ +# +# MCOL-4647 SEC_TO_TIME(double_or_float) returns a wrong result +# +CREATE TABLE t1 (a DOUBLE); +INSERT INTO t1 VALUES (0.000025e-35); +INSERT INTO t1 VALUES (-1),(1); +INSERT INTO t1 VALUES (-900),(900); +INSERT INTO t1 VALUES (-1000000),(1000000); +SELECT a, SEC_TO_TIME(a) FROM t1 ORDER BY 1; +a SEC_TO_TIME(a) +-1000000 -277:46:40.000000 +-900 -00:15:00.000000 +-1 -00:00:01.000000 +2.5e-40 00:00:00.000000 +1 00:00:01.000000 +900 00:15:00.000000 +1000000 277:46:40.000000 +DROP TABLE t1; diff --git a/mtr/basic/t/func_cast.test b/mtr/basic/t/func_cast.test new file mode 100644 index 000000000..34a444203 --- /dev/null +++ b/mtr/basic/t/func_cast.test @@ -0,0 +1,28 @@ +--source ../include/have_columnstore.inc +--source ../include/combinations.myisam-columnstore.inc + +--echo # +--echo # MCOL-4631 CAST(double AS SIGNED) returns 0 or NULL +--echo # + +CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL); +INSERT INTO t1 VALUES (9.2233720368547758e+18, 9.2233720368547758e+18); +INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614); +--disable_warnings +SELECT d1, CAST(d1 AS SIGNED), CAST(d2 AS SIGNED) FROM t1; +--enable_warnings +DROP TABLE t1; + + +--echo # +--echo # MCOL-4649 MCOL-4631 CAST(double AS UNSIGNED) returns 0 +--echo # + +CREATE TABLE t1 (d1 DOUBLE, d2 DOUBLE NOT NULL); +INSERT INTO t1 VALUES (18446744073709551614,18446744073709551614); +--disable_warnings +--replace_result 18446744073709551613 18446744073709551615 +SELECT d1, CAST(d1 AS UNSIGNED), CAST(d2 AS UNSIGNED) FROM t1; +--enable_warnings +DROP TABLE t1; + diff --git a/mtr/basic/t/func_sec_to_time.test b/mtr/basic/t/func_sec_to_time.test new file mode 100644 index 000000000..e9524a639 --- /dev/null +++ b/mtr/basic/t/func_sec_to_time.test @@ -0,0 +1,14 @@ +--source ../include/have_columnstore.inc +--source ../include/combinations.myisam-columnstore.inc + +--echo # +--echo # MCOL-4647 SEC_TO_TIME(double_or_float) returns a wrong result +--echo # + +CREATE TABLE t1 (a DOUBLE); +INSERT INTO t1 VALUES (0.000025e-35); +INSERT INTO t1 VALUES (-1),(1); +INSERT INTO t1 VALUES (-900),(900); +INSERT INTO t1 VALUES (-1000000),(1000000); +SELECT a, SEC_TO_TIME(a) FROM t1 ORDER BY 1; +DROP TABLE t1; diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index 3a6c28f32..8efcf7208 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -126,41 +126,15 @@ int64_t Func_cast_signed::getIntVal(Row& row, case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::UDOUBLE: { - double value = parm[0]->data()->getDoubleVal(row, isNull); - - if (value > 0) - value += 0.5; - else if (value < 0) - value -= 0.5; - - int64_t ret = (int64_t) value; - - if (value > (double) numeric_limits::max()) - ret = numeric_limits::max(); - else if (value < (double) (numeric_limits::min() + 2)) - ret = numeric_limits::min() + 2; // IDB min for bigint - - return ret; + datatypes::TDouble d(parm[0]->data()->getDoubleVal(row, isNull)); + return d.toMCSSInt64Round(); } break; case execplan::CalpontSystemCatalog::LONGDOUBLE: { - long double value = parm[0]->data()->getLongDoubleVal(row, isNull); - - if (value > 0) - value += 0.5; - else if (value < 0) - value -= 0.5; - - int64_t ret = (int64_t) value; - - if (value > (long double) numeric_limits::max()) - ret = numeric_limits::max(); - else if (value < (long double) (numeric_limits::min() + 2)) - ret = numeric_limits::min() + 2; // IDB min for bigint - - return ret; + datatypes::TLongDouble d(parm[0]->data()->getLongDoubleVal(row, isNull)); + return d.toMCSSInt64Round(); } break; @@ -272,41 +246,15 @@ uint64_t Func_cast_unsigned::getUintVal(Row& row, case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::UDOUBLE: { - double value = parm[0]->data()->getDoubleVal(row, isNull); - - if (value > 0) - value += 0.5; - else if (value < 0) - value -= 0.5; - - uint64_t ret = (uint64_t) value; - - if (value > (double) numeric_limits::max() - 2) - ret = numeric_limits::max(); - else if (value < 0) - ret = 0; - - return ret; + datatypes::TDouble d(parm[0]->data()->getDoubleVal(row, isNull)); + return d.toMCSUInt64Round(); } break; case execplan::CalpontSystemCatalog::LONGDOUBLE: { - long double value = parm[0]->data()->getLongDoubleVal(row, isNull); - - if (value > 0) - value += 0.5; - else if (value < 0) - value -= 0.5; - - uint64_t ret = (uint64_t) value; - - if (value > (long double) numeric_limits::max() - 2) - ret = numeric_limits::max(); - else if (value < 0) - ret = 0; - - return ret; + datatypes::TLongDouble d(parm[0]->data()->getLongDoubleVal(row, isNull)); + return d.toMCSUInt64Round(); } break; diff --git a/utils/funcexp/func_sec_to_time.cpp b/utils/funcexp/func_sec_to_time.cpp index 8d5594ca8..58fb9d2e3 100644 --- a/utils/funcexp/func_sec_to_time.cpp +++ b/utils/funcexp/func_sec_to_time.cpp @@ -75,47 +75,14 @@ string Func_sec_to_time::getStrVal(rowgroup::Row& row, case execplan::CalpontSystemCatalog::DOUBLE: { - const string& valStr = parm[0]->data()->getStrVal(row, isNull); - val = parm[0]->data()->getIntVal(row, isNull); - size_t x = valStr.find("."); - - if (x < string::npos) - { - string tmp = valStr.substr(x + 1, 1); - char* ptr = &tmp[0]; - int i = atoi(ptr); - - if (i >= 5) - { - if (val > 0) - val += 1; - else - val -= 1; - } - } + datatypes::TDouble d(parm[0]->data()->getDoubleVal(row, isNull)); + val = d.toMCSSInt64Round(); + break; } - break; - case execplan::CalpontSystemCatalog::FLOAT: { - const string& valStr = parm[0]->data()->getStrVal(row, isNull); - val = parm[0]->data()->getIntVal(row, isNull); - size_t x = valStr.find("."); - - if (x < string::npos) - { - string tmp = valStr.substr(x + 1, 1); - char* ptr = &tmp[0]; - int i = atoi(ptr); - - if (i >= 5) - { - if (val > 0) - val += 1; - else - val -= 1; - } - } + datatypes::TDouble d(parm[0]->data()->getFloatVal(row, isNull)); + val = d.toMCSSInt64Round(); } break;