From 8f80c1dee6dc57dbc4bc25f53419d00653931ce2 Mon Sep 17 00:00:00 2001 From: Gagan Goel Date: Tue, 25 Feb 2020 17:11:32 -0500 Subject: [PATCH] MCOL-641 1. Implement int128 version of strtoll. 2. Templatize number_int_value. 3. Add test cases for strtoll128 and number_int_value for Decimal38. --- utils/common/widedecimalutils.h | 67 +++ utils/dataconvert/dataconvert-tests.cpp | 542 ++++++++++++++++++++++++ utils/dataconvert/dataconvert.cpp | 194 ++++++--- utils/dataconvert/dataconvert.h | 119 +++++- writeengine/xml/we_xmljob.cpp | 1 + 5 files changed, 869 insertions(+), 54 deletions(-) create mode 100644 utils/common/widedecimalutils.h create mode 100644 utils/dataconvert/dataconvert-tests.cpp diff --git a/utils/common/widedecimalutils.h b/utils/common/widedecimalutils.h new file mode 100644 index 000000000..be0c742d9 --- /dev/null +++ b/utils/common/widedecimalutils.h @@ -0,0 +1,67 @@ +/* Copyright (C) 2020 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef WIDE_DECIMAL_UTILS_H +#define WIDE_DECIMAL_UTILS_H + +namespace utils +{ + +using int128_t = __int128; +using uint128_t = unsigned __int128; + +const uint64_t BINARYNULLVALUELOW = 0ULL; +const uint64_t BINARYNULLVALUEHIGH = 0x8000000000000000ULL; +const uint64_t BINARYEMPTYVALUELOW = 1ULL; +const uint64_t BINARYEMPTYVALUEHIGH = 0x8000000000000000ULL; + + inline bool isWideDecimalNullValue(const int128_t val) + { + const uint64_t* ptr = reinterpret_cast(&val); + return (ptr[0] == BINARYNULLVALUELOW && ptr[1] == BINARYNULLVALUEHIGH); + } + + inline bool isWideDecimalEmptyValue(const int128_t val) + { + const uint64_t* ptr = reinterpret_cast(&val); + return (ptr[0] == BINARYEMPTYVALUELOW && ptr[1] == BINARYEMPTYVALUEHIGH); + } + + inline void int128Max(int128_t& val) + { + uint64_t* ptr = reinterpret_cast(&val); + ptr[0] = 0xFFFFFFFFFFFFFFFF; + ptr[1] = 0x7FFFFFFFFFFFFFFF; + } + + inline void int128Min(int128_t& val) + { + uint64_t* ptr = reinterpret_cast(&val); + ptr[0] = 0; + ptr[1] = 0x8000000000000000; + } + + inline void uint128Max(uint128_t& val) + { + uint64_t* ptr = reinterpret_cast(&val); + ptr[0] = 0xFFFFFFFFFFFFFFFF; + ptr[1] = 0xFFFFFFFFFFFFFFFF; + } + +} + +#endif // WIDE_DECIMAL_UTILS_H diff --git a/utils/dataconvert/dataconvert-tests.cpp b/utils/dataconvert/dataconvert-tests.cpp new file mode 100644 index 000000000..3a18571e8 --- /dev/null +++ b/utils/dataconvert/dataconvert-tests.cpp @@ -0,0 +1,542 @@ +/* Copyright (C) 2020 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +using namespace std; + +#include "gtest/gtest.h" + +#include "calpontsystemcatalog.h" +using namespace execplan; +#include "dataconvert.h" +using namespace dataconvert; + +TEST(DataConvertTest, Strtoll128) +{ + char *ep = NULL; + bool saturate = false; + string str; + int128_t val, valMax; + bitset<64> b1, b2, b3, b4; + + // test empty + str = ""; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, 0); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + + // test simple values + str = "123"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, 123); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + str = " 123"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, 123); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + str = " 123.45"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, 123); + EXPECT_NE(*ep, '\0'); + EXPECT_FALSE(saturate); + str = "-123"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, -123); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + str = " -123"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, -123); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + str = " -123.45"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + EXPECT_EQ(val, -123); + EXPECT_NE(*ep, '\0'); + EXPECT_FALSE(saturate); + + // test max/min values + // test max + str = "170141183460469231731687303715884105727"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + b1 = *(reinterpret_cast(&val)); + b2 = *(reinterpret_cast(&val) + 1); + valMax = ((((((((int128_t)170141183 * 1000000000) + 460469231) * 1000000000) + 731687303) * 1000000000 ) + 715884105) * 1000) + 727; + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + // test min + str = "-170141183460469231731687303715884105728"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + b1 = *(reinterpret_cast(&val)); + b2 = *(reinterpret_cast(&val) + 1); + valMax = ((((((((int128_t)170141183 * 1000000000) + 460469231) * 1000000000) + 731687303) * 1000000000 ) + 715884105) * 1000) + 727; + valMax = -valMax - 1; + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_EQ(*ep, '\0'); + EXPECT_FALSE(saturate); + + // test saturation + // test saturation to max + str = "170141183460469231731687303715884105728"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + b1 = *(reinterpret_cast(&val)); + b2 = *(reinterpret_cast(&val) + 1); + valMax = ((((((((int128_t)170141183 * 1000000000) + 460469231) * 1000000000) + 731687303) * 1000000000 ) + 715884105) * 1000) + 727; + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_EQ(*ep, '\0'); + EXPECT_TRUE(saturate); + // test saturation to min + str = "-170141183460469231731687303715884105729"; + saturate = false; + val = strtoll128(str.c_str(), saturate, &ep); + b1 = *(reinterpret_cast(&val)); + b2 = *(reinterpret_cast(&val) + 1); + valMax = ((((((((int128_t)170141183 * 1000000000) + 460469231) * 1000000000) + 731687303) * 1000000000 ) + 715884105) * 1000) + 727; + valMax = -valMax - 1; + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_EQ(*ep, '\0'); + EXPECT_TRUE(saturate); +} + +TEST(DataConvertTest, NumberIntValue) +{ + CalpontSystemCatalog::ColType ct; + int128_t res, valMax; + string data; + bool noRoundup = false; + bool pushWarning; + bitset<64> b1, b2, b3, b4; + + // tests for signed decimal + // behaviour of number_int_value for unsigned decimal + // is similar to the signed case. + ct.colDataType = CalpontSystemCatalog::DECIMAL; + // test with decimal(38,0) + ct.precision = 38; + ct.scale = 0; + // test simple values + //data = ""; + data = "0"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 0); + EXPECT_FALSE(pushWarning); + data = "1234"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 1234); + EXPECT_FALSE(pushWarning); + data = "12.0"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 12); + EXPECT_FALSE(pushWarning); + data = "12.34"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 12); + EXPECT_TRUE(pushWarning); + data = "-1234"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, -1234); + EXPECT_FALSE(pushWarning); + data = "-12.34"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, -12); + EXPECT_TRUE(pushWarning); + // test max + data = "99999999999999999999999999999999999999"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test min + data = "-99999999999999999999999999999999999999"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test rounding + data = "12.56"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 13); + EXPECT_TRUE(pushWarning); + data = "-12.56"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, -13); + EXPECT_TRUE(pushWarning); + // test saturation + data = "999999999999999999999999999999999999999"; // data has 39 9's + // valMax has 38 9's + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + data = "-999999999999999999999999999999999999999"; // data has 39 9's + // valMax has 38 9's + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + // test scientific notation + data = "1.23e37"; + valMax = ((((((((int128_t)123000000 * 1000000000) + 0) * 1000000000) + 0) * 1000000000 ) + 0) * 100) + 0; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + data = "1.23e38"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + + // test with decimal(38,10) + ct.scale = 10; + data = "0"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 0); + EXPECT_FALSE(pushWarning); + data = "1234"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 12340000000000); + EXPECT_FALSE(pushWarning); + data = "12.0"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 120000000000); + EXPECT_FALSE(pushWarning); + data = "12.34"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 123400000000); + EXPECT_FALSE(pushWarning); + data = "-1234"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, -12340000000000); + EXPECT_FALSE(pushWarning); + data = "-12.34"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, -123400000000); + EXPECT_FALSE(pushWarning); + // test max + data = "9999999999999999999999999999.9999999999"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test min + data = "-9999999999999999999999999999.9999999999"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test rounding + data = "12.11111111119"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 121111111112); + EXPECT_TRUE(pushWarning); + data = "-12.11111111119"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, -121111111112); + EXPECT_TRUE(pushWarning); + // test saturation + data = "99999999999999999999999999999"; // data has 29 9's + // valMax has 38 9's + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + data = "-99999999999999999999999999999"; // data has 29 9's + // valMax has 38 9's + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + // test scientific notation + data = "1.23e9"; + valMax = ((((int128_t)123000000 * 1000000000) + 0) * 100); + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + data = "1.23e28"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + // test with decimal(38,38) + ct.scale = 38; + data = "0"; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + EXPECT_EQ(res, 0); + EXPECT_FALSE(pushWarning); + data = "1.234"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + data = "0.123"; + valMax = ((((((((int128_t)123000000 * 1000000000) + 0) * 1000000000) + 0) * 1000000000 ) + 0) * 100) + 0; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + data = "-1.234"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + data = "-0.123"; + valMax = ((((((((int128_t)123000000 * 1000000000) + 0) * 1000000000) + 0) * 1000000000 ) + 0) * 100) + 0; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test max + data = "0.99999999999999999999999999999999999999"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test min + data = "-0.99999999999999999999999999999999999999"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + // test rounding + data = "0.199999999999999999999999999999999999999"; + valMax = ((((((((int128_t)200000000 * 1000000000) + 0) * 1000000000) + 0) * 1000000000 ) + 0) * 100) + 0; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + data = "-0.199999999999999999999999999999999999999"; + valMax = ((((((((int128_t)200000000 * 1000000000) + 0) * 1000000000) + 0) * 1000000000 ) + 0) * 100) + 0; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + // test saturation + data = "99999999999999999999999999999"; // data has 29 9's + // valMax has 38 9's + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + data = "-99999999999999999999999999999"; // data has 29 9's + // valMax has 38 9's + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + valMax = -valMax; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); + // test scientific notation + data = "123e-4"; + valMax = ((((((((int128_t)123000000 * 1000000000) + 0) * 1000000000) + 0) * 1000000000 ) + 0) * 10) + 0; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_FALSE(pushWarning); + data = "123e-2"; + valMax = ((((((((int128_t)999999999 * 1000000000) + 999999999) * 1000000000) + 999999999) * 1000000000 ) + 999999999) * 100) + 99; + pushWarning = false; + number_int_value(data, ct, pushWarning, noRoundup, res); + b1 = *(reinterpret_cast(&res)); + b2 = *(reinterpret_cast(&res) + 1); + b3 = *(reinterpret_cast(&valMax)); + b4 = *(reinterpret_cast(&valMax) + 1); + EXPECT_EQ(b1, b3); + EXPECT_EQ(b2, b4); + EXPECT_TRUE(pushWarning); +} diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 6838cebbe..1273e134f 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -37,7 +37,6 @@ using namespace boost::algorithm; #include "calpontsystemcatalog.h" #include "calpontselectexecutionplan.h" #include "columnresult.h" -#include "common/branchpred.h" using namespace execplan; #include "joblisttypes.h" @@ -79,6 +78,30 @@ const int64_t columnstore_precision[19] = 999999999999999999LL }; +const string columnstore_big_precision[20] = +{ + "9999999999999999999", + "99999999999999999999", + "999999999999999999999", + "9999999999999999999999", + "99999999999999999999999", + "999999999999999999999999", + "9999999999999999999999999", + "99999999999999999999999999", + "999999999999999999999999999", + "9999999999999999999999999999", + "99999999999999999999999999999", + "999999999999999999999999999999", + "9999999999999999999999999999999", + "99999999999999999999999999999999", + "999999999999999999999999999999999", + "9999999999999999999999999999999999", + "99999999999999999999999999999999999", + "999999999999999999999999999999999999", + "9999999999999999999999999999999999999", + "99999999999999999999999999999999999999" +}; + template bool from_string(T& t, const std::string& s, std::ios_base & (*f)(std::ios_base&)) { @@ -103,10 +126,17 @@ bool number_value ( const string& data ) return true; } -int64_t number_int_value(const string& data, - const CalpontSystemCatalog::ColType& ct, - bool& pushwarning, - bool noRoundup) +} // namespace anon + +namespace dataconvert +{ + +template +void number_int_value(const string& data, + const CalpontSystemCatalog::ColType& ct, + bool& pushwarning, + bool noRoundup, + T& intVal) { // copy of the original input string valStr(data); @@ -132,11 +162,13 @@ int64_t number_int_value(const string& data, if (boost::iequals(valStr, "true")) { - return 1; + intVal = 1; + return; } if (boost::iequals(valStr, "false")) { - return 0; + intVal = 0; + return; } // convert to fixed-point notation if input is in scientific notation @@ -152,9 +184,9 @@ int64_t number_int_value(const string& data, // get the exponent string exp = valStr.substr(epos + 1); bool overflow = false; - int64_t exponent = dataconvert::string_to_ll(exp, overflow); + T exponent = dataconvert::string_to_ll(exp, overflow); - // if the exponent can not be held in 64-bit, not supported or saturated. + // if the exponent can not be held in 64 or 128 bits, not supported or saturated. if (overflow) throw QueryDataExcept("value is invalid.", formatErr); @@ -265,7 +297,7 @@ int64_t number_int_value(const string& data, if (dp != string::npos) { //Check if need round up - int frac1 = dataconvert::string_to_ll(valStr.substr(dp + 1, 1), pushwarning); + int frac1 = dataconvert::string_to_ll(valStr.substr(dp + 1, 1), pushwarning); if ((!noRoundup) && frac1 >= 5) roundup = 1; @@ -281,11 +313,11 @@ int64_t number_int_value(const string& data, } } - int64_t intVal = dataconvert::string_to_ll(intStr, pushwarning); + intVal = dataconvert::string_to_ll(intStr, pushwarning); //@Bug 3350 negative value round up. intVal += intVal >= 0 ? roundup : -roundup; bool dummy = false; - int64_t frnVal = (frnStr.length() > 0) ? dataconvert::string_to_ll(frnStr, dummy) : 0; + T frnVal = (frnStr.length() > 0) ? dataconvert::string_to_ll(frnStr, dummy) : 0; if (frnVal != 0) pushwarning = true; @@ -406,6 +438,16 @@ int64_t number_int_value(const string& data, pushwarning = true; } } + else if (ct.colWidth == 16) + { + int128_t tmp; + utils::int128Min(tmp); + if (intVal < tmp + 2) // + 2 for NULL and EMPTY values + { + intVal = tmp + 2; + pushwarning = true; + } + } break; @@ -418,8 +460,20 @@ int64_t number_int_value(const string& data, (ct.colDataType == CalpontSystemCatalog::UDECIMAL) || (ct.scale > 0)) { - int64_t rangeUp = columnstore_precision[ct.precision]; - int64_t rangeLow = -rangeUp; + T rangeUp, rangeLow; + + if (ct.precision < 19) + { + rangeUp = (T) columnstore_precision[ct.precision]; + } + else + { + bool dummy = false; + char *ep = NULL; + rangeUp = (T) dataconvert::strtoll128(columnstore_big_precision[ct.precision - 19].c_str(), dummy, &ep); + } + + rangeLow = -rangeUp; if (intVal > rangeUp) { @@ -432,10 +486,23 @@ int64_t number_int_value(const string& data, pushwarning = true; } } - - return intVal; } +// Explicit template instantiation +template +void number_int_value(const std::string& data, + const execplan::CalpontSystemCatalog::ColType& ct, + bool& pushwarning, + bool noRoundup, + int64_t& intVal); + +template +void number_int_value(const std::string& data, + const execplan::CalpontSystemCatalog::ColType& ct, + bool& pushwarning, + bool noRoundup, + int128_t& intVal); + uint64_t number_uint_value(const string& data, const CalpontSystemCatalog::ColType& ct, bool& pushwarning, @@ -476,7 +543,7 @@ uint64_t number_uint_value(const string& data, // get the exponent string exp = valStr.substr(epos + 1); bool overflow = false; - int64_t exponent = dataconvert::string_to_ll(exp, overflow); + int64_t exponent = dataconvert::string_to_ll(exp, overflow); // if the exponent can not be held in 64-bit, not supported or saturated. if (overflow) @@ -596,11 +663,6 @@ uint64_t number_uint_value(const string& data, return uintVal; } -} // namespace anon - -namespace dataconvert -{ - /** * This function reads a decimal value from a string. It will stop processing * in 3 cases: @@ -1427,6 +1489,7 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, bool noRoundup, bool isUpdate) { boost::any value; + int64_t val64; // WIP std::string data( dataOrig ); pushWarning = false; @@ -1453,7 +1516,11 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, data.replace (x, 1, " "); } - if (number_int_value (data, colType, pushWarning, noRoundup)) + int64_t tmp = 0; + + number_int_value (data, colType, pushWarning, noRoundup, tmp); + + if (tmp) { bool bitvalue; @@ -1470,54 +1537,65 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, break; case CalpontSystemCatalog::TINYINT: - value = (char) number_int_value(data, colType, pushWarning, noRoundup); + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (char) val64; break; case CalpontSystemCatalog::SMALLINT: - value = (short) number_int_value(data, colType, pushWarning, noRoundup); + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (short) val64; break; case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: - value = (int) number_int_value(data, colType, pushWarning, noRoundup); + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (int) val64; break; case CalpontSystemCatalog::BIGINT: - value = (long long) number_int_value(data, colType, pushWarning, noRoundup); + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (long long) val64; break; - // MCOL-641 WIP - // Simplest form of a template will use colType and width as a parameter - // There will be lots specializations case CalpontSystemCatalog::DECIMAL: - // TODO MCOL-641 implement decimal38 version of number_int_value - if (colType.colWidth == 16) + if (colType.colWidth == 1) { - int128_t bigint; - // WIP - //atoi_(data, bigint); - atoi128(data, bigint); - value = bigint; + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (char) val64; } - else if (colType.colWidth == 1) - value = (char) number_int_value(data, colType, pushWarning, noRoundup); else if (colType.colWidth == 2) - value = (short) number_int_value(data, colType, pushWarning, noRoundup); + { + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (short) val64; + } else if (colType.colWidth == 4) - value = (int) number_int_value(data, colType, pushWarning, noRoundup); + { + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (int) val64; + } else if (colType.colWidth == 8) - value = (long long) number_int_value(data, colType, pushWarning, noRoundup); - else if (colType.colWidth == 32) - value = data; + { + number_int_value(data, colType, pushWarning, noRoundup, val64); + value = (long long) val64; + } + else if (colType.colWidth == 16) + { + int128_t val128; + number_int_value(data, colType, pushWarning, noRoundup, val128); + value = (int128_t) val128; + } + //else if (colType.colWidth == 32) + // value = data; break; - // MCOL-641 Implement UDECIMAL + case CalpontSystemCatalog::UDECIMAL: // UDECIMAL numbers may not be negative if (colType.colWidth == 1) { - char ival = (char) number_int_value(data, colType, pushWarning, noRoundup); + number_int_value(data, colType, pushWarning, noRoundup, val64); + char ival = (char) val64; if (ival < 0 && ival != static_cast(joblist::TINYINTEMPTYROW) && @@ -1531,7 +1609,8 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, } else if (colType.colWidth == 2) { - short ival = (short) number_int_value(data, colType, pushWarning, noRoundup); + number_int_value(data, colType, pushWarning, noRoundup, val64); + short ival = (short) val64; if (ival < 0 && ival != static_cast(joblist::SMALLINTEMPTYROW) && @@ -1545,7 +1624,8 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, } else if (colType.colWidth == 4) { - int ival = static_cast(number_int_value(data, colType, pushWarning, noRoundup)); + number_int_value(data, colType, pushWarning, noRoundup, val64); + int ival = static_cast(val64); if (ival < 0 && ival != static_cast(joblist::INTEMPTYROW) && @@ -1559,7 +1639,8 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, } else if (colType.colWidth == 8) { - long long ival = static_cast(number_int_value(data, colType, pushWarning, noRoundup)); + number_int_value(data, colType, pushWarning, noRoundup, val64); + long long ival = static_cast(val64); if (ival < 0 && ival != static_cast(joblist::BIGINTEMPTYROW) && @@ -1571,6 +1652,21 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, value = ival; } + else if (colType.colWidth == 16) + { + int128_t val128; + number_int_value(data, colType, pushWarning, noRoundup, val128); + + if (val128 < 0 && + !utils::isWideDecimalNullValue(val128) && + !utils::isWideDecimalEmptyValue(val128)) + { + val128 = 0; + pushWarning = true; + } + + value = val128; + } break; diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index 2301d75ec..67cb3265c 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -50,6 +50,9 @@ #include "calpontsystemcatalog.h" #include "columnresult.h" #include "exceptclasses.h" +#include "common/branchpred.h" + +#include "widedecimalutils.h" // remove this block if the htonll is defined in library #ifdef __linux__ @@ -822,8 +825,8 @@ void TimeStamp::reset() second = 0xFFFFFFFFFFF; } -inline -int64_t string_to_ll( const std::string& data, bool& bSaturate ) +template +inline T string_to_ll( const std::string& data, bool& bSaturate ) { // This function doesn't take into consideration our special values // for NULL and EMPTY when setting the saturation point. Should it? @@ -870,6 +873,13 @@ uint64_t string_to_ull( const std::string& data, bool& bSaturate ) return value; } +template +void number_int_value(const std::string& data, + const execplan::CalpontSystemCatalog::ColType& ct, + bool& pushwarning, + bool noRoundup, + T& intVal); + /** @brief DataConvert is a component for converting string data to Calpont format */ class DataConvert @@ -1044,9 +1054,7 @@ public: const uint8_t scale); template static size_t writeFractionalPart(T* dec, char* p, - const uint16_t buflen, const uint8_t scale); - - + const uint16_t buflen, const uint8_t scale); static inline void int128Max(int128_t& i) { @@ -1465,6 +1473,107 @@ inline std::string DataConvert::constructRegexp(const std::string& str) return ret; } +inline int128_t add128(int128_t a, int128_t b) +{ + return a + b; +} + +inline int128_t subtract128(int128_t a, int128_t b) +{ + return a - b; +} + +inline bool lessThan128(int128_t a, int128_t b) +{ + return a < b; +} + +inline bool greaterThan128(int128_t a, int128_t b) +{ + return a > b; +} + +// Naive __int128 version of strtoll +inline int128_t strtoll128(const char* data, bool& saturate, char** ep) +{ + int128_t res = 0; + + if (*data == '\0') + { + if (ep) + *ep = (char*)data; + return res; + } + + // skip leading whitespace characters + while (*data != '\0' && + (*data == ' ' || *data == '\t' || *data == '\n')) + data++; + + int128_t (*op)(int128_t, int128_t); + op = add128; + bool (*compare)(int128_t, int128_t); + compare = lessThan128; + + // check the -ve sign + bool is_neg = false; + if (*data == '-') + { + is_neg = true; + op = subtract128; + compare = greaterThan128; + data++; + } + + int128_t tmp; + + for (; *data != '\0' && isdigit(*data); data++) + { + tmp = op(res*10, *data - '0'); + + if (UNLIKELY(compare(tmp, res))) + { + saturate = true; + + if (is_neg) + utils::int128Min(res); + else + utils::int128Max(res); + + while (*data != '\0' && isdigit(*data)) + data++; + + if (ep) + *ep = (char*)data; + + return res; + } + + res = tmp; + } + + if (ep) + *ep = (char*)data; + + return res; +} + +template<> +inline int128_t string_to_ll ( const std::string& data, bool& bSaturate ) +{ + // This function doesn't take into consideration our special values + // for NULL and EMPTY when setting the saturation point. Should it? + char* ep = NULL; + const char* str = data.c_str(); + int128_t value = strtoll128(str, bSaturate, &ep); + + // (no digits) || (more chars) + if ((ep == str) || (*ep != '\0')) + throw logging::QueryDataExcept("value is not numerical.", logging::formatErr); + + return value; +} + } // namespace dataconvert #undef EXPORT diff --git a/writeengine/xml/we_xmljob.cpp b/writeengine/xml/we_xmljob.cpp index 863e5cbef..f93859b61 100644 --- a/writeengine/xml/we_xmljob.cpp +++ b/writeengine/xml/we_xmljob.cpp @@ -48,6 +48,7 @@ using namespace execplan; namespace WriteEngine { // Maximum saturation value for DECIMAL types based on precision +// TODO MCOL-641 add support here. see dataconvert.cpp const long long columnstore_precision[19] = { 0,