Merge pull request #1899 from tntnatbry/MCOL-4612

MCOL-4612 A subquery with a union for DECIMAL and BIGINT returns zeros.
2025-07-30 19:23:07 +03:00 · 2021-05-03 02:52:23 -04:00
parent 71c16fcb56 4e9307fa6d
commit 78cca01dfa
4 changed files with 187 additions and 36 deletions
--- a/dbcon/joblist/tupleunion.cpp
+++ b/dbcon/joblist/tupleunion.cpp
@ -504,34 +504,29 @@ void TupleUnion::normalize(const Row& in, Row* out)
                    {
 dec1:
                        int diff = out->getScale(i) - in.getScale(i);
                        idbassert(diff >= 0);
                        /*
                           Signed INT to XDecimal
-                           TODO: there are a few problems here:
+                           TODO:
                           - This code is not wide decimal friendly.
                             `uint64_t val` can overflow when applying a positive scale
                             It's not possible to make a reproducible bug report
                             at the moment: MCOL-4612 and MCOL-4613 should be fixed first.
                           - Using uint64_t is wrong here. The data type of "in" field is
                             signed. In case of a negative diff, the result will be wrong,
                             because division (unlike multiplication) is sensitive to
                             the signess of the operands.
                             Perhaps diff cannot be negative and we can put an assert for it.
                             Anyway, it's safer to change `uint64_t val` to `int64_t val`.
                           - This code does not handle overflow that may happen on
-                             scale multiplication (MCOL-4613). Instead of returning a garbage
+                             scale multiplication. Instead of returning a garbage value
                             we should probably apply saturation here. In long terms we
                             should implement DECIMAL(65,x) to avoid overflow completely
                             (so the UNION between DECIMAL and integer can choose a proper
                              DECIMAL(M,N) result data type to guarantee that any incoming
                              integer value can fit into it).
                        */
                        // TODO: isn't overflow possible below?
                        uint64_t val = datatypes::applySignedScale<uint64_t>(in.getIntField(i), diff);
                        if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
                        {
                            int128_t val = datatypes::applySignedScale<int128_t>(in.getIntField(i), diff);
                            out->setInt128Field(val, i);
                        }
                        else
                        {
                            int64_t val = datatypes::applySignedScale<int64_t>(in.getIntField(i), diff);
                            out->setIntField(val, i);
                        }
                        break;
                    }
@ -625,20 +620,24 @@ dec1:
                    {
 dec2:
                        int diff = out->getScale(i) - in.getScale(i);
                        idbassert(diff >= 0);
                        /*
                          Unsigned INT to XDecimal
-                          TODO: There are a few problems here:
+                          TODO:
-                          - It should use in.getUintField() instead of in.getIntField()
+                          - The overflow problem mentioned in the code under label "dec1:" is
                          - All problems mentioned in the code under label "dec1:" are
                            also applicable here.
                        */
                        // TODO: isn't overflow possible below?
                        uint64_t val = datatypes::applySignedScale<uint64_t>(in.getIntField(i), diff);
                        if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
                        {
                            int128_t val = datatypes::applySignedScale<int128_t>(in.getUintField(i), diff);
                            out->setInt128Field(val, i);
                        }
                        else
-                            out->setIntField(val, i);
+                        {
                            uint64_t val = datatypes::applySignedScale<uint64_t>(in.getUintField(i), diff);
                            out->setUintField(val, i);
                        }
                        break;
                    }
@ -1109,9 +1108,8 @@ dec4:                   /* have to pick a scale to use for the double. using 5..
                                out->setInt128Field(isInputWide ? val128 : val, i);
                            else if (out->getScale(i) > scale)
                            {
-                                int128_t divisor = 1;
+                                int128_t temp = datatypes::applySignedScale<int128_t>(
-                                datatypes::getScaleDivisor(divisor, out->getScale(i) - scale);
+                                    isInputWide ? val128 : val, out->getScale(i) - scale);
                                int128_t temp = isInputWide ? divisor*val128 : divisor*val;
                                out->setInt128Field(temp, i);
                            }
                            else // should not happen, the output's scale is the largest
@ -1124,7 +1122,11 @@ dec4:                   /* have to pick a scale to use for the double. using 5..
                            if (out->getScale(i) == scale)
                                out->setIntField(val, i);
                            else if (out->getScale(i) > scale)
-                                out->setIntField(IDB_pow[out->getScale(i) - scale]*val, i);
+                            {
                                int64_t temp = datatypes::applySignedScale<int64_t>(
                                    val, out->getScale(i) - scale);
                                out->setIntField(temp, i);
                            }
                            else // should not happen, the output's scale is the largest
                                throw logic_error("TupleUnion::normalize(): incorrect scale setting");
                        }
--- a/mysql-test/columnstore/csinternal/devregression/r/mcs7147_regression_bug4388.result
+++ b/mysql-test/columnstore/csinternal/devregression/r/mcs7147_regression_bug4388.result
@ -1,9 +1,11 @@
 USE tpch1;
-create table if not exists bug4388(
+set default_storage_engine=columnstore;
 drop table if exists bug4388;
 create table bug4388(
 `venta_clave` int(10) DEFAULT NULL,
 `cantidad` decimal(10,3) DEFAULT NULL,
 `changev` decimal(18,4) DEFAULT NULL
-) engine=columnstore;
+);
 insert into bug4388 values (null,null,6.8000);
 select coalesce(sum(changev),0) as col1 from bug4388;
 col1
@ -38,4 +40,76 @@ select sum(co) from ( select sum(col1) as co from (select sum(changev)
 as col1 from bug4388 ) t ) res;
 sum(co)
 6.8000
 select sum(col1) as co from  ( select 984467440737095516
 as col1 union all select coalesce(sum(changev),0) as col1 from
 bug4388 ) t;
 co
 984467440737095522.8000
 select sum(col1) as co from  ( select 18446744073709551612
 as col1 union all select coalesce(sum(changev),0) as col1 from
 bug4388 ) t;
 co
 18446744073709551618.8000
 drop table bug4388;
 #
 # MCOL-4613 Garbage result of a union between huge narrow DECIMAL and BIGINT
 #
 drop table if exists t1;
 drop table if exists t2;
 drop table if exists t3;
 create table t1 (a decimal(17,1), b bigint);
 insert into t1 values (9999999999999999.9, 999999999999999999);
 select * from (select a from t1 union select b from t1) tu order by a;
 a
 9999999999999999.9
 999999999999999999.0
 drop table t1;
 #
 # MCOL-4612 A subquery with a union for DECIMAL and BIGINT returns zeros
 #
 create table t1 (a decimal(17,1), b bigint);
 insert into t1 values (1, 1);
 insert into t1 values (9999999999999999, 99999999999999999);
 select * from (select a from t1 union select b from t1) tu order by a;
 a
 1.0
 9999999999999999.0
 99999999999999999.0
 select * from (select a from t1 union all select b from t1) tu order by a;
 a
 1.0
 1.0
 9999999999999999.0
 99999999999999999.0
 drop table t1;
 create table t1 (a decimal(18,5), b decimal(18,5) unsigned);
 create table t2 (a bigint, b bigint unsigned);
 create table t3 (a decimal(38,10), b decimal(38,10) unsigned);
 insert into t1 values
 (-1234567890123.12345, 1234567890123.12345),
 (-1234567890123.1234, 1234567890123.1234),
 (-9999999999999.99999, 9999999999999.99999),
 (-999999999999.99999, 999999999999.99999),
 (-99999999999.99999, 99999999999.99999);
 insert into t2 values
 (-123456789012345, 123456789012345),
 (9223372036854775807, 18446744073709551613),
 (-9223372036854775806, 0);
 insert into t3 values
 (-9999999999999999999999999999.9999999999, 9999999999999999999999999999.9999999999),
 (-1234567890123456789012345678.9012345678, 1234567890123456789012345678.9012345678);
 select * from (select a,b from t1 union select a,b from t2 union select a,b from t3) tu order by a,b;
 a	b
 -9999999999999999999999999999.9999999999	9999999999999999999999999999.9999999999
 -1234567890123456789012345678.9012345678	1234567890123456789012345678.9012345678
 -9223372036854775806.0000000000	0.0000000000
 -123456789012345.0000000000	123456789012345.0000000000
 -9999999999999.9999900000	9999999999999.9999900000
 -1234567890123.1234500000	1234567890123.1234500000
 -1234567890123.1234000000	1234567890123.1234000000
 -999999999999.9999900000	999999999999.9999900000
 -99999999999.9999900000	99999999999.9999900000
 9223372036854775807.0000000000	18446744073709551613.0000000000
 drop table t1;
 drop table t2;
 drop table t3;
--- a/mysql-test/columnstore/csinternal/devregression/t/mcs7147_regression_bug4388.test
+++ b/mysql-test/columnstore/csinternal/devregression/t/mcs7147_regression_bug4388.test
@ -8,11 +8,17 @@
 #
 USE tpch1;
 #
-create table if not exists bug4388(
+
 set default_storage_engine=columnstore;
 --disable_warnings
 drop table if exists bug4388;
 --enable_warnings
 create table bug4388(
 `venta_clave` int(10) DEFAULT NULL,
  `cantidad` decimal(10,3) DEFAULT NULL,
  `changev` decimal(18,4) DEFAULT NULL
-) engine=columnstore;
+);
 insert into bug4388 values (null,null,6.8000);
 select coalesce(sum(changev),0) as col1 from bug4388;
 select sum(co) + 0 from ( select sum(col1) as co from  ( select 0 
@ -31,6 +37,68 @@ select sum(col1) from
 select sum(co) from (select sum(changev) as co from bug4388 ) t;
 select sum(co) from ( select sum(col1) as co from (select sum(changev)
 as col1 from bug4388 ) t ) res;
 drop table bug4388;
 #
 select sum(col1) as co from  ( select 984467440737095516
 as col1 union all select coalesce(sum(changev),0) as col1 from
 bug4388 ) t;
 select sum(col1) as co from  ( select 18446744073709551612
 as col1 union all select coalesce(sum(changev),0) as col1 from
 bug4388 ) t;
 drop table bug4388;
 --echo #
 --echo # MCOL-4613 Garbage result of a union between huge narrow DECIMAL and BIGINT
 --echo #
 --disable_warnings
 drop table if exists t1;
 drop table if exists t2;
 drop table if exists t3;
 --enable_warnings
 # Original test case from the MCOL-4613 issue description
 create table t1 (a decimal(17,1), b bigint);
 insert into t1 values (9999999999999999.9, 999999999999999999);
 select * from (select a from t1 union select b from t1) tu order by a;
 drop table t1;
 --echo #
 --echo # MCOL-4612 A subquery with a union for DECIMAL and BIGINT returns zeros
 --echo #
 # Original test case from the MCOL-4612 issue description
 create table t1 (a decimal(17,1), b bigint);
 insert into t1 values (1, 1);
 insert into t1 values (9999999999999999, 99999999999999999);
 select * from (select a from t1 union select b from t1) tu order by a;
 select * from (select a from t1 union all select b from t1) tu order by a;
 drop table t1;
 # Additional test cases for MCOL-4613 and MCOL-4612
 create table t1 (a decimal(18,5), b decimal(18,5) unsigned);
 create table t2 (a bigint, b bigint unsigned);
 create table t3 (a decimal(38,10), b decimal(38,10) unsigned);
 insert into t1 values
 (-1234567890123.12345, 1234567890123.12345),
 (-1234567890123.1234, 1234567890123.1234),
 (-9999999999999.99999, 9999999999999.99999),
 (-999999999999.99999, 999999999999.99999),
 (-99999999999.99999, 99999999999.99999);
 insert into t2 values
 (-123456789012345, 123456789012345),
 (9223372036854775807, 18446744073709551613),
 (-9223372036854775806, 0);
 insert into t3 values
 (-9999999999999999999999999999.9999999999, 9999999999999999999999999999.9999999999),
 (-1234567890123456789012345678.9012345678, 1234567890123456789012345678.9012345678);
 select * from (select a,b from t1 union select a,b from t2 union select a,b from t3) tu order by a,b;
 drop table t1;
 drop table t2;
 drop table t3;
--- a/utils/dataconvert/dataconvert.cpp
+++ b/utils/dataconvert/dataconvert.cpp
@ -3037,6 +3037,17 @@ DataConvert::joinColTypeForUnion(datatypes::SystemCatalog::TypeHolderStd &unione
                case datatypes::SystemCatalog::UINT:
                case datatypes::SystemCatalog::UBIGINT:
                case datatypes::SystemCatalog::UDECIMAL:
                    unionedType.precision = std::max(type.precision, unionedType.precision);
                    unionedType.scale = std::max(type.scale, unionedType.scale);
                    if (datatypes::Decimal::isWideDecimalTypeByPrecision(unionedType.precision))
                    {
                        unionedType.colDataType = datatypes::SystemCatalog::DECIMAL;
                        unionedType.colWidth = datatypes::MAXDECIMALWIDTH;
                        break;
                    }
                    if (type.colWidth > unionedType.colWidth)
                    {
                        unionedType.colDataType = type.colDataType;
@ -3054,10 +3065,6 @@ DataConvert::joinColTypeForUnion(datatypes::SystemCatalog::TypeHolderStd &unione
                        unionedType.colDataType = datatypes::SystemCatalog::DECIMAL;
                    }
                    if (type.precision > unionedType.precision)
                        unionedType.precision = type.precision;
                    unionedType.scale = (type.scale > unionedType.scale) ? type.scale : unionedType.scale;
                    break;
                case datatypes::SystemCatalog::DATE: