You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-4173 This patch adds support for wide-DECIMAL INNER, OUTER, SEMI, functional JOINs
based on top of TypelessData
This commit is contained in:
@ -411,6 +411,33 @@ inline bool isNumeric(const datatypes::SystemCatalog::ColDataType type)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool isInteger(const datatypes::SystemCatalog::ColDataType type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case datatypes::SystemCatalog::TINYINT:
|
||||||
|
case datatypes::SystemCatalog::SMALLINT:
|
||||||
|
case datatypes::SystemCatalog::MEDINT:
|
||||||
|
case datatypes::SystemCatalog::INT:
|
||||||
|
case datatypes::SystemCatalog::BIGINT:
|
||||||
|
case datatypes::SystemCatalog::UTINYINT:
|
||||||
|
case datatypes::SystemCatalog::USMALLINT:
|
||||||
|
case datatypes::SystemCatalog::UMEDINT:
|
||||||
|
case datatypes::SystemCatalog::UINT:
|
||||||
|
case datatypes::SystemCatalog::UBIGINT:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline bool isLongDouble(const datatypes::SystemCatalog::ColDataType type)
|
||||||
|
{
|
||||||
|
return type == datatypes::SystemCatalog::LONGDOUBLE;
|
||||||
|
}
|
||||||
|
|
||||||
inline bool isDecimal(const datatypes::SystemCatalog::ColDataType type)
|
inline bool isDecimal(const datatypes::SystemCatalog::ColDataType type)
|
||||||
{
|
{
|
||||||
return (type == datatypes::SystemCatalog::DECIMAL ||
|
return (type == datatypes::SystemCatalog::DECIMAL ||
|
||||||
|
@ -83,7 +83,7 @@ namespace datatypes
|
|||||||
constexpr uint32_t MAXDECIMALWIDTH = 16U;
|
constexpr uint32_t MAXDECIMALWIDTH = 16U;
|
||||||
constexpr uint8_t INT64MAXPRECISION = 18U;
|
constexpr uint8_t INT64MAXPRECISION = 18U;
|
||||||
constexpr uint8_t INT128MAXPRECISION = 38U;
|
constexpr uint8_t INT128MAXPRECISION = 38U;
|
||||||
constexpr uint8_t MAXLEGACYWIDTH = 8U;
|
constexpr uint32_t MAXLEGACYWIDTH = 8U;
|
||||||
constexpr uint8_t MAXSCALEINC4AVG = 4U;
|
constexpr uint8_t MAXSCALEINC4AVG = 4U;
|
||||||
constexpr int8_t IGNOREPRECISION = -1;
|
constexpr int8_t IGNOREPRECISION = -1;
|
||||||
|
|
||||||
|
@ -285,6 +285,21 @@ class TSInt128
|
|||||||
return TSInt128(s128Value + rhs.s128Value);
|
return TSInt128(s128Value + rhs.s128Value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool operator>(const TSInt128& rhs) const
|
||||||
|
{
|
||||||
|
return s128Value > rhs.s128Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator<(const TSInt128& rhs) const
|
||||||
|
{
|
||||||
|
return s128Value < rhs.s128Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator!=(const TSInt128& rhs) const
|
||||||
|
{
|
||||||
|
return s128Value != rhs.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
inline TFloat128 toTFloat128() const
|
inline TFloat128 toTFloat128() const
|
||||||
{
|
{
|
||||||
return TFloat128(s128Value);
|
return TFloat128(s128Value);
|
||||||
|
@ -51,6 +51,11 @@ public:
|
|||||||
{
|
{
|
||||||
return mValue;
|
return mValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void store(uint8_t* dst) const
|
||||||
|
{
|
||||||
|
*(uint64_t*) dst = mValue;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -1093,6 +1093,7 @@ void BatchPrimitiveProcessorJL::createBPP(ByteStream& bs) const
|
|||||||
cout << "PMJoinerCount = " << PMJoinerCount << endl;
|
cout << "PMJoinerCount = " << PMJoinerCount << endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
bool smallSideRGSent = false;
|
||||||
for (i = 0; i < PMJoinerCount; i++)
|
for (i = 0; i < PMJoinerCount; i++)
|
||||||
{
|
{
|
||||||
bs << (uint32_t) tJoiners[i]->size();
|
bs << (uint32_t) tJoiners[i]->size();
|
||||||
@ -1121,6 +1122,17 @@ void BatchPrimitiveProcessorJL::createBPP(ByteStream& bs) const
|
|||||||
{
|
{
|
||||||
serializeVector<uint32_t>(bs, tJoiners[i]->getLargeKeyColumns());
|
serializeVector<uint32_t>(bs, tJoiners[i]->getLargeKeyColumns());
|
||||||
bs << (uint32_t) tJoiners[i]->getKeyLength();
|
bs << (uint32_t) tJoiners[i]->getKeyLength();
|
||||||
|
// MCOL-4173 Notify PP if smallSide and largeSide have different column widths
|
||||||
|
// and send smallSide RG to PP.
|
||||||
|
bool joinHasSkewedKeyColumn = tJoiners[i]->joinHasSkewedKeyColumn();
|
||||||
|
bs << joinHasSkewedKeyColumn;
|
||||||
|
if (!smallSideRGSent && joinHasSkewedKeyColumn)
|
||||||
|
{
|
||||||
|
idbassert(!smallSideRGs.empty());
|
||||||
|
bs << smallSideRGs[0];
|
||||||
|
serializeVector<uint32_t>(bs, tJoiners[i]->getSmallKeyColumns());
|
||||||
|
smallSideRGSent = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1606,17 +1618,6 @@ bool BatchPrimitiveProcessorJL::nextTupleJoinerMsg(ByteStream& bs)
|
|||||||
|
|
||||||
smallSide.setRowCount(toSend);
|
smallSide.setRowCount(toSend);
|
||||||
tmpData.serialize(bs, smallSide.getDataSize());
|
tmpData.serialize(bs, smallSide.getDataSize());
|
||||||
|
|
||||||
/*
|
|
||||||
uint32_t lpos;
|
|
||||||
uint8_t *buf;
|
|
||||||
|
|
||||||
bs.needAtLeast(r.getSize() * toSend);
|
|
||||||
buf = (uint8_t *) bs.getInputPtr();
|
|
||||||
//for (i = pos, lpos = 0; i < pos + toSend; i++, lpos += r.getSize())
|
|
||||||
// memcpy(&buf[lpos], (*tSmallSide)[i], r.getSize());
|
|
||||||
bs.advanceInputPtr(r.getSize() * toSend);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pos += toSend;
|
pos += toSend;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Copyright (C) 2014 InfiniDB, Inc.
|
/* Copyright (C) 2014 InfiniDB, Inc.
|
||||||
Copyright (C) 2019 MariaDB Corporation
|
Copyright (C) 2019-2021 MariaDB Corporation
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -1480,10 +1480,10 @@ bool addFunctionJoin(vector<uint32_t>& joinedTables, JobStepVector& joinSteps,
|
|||||||
TupleInfo ti1 = getTupleInfo(key1, jobInfo);
|
TupleInfo ti1 = getTupleInfo(key1, jobInfo);
|
||||||
TupleInfo ti2 = getTupleInfo(key2, jobInfo);
|
TupleInfo ti2 = getTupleInfo(key2, jobInfo);
|
||||||
|
|
||||||
if (ti1.dtype == CalpontSystemCatalog::CHAR
|
// Enable Typeless JOIN for char and wide decimal types.
|
||||||
|| ti1.dtype == CalpontSystemCatalog::VARCHAR
|
if (datatypes::isCharType(ti1.dtype) ||
|
||||||
|| ti1.dtype == CalpontSystemCatalog::TEXT)
|
(datatypes::isWideDecimalType(ti1.dtype, ti1.width) ||
|
||||||
// || ti1.dtype == CalpontSystemCatalog::LONGDOUBLE)
|
datatypes::isWideDecimalType(ti2.dtype, ti2.width)))
|
||||||
m1->second.fTypeless = m2->second.fTypeless = true; // ti2 is compatible
|
m1->second.fTypeless = m2->second.fTypeless = true; // ti2 is compatible
|
||||||
else
|
else
|
||||||
m1->second.fTypeless = m2->second.fTypeless = false;
|
m1->second.fTypeless = m2->second.fTypeless = false;
|
||||||
|
@ -1736,13 +1736,13 @@ void TupleHashJoinStep::joinOneRG(uint32_t threadID, vector<RGData>* out,
|
|||||||
{
|
{
|
||||||
(*tjoiners)[j]->match(largeSideRow, k, threadID, &joinMatches[j]);
|
(*tjoiners)[j]->match(largeSideRow, k, threadID, &joinMatches[j]);
|
||||||
/* Debugging code to print the matches
|
/* Debugging code to print the matches
|
||||||
Row r;
|
Row r;
|
||||||
smallRGs[j].initRow(&r);
|
smallRGs[j].initRow(&r);
|
||||||
cout << joinMatches[j].size() << " matches: \n";
|
cout << joinMatches[j].size() << " matches: \n";
|
||||||
for (uint32_t z = 0; z < joinMatches[j].size(); z++) {
|
for (uint32_t z = 0; z < joinMatches[j].size(); z++) {
|
||||||
r.setData(joinMatches[j][z]);
|
r.setData(joinMatches[j][z]);
|
||||||
cout << " " << r.toString() << endl;
|
cout << " " << r.toString() << endl;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
matchCount = joinMatches[j].size();
|
matchCount = joinMatches[j].size();
|
||||||
|
|
||||||
|
@ -7,6 +7,10 @@ DROP DATABASE IF EXISTS mcol641_joins_db;
|
|||||||
CREATE DATABASE mcol641_joins_db;
|
CREATE DATABASE mcol641_joins_db;
|
||||||
USE mcol641_joins_db;
|
USE mcol641_joins_db;
|
||||||
|
|
||||||
|
--disable_query_log
|
||||||
|
SET default_storage_engine=ColumnStore;
|
||||||
|
--enable_query_log
|
||||||
|
|
||||||
CREATE TABLE cs1 (d1 DECIMAL(38), d2 DECIMAL(38,10), d3 DECIMAL(38,38)) ENGINE=columnstore;
|
CREATE TABLE cs1 (d1 DECIMAL(38), d2 DECIMAL(38,10), d3 DECIMAL(38,38)) ENGINE=columnstore;
|
||||||
CREATE TABLE cs2 (de1 DECIMAL(38,38), de2 DECIMAL(38,10)) ENGINE=columnstore;
|
CREATE TABLE cs2 (de1 DECIMAL(38,38), de2 DECIMAL(38,10)) ENGINE=columnstore;
|
||||||
|
|
||||||
|
1298
mysql-test/columnstore/future/mcol641-skewed-joins.result
Normal file
1298
mysql-test/columnstore/future/mcol641-skewed-joins.result
Normal file
File diff suppressed because it is too large
Load Diff
328
mysql-test/columnstore/future/mcol641-skewed-joins.test
Normal file
328
mysql-test/columnstore/future/mcol641-skewed-joins.test
Normal file
@ -0,0 +1,328 @@
|
|||||||
|
-- source ../include/have_columnstore.inc
|
||||||
|
-- source ../include/enable_ordered_only.inc
|
||||||
|
|
||||||
|
--disable_warnings
|
||||||
|
DROP DATABASE IF EXISTS mcol641_joins_db;
|
||||||
|
--enable_warnings
|
||||||
|
|
||||||
|
CREATE DATABASE mcol641_joins_db;
|
||||||
|
USE mcol641_joins_db;
|
||||||
|
|
||||||
|
--disable_query_log
|
||||||
|
SET default_storage_engine=ColumnStore;
|
||||||
|
--enable_query_log
|
||||||
|
|
||||||
|
CREATE TABLE cs1 (d1 DECIMAL(38), d2 DECIMAL(37), id TINYINT);
|
||||||
|
CREATE TABLE cs2 (i1 SMALLINT, i2 MEDIUMINT, i3 INT, i4 BIGINT);
|
||||||
|
|
||||||
|
INSERT INTO cs1 VALUES
|
||||||
|
(99,0,1),
|
||||||
|
(255,254,2),
|
||||||
|
(254,253,3),
|
||||||
|
(252,253,4),
|
||||||
|
(65535,2147483647,5),
|
||||||
|
(65534,2147483646,6),
|
||||||
|
(65533,65532,7),
|
||||||
|
(2147483647,2147483636,8),
|
||||||
|
(2147483646,2147483635,9),
|
||||||
|
(2147483645,2147483634,10),
|
||||||
|
(2147483645,9223372036854775804,11),
|
||||||
|
(9223372036854775807,0,12),
|
||||||
|
(9223372036854775807,2147483627,13),
|
||||||
|
(9223372036854775806,2147483626,14),
|
||||||
|
(9223372036854775805,9223372036854775704,15);
|
||||||
|
|
||||||
|
INSERT INTO cs2 VALUES
|
||||||
|
(255,254,NULL,NULL),
|
||||||
|
(254,253,NULL,NULL),
|
||||||
|
(251,251,NULL,NULL),
|
||||||
|
(NULL,65535,NULL,NULL),
|
||||||
|
(NULL,65535,2147483647,NULL),
|
||||||
|
(NULL,65534,2147483646,NULL),
|
||||||
|
(NULL,0,2147483641,NULL),
|
||||||
|
(NULL,NULL,2147483647,NULL),
|
||||||
|
(NULL,NULL,2147483647,2147483636),
|
||||||
|
(NULL,NULL,2147483646,2147483635),
|
||||||
|
(NULL,NULL,0,2147483641),
|
||||||
|
(NULL,NULL,NULL,9223372036854775807),
|
||||||
|
(NULL,NULL,2147483627,9223372036854775807),
|
||||||
|
(NULL,NULL,2147483626,9223372036854775806),
|
||||||
|
(NULL,NULL,0,1);
|
||||||
|
|
||||||
|
# Distributed PrimProc-based JOINs
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i2 ORDER BY id,i3;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i2 ORDER BY id,i3;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i3 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i3 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i4 AND cs2.i3 IS NOT NULL ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i4 AND cs2.i3 IS NOT NULL ORDER BY id;
|
||||||
|
|
||||||
|
# PrimProc-based composite key JOINs
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i1 AND cs1.d2 = cs2.i2 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i1 AND cs1.d2 = cs2.i2 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i2 AND cs1.d2 = cs2.i3 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i2 AND cs1.d2 = cs2.i3 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i3 AND cs1.d2 = cs2.i4 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i3 AND cs1.d2 = cs2.i4 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1 = cs2.i4 AND cs1.d2 = cs2.i3 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1 = cs2.i4 AND cs1.d2 = cs2.i3 ORDER BY id;
|
||||||
|
|
||||||
|
# ExeMgr-based JOINs
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i1 ORDER BY id,i2;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i1 ORDER BY id,i2;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i2 ORDER BY id,i3;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i2 ORDER BY id,i3;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i3 ORDER BY id,i2,i4;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i3 ORDER BY id,i2,i4;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i4 AND s2.i3 IS NOT NULL ORDER BY id,i3;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i4 AND s2.i3 IS NOT NULL ORDER BY id,i3;
|
||||||
|
|
||||||
|
# Functional JOIN
|
||||||
|
# Distributed PrimProc-based functional JOINs
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1 = cs2.i1-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1 = cs2.i1-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1 = cs2.i2-1 ORDER BY id,i3;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1 = cs2.i2-1 ORDER BY id,i3;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1 = cs2.i3-1 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1 = cs2.i3-1 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1 = cs2.i4-1 AND cs2.i3 IS NOT NULL ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1 = cs2.i4-1 AND cs2.i3 IS NOT NULL ORDER BY id;
|
||||||
|
|
||||||
|
# PrimProc-based composite key JOINs
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1= cs2.i1-1 AND cs1.d2-1= cs2.i2-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1= cs2.i1-1 AND cs1.d2-1= cs2.i2-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1= cs2.i2-1 AND cs1.d2-1= cs2.i3-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1= cs2.i2-1 AND cs1.d2-1= cs2.i3-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1= cs2.i3-1 AND cs1.d2-1= cs2.i4-1 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1= cs2.i3-1 AND cs1.d2-1= cs2.i4-1 ORDER BY id,i2,i4;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 INNER JOIN cs2 ON cs1.d1-1= cs2.i4-1 AND cs1.d2-1= cs2.i3-1 ORDER BY id;
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 INNER JOIN cs1 ON cs1.d1-1= cs2.i4-1 AND cs1.d2-1= cs2.i3-1 ORDER BY id;
|
||||||
|
|
||||||
|
# ExeMgr-based JOINs
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i1-1 ORDER BY id,i2;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i1-1 ORDER BY id,i2;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i2-1 ORDER BY id,i3;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i2-1 ORDER BY id,i3;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i3-1 ORDER BY id,i2,i4;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i3-1 ORDER BY id,i2,i4;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 INNER JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i4-1 AND s2.i3 IS NOT NULL ORDER BY id,i3;
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 INNER JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i4-1 AND s2.i3 IS NOT NULL ORDER BY id,i3;
|
||||||
|
|
||||||
|
-- source ../include/disable_ordered_only.inc
|
||||||
|
|
||||||
|
# Skewed OUTER JOIN
|
||||||
|
TRUNCATE cs1;
|
||||||
|
TRUNCATE cs2;
|
||||||
|
|
||||||
|
INSERT INTO cs1 VALUES
|
||||||
|
(99,0,1),
|
||||||
|
(255,254,2),
|
||||||
|
(254,253,3),
|
||||||
|
(252,253,4),
|
||||||
|
(-252,253,5),
|
||||||
|
(65535,2147483647,5),
|
||||||
|
(65534,2147483646,6),
|
||||||
|
(65533,65532,7),
|
||||||
|
(2147483647,2147483636,8),
|
||||||
|
(2147483646,2147483635,9),
|
||||||
|
(2147483645,2147483634,10),
|
||||||
|
(2147483645,9223372036854775804,11),
|
||||||
|
(9223372036854775807,0,12),
|
||||||
|
(9223372036854775807,2147483627,13),
|
||||||
|
(9223372036854775806,2147483626,14),
|
||||||
|
(9223372036854775805,9223372036854775704,15);
|
||||||
|
|
||||||
|
INSERT INTO cs2 VALUES
|
||||||
|
(255,254,NULL,NULL),
|
||||||
|
(254,253,NULL,NULL),
|
||||||
|
(251,251,NULL,NULL),
|
||||||
|
(-252,253,NULL,NULL),
|
||||||
|
(-250,253,NULL,NULL),
|
||||||
|
(NULL,65535,NULL,NULL),
|
||||||
|
(NULL,65535,2147483647,NULL),
|
||||||
|
(NULL,65534,2147483646,NULL),
|
||||||
|
(NULL,0,2147483641,NULL),
|
||||||
|
(NULL,NULL,2147483647,NULL),
|
||||||
|
(NULL,NULL,2147483647,2147483636),
|
||||||
|
(NULL,NULL,2147483646,2147483635),
|
||||||
|
(NULL,NULL,0,2147483641),
|
||||||
|
(NULL,NULL,NULL,9223372036854775807),
|
||||||
|
(NULL,NULL,2147483627,9223372036854775807),
|
||||||
|
(NULL,NULL,2147483626,9223372036854775806),
|
||||||
|
(NULL,NULL,0,1);
|
||||||
|
|
||||||
|
# Distributed PrimProc-based JOINs
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i2 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i2 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i4 AND cs2.i3 IS NOT NULL ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i4 AND cs2.i3 IS NOT NULL ;
|
||||||
|
|
||||||
|
# PrimProc-based composite key JOINs
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i1 AND cs1.d2 = cs2.i2 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i1 AND cs1.d2 = cs2.i2 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i2 AND cs1.d2 = cs2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i2 AND cs1.d2 = cs2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i3 AND cs1.d2 = cs2.i4 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i3 AND cs1.d2 = cs2.i4 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1 = cs2.i4 AND cs1.d2 = cs2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1 = cs2.i4 AND cs1.d2 = cs2.i3 ;
|
||||||
|
|
||||||
|
# ExeMgr-based JOINs
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i2 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i2 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i3 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1=s2.i4 AND s2.i3 IS NOT NULL ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1=s2.i4 AND s2.i3 IS NOT NULL ;
|
||||||
|
|
||||||
|
# Functional JOIN
|
||||||
|
# Distributed PrimProc-based functional JOINs
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1 = cs2.i1-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1 = cs2.i1-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1 = cs2.i2-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1 = cs2.i2-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1 = cs2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1 = cs2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1 = cs2.i4-1 AND cs2.i3 IS NOT NULL ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1 = cs2.i4-1 AND cs2.i3 IS NOT NULL ;
|
||||||
|
|
||||||
|
# PrimProc-based composite key JOINs
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1= cs2.i1-1 AND cs1.d2-1= cs2.i2-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1= cs2.i1-1 AND cs1.d2-1= cs2.i2-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1= cs2.i2-1 AND cs1.d2-1= cs2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1= cs2.i2-1 AND cs1.d2-1= cs2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1= cs2.i3-1 AND cs1.d2-1= cs2.i4-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1= cs2.i3-1 AND cs1.d2-1= cs2.i4-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs1 LEFT JOIN cs2 ON cs1.d1-1= cs2.i4-1 AND cs1.d2-1= cs2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT cs1.*, cs2.* FROM cs2 LEFT JOIN cs1 ON cs1.d1-1= cs2.i4-1 AND cs1.d2-1= cs2.i3-1 ;
|
||||||
|
|
||||||
|
# ExeMgr-based JOINs
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i1-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i1-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i2-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i2-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i3-1 ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs1)s1 LEFT JOIN (SELECT * FROM cs2)s2 ON s1.d1-1=s2.i4-1 AND s2.i3 IS NOT NULL ;
|
||||||
|
--sorted_result
|
||||||
|
SELECT s1.*,s2.* FROM (SELECT * FROM cs2)s2 LEFT JOIN (SELECT * FROM cs1)s1 ON s1.d1-1=s2.i4-1 AND s2.i3 IS NOT NULL ;
|
||||||
|
|
||||||
|
# Misc skewed JOIN
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a DECIMAL(10,1), b DECIMAL(20,1));
|
||||||
|
INSERT INTO t1 VALUES (10.1,20.1);
|
||||||
|
CREATE TABLE t2 (a DECIMAL(20,1), b DECIMAL(10,1));
|
||||||
|
INSERT INTO t2 VALUES (10.1,20.1);
|
||||||
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b;
|
||||||
|
|
||||||
|
DROP TABLE t1,t2;
|
||||||
|
CREATE TABLE t1 (a CHAR(10), b DECIMAL(10,1));
|
||||||
|
INSERT INTO t1 VALUES (10.1,20.1);
|
||||||
|
CREATE TABLE t2 (a CHAR(10), b DECIMAL(20,1));
|
||||||
|
INSERT INTO t2 VALUES (10.1,20.1);
|
||||||
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b;
|
||||||
|
SELECT * FROM t2,t1 WHERE t1.a=t2.a AND t1.b=t2.b;
|
||||||
|
|
||||||
|
DROP TABLE t1,t2;
|
||||||
|
CREATE TABLE t1 (a DECIMAL(10,1), b CHAR(10));
|
||||||
|
INSERT INTO t1 VALUES (10.1,20.1);
|
||||||
|
CREATE TABLE t2 (a DECIMAL(20,1), b CHAR(10));
|
||||||
|
INSERT INTO t2 VALUES (10.1,20.1);
|
||||||
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b;
|
||||||
|
SELECT * FROM t2,t1 WHERE t1.a=t2.a AND t1.b=t2.b;
|
||||||
|
|
||||||
|
SELECT * FROM t2,t1 WHERE (t1.a,t1.b)=(t2.a,t2.b);
|
||||||
|
SELECT * FROM t1,t2 WHERE (t1.a,t1.b)=(t2.a,t2.b);
|
||||||
|
|
||||||
|
SELECT * FROM t1 JOIN t2 USING (a,b);
|
||||||
|
SELECT * FROM t2 JOIN t1 USING (a,b);
|
||||||
|
|
||||||
|
# Testing the max number of skewed columns in a join.
|
||||||
|
DROP TABLE t1,t2;
|
||||||
|
CREATE TABLE t1 (a DECIMAL(10,1), b DECIMAL(20,1),a1 DECIMAL(10,1), b1 DECIMAL(20,1),a2 DECIMAL(10,1), b2 DECIMAL(20,1),a3 DECIMAL(10,1), b3 DECIMAL(20,1),a4 DECIMAL(10,1), b4 DECIMAL(20,1),a5 DECIMAL(10,1));
|
||||||
|
INSERT INTO t1 VALUES (10.1,20.1,10.1,20.1,10.1,20.1,10.1,20.1,10.1,20.1,10.1);
|
||||||
|
CREATE TABLE t2 (a DECIMAL(20,1), b DECIMAL(10,1),a1 DECIMAL(20,1), b1 DECIMAL(10,1),a2 DECIMAL(20,1), b2 DECIMAL(10,1),a3 DECIMAL(20,1), b3 DECIMAL(10,1),a4 DECIMAL(20,1), b4 DECIMAL(10,1),a5 DECIMAL(20,1));
|
||||||
|
INSERT INTO t2 VALUES (10.1,20.1,10.1,20.1,10.1,20.1,10.1,20.1,10.1,20.1,10.1);
|
||||||
|
# These work b/c the max is 10 columns.
|
||||||
|
SELECT * FROM t1 INNER JOIN t2 USING(a,b,a1,b1,a2,b2,a3,b3,a4);
|
||||||
|
SELECT * FROM t2 INNER JOIN t1 USING(a,b,a1,b1,a2,b2,a3,b3,a4);
|
||||||
|
# These do not.
|
||||||
|
#SELECT * FROM t1 INNER JOIN t2 USING(a,b,a1,b1,a2,b2,a3,b3,a4,b4,a5);
|
||||||
|
#SELECT * FROM t1 INNER JOIN t2 USING(a,b,a1,b1,a2,b2,a3,b3,a4,b4,a5);
|
||||||
|
|
||||||
|
# Mixing skewed columns with non-skewed.
|
||||||
|
DROP TABLE t1,t2;
|
||||||
|
CREATE TABLE t1 (a DECIMAL(10,1), t text, b DECIMAL(20,1), i1 int, a1 DECIMAL(10,1), b1 DECIMAL(20,1),a2 DECIMAL(10,1), b2 DECIMAL(20,1),a3 DECIMAL(10,1), b3 DECIMAL(20,1),a4 DECIMAL(10,1), b4 DECIMAL(20,1),a5 DECIMAL(10,1));
|
||||||
|
INSERT INTO t1 VALUES (10.1,'some',20.1,42,10.1,20.1,10.1,20.1,10.1,20.1,10.1,20.1,10.1);
|
||||||
|
CREATE TABLE t2 (a DECIMAL(20,1), b DECIMAL(10,1), t text, a1 DECIMAL(20,1), i1 int, b1 DECIMAL(10,1),a2 DECIMAL(20,1), b2 DECIMAL(10,1),a3 DECIMAL(20,1), b3 DECIMAL(10,1),a4 DECIMAL(20,1), b4 DECIMAL(10,1),a5 DECIMAL(20,1));
|
||||||
|
INSERT INTO t2 VALUES (10.1,20.1,'some',10.1,42,20.1,10.1,20.1,10.1,20.1,10.1,20.1,10.1);
|
||||||
|
|
||||||
|
# These work b/c the max is 10 columns.
|
||||||
|
SELECT * FROM t1 INNER JOIN t2 USING(a,b,a1,b1,a2,b2,a3,b3,a4,b4,t,i1);
|
||||||
|
SELECT * FROM t2 INNER JOIN t1 USING(a,b,a1,b1,a2,b2,a3,b3,a4,b4,t,i1);
|
||||||
|
# These do not.
|
||||||
|
#SELECT * FROM t1 INNER JOIN t2 USING(a,b,a1,b1,a2,b2,a3,b3,a4,b4,a5,t,i1);
|
||||||
|
#SELECT * FROM t2 INNER JOIN t1 USING(a,b,a1,b1,a2,b2,a3,b3,a4,b4,a5,t,i1);
|
||||||
|
|
||||||
|
SELECT t1.a,t1.t,t1.i1 FROM t1 INNER JOIN (SELECT * from t2) s1 USING(a,b);
|
||||||
|
SELECT t2.a,t2.t,s1.i1 FROM t2 INNER JOIN (SELECT * from t1) s1 USING(a,b);
|
||||||
|
SELECT t1.a,t1.t,t1.i1 FROM t1 INNER JOIN (SELECT * from t2) s1 where t1.a+1=s1.a+1 and t1.b+1=s1.b+1;
|
||||||
|
SELECT t2.a,t2.t,t2.i1 FROM t2 INNER JOIN (SELECT * from t1) s1 where t2.a+1=s1.a+1 and t2.b+1=s1.b+1;
|
||||||
|
|
||||||
|
# Clean UP
|
||||||
|
DROP DATABASE mcol641_joins_db;
|
3
mysql-test/columnstore/include/disable_ordered_only.inc
Normal file
3
mysql-test/columnstore/include/disable_ordered_only.inc
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
--disable_query_log
|
||||||
|
set global columnstore_ordered_only=off;
|
||||||
|
--enable_query_log
|
3
mysql-test/columnstore/include/enable_ordered_only.inc
Normal file
3
mysql-test/columnstore/include/enable_ordered_only.inc
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
--disable_query_log
|
||||||
|
set global columnstore_ordered_only=on;
|
||||||
|
--enable_query_log
|
@ -129,6 +129,9 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor() :
|
|||||||
hasFilterStep(false),
|
hasFilterStep(false),
|
||||||
filtOnString(false),
|
filtOnString(false),
|
||||||
prefetchThreshold(0),
|
prefetchThreshold(0),
|
||||||
|
mJOINHasSkewedKeyColumn(false),
|
||||||
|
mSmallSideRGPtr(nullptr),
|
||||||
|
mSmallSideKeyColumnsPtr(nullptr),
|
||||||
hasDictStep(false),
|
hasDictStep(false),
|
||||||
sockIndex(0),
|
sockIndex(0),
|
||||||
endOfJoinerRan(false),
|
endOfJoinerRan(false),
|
||||||
@ -175,6 +178,9 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream& b, double prefetch,
|
|||||||
hasFilterStep(false),
|
hasFilterStep(false),
|
||||||
filtOnString(false),
|
filtOnString(false),
|
||||||
prefetchThreshold(prefetch),
|
prefetchThreshold(prefetch),
|
||||||
|
mJOINHasSkewedKeyColumn(false),
|
||||||
|
mSmallSideRGPtr(nullptr),
|
||||||
|
mSmallSideKeyColumnsPtr(nullptr),
|
||||||
hasDictStep(false),
|
hasDictStep(false),
|
||||||
sockIndex(0),
|
sockIndex(0),
|
||||||
endOfJoinerRan(false),
|
endOfJoinerRan(false),
|
||||||
@ -297,7 +303,6 @@ void BatchPrimitiveProcessor::initBPP(ByteStream& bs)
|
|||||||
for (uint j = 0; j < joinerCount; ++j)
|
for (uint j = 0; j < joinerCount; ++j)
|
||||||
tJoiners[j].reset(new boost::shared_ptr<TJoiner>[processorThreads]);
|
tJoiners[j].reset(new boost::shared_ptr<TJoiner>[processorThreads]);
|
||||||
|
|
||||||
//_pools.reset(new boost::shared_ptr<utils::SimplePool>[joinerCount]);
|
|
||||||
tlJoiners.reset(new boost::shared_array<boost::shared_ptr<TLJoiner> >[joinerCount]);
|
tlJoiners.reset(new boost::shared_array<boost::shared_ptr<TLJoiner> >[joinerCount]);
|
||||||
for (uint j = 0; j < joinerCount; ++j)
|
for (uint j = 0; j < joinerCount; ++j)
|
||||||
tlJoiners[j].reset(new boost::shared_ptr<TLJoiner>[processorThreads]);
|
tlJoiners[j].reset(new boost::shared_ptr<TLJoiner>[processorThreads]);
|
||||||
@ -310,8 +315,9 @@ void BatchPrimitiveProcessor::initBPP(ByteStream& bs)
|
|||||||
tJoinerSizes.reset(new std::atomic<uint32_t>[joinerCount]);
|
tJoinerSizes.reset(new std::atomic<uint32_t>[joinerCount]);
|
||||||
largeSideKeyColumns.reset(new uint32_t[joinerCount]);
|
largeSideKeyColumns.reset(new uint32_t[joinerCount]);
|
||||||
tlLargeSideKeyColumns.reset(new vector<uint32_t>[joinerCount]);
|
tlLargeSideKeyColumns.reset(new vector<uint32_t>[joinerCount]);
|
||||||
|
tlSmallSideKeyColumns.reset(new std::vector<uint32_t>);
|
||||||
typelessJoin.reset(new bool[joinerCount]);
|
typelessJoin.reset(new bool[joinerCount]);
|
||||||
tlKeyLengths.reset(new uint32_t[joinerCount]);
|
tlSmallSideKeyLengths.reset(new uint32_t[joinerCount]);
|
||||||
|
|
||||||
storedKeyAllocators.reset(new PoolAllocator[joinerCount]);
|
storedKeyAllocators.reset(new PoolAllocator[joinerCount]);
|
||||||
for (uint j = 0; j < joinerCount; ++j)
|
for (uint j = 0; j < joinerCount; ++j)
|
||||||
@ -322,6 +328,7 @@ void BatchPrimitiveProcessor::initBPP(ByteStream& bs)
|
|||||||
joinFEFilters.reset(new scoped_ptr<FuncExpWrapper>[joinerCount]);
|
joinFEFilters.reset(new scoped_ptr<FuncExpWrapper>[joinerCount]);
|
||||||
hasJoinFEFilters = false;
|
hasJoinFEFilters = false;
|
||||||
hasSmallOuterJoin = false;
|
hasSmallOuterJoin = false;
|
||||||
|
bool smallSideRGRecvd = false;
|
||||||
|
|
||||||
for (i = 0; i < joinerCount; i++)
|
for (i = 0; i < joinerCount; i++)
|
||||||
{
|
{
|
||||||
@ -356,14 +363,31 @@ void BatchPrimitiveProcessor::initBPP(ByteStream& bs)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
deserializeVector<uint32_t>(bs, tlLargeSideKeyColumns[i]);
|
deserializeVector<uint32_t>(bs, tlLargeSideKeyColumns[i]);
|
||||||
bs >> tlKeyLengths[i];
|
bs >> tlSmallSideKeyLengths[i];
|
||||||
//storedKeyAllocators[i] = PoolAllocator();
|
bs >> mJOINHasSkewedKeyColumn;
|
||||||
|
// Deser smallSideRG if key data types are different, e.g. INT vs wide-DECIMAL.
|
||||||
|
if (mJOINHasSkewedKeyColumn && !smallSideRGRecvd)
|
||||||
|
{
|
||||||
|
smallSideRGs.emplace_back(rowgroup::RowGroup(bs));
|
||||||
|
// LargeSide key columns number equals to SmallSide key columns number.
|
||||||
|
deserializeVector<uint32_t>(bs, *tlSmallSideKeyColumns);
|
||||||
|
mSmallSideRGPtr = &smallSideRGs[0];
|
||||||
|
mSmallSideKeyColumnsPtr = &(*tlSmallSideKeyColumns);
|
||||||
|
smallSideRGRecvd = true;
|
||||||
|
}
|
||||||
|
|
||||||
for (uint j = 0; j < processorThreads; ++j)
|
for (uint j = 0; j < processorThreads; ++j)
|
||||||
tlJoiners[i][j].reset(new TLJoiner(10,
|
{
|
||||||
TupleJoiner::TypelessDataHasher(&outputRG,
|
auto tlHasher = TupleJoiner::TypelessDataHasher(&outputRG,
|
||||||
&tlLargeSideKeyColumns[i]),
|
&tlLargeSideKeyColumns[i],
|
||||||
TupleJoiner::TypelessDataComparator(&outputRG,
|
mSmallSideKeyColumnsPtr,
|
||||||
&tlLargeSideKeyColumns[i])));
|
mSmallSideRGPtr);
|
||||||
|
auto tlComparator = TupleJoiner::TypelessDataComparator(&outputRG,
|
||||||
|
&tlLargeSideKeyColumns[i],
|
||||||
|
mSmallSideKeyColumnsPtr,
|
||||||
|
mSmallSideRGPtr);
|
||||||
|
tlJoiners[i][j].reset(new TLJoiner(10, tlHasher, tlComparator));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -610,7 +634,6 @@ void BatchPrimitiveProcessor::addToJoiner(ByteStream& bs)
|
|||||||
if (typelessJoin[joinerNum])
|
if (typelessJoin[joinerNum])
|
||||||
{
|
{
|
||||||
utils::VLArray<vector<pair<TypelessData, uint32_t> > > tmpBuckets(processorThreads);
|
utils::VLArray<vector<pair<TypelessData, uint32_t> > > tmpBuckets(processorThreads);
|
||||||
TypelessData tlLargeKey;
|
|
||||||
uint8_t nullFlag;
|
uint8_t nullFlag;
|
||||||
PoolAllocator &storedKeyAllocator = storedKeyAllocators[joinerNum];
|
PoolAllocator &storedKeyAllocator = storedKeyAllocators[joinerNum];
|
||||||
// this first loop hashes incoming values into vectors that parallel the hash tables.
|
// this first loop hashes incoming values into vectors that parallel the hash tables.
|
||||||
@ -620,10 +643,20 @@ void BatchPrimitiveProcessor::addToJoiner(ByteStream& bs)
|
|||||||
bs >> nullFlag;
|
bs >> nullFlag;
|
||||||
if (nullFlag == 0)
|
if (nullFlag == 0)
|
||||||
{
|
{
|
||||||
tlLargeKey.deserialize(bs, storedKeyAllocator);
|
TypelessData tlSmallSideKey(bs, storedKeyAllocator);
|
||||||
|
if (mJOINHasSkewedKeyColumn)
|
||||||
|
tlSmallSideKey.setSmallSideWithSkewedData();
|
||||||
|
else
|
||||||
|
tlSmallSideKey.setSmallSide();
|
||||||
bs >> tlIndex;
|
bs >> tlIndex;
|
||||||
bucket = tlLargeKey.hash(outputRG, tlLargeSideKeyColumns[joinerNum]) & ptMask;
|
// The bucket number corresponds with the index used later inserting TL keys into permanent JOIN hash map.
|
||||||
tmpBuckets[bucket].push_back(make_pair(tlLargeKey, tlIndex));
|
auto ha = tlSmallSideKey.hash(outputRG,
|
||||||
|
tlLargeSideKeyColumns[joinerNum],
|
||||||
|
mSmallSideKeyColumnsPtr,
|
||||||
|
mSmallSideRGPtr);
|
||||||
|
|
||||||
|
bucket = ha & ptMask;
|
||||||
|
tmpBuckets[bucket].push_back(make_pair(tlSmallSideKey, tlIndex));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
++nullCount;
|
++nullCount;
|
||||||
@ -914,11 +947,6 @@ void BatchPrimitiveProcessor::initProcessor()
|
|||||||
{
|
{
|
||||||
outputRG.initRow(&oldRow);
|
outputRG.initRow(&oldRow);
|
||||||
outputRG.initRow(&newRow);
|
outputRG.initRow(&newRow);
|
||||||
tmpKeyAllocators.reset(new FixedAllocator[joinerCount]);
|
|
||||||
|
|
||||||
for (i = 0; i < joinerCount; i++)
|
|
||||||
if (typelessJoin[i])
|
|
||||||
tmpKeyAllocators[i] = FixedAllocator(tlKeyLengths[i], true);
|
|
||||||
|
|
||||||
tSmallSideMatches.reset(new MatchedData[joinerCount]);
|
tSmallSideMatches.reset(new MatchedData[joinerCount]);
|
||||||
keyColumnProj.reset(new bool[projectCount]);
|
keyColumnProj.reset(new bool[projectCount]);
|
||||||
@ -1126,7 +1154,6 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
uint32_t newRowCount = 0, i, j;
|
uint32_t newRowCount = 0, i, j;
|
||||||
vector<uint32_t> matches;
|
vector<uint32_t> matches;
|
||||||
uint64_t largeKey;
|
uint64_t largeKey;
|
||||||
TypelessData tlLargeKey;
|
|
||||||
|
|
||||||
outputRG.getRow(0, &oldRow);
|
outputRG.getRow(0, &oldRow);
|
||||||
outputRG.getRow(0, &newRow);
|
outputRG.getRow(0, &newRow);
|
||||||
@ -1195,8 +1222,10 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
{
|
{
|
||||||
//cout << " typeless join\n";
|
//cout << " typeless join\n";
|
||||||
// the null values are not sent by UM in typeless case. null -> !found
|
// the null values are not sent by UM in typeless case. null -> !found
|
||||||
tlLargeKey = TypelessData(&oldRow);
|
TypelessData tlLargeKey(&oldRow);
|
||||||
uint bucket = oldRow.hashTypeless(tlLargeSideKeyColumns[j]) & ptMask;
|
uint bucket = oldRow.hashTypeless(tlLargeSideKeyColumns[j],
|
||||||
|
mSmallSideKeyColumnsPtr,
|
||||||
|
mSmallSideRGPtr ? &mSmallSideRGPtr->getColWidths() : nullptr) & ptMask;
|
||||||
found = tlJoiners[j][bucket]->find(tlLargeKey) != tlJoiners[j][bucket]->end();
|
found = tlJoiners[j][bucket]->find(tlLargeKey) != tlJoiners[j][bucket]->end();
|
||||||
|
|
||||||
if ((!found && !(joinTypes[j] & (LARGEOUTER | ANTI))) ||
|
if ((!found && !(joinTypes[j] & (LARGEOUTER | ANTI))) ||
|
||||||
@ -1335,21 +1364,23 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
/* Finally, copy the row into the output */
|
/* Finally, copy the row into the output */
|
||||||
if (j == joinerCount)
|
if (j == joinerCount)
|
||||||
{
|
{
|
||||||
|
// We need to update 8 and 16 bytes in values and wide128Values buffers
|
||||||
|
// otherwise unrelated values will be observed in the JOIN-ed output RGData.
|
||||||
if (i != newRowCount)
|
if (i != newRowCount)
|
||||||
{
|
{
|
||||||
values[newRowCount] = values[i];
|
values[newRowCount] = values[i];
|
||||||
|
if (mJOINHasSkewedKeyColumn)
|
||||||
|
wide128Values[newRowCount] = wide128Values[i];
|
||||||
relRids[newRowCount] = relRids[i];
|
relRids[newRowCount] = relRids[i];
|
||||||
copyRow(oldRow, &newRow);
|
copyRow(oldRow, &newRow);
|
||||||
//cout << "joined row: " << newRow.toString() << endl;
|
//cout << "joined row: " << newRow.toString() << endl;
|
||||||
//memcpy(newRow.getData(), oldRow.getData(), oldRow.getSize());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
newRowCount++;
|
newRowCount++;
|
||||||
newRow.nextRow();
|
newRow.nextRow();
|
||||||
}
|
}
|
||||||
|
|
||||||
//else
|
//else
|
||||||
// cout << "j != joinerCount\n";
|
// cout << "j != joinerCount\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2220,7 +2251,6 @@ int BatchPrimitiveProcessor::operator()()
|
|||||||
}
|
}
|
||||||
catch (std::exception& e)
|
catch (std::exception& e)
|
||||||
{
|
{
|
||||||
cerr << "BPP::sendResponse(): " << e.what() << endl;
|
|
||||||
break; // If we make this throw, be sure to do the cleanup at the end
|
break; // If we make this throw, be sure to do the cleanup at the end
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2382,13 +2412,22 @@ SBPP BatchPrimitiveProcessor::duplicate()
|
|||||||
//bpp->_pools = _pools;
|
//bpp->_pools = _pools;
|
||||||
bpp->typelessJoin = typelessJoin;
|
bpp->typelessJoin = typelessJoin;
|
||||||
bpp->tlLargeSideKeyColumns = tlLargeSideKeyColumns;
|
bpp->tlLargeSideKeyColumns = tlLargeSideKeyColumns;
|
||||||
|
bpp->tlSmallSideKeyColumns = tlSmallSideKeyColumns;
|
||||||
bpp->tlJoiners = tlJoiners;
|
bpp->tlJoiners = tlJoiners;
|
||||||
bpp->tlKeyLengths = tlKeyLengths;
|
bpp->tlSmallSideKeyLengths = tlSmallSideKeyLengths;
|
||||||
bpp->storedKeyAllocators = storedKeyAllocators;
|
bpp->storedKeyAllocators = storedKeyAllocators;
|
||||||
bpp->joinNullValues = joinNullValues;
|
bpp->joinNullValues = joinNullValues;
|
||||||
bpp->doMatchNulls = doMatchNulls;
|
bpp->doMatchNulls = doMatchNulls;
|
||||||
bpp->hasJoinFEFilters = hasJoinFEFilters;
|
bpp->hasJoinFEFilters = hasJoinFEFilters;
|
||||||
bpp->hasSmallOuterJoin = hasSmallOuterJoin;
|
bpp->hasSmallOuterJoin = hasSmallOuterJoin;
|
||||||
|
bpp->mJOINHasSkewedKeyColumn = mJOINHasSkewedKeyColumn;
|
||||||
|
bpp->mSmallSideRGPtr = mSmallSideRGPtr;
|
||||||
|
bpp->mSmallSideKeyColumnsPtr = mSmallSideKeyColumnsPtr;
|
||||||
|
if (!getTupleJoinRowGroupData && mJOINHasSkewedKeyColumn)
|
||||||
|
{
|
||||||
|
idbassert(!smallSideRGs.empty());
|
||||||
|
bpp->smallSideRGs.push_back(smallSideRGs[0]);
|
||||||
|
}
|
||||||
|
|
||||||
if (hasJoinFEFilters)
|
if (hasJoinFEFilters)
|
||||||
{
|
{
|
||||||
@ -2714,7 +2753,9 @@ inline void BatchPrimitiveProcessor::getJoinResults(const Row& r, uint32_t jInde
|
|||||||
}
|
}
|
||||||
|
|
||||||
TypelessData largeKey(&r);
|
TypelessData largeKey(&r);
|
||||||
bucket = r.hashTypeless(tlLargeSideKeyColumns[jIndex]) & ptMask;
|
bucket = r.hashTypeless(tlLargeSideKeyColumns[jIndex],
|
||||||
|
mSmallSideKeyColumnsPtr,
|
||||||
|
mSmallSideRGPtr ? &mSmallSideRGPtr->getColWidths() : nullptr) & ptMask;
|
||||||
pair<TLJoiner::iterator, TLJoiner::iterator> range =
|
pair<TLJoiner::iterator, TLJoiner::iterator> range =
|
||||||
tlJoiners[jIndex][bucket]->equal_range(largeKey);
|
tlJoiners[jIndex][bucket]->equal_range(largeKey);
|
||||||
for (; range.first != range.second; ++range.first)
|
for (; range.first != range.second; ++range.first)
|
||||||
|
@ -87,7 +87,6 @@ public:
|
|||||||
std::runtime_error(s) { }
|
std::runtime_error(s) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class BatchPrimitiveProcessor
|
class BatchPrimitiveProcessor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -184,7 +183,6 @@ private:
|
|||||||
void writeProjectionPreamble();
|
void writeProjectionPreamble();
|
||||||
void makeResponse();
|
void makeResponse();
|
||||||
void sendResponse();
|
void sendResponse();
|
||||||
|
|
||||||
/* Used by scan operations to increment the LBIDs in successive steps */
|
/* Used by scan operations to increment the LBIDs in successive steps */
|
||||||
void nextLBID();
|
void nextLBID();
|
||||||
|
|
||||||
@ -348,13 +346,17 @@ private:
|
|||||||
/* extra typeless join vars & fcns*/
|
/* extra typeless join vars & fcns*/
|
||||||
boost::shared_array<bool> typelessJoin;
|
boost::shared_array<bool> typelessJoin;
|
||||||
boost::shared_array<std::vector<uint32_t> > tlLargeSideKeyColumns;
|
boost::shared_array<std::vector<uint32_t> > tlLargeSideKeyColumns;
|
||||||
|
std::shared_ptr<std::vector<uint32_t>> tlSmallSideKeyColumns;
|
||||||
boost::shared_array<boost::shared_array<boost::shared_ptr<TLJoiner> > > tlJoiners;
|
boost::shared_array<boost::shared_array<boost::shared_ptr<TLJoiner> > > tlJoiners;
|
||||||
boost::shared_array<uint32_t> tlKeyLengths;
|
boost::shared_array<uint32_t> tlSmallSideKeyLengths;
|
||||||
|
// True if smallSide and largeSide TypelessData key column differs,e.g BIGINT vs DECIMAL(38).
|
||||||
|
bool mJOINHasSkewedKeyColumn;
|
||||||
|
const rowgroup::RowGroup* mSmallSideRGPtr;
|
||||||
|
const std::vector<uint32_t>* mSmallSideKeyColumnsPtr;
|
||||||
|
|
||||||
inline void getJoinResults(const rowgroup::Row& r, uint32_t jIndex, std::vector<uint32_t>& v);
|
inline void getJoinResults(const rowgroup::Row& r, uint32_t jIndex, std::vector<uint32_t>& v);
|
||||||
// these allocators hold the memory for the keys stored in tlJoiners
|
// these allocators hold the memory for the keys stored in tlJoiners
|
||||||
boost::shared_array<utils::PoolAllocator> storedKeyAllocators;
|
boost::shared_array<utils::PoolAllocator> storedKeyAllocators;
|
||||||
// these allocators hold the memory for the large side keys which are short-lived
|
|
||||||
boost::scoped_array<utils::FixedAllocator> tmpKeyAllocators;
|
|
||||||
|
|
||||||
/* PM Aggregation */
|
/* PM Aggregation */
|
||||||
rowgroup::RowGroup joinedRG; // if there's a join, the rows are formatted with this
|
rowgroup::RowGroup joinedRG; // if there's a join, the rows are formatted with this
|
||||||
|
@ -126,7 +126,7 @@ public:
|
|||||||
}
|
}
|
||||||
uint32_t finalize() const
|
uint32_t finalize() const
|
||||||
{
|
{
|
||||||
return (uint32_t) mPart1;
|
return (uint32_t)mPart1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -30,7 +30,6 @@
|
|||||||
#include "lbidlist.h"
|
#include "lbidlist.h"
|
||||||
#include "spinlock.h"
|
#include "spinlock.h"
|
||||||
#include "vlarray.h"
|
#include "vlarray.h"
|
||||||
#include "mcs_string.h"
|
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -42,6 +41,7 @@ using namespace joblist;
|
|||||||
namespace joiner
|
namespace joiner
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// Typed joiner ctor
|
||||||
TupleJoiner::TupleJoiner(
|
TupleJoiner::TupleJoiner(
|
||||||
const rowgroup::RowGroup& smallInput,
|
const rowgroup::RowGroup& smallInput,
|
||||||
const rowgroup::RowGroup& largeInput,
|
const rowgroup::RowGroup& largeInput,
|
||||||
@ -145,6 +145,7 @@ TupleJoiner::TupleJoiner(
|
|||||||
nullValueForJoinColumn = smallNullRow.getSignedNullValue(smallJoinColumn);
|
nullValueForJoinColumn = smallNullRow.getSignedNullValue(smallJoinColumn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Typeless joiner ctor
|
||||||
TupleJoiner::TupleJoiner(
|
TupleJoiner::TupleJoiner(
|
||||||
const rowgroup::RowGroup& smallInput,
|
const rowgroup::RowGroup& smallInput,
|
||||||
const rowgroup::RowGroup& largeInput,
|
const rowgroup::RowGroup& largeInput,
|
||||||
@ -182,67 +183,31 @@ TupleJoiner::TupleJoiner(
|
|||||||
smallNullRow.initToNull();
|
smallNullRow.initToNull();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = keyLength = 0; i < smallKeyColumns.size(); i++)
|
keyLength = calculateKeyLength(smallKeyColumns, smallRG, &largeKeyColumns, &largeRG);
|
||||||
{
|
|
||||||
if (smallRG.getColTypes()[smallKeyColumns[i]] == CalpontSystemCatalog::CHAR ||
|
|
||||||
smallRG.getColTypes()[smallKeyColumns[i]] == CalpontSystemCatalog::VARCHAR
|
|
||||||
||
|
|
||||||
smallRG.getColTypes()[smallKeyColumns[i]] == CalpontSystemCatalog::TEXT)
|
|
||||||
{
|
|
||||||
keyLength += smallRG.getColumnWidth(smallKeyColumns[i]) + 2; // +2 for length
|
|
||||||
|
|
||||||
// MCOL-698: if we don't do this LONGTEXT allocates 32TB RAM
|
|
||||||
if (keyLength > 65536)
|
|
||||||
keyLength = 65536;
|
|
||||||
}
|
|
||||||
else if (smallRG.getColTypes()[smallKeyColumns[i]] == CalpontSystemCatalog::LONGDOUBLE)
|
|
||||||
{
|
|
||||||
keyLength += sizeof(long double);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
keyLength += 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set bSignedUnsignedJoin if one or more join columns are signed to unsigned compares.
|
|
||||||
if (smallRG.isUnsigned(smallKeyColumns[i]) != largeRG.isUnsigned(largeKeyColumns[i]))
|
|
||||||
{
|
|
||||||
bSignedUnsignedJoin = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// note, 'numcores' is implied by tuplehashjoin on calls to insertRGData().
|
|
||||||
// TODO: make it explicit to avoid future confusion.
|
|
||||||
storedKeyAlloc.reset(new FixedAllocator[numCores]);
|
|
||||||
for (i = 0; i < (uint) numCores; i++)
|
|
||||||
storedKeyAlloc[i].setAllocSize(keyLength);
|
|
||||||
|
|
||||||
discreteValues.reset(new bool[smallKeyColumns.size()]);
|
discreteValues.reset(new bool[smallKeyColumns.size()]);
|
||||||
cpValues.reset(new vector<int128_t>[smallKeyColumns.size()]);
|
cpValues.reset(new vector<int128_t>[smallKeyColumns.size()]);
|
||||||
|
|
||||||
for (i = 0; i < smallKeyColumns.size(); i++)
|
for (i = 0; i < smallKeyColumns.size(); ++i)
|
||||||
{
|
{
|
||||||
discreteValues[i] = false;
|
uint32_t smallKeyColumnsIdx = smallKeyColumns[i];
|
||||||
if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]]))
|
auto smallSideColType = smallRG.getColTypes()[smallKeyColumnsIdx];
|
||||||
|
// Set bSignedUnsignedJoin if one or more join columns are signed to unsigned compares.
|
||||||
|
if (smallRG.isUnsigned(smallKeyColumnsIdx) != largeRG.isUnsigned(largeKeyColumns[i]))
|
||||||
{
|
{
|
||||||
if (datatypes::isWideDecimalType(
|
bSignedUnsignedJoin = true;
|
||||||
smallRG.getColType(smallKeyColumns[i]),
|
}
|
||||||
smallRG.getColumnWidth(smallKeyColumns[i])))
|
|
||||||
{
|
discreteValues[i] = false;
|
||||||
cpValues[i].push_back((int128_t) -1);
|
if (isUnsigned(smallSideColType))
|
||||||
cpValues[i].push_back(0);
|
{
|
||||||
}
|
cpValues[i].push_back((int128_t) numeric_limits<uint64_t>::max());
|
||||||
else
|
cpValues[i].push_back(0);
|
||||||
{
|
|
||||||
cpValues[i].push_back((int128_t) numeric_limits<uint64_t>::max());
|
|
||||||
cpValues[i].push_back(0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (datatypes::isWideDecimalType(
|
if (datatypes::isWideDecimalType(smallSideColType,
|
||||||
smallRG.getColType(smallKeyColumns[i]),
|
smallRG.getColumnWidth(smallKeyColumnsIdx)))
|
||||||
smallRG.getColumnWidth(smallKeyColumns[i])))
|
|
||||||
{
|
{
|
||||||
cpValues[i].push_back(utils::maxInt128);
|
cpValues[i].push_back(utils::maxInt128);
|
||||||
cpValues[i].push_back(utils::minInt128);
|
cpValues[i].push_back(utils::minInt128);
|
||||||
@ -254,6 +219,12 @@ TupleJoiner::TupleJoiner(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// note, 'numcores' is implied by tuplehashjoin on calls to insertRGData().
|
||||||
|
// TODO: make it explicit to avoid future confusion.
|
||||||
|
storedKeyAlloc.reset(new FixedAllocator[numCores]);
|
||||||
|
for (i = 0; i < (uint) numCores; i++)
|
||||||
|
storedKeyAlloc[i].setAllocSize(keyLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
TupleJoiner::TupleJoiner() { }
|
TupleJoiner::TupleJoiner() { }
|
||||||
@ -730,10 +701,12 @@ void TupleJoiner::doneInserting()
|
|||||||
typelesshash_t::iterator thit;
|
typelesshash_t::iterator thit;
|
||||||
uint32_t i, pmpos = 0, rowCount;
|
uint32_t i, pmpos = 0, rowCount;
|
||||||
Row smallRow;
|
Row smallRow;
|
||||||
|
auto smallSideColIdx = smallKeyColumns[col];
|
||||||
|
auto smallSideColType = smallRG.getColType(smallSideColIdx);
|
||||||
|
|
||||||
smallRG.initRow(&smallRow);
|
smallRG.initRow(&smallRow);
|
||||||
|
|
||||||
if (smallRow.isCharType(smallKeyColumns[col]))
|
if (smallRow.isCharType(smallSideColIdx))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
rowCount = size();
|
rowCount = size();
|
||||||
@ -743,7 +716,7 @@ void TupleJoiner::doneInserting()
|
|||||||
pmpos = 0;
|
pmpos = 0;
|
||||||
else if (typelessJoin)
|
else if (typelessJoin)
|
||||||
thit = ht[bucket]->begin();
|
thit = ht[bucket]->begin();
|
||||||
else if (smallRG.getColType(smallKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE)
|
else if (isLongDouble(smallRG.getColType(smallKeyColumns[0])))
|
||||||
ldit = ld[bucket]->begin();
|
ldit = ld[bucket]->begin();
|
||||||
else if (!smallRG.usesStringTable())
|
else if (!smallRG.usesStringTable())
|
||||||
hit = h[bucket]->begin();
|
hit = h[bucket]->begin();
|
||||||
@ -761,7 +734,7 @@ void TupleJoiner::doneInserting()
|
|||||||
smallRow.setPointer(thit->second);
|
smallRow.setPointer(thit->second);
|
||||||
++thit;
|
++thit;
|
||||||
}
|
}
|
||||||
else if (smallRG.getColType(smallKeyColumns[col]) == CalpontSystemCatalog::LONGDOUBLE)
|
else if (isLongDouble(smallSideColType))
|
||||||
{
|
{
|
||||||
while (ldit == ld[bucket]->end())
|
while (ldit == ld[bucket]->end())
|
||||||
ldit = ld[++bucket]->begin();
|
ldit = ld[++bucket]->begin();
|
||||||
@ -783,9 +756,9 @@ void TupleJoiner::doneInserting()
|
|||||||
++sthit;
|
++sthit;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (smallRow.getColType(smallKeyColumns[col]) == CalpontSystemCatalog::LONGDOUBLE)
|
if (isLongDouble(smallSideColType))
|
||||||
{
|
{
|
||||||
double dval = (double)roundl(smallRow.getLongDoubleField(smallKeyColumns[col]));
|
double dval = (double)roundl(smallRow.getLongDoubleField(smallSideColIdx));
|
||||||
switch (largeRG.getColType(largeKeyColumns[col]))
|
switch (largeRG.getColType(largeKeyColumns[col]))
|
||||||
{
|
{
|
||||||
case CalpontSystemCatalog::DOUBLE:
|
case CalpontSystemCatalog::DOUBLE:
|
||||||
@ -802,19 +775,18 @@ void TupleJoiner::doneInserting()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (datatypes::isWideDecimalType(
|
else if (datatypes::isWideDecimalType(smallSideColType,
|
||||||
smallRow.getColType(smallKeyColumns[col]),
|
smallRow.getColumnWidth(smallSideColIdx)))
|
||||||
smallRow.getColumnWidth(smallKeyColumns[col])))
|
|
||||||
{
|
{
|
||||||
uniquer.insert(*((int128_t*)smallRow.getBinaryField<int128_t>(smallKeyColumns[col])));
|
uniquer.insert(smallRow.getTSInt128Field(smallSideColIdx).getValue());
|
||||||
}
|
}
|
||||||
else if (smallRow.isUnsigned(smallKeyColumns[col]))
|
else if (smallRow.isUnsigned(smallSideColIdx))
|
||||||
{
|
{
|
||||||
uniquer.insert((int64_t)smallRow.getUintField(smallKeyColumns[col]));
|
uniquer.insert((int64_t)smallRow.getUintField(smallSideColIdx));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uniquer.insert(smallRow.getIntField(smallKeyColumns[col]));
|
uniquer.insert(smallRow.getIntField(smallSideColIdx));
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECKSIZE;
|
CHECKSIZE;
|
||||||
@ -1170,7 +1142,8 @@ void TupleJoiner::updateCPData(const Row& r)
|
|||||||
r.getColType(colIdx),
|
r.getColType(colIdx),
|
||||||
r.getColumnWidth(colIdx)))
|
r.getColumnWidth(colIdx)))
|
||||||
{
|
{
|
||||||
uval = *((int128_t*)r.getBinaryField<int128_t>(colIdx));
|
|
||||||
|
uval = r.getTSInt128Field(colIdx).getValue();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1210,7 +1183,7 @@ void TupleJoiner::updateCPData(const Row& r)
|
|||||||
r.getColType(colIdx),
|
r.getColType(colIdx),
|
||||||
r.getColumnWidth(colIdx)))
|
r.getColumnWidth(colIdx)))
|
||||||
{
|
{
|
||||||
val = *((int128_t*)r.getBinaryField<int128_t>(colIdx));
|
val = r.getTSInt128Field(colIdx).getValue();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1283,66 +1256,134 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class WideDecimalKeyConverter
|
||||||
class TypelessDataDecoder
|
|
||||||
{
|
{
|
||||||
const uint8_t *mPtr;
|
const Row* mR;
|
||||||
const uint8_t *mEnd;
|
uint64_t convertedValue;
|
||||||
void checkAvailableData(uint32_t nbytes) const
|
const uint32_t mKeyColId;
|
||||||
{
|
uint16_t width;
|
||||||
if (mPtr + nbytes > mEnd)
|
public:
|
||||||
throw runtime_error("TypelessData is too short");
|
WideDecimalKeyConverter(const Row& r,
|
||||||
}
|
const uint32_t keyColId): mR(&r),
|
||||||
public:
|
mKeyColId(keyColId),
|
||||||
TypelessDataDecoder(const uint8_t* ptr, size_t length)
|
width(datatypes::MAXDECIMALWIDTH)
|
||||||
:mPtr(ptr), mEnd(ptr + length)
|
|
||||||
{ }
|
{ }
|
||||||
TypelessDataDecoder(const TypelessData &data)
|
bool isConvertedToSmallSideType() const { return width == datatypes::MAXLEGACYWIDTH; }
|
||||||
:TypelessDataDecoder(data.data, data.len)
|
int64_t getConvertedTInt64() const { return (int64_t)convertedValue; }
|
||||||
{ }
|
// Returns true if the value doesn't fit into allowed range for a type.
|
||||||
ConstString scanGeneric(uint32_t length)
|
template <typename T, typename AT>
|
||||||
|
bool numericRangeCheckAndConvert(const AT& value)
|
||||||
{
|
{
|
||||||
checkAvailableData(length);
|
if (value > AT(std::numeric_limits<T>::max()) ||
|
||||||
ConstString res((const char *) mPtr, length);
|
value < AT(std::numeric_limits<T>::min()))
|
||||||
mPtr += length;
|
return true;
|
||||||
return res;
|
|
||||||
|
convertedValue = (uint64_t) static_cast<T>(value);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
uint32_t scanStringLength()
|
// As of MCS 6.x there is an asumption MCS can't join having
|
||||||
|
// INTEGER and non-INTEGER potentially fractional keys,
|
||||||
|
// e.g. BIGINT to DECIMAL(38,1). It can only join BIGINT to DECIMAL(38).
|
||||||
|
// convert() checks if wide-DECIMAL overflows INTEGER type range
|
||||||
|
// and sets internal width to 0 if it is. If not width is set to 8
|
||||||
|
// and convertedValue is casted to INTEGER type.
|
||||||
|
// This convert() is called in EM to cast smallSide TypelessData
|
||||||
|
// if the key columns has a skew, e.g. INT to DECIMAL(38).
|
||||||
|
inline WideDecimalKeyConverter&
|
||||||
|
convert(const bool otherSideIsIntOrNarrow,
|
||||||
|
const execplan::CalpontSystemCatalog::ColDataType otherSideType)
|
||||||
{
|
{
|
||||||
checkAvailableData(2);
|
if (otherSideIsIntOrNarrow)
|
||||||
uint32_t res = ((uint32_t) mPtr[0]) * 255 + mPtr[1];
|
{
|
||||||
mPtr += 2;
|
datatypes::TSInt128 integralPart = mR->getTSInt128Field(mKeyColId);
|
||||||
return res;
|
|
||||||
|
bool isUnsigned = datatypes::isUnsigned(otherSideType);
|
||||||
|
if (isUnsigned)
|
||||||
|
{
|
||||||
|
width = (numericRangeCheckAndConvert<uint64_t>(integralPart)) ? 0 : datatypes::MAXLEGACYWIDTH;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
width = (numericRangeCheckAndConvert<int64_t>(integralPart)) ? 0 : datatypes::MAXLEGACYWIDTH;
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
ConstString scanString()
|
// Stores the value that might had been converted.
|
||||||
|
inline bool store(TypelessData& typelessData,
|
||||||
|
uint32_t& off,
|
||||||
|
const uint32_t keylen) const
|
||||||
{
|
{
|
||||||
return scanGeneric(scanStringLength());
|
// A note from convert() if there is otherSide column type range
|
||||||
|
// overflow so store() returns TD with len=0. This tells EM to skip this
|
||||||
|
// key b/c it won't match at PP. This happens it is possible to skip
|
||||||
|
// smallSide TD but can't to do the same with largeSide b/c of OUTER joins.
|
||||||
|
if (!width)
|
||||||
|
{
|
||||||
|
typelessData.len = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (off + width > keylen)
|
||||||
|
return true;
|
||||||
|
switch (width)
|
||||||
|
{
|
||||||
|
case datatypes::MAXDECIMALWIDTH:
|
||||||
|
{
|
||||||
|
mR->storeInt128FieldIntoPtr(mKeyColId, &typelessData.data[off]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
datatypes::TUInt64(convertedValue).store(&typelessData.data[off]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
off += width;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// smallSideColWidths is non-nullptr valid pointer only
|
||||||
|
// if there is a skew b/w small and large side columns widths.
|
||||||
uint32 TypelessData::hash(const RowGroup& r,
|
uint32 TypelessData::hash(const RowGroup& r,
|
||||||
const std::vector<uint32_t>& keyCols) const
|
const std::vector<uint32_t>& keyCols,
|
||||||
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup* smallSideRG) const
|
||||||
{
|
{
|
||||||
if (mRowPtr)
|
// This part is for largeSide hashing using Row at PP.
|
||||||
return mRowPtr->hashTypeless(keyCols);
|
if (!isSmallSide())
|
||||||
|
{
|
||||||
|
return mRowPtr->hashTypeless(keyCols,
|
||||||
|
smallSideKeyColumnsIds,
|
||||||
|
(smallSideRG) ? &smallSideRG->getColWidths() : nullptr);
|
||||||
|
}
|
||||||
|
// This part is for smallSide hashing at PP.
|
||||||
TypelessDataDecoder decoder(*this);
|
TypelessDataDecoder decoder(*this);
|
||||||
datatypes::MariaDBHasher hasher;
|
datatypes::MariaDBHasher hasher;
|
||||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
for (auto keyColId: keyCols)
|
||||||
{
|
{
|
||||||
switch (r.getColTypes()[keyCols[i]])
|
switch (r.getColTypes()[keyColId])
|
||||||
{
|
{
|
||||||
case CalpontSystemCatalog::VARCHAR:
|
case CalpontSystemCatalog::VARCHAR:
|
||||||
case CalpontSystemCatalog::CHAR:
|
case CalpontSystemCatalog::CHAR:
|
||||||
case CalpontSystemCatalog::TEXT:
|
case CalpontSystemCatalog::TEXT:
|
||||||
{
|
{
|
||||||
CHARSET_INFO *cs= const_cast<RowGroup&>(r).getCharset(keyCols[i]);
|
CHARSET_INFO *cs= const_cast<RowGroup&>(r).getCharset(keyColId);
|
||||||
hasher.add(cs, decoder.scanString());
|
hasher.add(cs, decoder.scanString());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CalpontSystemCatalog::DECIMAL:
|
||||||
|
{
|
||||||
|
const uint32_t width = std::max(r.getColWidths()[keyColId], datatypes::MAXLEGACYWIDTH);
|
||||||
|
if (isSmallSideWithSkewedData() || width == datatypes::MAXLEGACYWIDTH)
|
||||||
|
{
|
||||||
|
int64_t val = decoder.scanTInt64();
|
||||||
|
hasher.add(&my_charset_bin, reinterpret_cast<const char*>(&val), datatypes::MAXLEGACYWIDTH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
hasher.add(&my_charset_bin, decoder.scanGeneric(width));
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
hasher.add(&my_charset_bin, decoder.scanGeneric(8));
|
hasher.add(&my_charset_bin, decoder.scanGeneric(datatypes::MAXLEGACYWIDTH));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1350,41 +1391,84 @@ uint32 TypelessData::hash(const RowGroup& r,
|
|||||||
return hasher.finalize();
|
return hasher.finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this is smallSide, Row represents largeSide record.
|
||||||
int TypelessData::cmpToRow(const RowGroup& r,
|
int TypelessData::cmpToRow(const RowGroup& r,
|
||||||
const std::vector<uint32_t>& keyCols,
|
const std::vector<uint32_t>& keyCols,
|
||||||
const rowgroup::Row &row) const
|
const rowgroup::Row &row,
|
||||||
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG) const
|
||||||
{
|
{
|
||||||
TypelessDataDecoder a(*this);
|
TypelessDataDecoder a(*this);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
for (uint32_t i = 0; i < keyCols.size(); i++)
|
||||||
{
|
{
|
||||||
switch (r.getColTypes()[keyCols[i]])
|
auto largeSideKeyColRowIdx = keyCols[i];
|
||||||
|
switch (r.getColType(largeSideKeyColRowIdx))
|
||||||
{
|
{
|
||||||
case CalpontSystemCatalog::VARCHAR:
|
case CalpontSystemCatalog::VARCHAR:
|
||||||
case CalpontSystemCatalog::CHAR:
|
case CalpontSystemCatalog::CHAR:
|
||||||
case CalpontSystemCatalog::TEXT:
|
case CalpontSystemCatalog::TEXT:
|
||||||
{
|
{
|
||||||
datatypes::Charset cs(*const_cast<RowGroup&>(r).getCharset(keyCols[i]));
|
datatypes::Charset cs(*const_cast<RowGroup&>(r).getCharset(largeSideKeyColRowIdx));
|
||||||
ConstString ta = a.scanString();
|
ConstString ta = a.scanString();
|
||||||
ConstString tb = row.getConstString(keyCols[i]);
|
ConstString tb = row.getConstString(largeSideKeyColRowIdx);
|
||||||
if (int rc= cs.strnncollsp(ta, tb))
|
if (int rc= cs.strnncollsp(ta, tb))
|
||||||
return rc;
|
return rc;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CalpontSystemCatalog::DECIMAL:
|
||||||
|
{
|
||||||
|
auto largeSideWidth = row.getColumnWidth(largeSideKeyColRowIdx);
|
||||||
|
// First branch processes skewed JOIN, e.g. INT to DECIMAL(38)
|
||||||
|
// else branch processes decimal with common width at both small- and largeSide.
|
||||||
|
if (isSmallSideWithSkewedData() &&
|
||||||
|
largeSideWidth != smallSideRG->getColumnWidth(smallSideKeyColumnsIds->operator[](i)))
|
||||||
|
{
|
||||||
|
if (largeSideWidth == datatypes::MAXLEGACYWIDTH)
|
||||||
|
{
|
||||||
|
if (int rc = a.scanTInt64() != row.getIntField(largeSideKeyColRowIdx))
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
WideDecimalKeyConverter cv(row, largeSideKeyColRowIdx);
|
||||||
|
if (!cv.convert(true,
|
||||||
|
smallSideRG->getColType(smallSideKeyColumnsIds->operator[](i)))
|
||||||
|
.isConvertedToSmallSideType())
|
||||||
|
return 1;
|
||||||
|
if (int rc = a.scanTInt64() != cv.getConvertedTInt64())
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// There is an assumption that both sides here are equal and are either 8 or 16 bytes.
|
||||||
|
if (largeSideWidth == datatypes::MAXDECIMALWIDTH)
|
||||||
|
{
|
||||||
|
if (int rc = a.scanTInt128() != row.getTSInt128Field(largeSideKeyColRowIdx))
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (int rc = a.scanTInt64() != row.getIntField(largeSideKeyColRowIdx))
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
ConstString ta = a.scanGeneric(datatypes::MAXLEGACYWIDTH);
|
ConstString ta = a.scanGeneric(datatypes::MAXLEGACYWIDTH);
|
||||||
if (r.isUnsigned(keyCols[i]))
|
if (r.isUnsigned(largeSideKeyColRowIdx))
|
||||||
{
|
{
|
||||||
uint64_t tb = row.getUintField(keyCols[i]);
|
uint64_t tb = row.getUintField(largeSideKeyColRowIdx);
|
||||||
if (int rc= memcmp(ta.str(), &tb , datatypes::MAXLEGACYWIDTH))
|
if (int rc = memcmp(ta.str(), &tb , datatypes::MAXLEGACYWIDTH))
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int64_t tb = row.getIntField(keyCols[i]);
|
int64_t tb = row.getIntField(largeSideKeyColRowIdx);
|
||||||
if (int rc= memcmp(ta.str(), &tb , datatypes::MAXLEGACYWIDTH))
|
if (int rc = memcmp(ta.str(), &tb , datatypes::MAXLEGACYWIDTH))
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -1394,39 +1478,60 @@ int TypelessData::cmpToRow(const RowGroup& r,
|
|||||||
return 0; // Equal
|
return 0; // Equal
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int TypelessData::cmp(const RowGroup& r, const std::vector<uint32_t>& keyCols,
|
int TypelessData::cmp(const RowGroup& r, const std::vector<uint32_t>& keyCols,
|
||||||
const TypelessData &da, const TypelessData &db)
|
const TypelessData &da, const TypelessData &db,
|
||||||
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG)
|
||||||
{
|
{
|
||||||
idbassert((da.mRowPtr == nullptr) + (db.mRowPtr == nullptr) > 0);
|
idbassert(da.isSmallSide() || db.isSmallSide());
|
||||||
if (da.mRowPtr)
|
if (!da.isSmallSide() && db.isSmallSide())
|
||||||
return -db.cmpToRow(r, keyCols, da.mRowPtr[0]);
|
return -db.cmpToRow(r, keyCols, da.mRowPtr[0], smallSideKeyColumnsIds, smallSideRG);
|
||||||
if (db.mRowPtr)
|
if (da.isSmallSide() && !db.isSmallSide())
|
||||||
return da.cmpToRow(r, keyCols, db.mRowPtr[0]);
|
return da.cmpToRow(r, keyCols, db.mRowPtr[0], smallSideKeyColumnsIds, smallSideRG);
|
||||||
|
|
||||||
|
// This case happens in BPP::addToJoiner when it populates the final
|
||||||
|
// hashmap with multiple smallSide TDs from temp hashmaps.
|
||||||
|
idbassert(da.isSmallSide() && db.isSmallSide());
|
||||||
|
|
||||||
TypelessDataDecoder a(da);
|
TypelessDataDecoder a(da);
|
||||||
TypelessDataDecoder b(db);
|
TypelessDataDecoder b(db);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
for (uint32_t i = 0; i < keyCols.size(); ++i)
|
||||||
{
|
{
|
||||||
switch (r.getColTypes()[keyCols[i]])
|
auto keyColIdx = keyCols[i];
|
||||||
|
switch (r.getColTypes()[keyColIdx])
|
||||||
{
|
{
|
||||||
case CalpontSystemCatalog::VARCHAR:
|
case CalpontSystemCatalog::VARCHAR:
|
||||||
case CalpontSystemCatalog::CHAR:
|
case CalpontSystemCatalog::CHAR:
|
||||||
case CalpontSystemCatalog::TEXT:
|
case CalpontSystemCatalog::TEXT:
|
||||||
{
|
{
|
||||||
datatypes::Charset cs(*const_cast<RowGroup&>(r).getCharset(keyCols[i]));
|
datatypes::Charset cs(*const_cast<RowGroup&>(r).getCharset(keyColIdx));
|
||||||
ConstString ta = a.scanString();
|
ConstString ta = a.scanString();
|
||||||
ConstString tb = b.scanString();
|
ConstString tb = b.scanString();
|
||||||
if (int rc= cs.strnncollsp(ta, tb))
|
if (int rc= cs.strnncollsp(ta, tb))
|
||||||
return rc;
|
return rc;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CalpontSystemCatalog::DECIMAL:
|
||||||
|
{
|
||||||
|
auto largeSideWidth = r.getColumnWidth(keyColIdx);
|
||||||
|
// First and second branches processes skewed JOIN, e.g. INT to DECIMAL(38)
|
||||||
|
// Third processes decimal with common width at both small- and largeSide.
|
||||||
|
auto width = (da.isSmallSideWithSkewedData() &&
|
||||||
|
largeSideWidth != smallSideRG->getColumnWidth(smallSideKeyColumnsIds->operator[](i))) ? datatypes::MAXLEGACYWIDTH : std::max(r.getColWidths()[keyColIdx], datatypes::MAXLEGACYWIDTH);
|
||||||
|
ConstString ta = a.scanGeneric(width);
|
||||||
|
ConstString tb = b.scanGeneric(width);
|
||||||
|
if (int rc= memcmp(ta.str(), tb.str(), width))
|
||||||
|
return rc;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
ConstString ta = a.scanGeneric(8);
|
ConstString ta = a.scanGeneric(datatypes::MAXLEGACYWIDTH);
|
||||||
ConstString tb = b.scanGeneric(8);
|
ConstString tb = b.scanGeneric(datatypes::MAXLEGACYWIDTH);
|
||||||
idbassert(ta.length() == tb.length());
|
idbassert(ta.length() == tb.length());
|
||||||
|
// It is impossible to join signed to unsigned types now
|
||||||
|
// but there is a potential error, e.g. uint64 vs negative int64.
|
||||||
if (int rc= memcmp(ta.str(), tb.str() , ta.length()))
|
if (int rc= memcmp(ta.str(), tb.str() , ta.length()))
|
||||||
return rc;
|
return rc;
|
||||||
break;
|
break;
|
||||||
@ -1438,23 +1543,24 @@ int TypelessData::cmp(const RowGroup& r, const std::vector<uint32_t>& keyCols,
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Called in joblist code to produce SmallSide TypelessData to be sent to PP.
|
||||||
TypelessData makeTypelessKey(const Row& r, const vector<uint32_t>& keyCols,
|
TypelessData makeTypelessKey(const Row& r, const vector<uint32_t>& keyCols,
|
||||||
uint32_t keylen, FixedAllocator* fa,
|
uint32_t keylen, FixedAllocator* fa,
|
||||||
const rowgroup::RowGroup& otherSideRG, const std::vector<uint32_t>& otherKeyCols)
|
const rowgroup::RowGroup& otherSideRG,
|
||||||
|
const std::vector<uint32_t>& otherKeyCols)
|
||||||
{
|
{
|
||||||
TypelessData ret;
|
TypelessData ret;
|
||||||
uint32_t off = 0, i;
|
uint32_t off = 0, i;
|
||||||
execplan::CalpontSystemCatalog::ColDataType type;
|
execplan::CalpontSystemCatalog::ColDataType type;
|
||||||
|
|
||||||
ret.data = (uint8_t*) fa->allocate();
|
ret.data = (uint8_t*) fa->allocate();
|
||||||
|
idbassert(keyCols.size() == otherKeyCols.size());
|
||||||
|
|
||||||
for (i = 0; i < keyCols.size(); i++)
|
for (i = 0; i < keyCols.size(); i++)
|
||||||
{
|
{
|
||||||
type = r.getColTypes()[keyCols[i]];
|
type = r.getColTypes()[keyCols[i]];
|
||||||
|
|
||||||
if (type == CalpontSystemCatalog::VARCHAR ||
|
if (datatypes::isCharType(type))
|
||||||
type == CalpontSystemCatalog::CHAR ||
|
|
||||||
type == CalpontSystemCatalog::TEXT)
|
|
||||||
{
|
{
|
||||||
// this is a string, copy a normalized version
|
// this is a string, copy a normalized version
|
||||||
const uint8_t* str = r.getStringPointer(keyCols[i]);
|
const uint8_t* str = r.getStringPointer(keyCols[i]);
|
||||||
@ -1462,7 +1568,19 @@ TypelessData makeTypelessKey(const Row& r, const vector<uint32_t>& keyCols,
|
|||||||
if (TypelessDataStringEncoder(str, width).store(ret.data, off, keylen))
|
if (TypelessDataStringEncoder(str, width).store(ret.data, off, keylen))
|
||||||
goto toolong;
|
goto toolong;
|
||||||
}
|
}
|
||||||
else if (r.getColType(keyCols[i]) == CalpontSystemCatalog::LONGDOUBLE)
|
else if (datatypes::isWideDecimalType(type, r.getColumnWidth(keyCols[i])))
|
||||||
|
{
|
||||||
|
bool otherSideIsIntOrNarrow = otherSideRG.getColumnWidth(otherKeyCols[i]) <= datatypes::MAXLEGACYWIDTH;
|
||||||
|
// useless if otherSideIsInt is false
|
||||||
|
auto otherSideType = (otherSideIsIntOrNarrow) ? otherSideRG.getColType(otherKeyCols[i])
|
||||||
|
: datatypes::SystemCatalog::UNDEFINED;
|
||||||
|
if (WideDecimalKeyConverter(r, keyCols[i]).convert(otherSideIsIntOrNarrow, otherSideType)
|
||||||
|
.store(ret, off, keylen))
|
||||||
|
{
|
||||||
|
goto toolong;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (datatypes::isLongDouble(type))
|
||||||
{
|
{
|
||||||
if (off + sizeof(long double) > keylen)
|
if (off + sizeof(long double) > keylen)
|
||||||
goto toolong;
|
goto toolong;
|
||||||
@ -1546,7 +1664,7 @@ toolong:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The method is used by disk-based JOIN and it is not collation or wide DECIMAL aware.
|
||||||
uint64_t getHashOfTypelessKey(const Row& r, const vector<uint32_t>& keyCols, uint32_t seed)
|
uint64_t getHashOfTypelessKey(const Row& r, const vector<uint32_t>& keyCols, uint32_t seed)
|
||||||
{
|
{
|
||||||
Hasher_r hasher;
|
Hasher_r hasher;
|
||||||
@ -1620,14 +1738,7 @@ void TypelessData::serialize(messageqcpp::ByteStream& b) const
|
|||||||
{
|
{
|
||||||
b << len;
|
b << len;
|
||||||
b.append(data, len);
|
b.append(data, len);
|
||||||
}
|
// Flags are not send b/c they are locally significant now.
|
||||||
|
|
||||||
void TypelessData::deserialize(messageqcpp::ByteStream& b, utils::FixedAllocator& fa)
|
|
||||||
{
|
|
||||||
b >> len;
|
|
||||||
data = (uint8_t*) fa.allocate(len);
|
|
||||||
memcpy(data, b.buf(), len);
|
|
||||||
b.advance(len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TypelessData::deserialize(messageqcpp::ByteStream& b, utils::PoolAllocator& fa)
|
void TypelessData::deserialize(messageqcpp::ByteStream& b, utils::PoolAllocator& fa)
|
||||||
@ -1789,9 +1900,87 @@ boost::shared_ptr<TupleJoiner> TupleJoiner::copyForDiskJoin()
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used for Typeless JOIN to detect if there is a JOIN when largeSide is wide-DECIMAL and
|
||||||
|
// smallSide is a smaller data type, e.g. INT or narrow-DECIMAL.
|
||||||
|
bool TupleJoiner::joinHasSkewedKeyColumn()
|
||||||
|
{
|
||||||
|
std::vector<uint32_t>::const_iterator largeSideKeyColumnsIter = getLargeKeyColumns().begin();
|
||||||
|
std::vector<uint32_t>::const_iterator smallSideKeyColumnsIter = getSmallKeyColumns().begin();
|
||||||
|
idbassert(getLargeKeyColumns().size() == getSmallKeyColumns().size());
|
||||||
|
while (largeSideKeyColumnsIter != getLargeKeyColumns().end())
|
||||||
|
{
|
||||||
|
auto smallSideColumnWidth = smallRG.getColumnWidth(*smallSideKeyColumnsIter);
|
||||||
|
auto largeSideColumnWidth = largeRG.getColumnWidth(*largeSideKeyColumnsIter);
|
||||||
|
bool widthIsDifferent = smallSideColumnWidth != largeSideColumnWidth;
|
||||||
|
if (widthIsDifferent && (datatypes::isWideDecimalType(smallRG.getColTypes()[*smallSideKeyColumnsIter], smallSideColumnWidth) ||
|
||||||
|
datatypes::isWideDecimalType(largeRG.getColTypes()[*largeSideKeyColumnsIter], largeSideColumnWidth)))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
++largeSideKeyColumnsIter;
|
||||||
|
++smallSideKeyColumnsIter;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void TupleJoiner::setConvertToDiskJoin()
|
void TupleJoiner::setConvertToDiskJoin()
|
||||||
{
|
{
|
||||||
_convertToDiskJoin = true;
|
_convertToDiskJoin = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The method is made to reuse the code from Typeless TupleJoiner ctor.
|
||||||
|
// It is used in the mentioned ctor and in initBPP() to calculate
|
||||||
|
// Typeless key length in case of a JOIN when large side column is INT
|
||||||
|
// and small side column is wide-DECIMAL.
|
||||||
|
// An important assumption is that if the type is DECIMAL than it must
|
||||||
|
// be wide-DECIMAL b/c MCS calls the function running Typeless TupleJoiner
|
||||||
|
// ctor.
|
||||||
|
uint32_t calculateKeyLength(const std::vector<uint32_t>& aKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup& aSmallRowGroup,
|
||||||
|
const std::vector<uint32_t>* aLargeKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup* aLargeRowGroup)
|
||||||
|
{
|
||||||
|
uint32_t keyLength = 0;
|
||||||
|
for (size_t keyColumnIdx = 0; keyColumnIdx < aKeyColumnsIds.size(); ++keyColumnIdx)
|
||||||
|
{
|
||||||
|
auto smallSideKeyColumnId = aKeyColumnsIds[keyColumnIdx];
|
||||||
|
auto largeSideKeyColumnId = (aLargeRowGroup)
|
||||||
|
? aLargeKeyColumnsIds->operator[](keyColumnIdx)
|
||||||
|
: std::numeric_limits<uint64_t>::max();
|
||||||
|
const auto& smallKeyColumnType = aSmallRowGroup.getColTypes()[smallSideKeyColumnId];
|
||||||
|
// Not used if aLargeRowGroup is 0 that happens in PrimProc.
|
||||||
|
const auto& largeKeyColumntype = (aLargeRowGroup) ? aLargeRowGroup->getColTypes()[largeSideKeyColumnId]
|
||||||
|
: datatypes::SystemCatalog::UNDEFINED;
|
||||||
|
if (datatypes::isCharType(smallKeyColumnType))
|
||||||
|
{
|
||||||
|
keyLength += aSmallRowGroup.getColumnWidth(smallSideKeyColumnId) + 2; // +2 for encoded length
|
||||||
|
|
||||||
|
// MCOL-698: if we don't do this LONGTEXT allocates 32TB RAM
|
||||||
|
if (keyLength > 65536)
|
||||||
|
return 65536;
|
||||||
|
}
|
||||||
|
else if (datatypes::isLongDouble(smallKeyColumnType))
|
||||||
|
{
|
||||||
|
keyLength += sizeof(long double);
|
||||||
|
}
|
||||||
|
else if (datatypes::isWideDecimalType(smallKeyColumnType,
|
||||||
|
aSmallRowGroup.getColumnWidth(smallSideKeyColumnId)))
|
||||||
|
{
|
||||||
|
keyLength += (aLargeRowGroup &&
|
||||||
|
!datatypes::isWideDecimalType(largeKeyColumntype,
|
||||||
|
aLargeRowGroup->getColumnWidth(smallSideKeyColumnId)))
|
||||||
|
? datatypes::MAXLEGACYWIDTH // Small=Wide, Large=Narrow/xINT
|
||||||
|
: datatypes::MAXDECIMALWIDTH; // Small=Wide, Large=Wide
|
||||||
|
}
|
||||||
|
else
|
||||||
|
// The branch covers all datatypes left including skewed DECIMAL JOIN case
|
||||||
|
// Small=Wide, Large=Narrow
|
||||||
|
{
|
||||||
|
keyLength += datatypes::MAXLEGACYWIDTH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return keyLength;
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -40,32 +40,77 @@
|
|||||||
#include "hasher.h"
|
#include "hasher.h"
|
||||||
#include "threadpool.h"
|
#include "threadpool.h"
|
||||||
#include "columnwidth.h"
|
#include "columnwidth.h"
|
||||||
|
#include "mcs_string.h"
|
||||||
|
|
||||||
namespace joiner
|
namespace joiner
|
||||||
{
|
{
|
||||||
|
|
||||||
|
uint32_t calculateKeyLength(const std::vector<uint32_t>& aKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup& aRowGroup,
|
||||||
|
const std::vector<uint32_t>* aLargeKeyColumnsIds = nullptr,
|
||||||
|
const rowgroup::RowGroup* aLargeRowGroup = nullptr);
|
||||||
|
|
||||||
|
constexpr uint8_t IS_SMALLSIDE = 0x01; // SmallSide of a JOIN w/o a skew in key columns widths
|
||||||
|
constexpr uint8_t IS_SMALLSIDE_SKEWED = 0x02; // SmallSide of a JOIN with a skew in key cols widths
|
||||||
|
class TypelessDataDecoder;
|
||||||
|
|
||||||
class TypelessData
|
class TypelessData
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
uint8_t* data;
|
union {
|
||||||
|
uint8_t* data;
|
||||||
|
const rowgroup::Row *mRowPtr;
|
||||||
|
};
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
const rowgroup::Row *mRowPtr;
|
// The flags are locally significant in PP now so serialize doesn't send it over the wire.
|
||||||
|
uint32_t mFlags;
|
||||||
|
|
||||||
TypelessData() : data(NULL), len(0), mRowPtr(nullptr) { }
|
TypelessData() : data(nullptr), len(0), mFlags(0) { }
|
||||||
TypelessData(const rowgroup::Row *rowPtr) : data(NULL), len(0), mRowPtr(rowPtr) { }
|
TypelessData(const rowgroup::Row *rowPtr) : mRowPtr(rowPtr), len(0), mFlags(0) { }
|
||||||
|
TypelessData(messageqcpp::ByteStream& bs, utils::PoolAllocator& memAllocator) : data(nullptr), len(0), mFlags(0)
|
||||||
|
{
|
||||||
|
deserialize(bs, memAllocator);
|
||||||
|
}
|
||||||
inline bool operator==(const TypelessData&) const;
|
inline bool operator==(const TypelessData&) const;
|
||||||
void serialize(messageqcpp::ByteStream&) const;
|
void serialize(messageqcpp::ByteStream&) const;
|
||||||
void deserialize(messageqcpp::ByteStream&, utils::FixedAllocator&);
|
void deserialize(messageqcpp::ByteStream&, utils::FixedAllocator&);
|
||||||
void deserialize(messageqcpp::ByteStream&, utils::PoolAllocator&);
|
void deserialize(messageqcpp::ByteStream&, utils::PoolAllocator&);
|
||||||
std::string toString() const;
|
std::string toString() const;
|
||||||
uint32_t hash(const rowgroup::RowGroup&, const std::vector<uint32_t>& keyCols) const;
|
uint32_t hash(const rowgroup::RowGroup&,
|
||||||
static int cmp(const rowgroup::RowGroup&, const std::vector<uint32_t>& keyCols,
|
const std::vector<uint32_t>& keyCols,
|
||||||
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG) const;
|
||||||
|
static int cmp(const rowgroup::RowGroup&,
|
||||||
|
const std::vector<uint32_t>& keyCols,
|
||||||
const TypelessData &a,
|
const TypelessData &a,
|
||||||
const TypelessData &b);
|
const TypelessData &b,
|
||||||
int cmpToRow(const rowgroup::RowGroup& r, const std::vector<uint32_t>& keyCols,
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
const rowgroup::Row &db) const;
|
const rowgroup::RowGroup *smallSideRG);
|
||||||
|
int cmpToRow(const rowgroup::RowGroup& r,
|
||||||
|
const std::vector<uint32_t>& keyCols,
|
||||||
|
const rowgroup::Row &row,
|
||||||
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG) const;
|
||||||
|
inline void setSmallSide()
|
||||||
|
{
|
||||||
|
mFlags |= IS_SMALLSIDE;
|
||||||
|
}
|
||||||
|
inline void setSmallSideWithSkewedData()
|
||||||
|
{
|
||||||
|
mFlags |= IS_SMALLSIDE_SKEWED;
|
||||||
|
}
|
||||||
|
inline bool isSmallSide() const
|
||||||
|
{
|
||||||
|
return mFlags & (IS_SMALLSIDE_SKEWED | IS_SMALLSIDE);
|
||||||
|
}
|
||||||
|
inline bool isSmallSideWithSkewedData() const
|
||||||
|
{
|
||||||
|
return mFlags & IS_SMALLSIDE_SKEWED;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// This operator is used in EM only so it doesn't support TD cmp operation
|
||||||
|
// using Row pointers.
|
||||||
inline bool TypelessData::operator==(const TypelessData& t) const
|
inline bool TypelessData::operator==(const TypelessData& t) const
|
||||||
{
|
{
|
||||||
if (len != t.len)
|
if (len != t.len)
|
||||||
@ -77,6 +122,57 @@ inline bool TypelessData::operator==(const TypelessData& t) const
|
|||||||
return (memcmp(data, t.data, len) == 0);
|
return (memcmp(data, t.data, len) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class TypelessDataDecoder
|
||||||
|
{
|
||||||
|
const uint8_t *mPtr;
|
||||||
|
const uint8_t *mEnd;
|
||||||
|
void checkAvailableData(uint32_t nbytes) const
|
||||||
|
{
|
||||||
|
if (mPtr + nbytes > mEnd)
|
||||||
|
throw runtime_error("TypelessData is too short");
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
TypelessDataDecoder(const uint8_t* ptr, size_t length)
|
||||||
|
:mPtr(ptr), mEnd(ptr + length)
|
||||||
|
{ }
|
||||||
|
TypelessDataDecoder(const TypelessData &data)
|
||||||
|
:TypelessDataDecoder(data.data, data.len)
|
||||||
|
{ }
|
||||||
|
utils::ConstString scanGeneric(uint32_t length)
|
||||||
|
{
|
||||||
|
checkAvailableData(length);
|
||||||
|
utils::ConstString res((const char *) mPtr, length);
|
||||||
|
mPtr += length;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
uint32_t scanStringLength()
|
||||||
|
{
|
||||||
|
checkAvailableData(2);
|
||||||
|
uint32_t res = ((uint32_t) mPtr[0]) * 255 + mPtr[1];
|
||||||
|
mPtr += 2;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
utils::ConstString scanString()
|
||||||
|
{
|
||||||
|
return scanGeneric(scanStringLength());
|
||||||
|
}
|
||||||
|
int64_t scanTInt64()
|
||||||
|
{
|
||||||
|
checkAvailableData(sizeof(int64_t));
|
||||||
|
int64_t res = *reinterpret_cast<const int64_t*>(mPtr);
|
||||||
|
mPtr += sizeof(int64_t);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
datatypes::TSInt128 scanTInt128()
|
||||||
|
{
|
||||||
|
checkAvailableData(datatypes::MAXDECIMALWIDTH);
|
||||||
|
datatypes::TSInt128 res(mPtr);
|
||||||
|
mPtr += datatypes::MAXDECIMALWIDTH;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
// Comparator for long double in the hash
|
// Comparator for long double in the hash
|
||||||
class LongDoubleEq
|
class LongDoubleEq
|
||||||
{
|
{
|
||||||
@ -104,10 +200,16 @@ class TypelessDataStructure
|
|||||||
public:
|
public:
|
||||||
const rowgroup::RowGroup *mRowGroup;
|
const rowgroup::RowGroup *mRowGroup;
|
||||||
const std::vector<uint32_t> *mMap;
|
const std::vector<uint32_t> *mMap;
|
||||||
|
const std::vector<uint32_t> *mSmallSideKeyColumnsIds;
|
||||||
|
const rowgroup::RowGroup *mSmallSideRG;
|
||||||
TypelessDataStructure(const rowgroup::RowGroup *rg,
|
TypelessDataStructure(const rowgroup::RowGroup *rg,
|
||||||
const std::vector<uint32_t> *map)
|
const std::vector<uint32_t> *map,
|
||||||
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG)
|
||||||
:mRowGroup(rg),
|
:mRowGroup(rg),
|
||||||
mMap(map)
|
mMap(map),
|
||||||
|
mSmallSideKeyColumnsIds(smallSideKeyColumnsIds),
|
||||||
|
mSmallSideRG(smallSideRG)
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -150,12 +252,14 @@ public:
|
|||||||
struct TypelessDataHasher: public TypelessDataStructure
|
struct TypelessDataHasher: public TypelessDataStructure
|
||||||
{
|
{
|
||||||
TypelessDataHasher(const rowgroup::RowGroup *rg,
|
TypelessDataHasher(const rowgroup::RowGroup *rg,
|
||||||
const std::vector<uint32_t> *map)
|
const std::vector<uint32_t> *map,
|
||||||
:TypelessDataStructure(rg, map)
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG)
|
||||||
|
:TypelessDataStructure(rg, map, smallSideKeyColumnsIds, smallSideRG)
|
||||||
{ }
|
{ }
|
||||||
inline size_t operator()(const TypelessData& e) const
|
inline size_t operator()(const TypelessData& e) const
|
||||||
{
|
{
|
||||||
return e.hash(*mRowGroup, *mMap);
|
return e.hash(*mRowGroup, *mMap, mSmallSideKeyColumnsIds, mSmallSideRG);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -163,12 +267,14 @@ public:
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TypelessDataComparator(const rowgroup::RowGroup *rg,
|
TypelessDataComparator(const rowgroup::RowGroup *rg,
|
||||||
const std::vector<uint32_t> *map)
|
const std::vector<uint32_t> *map,
|
||||||
:TypelessDataStructure(rg, map)
|
const std::vector<uint32_t> *smallSideKeyColumnsIds,
|
||||||
|
const rowgroup::RowGroup *smallSideRG)
|
||||||
|
:TypelessDataStructure(rg, map, smallSideKeyColumnsIds, smallSideRG)
|
||||||
{ }
|
{ }
|
||||||
bool operator()(const TypelessData& a, const TypelessData& b) const
|
bool operator()(const TypelessData& a, const TypelessData& b) const
|
||||||
{
|
{
|
||||||
return !TypelessData::cmp(*mRowGroup, *mMap, a, b);
|
return !TypelessData::cmp(*mRowGroup, *mMap, a, b, mSmallSideKeyColumnsIds, mSmallSideRG);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -365,6 +471,12 @@ public:
|
|||||||
return nullValueForJoinColumn;
|
return nullValueForJoinColumn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wide-DECIMAL JOIN
|
||||||
|
bool joinHasSkewedKeyColumn();
|
||||||
|
inline const vector<uint32_t>& getSmallSideColumnsWidths() const
|
||||||
|
{
|
||||||
|
return smallRG.getColWidths();
|
||||||
|
}
|
||||||
// Disk-based join support
|
// Disk-based join support
|
||||||
void clearData();
|
void clearData();
|
||||||
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
|
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
|
||||||
|
@ -170,6 +170,14 @@ ByteStream& ByteStream::operator<<(const uint8_t b)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ByteStream& ByteStream::operator<<(const bool b)
|
||||||
|
{
|
||||||
|
add(b);
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ByteStream& ByteStream::operator<<(const int16_t d)
|
ByteStream& ByteStream::operator<<(const int16_t d)
|
||||||
{
|
{
|
||||||
if (fBuf == 0 || (fCurInPtr - fBuf + 2U > fMaxLen + ISSOverhead))
|
if (fBuf == 0 || (fCurInPtr - fBuf + 2U > fMaxLen + ISSOverhead))
|
||||||
@ -296,6 +304,14 @@ ByteStream& ByteStream::operator>>(uint8_t& b)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ByteStream& ByteStream::operator>>(bool& b)
|
||||||
|
{
|
||||||
|
peek(b);
|
||||||
|
fCurOutPtr++;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ByteStream& ByteStream::operator>>(int16_t& d)
|
ByteStream& ByteStream::operator>>(int16_t& d)
|
||||||
{
|
{
|
||||||
peek(d);
|
peek(d);
|
||||||
@ -382,6 +398,15 @@ void ByteStream::peek(uint8_t& b) const
|
|||||||
b = *((int8_t*)fCurOutPtr);
|
b = *((int8_t*)fCurOutPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ByteStream::peek(bool& b) const
|
||||||
|
{
|
||||||
|
if (length() < 1)
|
||||||
|
throw underflow_error("ByteStream::peek(bool): not enough data in stream to fill datatype");
|
||||||
|
|
||||||
|
b = *((bool*)fCurOutPtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void ByteStream::peek(int16_t& d) const
|
void ByteStream::peek(int16_t& d) const
|
||||||
{
|
{
|
||||||
if (length() < 2)
|
if (length() < 2)
|
||||||
|
@ -113,6 +113,7 @@ public:
|
|||||||
* push a uint8_t onto the end of the stream
|
* push a uint8_t onto the end of the stream
|
||||||
*/
|
*/
|
||||||
EXPORT ByteStream& operator<<(const uint8_t b);
|
EXPORT ByteStream& operator<<(const uint8_t b);
|
||||||
|
EXPORT ByteStream& operator<<(const bool b);
|
||||||
/**
|
/**
|
||||||
* push a int16_t onto the end of the stream. The byte order is whatever the native byte order is.
|
* push a int16_t onto the end of the stream. The byte order is whatever the native byte order is.
|
||||||
*/
|
*/
|
||||||
@ -195,6 +196,7 @@ public:
|
|||||||
* extract a uint8_t from the front of the stream.
|
* extract a uint8_t from the front of the stream.
|
||||||
*/
|
*/
|
||||||
EXPORT ByteStream& operator>>(uint8_t& b);
|
EXPORT ByteStream& operator>>(uint8_t& b);
|
||||||
|
EXPORT ByteStream& operator>>(bool& b);
|
||||||
/**
|
/**
|
||||||
* extract a int16_t from the front of the stream. The byte order is whatever the native byte order is.
|
* extract a int16_t from the front of the stream. The byte order is whatever the native byte order is.
|
||||||
*/
|
*/
|
||||||
@ -273,6 +275,7 @@ public:
|
|||||||
* Peek at a uint8_t from the front of the stream.
|
* Peek at a uint8_t from the front of the stream.
|
||||||
*/
|
*/
|
||||||
EXPORT void peek(uint8_t& b) const;
|
EXPORT void peek(uint8_t& b) const;
|
||||||
|
EXPORT void peek(bool& b) const;
|
||||||
/**
|
/**
|
||||||
* Peek at a int16_t from the front of the stream. The byte order is whatever the native byte order is.
|
* Peek at a int16_t from the front of the stream. The byte order is whatever the native byte order is.
|
||||||
*/
|
*/
|
||||||
|
@ -1068,7 +1068,7 @@ void RowAggregation::makeAggFieldsNull(Row& row)
|
|||||||
case execplan::CalpontSystemCatalog::DECIMAL:
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
||||||
case execplan::CalpontSystemCatalog::UDECIMAL:
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
||||||
{
|
{
|
||||||
int colWidth = fRowGroupOut->getColumnWidth(colOut);
|
uint32_t colWidth = fRowGroupOut->getColumnWidth(colOut);
|
||||||
if (LIKELY(colWidth == datatypes::MAXDECIMALWIDTH))
|
if (LIKELY(colWidth == datatypes::MAXDECIMALWIDTH))
|
||||||
{
|
{
|
||||||
uint32_t offset = row.getOffset(colOut);
|
uint32_t offset = row.getOffset(colOut);
|
||||||
@ -1095,7 +1095,7 @@ void RowAggregation::makeAggFieldsNull(Row& row)
|
|||||||
case execplan::CalpontSystemCatalog::VARBINARY:
|
case execplan::CalpontSystemCatalog::VARBINARY:
|
||||||
case execplan::CalpontSystemCatalog::BLOB:
|
case execplan::CalpontSystemCatalog::BLOB:
|
||||||
{
|
{
|
||||||
int colWidth = fRowGroupOut->getColumnWidth(colOut);
|
uint32_t colWidth = fRowGroupOut->getColumnWidth(colOut);
|
||||||
|
|
||||||
if (colWidth <= datatypes::MAXLEGACYWIDTH)
|
if (colWidth <= datatypes::MAXLEGACYWIDTH)
|
||||||
{
|
{
|
||||||
|
@ -1320,6 +1320,12 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RowGroup::RowGroup(ByteStream& bs): columnCount(0), data(nullptr), rgData(nullptr), strings(nullptr),
|
||||||
|
useStringTable(true), hasCollation(false), hasLongStringField(false), sTableThreshold(20)
|
||||||
|
{
|
||||||
|
this->deserialize(bs);
|
||||||
|
}
|
||||||
|
|
||||||
RowGroup::~RowGroup()
|
RowGroup::~RowGroup()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -131,6 +131,16 @@ const int16_t rgCommonSize = 8192;
|
|||||||
#pragma warning (disable : 4200)
|
#pragma warning (disable : 4200)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Helper to get a value from nested vector pointers.
|
||||||
|
template<typename T>
|
||||||
|
inline T derefFromTwoVectorPtrs(const std::vector<T>* outer,
|
||||||
|
const std::vector<T>* inner,
|
||||||
|
const T innerIdx)
|
||||||
|
{
|
||||||
|
auto outerIdx = inner->operator[](innerIdx);
|
||||||
|
return outer->operator[](outerIdx);
|
||||||
|
}
|
||||||
|
|
||||||
class StringStore
|
class StringStore
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -434,6 +444,7 @@ public:
|
|||||||
getPrecision(colIndex));
|
getPrecision(colIndex));
|
||||||
}
|
}
|
||||||
inline long double getLongDoubleField(uint32_t colIndex) const;
|
inline long double getLongDoubleField(uint32_t colIndex) const;
|
||||||
|
inline void storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const;
|
||||||
inline void getInt128Field(uint32_t colIndex, int128_t& x) const;
|
inline void getInt128Field(uint32_t colIndex, int128_t& x) const;
|
||||||
inline datatypes::TSInt128 getTSInt128Field(uint32_t colIndex) const;
|
inline datatypes::TSInt128 getTSInt128Field(uint32_t colIndex) const;
|
||||||
|
|
||||||
@ -559,12 +570,17 @@ public:
|
|||||||
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
||||||
inline uint64_t hash() const; // generates a hash for all cols
|
inline uint64_t hash() const; // generates a hash for all cols
|
||||||
inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
||||||
inline void colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &hasher, uint32_t col) const;
|
inline void colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &hasher, uint32_t keyColsIdx,
|
||||||
inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols) const
|
const std::vector<uint32_t>& keyCols,
|
||||||
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
||||||
|
const std::vector<uint32_t>* smallSideColumnsWidths) const;
|
||||||
|
inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols,
|
||||||
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
||||||
|
const std::vector<uint32_t>* smallSideColumnsWidths) const
|
||||||
{
|
{
|
||||||
datatypes::MariaDBHasher h;
|
datatypes::MariaDBHasher h;
|
||||||
for (uint32_t i = 0; i < keyCols.size(); i++)
|
for (uint32_t i = 0; i < keyCols.size(); i++)
|
||||||
colUpdateMariaDBHasherTypeless(h, keyCols[i]);
|
colUpdateMariaDBHasherTypeless(h, i, keyCols, smallSideKeyColumnsIds, smallSideColumnsWidths);
|
||||||
return h.finalize();
|
return h.finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -950,30 +966,65 @@ inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t co
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void Row::colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &h, uint32_t col) const
|
inline void Row::colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &h, uint32_t keyColsIdx,
|
||||||
|
const std::vector<uint32_t>& keyCols,
|
||||||
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
||||||
|
const std::vector<uint32_t>* smallSideColumnsWidths) const
|
||||||
{
|
{
|
||||||
switch (getColType(col))
|
auto rowKeyColIdx = keyCols[keyColsIdx];
|
||||||
|
auto largeSideColType = getColType(rowKeyColIdx);
|
||||||
|
switch (largeSideColType)
|
||||||
{
|
{
|
||||||
case datatypes::SystemCatalog::CHAR:
|
case datatypes::SystemCatalog::CHAR:
|
||||||
case datatypes::SystemCatalog::VARCHAR:
|
case datatypes::SystemCatalog::VARCHAR:
|
||||||
case datatypes::SystemCatalog::BLOB:
|
case datatypes::SystemCatalog::BLOB:
|
||||||
case datatypes::SystemCatalog::TEXT:
|
case datatypes::SystemCatalog::TEXT:
|
||||||
{
|
{
|
||||||
CHARSET_INFO *cs = getCharset(col);
|
CHARSET_INFO *cs = getCharset(rowKeyColIdx);
|
||||||
h.add(cs, getConstString(col));
|
h.add(cs, getConstString(rowKeyColIdx));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case datatypes::SystemCatalog::DECIMAL:
|
||||||
|
{
|
||||||
|
auto width = getColumnWidth(rowKeyColIdx);
|
||||||
|
if (datatypes::isWideDecimalType(largeSideColType,
|
||||||
|
width))
|
||||||
|
{
|
||||||
|
bool joinHasSkewedKeyColumn = (smallSideColumnsWidths);
|
||||||
|
datatypes::TSInt128 val = getTSInt128Field(rowKeyColIdx);
|
||||||
|
if (joinHasSkewedKeyColumn &&
|
||||||
|
width != derefFromTwoVectorPtrs(smallSideColumnsWidths, smallSideKeyColumnsIds, keyColsIdx))
|
||||||
|
{
|
||||||
|
if (val.getValue() >= std::numeric_limits<int64_t>::min() &&
|
||||||
|
val.getValue() <= std::numeric_limits<uint64_t>::max())
|
||||||
|
{
|
||||||
|
h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXLEGACYWIDTH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int64_t val = getIntField(rowKeyColIdx);
|
||||||
|
h.add(&my_charset_bin, (const char*) &val, datatypes::MAXLEGACYWIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
if (isUnsigned(col))
|
if (isUnsigned(rowKeyColIdx))
|
||||||
{
|
{
|
||||||
uint64_t tb = getUintField(col);
|
uint64_t val = getUintField(rowKeyColIdx);
|
||||||
h.add(&my_charset_bin, (const char*) &tb, 8);
|
h.add(&my_charset_bin, (const char*) &val, datatypes::MAXLEGACYWIDTH);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int64_t val = getIntField(col);
|
int64_t val = getIntField(rowKeyColIdx);
|
||||||
h.add(&my_charset_bin, (const char*) &val, 8);
|
h.add(&my_charset_bin, (const char*) &val, datatypes::MAXLEGACYWIDTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -981,7 +1032,6 @@ inline void Row::colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &h, uin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
|
inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
|
||||||
{
|
{
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
@ -1096,6 +1146,11 @@ inline long double Row::getLongDoubleField(uint32_t colIndex) const
|
|||||||
return *((long double*) &data[offsets[colIndex]]);
|
return *((long double*) &data[offsets[colIndex]]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void Row::storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const
|
||||||
|
{
|
||||||
|
datatypes::TSInt128::assignPtrPtr(x, &data[offsets[colIndex]]);
|
||||||
|
}
|
||||||
|
|
||||||
inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const
|
inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const
|
||||||
{
|
{
|
||||||
datatypes::TSInt128::assignPtrPtr(&x, &data[offsets[colIndex]]);
|
datatypes::TSInt128::assignPtrPtr(&x, &data[offsets[colIndex]]);
|
||||||
@ -1489,6 +1544,8 @@ public:
|
|||||||
/** @brief Assignment operator. It copies metadata, not the row data */
|
/** @brief Assignment operator. It copies metadata, not the row data */
|
||||||
RowGroup& operator=(const RowGroup&);
|
RowGroup& operator=(const RowGroup&);
|
||||||
|
|
||||||
|
explicit RowGroup(messageqcpp::ByteStream& bs);
|
||||||
|
|
||||||
~RowGroup();
|
~RowGroup();
|
||||||
|
|
||||||
inline void initRow(Row*, bool forceInlineData = false) const;
|
inline void initRow(Row*, bool forceInlineData = false) const;
|
||||||
|
Reference in New Issue
Block a user