You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
A cleanup for MCOL-4064 Make JOIN collation aware
After creating and populating tables with CHAR(5) case insensitive columns, in a set of consequent joins like: select * from t1, t2 where t1.c1=t2.c1; select * from t1, t2 where t1.c1=t2.c2; select * from t1, t2 where t1.c2=t2.c1; select * from t1, t2 where t1.c2=t2.c2; only the first join worked reliably case insensitively. Removing the remaining pieces of the code that used order_swap() to compare short CHAR columns, and using Charset::strnncollsp() instead. This fixes the issue.
This commit is contained in:
@ -20,6 +20,7 @@
|
|||||||
#define MCS_DATATYPES_STRING_H
|
#define MCS_DATATYPES_STRING_H
|
||||||
|
|
||||||
#include "conststring.h"
|
#include "conststring.h"
|
||||||
|
#include "collation.h"
|
||||||
|
|
||||||
namespace datatypes
|
namespace datatypes
|
||||||
{
|
{
|
||||||
@ -36,6 +37,13 @@ public:
|
|||||||
utils::ConstString res = utils::ConstString((const char *) &mValue, 8);
|
utils::ConstString res = utils::ConstString((const char *) &mValue, 8);
|
||||||
return res.rtrimZero();
|
return res.rtrimZero();
|
||||||
}
|
}
|
||||||
|
static int strnncollsp(const datatypes::Charset &cs, int64_t a, int64_t b)
|
||||||
|
{
|
||||||
|
datatypes::TCharShort sa(a);
|
||||||
|
datatypes::TCharShort sb(b);
|
||||||
|
return cs.strnncollsp(static_cast<utils::ConstString>(sa),
|
||||||
|
static_cast<utils::ConstString>(sb));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "primitivemsg.h"
|
#include "primitivemsg.h"
|
||||||
#include "blocksize.h"
|
#include "blocksize.h"
|
||||||
#include "lbidlist.h"
|
#include "lbidlist.h"
|
||||||
|
#include "mcs_string.h"
|
||||||
#include "calpontsystemcatalog.h"
|
#include "calpontsystemcatalog.h"
|
||||||
#include "brm.h"
|
#include "brm.h"
|
||||||
#include "brmtypes.h"
|
#include "brmtypes.h"
|
||||||
@ -41,18 +42,6 @@ using namespace BRM;
|
|||||||
namespace joblist
|
namespace joblist
|
||||||
{
|
{
|
||||||
|
|
||||||
inline uint64_t order_swap(uint64_t x)
|
|
||||||
{
|
|
||||||
return (x >> 56) |
|
|
||||||
((x << 40) & 0x00FF000000000000ULL) |
|
|
||||||
((x << 24) & 0x0000FF0000000000ULL) |
|
|
||||||
((x << 8) & 0x000000FF00000000ULL) |
|
|
||||||
((x >> 8) & 0x00000000FF000000ULL) |
|
|
||||||
((x >> 24) & 0x0000000000FF0000ULL) |
|
|
||||||
((x >> 40) & 0x000000000000FF00ULL) |
|
|
||||||
(x << 56);
|
|
||||||
}
|
|
||||||
|
|
||||||
LBIDList::LBIDList()
|
LBIDList::LBIDList()
|
||||||
{
|
{
|
||||||
throw logic_error("Don't use LBIDList()");
|
throw logic_error("Don't use LBIDList()");
|
||||||
@ -361,7 +350,8 @@ int LBIDList::getMinMaxFromEntries(T& min, T& max, int32_t& seq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, CalpontSystemCatalog::ColDataType type,
|
void LBIDList::UpdateMinMax(T min, T max, int64_t lbid,
|
||||||
|
const CalpontSystemCatalog::ColType & type,
|
||||||
bool validData)
|
bool validData)
|
||||||
{
|
{
|
||||||
MinMaxPartition* mmp = NULL;
|
MinMaxPartition* mmp = NULL;
|
||||||
@ -400,17 +390,18 @@ void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, CalpontSystemCatalog::Co
|
|||||||
|
|
||||||
if (mmp->isValid == BRM::CP_INVALID)
|
if (mmp->isValid == BRM::CP_INVALID)
|
||||||
{
|
{
|
||||||
if (datatypes::isCharType(type))
|
if (datatypes::isCharType(type.colDataType))
|
||||||
{
|
{
|
||||||
if (order_swap(min) < order_swap(mmp->min) ||
|
datatypes::Charset cs(const_cast<CalpontSystemCatalog::ColType &>(type).getCharset());
|
||||||
|
if (datatypes::TCharShort::strnncollsp(cs, min, mmp->min) < 0 ||
|
||||||
mmp->min == numeric_limits<int64_t>::max())
|
mmp->min == numeric_limits<int64_t>::max())
|
||||||
mmp->min = min;
|
mmp->min = min;
|
||||||
|
|
||||||
if (order_swap(max) > order_swap(mmp->max) ||
|
if (datatypes::TCharShort::strnncollsp(cs, max, mmp->max) > 0 ||
|
||||||
mmp->max == numeric_limits<int64_t>::min())
|
mmp->max == numeric_limits<int64_t>::min())
|
||||||
mmp->max = max;
|
mmp->max = max;
|
||||||
}
|
}
|
||||||
else if (datatypes::isUnsigned(type))
|
else if (datatypes::isUnsigned(type.colDataType))
|
||||||
{
|
{
|
||||||
if (static_cast<uint64_t>(min) < static_cast<uint64_t>(mmp->min))
|
if (static_cast<uint64_t>(min) < static_cast<uint64_t>(mmp->min))
|
||||||
mmp->min = min;
|
mmp->min = min;
|
||||||
@ -702,18 +693,17 @@ static inline bool compareStr(const datatypes::Charset &cs,
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool LBIDList::checkSingleValue(T min, T max, T value,
|
bool LBIDList::checkSingleValue(T min, T max, T value,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type)
|
const execplan::CalpontSystemCatalog::ColType & type)
|
||||||
{
|
{
|
||||||
if (isCharType(type))
|
if (isCharType(type.colDataType))
|
||||||
{
|
{
|
||||||
// MCOL-641 LBIDList::CasualPartitionDataType() returns false if
|
// MCOL-641 LBIDList::CasualPartitionDataType() returns false if
|
||||||
// width > 8 for a character type, so T cannot be int128_t here
|
// width > 8 for a character type, so T cannot be int128_t here
|
||||||
uint64_t mmin = order_swap(min);
|
datatypes::Charset cs(const_cast<execplan::CalpontSystemCatalog::ColType&>(type).getCharset());
|
||||||
uint64_t mmax = order_swap(max);
|
return datatypes::TCharShort::strnncollsp(cs, value, min) >= 0 &&
|
||||||
uint64_t vvalue = order_swap(value);
|
datatypes::TCharShort::strnncollsp(cs, value, max) <= 0;
|
||||||
return (vvalue >= mmin && vvalue <= mmax);
|
|
||||||
}
|
}
|
||||||
else if (isUnsigned(type))
|
else if (isUnsigned(type.colDataType))
|
||||||
{
|
{
|
||||||
return (static_cast<uint64_t>(value) >= static_cast<uint64_t>(min) &&
|
return (static_cast<uint64_t>(value) >= static_cast<uint64_t>(min) &&
|
||||||
static_cast<uint64_t>(value) <= static_cast<uint64_t>(max));
|
static_cast<uint64_t>(value) <= static_cast<uint64_t>(max));
|
||||||
@ -726,19 +716,17 @@ bool LBIDList::checkSingleValue(T min, T max, T value,
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool LBIDList::checkRangeOverlap(T min, T max, T tmin, T tmax,
|
bool LBIDList::checkRangeOverlap(T min, T max, T tmin, T tmax,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type)
|
const execplan::CalpontSystemCatalog::ColType & type)
|
||||||
{
|
{
|
||||||
if (isCharType(type))
|
if (isCharType(type.colDataType))
|
||||||
{
|
{
|
||||||
// MCOL-641 LBIDList::CasualPartitionDataType() returns false if
|
// MCOL-641 LBIDList::CasualPartitionDataType() returns false if
|
||||||
// width > 8 for a character type, so T cannot be int128_t here
|
// width > 8 for a character type, so T cannot be int128_t here
|
||||||
uint64_t min2 = order_swap(min);
|
datatypes::Charset cs(const_cast<execplan::CalpontSystemCatalog::ColType&>(type).getCharset());
|
||||||
uint64_t max2 = order_swap(max);
|
return datatypes::TCharShort::strnncollsp(cs, tmin, max) <= 0 &&
|
||||||
uint64_t tmin2 = order_swap(tmin);
|
datatypes::TCharShort::strnncollsp(cs, tmax, min) >= 0;
|
||||||
uint64_t tmax2 = order_swap(tmax);
|
|
||||||
return (tmin2 <= max2 && tmax2 >= min2);
|
|
||||||
}
|
}
|
||||||
else if (isUnsigned(type))
|
else if (isUnsigned(type.colDataType))
|
||||||
{
|
{
|
||||||
return (static_cast<uint64_t>(tmin) <= static_cast<uint64_t>(max) &&
|
return (static_cast<uint64_t>(tmin) <= static_cast<uint64_t>(max) &&
|
||||||
static_cast<uint64_t>(tmax) >= static_cast<uint64_t>(min));
|
static_cast<uint64_t>(tmax) >= static_cast<uint64_t>(min));
|
||||||
@ -964,27 +952,27 @@ bool LBIDList::GetMinMax<int64_t>(int64_t* min, int64_t* max, int64_t* seq,
|
|||||||
|
|
||||||
template
|
template
|
||||||
void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid,
|
void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type, bool validData = true);
|
const execplan::CalpontSystemCatalog::ColType & type, bool validData = true);
|
||||||
|
|
||||||
template
|
template
|
||||||
void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid,
|
void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type, bool validData = true);
|
const execplan::CalpontSystemCatalog::ColType & type, bool validData = true);
|
||||||
|
|
||||||
template
|
template
|
||||||
bool LBIDList::checkSingleValue<int128_t>(int128_t min, int128_t max, int128_t value,
|
bool LBIDList::checkSingleValue<int128_t>(int128_t min, int128_t max, int128_t value,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type);
|
const execplan::CalpontSystemCatalog::ColType & type);
|
||||||
|
|
||||||
template
|
template
|
||||||
bool LBIDList::checkSingleValue<int64_t>(int64_t min, int64_t max, int64_t value,
|
bool LBIDList::checkSingleValue<int64_t>(int64_t min, int64_t max, int64_t value,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type);
|
const execplan::CalpontSystemCatalog::ColType & type);
|
||||||
|
|
||||||
template
|
template
|
||||||
bool LBIDList::checkRangeOverlap<int128_t>(int128_t min, int128_t max, int128_t tmin, int128_t tmax,
|
bool LBIDList::checkRangeOverlap<int128_t>(int128_t min, int128_t max, int128_t tmin, int128_t tmax,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type);
|
const execplan::CalpontSystemCatalog::ColType & type);
|
||||||
|
|
||||||
template
|
template
|
||||||
bool LBIDList::checkRangeOverlap<int64_t>(int64_t min, int64_t max, int64_t tmin, int64_t tmax,
|
bool LBIDList::checkRangeOverlap<int64_t>(int64_t min, int64_t max, int64_t tmin, int64_t tmax,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type);
|
const execplan::CalpontSystemCatalog::ColType & type);
|
||||||
|
|
||||||
} //namespace joblist
|
} //namespace joblist
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ public:
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void UpdateMinMax(T min, T max, int64_t lbid,
|
void UpdateMinMax(T min, T max, int64_t lbid,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type, bool validData = true);
|
const execplan::CalpontSystemCatalog::ColType & type, bool validData = true);
|
||||||
|
|
||||||
void UpdateAllPartitionInfo(const execplan::CalpontSystemCatalog::ColType& colType);
|
void UpdateAllPartitionInfo(const execplan::CalpontSystemCatalog::ColType& colType);
|
||||||
|
|
||||||
@ -118,11 +118,11 @@ public:
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool checkSingleValue(T min, T max, T value,
|
bool checkSingleValue(T min, T max, T value,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type);
|
const execplan::CalpontSystemCatalog::ColType & type);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool checkRangeOverlap(T min, T max, T tmin, T tmax,
|
bool checkRangeOverlap(T min, T max, T tmin, T tmax,
|
||||||
execplan::CalpontSystemCatalog::ColDataType type);
|
const execplan::CalpontSystemCatalog::ColType & type);
|
||||||
|
|
||||||
// check the column data type and the column size to determine if it
|
// check the column data type and the column size to determine if it
|
||||||
// is a data type to apply casual paritioning.
|
// is a data type to apply casual paritioning.
|
||||||
|
@ -2396,12 +2396,12 @@ void TupleBPS::receiveMultiPrimitiveMessages(uint32_t threadID)
|
|||||||
{
|
{
|
||||||
if (fColType.colWidth > 8)
|
if (fColType.colWidth > 8)
|
||||||
{
|
{
|
||||||
lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, fColType.colDataType,
|
lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, fColType,
|
||||||
cpv[i].valid);
|
cpv[i].valid);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, fColType.colDataType,
|
lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, fColType,
|
||||||
cpv[i].valid);
|
cpv[i].valid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3289,12 +3289,12 @@ void TupleBPS::addCPPredicates(uint32_t OID, const vector<int128_t>& vals, bool
|
|||||||
if (!isSmallSideWideDecimal)
|
if (!isSmallSideWideDecimal)
|
||||||
{
|
{
|
||||||
runtimeCPFlags[j] = ll.checkRangeOverlap(min, max, (int64_t) vals[0], (int64_t) vals[1],
|
runtimeCPFlags[j] = ll.checkRangeOverlap(min, max, (int64_t) vals[0], (int64_t) vals[1],
|
||||||
colType.colDataType) && runtimeCPFlags[j];
|
colType) && runtimeCPFlags[j];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
runtimeCPFlags[j] = ll.checkRangeOverlap((int128_t) min, (int128_t) max, vals[0], vals[1],
|
runtimeCPFlags[j] = ll.checkRangeOverlap((int128_t) min, (int128_t) max, vals[0], vals[1],
|
||||||
colType.colDataType) && runtimeCPFlags[j];
|
colType) && runtimeCPFlags[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -3306,12 +3306,12 @@ void TupleBPS::addCPPredicates(uint32_t OID, const vector<int128_t>& vals, bool
|
|||||||
if (!isSmallSideWideDecimal)
|
if (!isSmallSideWideDecimal)
|
||||||
{
|
{
|
||||||
intersection = intersection ||
|
intersection = intersection ||
|
||||||
ll.checkSingleValue(min, max, (int64_t) vals[k], colType.colDataType);
|
ll.checkSingleValue(min, max, (int64_t) vals[k], colType);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intersection = intersection ||
|
intersection = intersection ||
|
||||||
ll.checkSingleValue((int128_t) min, (int128_t) max, vals[k], colType.colDataType);
|
ll.checkSingleValue((int128_t) min, (int128_t) max, vals[k], colType);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3331,14 +3331,14 @@ void TupleBPS::addCPPredicates(uint32_t OID, const vector<int128_t>& vals, bool
|
|||||||
{
|
{
|
||||||
if (isRange)
|
if (isRange)
|
||||||
runtimeCPFlags[j] = ll.checkRangeOverlap(bigMin, bigMax, vals[0], vals[1],
|
runtimeCPFlags[j] = ll.checkRangeOverlap(bigMin, bigMax, vals[0], vals[1],
|
||||||
colType.colDataType) && runtimeCPFlags[j];
|
colType) && runtimeCPFlags[j];
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intersection = false;
|
intersection = false;
|
||||||
|
|
||||||
for (k = 0; k < vals.size(); k++)
|
for (k = 0; k < vals.size(); k++)
|
||||||
intersection = intersection ||
|
intersection = intersection ||
|
||||||
ll.checkSingleValue(bigMin, bigMax, vals[k], colType.colDataType);
|
ll.checkSingleValue(bigMin, bigMax, vals[k], colType);
|
||||||
|
|
||||||
runtimeCPFlags[j] = intersection && runtimeCPFlags[j];
|
runtimeCPFlags[j] = intersection && runtimeCPFlags[j];
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#include "lbidlist.h"
|
#include "lbidlist.h"
|
||||||
#include "spinlock.h"
|
#include "spinlock.h"
|
||||||
#include "vlarray.h"
|
#include "vlarray.h"
|
||||||
|
#include "mcs_string.h"
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -1127,15 +1128,16 @@ void TupleJoiner::updateCPData(const Row& r)
|
|||||||
|
|
||||||
if (r.isCharType(colIdx))
|
if (r.isCharType(colIdx))
|
||||||
{
|
{
|
||||||
|
datatypes::Charset cs(r.getCharset(col));
|
||||||
int64_t val = r.getIntField(colIdx);
|
int64_t val = r.getIntField(colIdx);
|
||||||
|
|
||||||
if (order_swap(val) < order_swap((int64_t) min) ||
|
if (datatypes::TCharShort::strnncollsp(cs, val, min) < 0 ||
|
||||||
((int64_t) min) == numeric_limits<int64_t>::max())
|
((int64_t) min) == numeric_limits<int64_t>::max())
|
||||||
{
|
{
|
||||||
min = val;
|
min = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (order_swap(val) > order_swap((int64_t) max) ||
|
if (datatypes::TCharShort::strnncollsp(cs, val, max) > 0 ||
|
||||||
((int64_t) max) == numeric_limits<int64_t>::min())
|
((int64_t) max) == numeric_limits<int64_t>::min())
|
||||||
{
|
{
|
||||||
max = val;
|
max = val;
|
||||||
|
@ -44,18 +44,6 @@
|
|||||||
namespace joiner
|
namespace joiner
|
||||||
{
|
{
|
||||||
|
|
||||||
inline uint64_t order_swap(uint64_t x)
|
|
||||||
{
|
|
||||||
return (x >> 56) |
|
|
||||||
((x << 40) & 0x00FF000000000000ULL) |
|
|
||||||
((x << 24) & 0x0000FF0000000000ULL) |
|
|
||||||
((x << 8) & 0x000000FF00000000ULL) |
|
|
||||||
((x >> 8) & 0x00000000FF000000ULL) |
|
|
||||||
((x >> 24) & 0x0000000000FF0000ULL) |
|
|
||||||
((x >> 40) & 0x000000000000FF00ULL) |
|
|
||||||
(x << 56);
|
|
||||||
}
|
|
||||||
|
|
||||||
class TypelessData
|
class TypelessData
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
Reference in New Issue
Block a user