You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-4809 This patch introduces vectorized scanning/filtering for short CHAR/VARCHAR columns
Short CHAR/VARCHAR column values contain integer-encoded strings. After certain manipulations(orderSwap(strnxfrm(str))) the values become integers that preserve original strings order relation according to a certain translation rules(collation). Prepared values are ready to be SIMD-processed.
This commit is contained in:
@ -1,5 +1,5 @@
|
|||||||
/* Copyright (C) 2014 InfiniDB, Inc.
|
/* Copyright (C) 2014 InfiniDB, Inc.
|
||||||
Copyright (C) 2016-2021 MariaDB Corporation
|
Copyright (C) 2016-2022 MariaDB Corporation
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -22,6 +22,7 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <type_traits>
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#else
|
#else
|
||||||
@ -62,6 +63,55 @@ inline uint64_t order_swap(uint64_t x)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dummy template
|
||||||
|
template<typename T,
|
||||||
|
typename std::enable_if<sizeof(T) >= sizeof(uint128_t), T>::type* = nullptr>
|
||||||
|
inline T orderSwap(T x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T,
|
||||||
|
typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type* = nullptr>
|
||||||
|
inline T orderSwap(T x)
|
||||||
|
{
|
||||||
|
T ret = (x >> 56) |
|
||||||
|
((x << 40) & 0x00FF000000000000ULL) |
|
||||||
|
((x << 24) & 0x0000FF0000000000ULL) |
|
||||||
|
((x << 8) & 0x000000FF00000000ULL) |
|
||||||
|
((x >> 8) & 0x00000000FF000000ULL) |
|
||||||
|
((x >> 24) & 0x0000000000FF0000ULL) |
|
||||||
|
((x >> 40) & 0x000000000000FF00ULL) |
|
||||||
|
(x << 56);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T,
|
||||||
|
typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type* = nullptr>
|
||||||
|
inline T orderSwap(T x)
|
||||||
|
{
|
||||||
|
T ret = (x >> 24) |
|
||||||
|
((x << 8) & 0x00FF0000U) |
|
||||||
|
((x >> 8) & 0x0000FF00U) |
|
||||||
|
(x << 24);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T,
|
||||||
|
typename std::enable_if<sizeof(T) == sizeof(int16_t), T>::type* = nullptr>
|
||||||
|
inline T orderSwap(T x)
|
||||||
|
{
|
||||||
|
T ret = (x >> 8) | (x <<8);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T,
|
||||||
|
typename std::enable_if<sizeof(T) == sizeof(uint8_t), T>::type* = nullptr>
|
||||||
|
inline T orderSwap(T x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
inline int compareBlock(const void* a, const void* b)
|
inline int compareBlock(const void* a, const void* b)
|
||||||
{
|
{
|
||||||
@ -107,8 +157,11 @@ inline bool colCompare_(const T& val1, const T& val2, uint8_t COP)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool colCompareStr(const ColRequestHeaderDataType& type, uint8_t COP, const utils::ConstString& val1,
|
inline bool colCompareStr(const ColRequestHeaderDataType &type,
|
||||||
const utils::ConstString& val2)
|
uint8_t COP,
|
||||||
|
const utils::ConstString &val1,
|
||||||
|
const utils::ConstString &val2,
|
||||||
|
const bool printOut = false)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
bool rc = primitives::StringComparator(type).op(&error, COP, val1, val2);
|
bool rc = primitives::StringComparator(type).op(&error, COP, val1, val2);
|
||||||
@ -1179,7 +1232,7 @@ void scalarFiltering(
|
|||||||
#if defined(__x86_64__)
|
#if defined(__x86_64__)
|
||||||
template <typename VT, typename SIMD_WRAPPER_TYPE, bool HAS_INPUT_RIDS, typename T,
|
template <typename VT, typename SIMD_WRAPPER_TYPE, bool HAS_INPUT_RIDS, typename T,
|
||||||
typename std::enable_if<HAS_INPUT_RIDS == false, T>::type* = nullptr>
|
typename std::enable_if<HAS_INPUT_RIDS == false, T>::type* = nullptr>
|
||||||
inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray, const T* origSrcArray,
|
inline SIMD_WRAPPER_TYPE simdDataLoad(VT& processor, const T* srcArray, const T* origSrcArray,
|
||||||
const primitives::RIDType* ridArray, const uint16_t iter)
|
const primitives::RIDType* ridArray, const uint16_t iter)
|
||||||
{
|
{
|
||||||
return {processor.loadFrom(reinterpret_cast<const char*>(srcArray))};
|
return {processor.loadFrom(reinterpret_cast<const char*>(srcArray))};
|
||||||
@ -1189,7 +1242,7 @@ inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray,
|
|||||||
// TODO Move the logic into simd namespace class methods and use intrinsics
|
// TODO Move the logic into simd namespace class methods and use intrinsics
|
||||||
template <typename VT, typename SIMD_WRAPPER_TYPE, bool HAS_INPUT_RIDS, typename T,
|
template <typename VT, typename SIMD_WRAPPER_TYPE, bool HAS_INPUT_RIDS, typename T,
|
||||||
typename std::enable_if<HAS_INPUT_RIDS == true, T>::type* = nullptr>
|
typename std::enable_if<HAS_INPUT_RIDS == true, T>::type* = nullptr>
|
||||||
inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray, const T* origSrcArray,
|
inline SIMD_WRAPPER_TYPE simdDataLoad(VT& processor, const T* srcArray, const T* origSrcArray,
|
||||||
const primitives::RIDType* ridArray, const uint16_t iter)
|
const primitives::RIDType* ridArray, const uint16_t iter)
|
||||||
{
|
{
|
||||||
constexpr const uint16_t WIDTH = sizeof(T);
|
constexpr const uint16_t WIDTH = sizeof(T);
|
||||||
@ -1205,6 +1258,32 @@ inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray,
|
|||||||
return {result};
|
return {result};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <ENUM_KIND KIND, typename VT,typename SIMD_WRAPPER_TYPE, typename T,
|
||||||
|
typename std::enable_if<KIND != KIND_TEXT, T>::type* = nullptr>
|
||||||
|
inline SIMD_WRAPPER_TYPE simdSwapedOrderDataLoad(const ColRequestHeaderDataType &type, VT& processor, typename VT::SimdType& dataVector)
|
||||||
|
{
|
||||||
|
return {dataVector};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <ENUM_KIND KIND, typename VT,typename SIMD_WRAPPER_TYPE, typename T,
|
||||||
|
typename std::enable_if<KIND == KIND_TEXT, T>::type* = nullptr>
|
||||||
|
inline SIMD_WRAPPER_TYPE simdSwapedOrderDataLoad(const ColRequestHeaderDataType &type,
|
||||||
|
VT& processor, typename VT::SimdType& dataVector)
|
||||||
|
{
|
||||||
|
constexpr const uint16_t WIDTH = sizeof(T);
|
||||||
|
constexpr const uint16_t VECTOR_SIZE = VT::vecByteSize / WIDTH;
|
||||||
|
using SimdType = typename VT::SimdType;
|
||||||
|
SimdType result;
|
||||||
|
T* resultTypedPtr = reinterpret_cast<T*>(&result);
|
||||||
|
T* srcTypedPtr = reinterpret_cast<T*>(&dataVector);
|
||||||
|
for (uint32_t i = 0; i < VECTOR_SIZE; ++i)
|
||||||
|
{
|
||||||
|
utils::ConstString s{reinterpret_cast<const char*>(&srcTypedPtr[i]), WIDTH};
|
||||||
|
resultTypedPtr[i] = orderSwap(type.strnxfrm<T>(s.rtrimZero()));
|
||||||
|
}
|
||||||
|
return {result};
|
||||||
|
}
|
||||||
|
|
||||||
// This routine filters input block in a vectorized manner.
|
// This routine filters input block in a vectorized manner.
|
||||||
// It supports all output types, all input types.
|
// It supports all output types, all input types.
|
||||||
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
||||||
@ -1214,8 +1293,8 @@ inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray,
|
|||||||
// to glue the masks produced by actual filters.
|
// to glue the masks produced by actual filters.
|
||||||
// Then it takes a vector of data, run filters and logical function using pointers.
|
// Then it takes a vector of data, run filters and logical function using pointers.
|
||||||
// See the corresponding dispatcher to get more details on vector processing class.
|
// See the corresponding dispatcher to get more details on vector processing class.
|
||||||
template <typename T, typename VT, bool HAS_INPUT_RIDS, int OUTPUT_TYPE, ENUM_KIND KIND, typename FT,
|
template<typename T, typename VT, bool HAS_INPUT_RIDS, int OUTPUT_TYPE,
|
||||||
typename ST>
|
ENUM_KIND KIND, typename FT, typename ST>
|
||||||
void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T* srcArray,
|
void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T* srcArray,
|
||||||
const uint32_t srcSize, primitives::RIDType* ridArray, const uint16_t ridSize,
|
const uint32_t srcSize, primitives::RIDType* ridArray, const uint16_t ridSize,
|
||||||
ParsedColumnFilter* parsedColumnFilter, const bool validMinMax, const T emptyValue,
|
ParsedColumnFilter* parsedColumnFilter, const bool validMinMax, const T emptyValue,
|
||||||
@ -1225,8 +1304,12 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
using SimdType = typename VT::SimdType;
|
using SimdType = typename VT::SimdType;
|
||||||
using SimdWrapperType = typename VT::SimdWrapperType;
|
using SimdWrapperType = typename VT::SimdWrapperType;
|
||||||
using FilterType = typename VT::FilterType;
|
using FilterType = typename VT::FilterType;
|
||||||
|
using UT = typename std::conditional<std::is_unsigned<FilterType>::value || datatypes::is_uint128_t<FilterType>::value || std::is_same<double, FilterType>::value,
|
||||||
|
FilterType, typename datatypes::make_unsigned<FilterType>::type>::type;
|
||||||
VT simdProcessor;
|
VT simdProcessor;
|
||||||
SimdType dataVec;
|
SimdType dataVec;
|
||||||
|
[[maybe_unused]] SimdType swapedOrderDataVec;
|
||||||
|
[[maybe_unused]] auto typeHolder = in->colType;
|
||||||
SimdType emptyFilterArgVec = simdProcessor.emptyNullLoadValue(emptyValue);
|
SimdType emptyFilterArgVec = simdProcessor.emptyNullLoadValue(emptyValue);
|
||||||
SimdType nullFilterArgVec = simdProcessor.emptyNullLoadValue(nullValue);
|
SimdType nullFilterArgVec = simdProcessor.emptyNullLoadValue(nullValue);
|
||||||
MT writeMask, nonEmptyMask, nonNullMask, nonNullOrEmptyMask;
|
MT writeMask, nonEmptyMask, nonNullMask, nonNullOrEmptyMask;
|
||||||
@ -1292,11 +1375,27 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
for (uint32_t j = 0; j < filterCount; ++j)
|
for (uint32_t j = 0; j < filterCount; ++j)
|
||||||
{
|
{
|
||||||
// Preload filter argument values only once.
|
// Preload filter argument values only once.
|
||||||
filterArgsVectors.push_back(simdProcessor.loadValue(*((FilterType*)&filterValues[j])));
|
if constexpr (KIND == KIND_TEXT)
|
||||||
|
{
|
||||||
|
// Preload filter argument values only once.
|
||||||
|
// First cast filter value as the corresponding unsigned int value
|
||||||
|
UT filterValue = *((UT*)&filterValues[j]);
|
||||||
|
// Cast to ConstString to preprocess the string
|
||||||
|
utils::ConstString s{reinterpret_cast<const char*>(&filterValue), sizeof(UT)};
|
||||||
|
// Strip all 0 bytes on the right, convert byte into collation weights array
|
||||||
|
// and swap bytes order.
|
||||||
|
UT bigEndianFilterWeights = orderSwap(typeHolder.strnxfrm<UT>(s.rtrimZero()));
|
||||||
|
filterArgsVectors.push_back(simdProcessor.loadValue(bigEndianFilterWeights));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
FilterType filterValue = *((FilterType*)&filterValues[j]);
|
||||||
|
filterArgsVectors.push_back(simdProcessor.loadValue(filterValue));
|
||||||
|
}
|
||||||
switch (filterCOPs[j])
|
switch (filterCOPs[j])
|
||||||
{
|
{
|
||||||
case (COMPARE_EQ):
|
case (COMPARE_EQ):
|
||||||
// Skipping extra filter pass generated by IS NULL
|
// Filter against NULL value
|
||||||
if (memcmp(&filterValues[j], &nullValue, sizeof(nullValue)) == 0)
|
if (memcmp(&filterValues[j], &nullValue, sizeof(nullValue)) == 0)
|
||||||
copFunctorVec.push_back(std::mem_fn(&VT::nullEmptyCmpEq));
|
copFunctorVec.push_back(std::mem_fn(&VT::nullEmptyCmpEq));
|
||||||
else
|
else
|
||||||
@ -1329,9 +1428,10 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
{
|
{
|
||||||
primitives::RIDType ridOffset = i * VECTOR_SIZE;
|
primitives::RIDType ridOffset = i * VECTOR_SIZE;
|
||||||
assert(!HAS_INPUT_RIDS || (HAS_INPUT_RIDS && ridSize >= ridOffset));
|
assert(!HAS_INPUT_RIDS || (HAS_INPUT_RIDS && ridSize >= ridOffset));
|
||||||
dataVec = simdDataLoadTemplate<VT, SimdWrapperType, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
|
dataVec = simdDataLoad<VT, SimdWrapperType, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
|
||||||
origSrcArray, ridArray, i)
|
origSrcArray, ridArray, i).v;
|
||||||
.v;
|
if constexpr(KIND==KIND_TEXT)
|
||||||
|
swapedOrderDataVec = simdSwapedOrderDataLoad<KIND, VT, SimdWrapperType, T>(typeHolder, simdProcessor, dataVec).v;
|
||||||
nonEmptyMask = simdProcessor.nullEmptyCmpNe(dataVec, emptyFilterArgVec);
|
nonEmptyMask = simdProcessor.nullEmptyCmpNe(dataVec, emptyFilterArgVec);
|
||||||
writeMask = nonEmptyMask;
|
writeMask = nonEmptyMask;
|
||||||
// NULL check
|
// NULL check
|
||||||
@ -1346,7 +1446,11 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
for (uint32_t j = 0; j < filterCount; ++j)
|
for (uint32_t j = 0; j < filterCount; ++j)
|
||||||
{
|
{
|
||||||
// filter using compiled filter and preloaded filter argument
|
// filter using compiled filter and preloaded filter argument
|
||||||
filterMask = copFunctorVec[j](simdProcessor, dataVec, filterArgsVectors[j]);
|
if constexpr(KIND==KIND_TEXT)
|
||||||
|
filterMask = copFunctorVec[j](simdProcessor, swapedOrderDataVec, filterArgsVectors[j]);
|
||||||
|
else
|
||||||
|
filterMask = copFunctorVec[j](simdProcessor, dataVec, filterArgsVectors[j]);
|
||||||
|
|
||||||
filterMask = bopFunctor(prevFilterMask, filterMask);
|
filterMask = bopFunctor(prevFilterMask, filterMask);
|
||||||
prevFilterMask = filterMask;
|
prevFilterMask = filterMask;
|
||||||
}
|
}
|
||||||
@ -1389,7 +1493,6 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
out->Min = Min;
|
out->Min = Min;
|
||||||
out->Max = Max;
|
out->Max = Max;
|
||||||
}
|
}
|
||||||
|
|
||||||
// process the tail. scalarFiltering changes out contents, e.g. Min/Max, NVALS, RIDs and values array
|
// process the tail. scalarFiltering changes out contents, e.g. Min/Max, NVALS, RIDs and values array
|
||||||
// This tail also sets out::Min/Max, out::validMinMax if validMinMax is set.
|
// This tail also sets out::Min/Max, out::validMinMax if validMinMax is set.
|
||||||
uint32_t processedSoFar = rid;
|
uint32_t processedSoFar = rid;
|
||||||
@ -1526,7 +1629,8 @@ void filterColumnData(NewColRequestHeader* in, ColResultHeader* out, uint16_t* r
|
|||||||
|
|
||||||
#if defined(__x86_64__)
|
#if defined(__x86_64__)
|
||||||
// Don't use vectorized filtering for text based data types.
|
// Don't use vectorized filtering for text based data types.
|
||||||
if (KIND <= KIND_FLOAT && WIDTH < 16)
|
if (WIDTH < 16 &&
|
||||||
|
(KIND != KIND_TEXT || (KIND == KIND_TEXT && in->colType.strnxfrmIsValid()) ))
|
||||||
{
|
{
|
||||||
bool canUseFastFiltering = true;
|
bool canUseFastFiltering = true;
|
||||||
for (uint32_t i = 0; i < filterCount; ++i)
|
for (uint32_t i = 0; i < filterCount; ++i)
|
||||||
@ -1672,6 +1776,8 @@ template <typename T,
|
|||||||
void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out)
|
void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out)
|
||||||
{
|
{
|
||||||
constexpr int W = sizeof(T);
|
constexpr int W = sizeof(T);
|
||||||
|
using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T,
|
||||||
|
typename datatypes::make_unsigned<T>::type>::type;
|
||||||
const uint16_t ridSize = in->NVALS;
|
const uint16_t ridSize = in->NVALS;
|
||||||
uint16_t* ridArray = in->getRIDArrayPtr(W);
|
uint16_t* ridArray = in->getRIDArrayPtr(W);
|
||||||
const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W;
|
const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W;
|
||||||
@ -1682,16 +1788,12 @@ void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, C
|
|||||||
dataType == execplan::CalpontSystemCatalog::TEXT) &&
|
dataType == execplan::CalpontSystemCatalog::TEXT) &&
|
||||||
!isDictTokenScan(in))
|
!isDictTokenScan(in))
|
||||||
{
|
{
|
||||||
using UT = typename std::conditional<std::is_unsigned<T>::value, T,
|
|
||||||
typename datatypes::make_unsigned<T>::type>::type;
|
|
||||||
filterColumnData<UT, KIND_TEXT>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
|
filterColumnData<UT, KIND_TEXT>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (datatypes::isUnsigned(dataType))
|
if (datatypes::isUnsigned(dataType))
|
||||||
{
|
{
|
||||||
using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T,
|
|
||||||
typename datatypes::make_unsigned<T>::type>::type;
|
|
||||||
filterColumnData<UT, KIND_UNSIGNED>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
|
filterColumnData<UT, KIND_UNSIGNED>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@ class SimdProcessorTypedTest : public testing::Test {
|
|||||||
using IntegralType = T;
|
using IntegralType = T;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
||||||
void SetUp() override
|
void SetUp() override
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (C) 2020 MariaDB Corporation
|
Copyright (C) 2020-2022 MariaDB Corporation
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -135,6 +135,8 @@ class Charset
|
|||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
const struct charset_info_st* mCharset;
|
const struct charset_info_st* mCharset;
|
||||||
|
private:
|
||||||
|
static constexpr uint flags_ = MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Charset(CHARSET_INFO& cs) : mCharset(&cs)
|
Charset(CHARSET_INFO& cs) : mCharset(&cs)
|
||||||
@ -182,9 +184,31 @@ class Charset
|
|||||||
size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
|
size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
|
||||||
{
|
{
|
||||||
idbassert(mCharset->coll);
|
idbassert(mCharset->coll);
|
||||||
|
|
||||||
return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
|
return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
|
||||||
}
|
}
|
||||||
|
// The magic check that tells that bytes are mapped to weights as 1:1
|
||||||
|
bool strnxfrmIsValid() const
|
||||||
|
{
|
||||||
|
return (mCharset->state & MY_CS_NON1TO1) == 0;
|
||||||
|
}
|
||||||
|
template<typename T>
|
||||||
|
T strnxfrm(const char* src) const
|
||||||
|
{
|
||||||
|
T ret = 0;
|
||||||
|
size_t len __attribute__((unused)) = mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T),
|
||||||
|
src, sizeof(T), flags_);
|
||||||
|
assert(len <= sizeof(T));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<typename T>
|
||||||
|
T strnxfrm(const utils::ConstString &src) const
|
||||||
|
{
|
||||||
|
T ret = 0;
|
||||||
|
size_t len __attribute__((unused)) = mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T),
|
||||||
|
(char*)src.str(), src.length(), flags_);
|
||||||
|
assert(len <= sizeof(T));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class CollationAwareHasher : public Charset
|
class CollationAwareHasher : public Charset
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2021 Mariadb Corporation.
|
/* Copyright (C) 2021-2022 Mariadb Corporation.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -116,6 +116,15 @@ struct StorageToFiltering<T, KIND, typename std::enable_if<KIND != KIND_FLOAT>::
|
|||||||
using type = T;
|
using type = T;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <int i0, int i1, int i2, int i3>
|
||||||
|
static inline vi128_t constant4i() {
|
||||||
|
static const union {
|
||||||
|
int i[4];
|
||||||
|
vi128_t xmm;
|
||||||
|
} u = {{i0,i1,i2,i3}};
|
||||||
|
return u.xmm;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename VT, typename T, typename ENABLE = void>
|
template <typename VT, typename T, typename ENABLE = void>
|
||||||
class SimdFilterProcessor;
|
class SimdFilterProcessor;
|
||||||
|
|
||||||
@ -462,7 +471,7 @@ class SimdFilterProcessor<
|
|||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
class SimdFilterProcessor<VT, CHECK_T,
|
class SimdFilterProcessor<VT, CHECK_T,
|
||||||
typename std::enable_if<std::is_same<VT, vi128_wr>::value && sizeof(CHECK_T) == 8 &&
|
typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, int64_t>::value &&
|
||||||
!std::is_same<CHECK_T, double>::value>::type>
|
!std::is_same<CHECK_T, double>::value>::type>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -569,7 +578,117 @@ class SimdFilterProcessor<VT, CHECK_T,
|
|||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
class SimdFilterProcessor<VT, CHECK_T,
|
class SimdFilterProcessor<VT, CHECK_T,
|
||||||
typename std::enable_if<std::is_same<VT, vi128_wr>::value && sizeof(CHECK_T) == 4 &&
|
typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, uint64_t>::value &&
|
||||||
|
!std::is_same<CHECK_T, double>::value>::type>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
constexpr static const uint16_t vecByteSize = 16U;
|
||||||
|
constexpr static const uint16_t vecBitSize = 128U;
|
||||||
|
using T = typename datatypes::WidthToSIntegralType<sizeof(CHECK_T)>::type;
|
||||||
|
using SimdWrapperType = vi128_wr;
|
||||||
|
using SimdType = vi128_t;
|
||||||
|
using FilterType = T;
|
||||||
|
using StorageType = T;
|
||||||
|
// Mask calculation for int and float types differs.
|
||||||
|
// See corresponding intrinsics algos for details.
|
||||||
|
constexpr static const uint16_t FilterMaskStep = sizeof(T);
|
||||||
|
// Load value
|
||||||
|
MCS_FORCE_INLINE SimdType emptyNullLoadValue(const T fill)
|
||||||
|
{
|
||||||
|
return loadValue(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType loadValue(const T fill)
|
||||||
|
{
|
||||||
|
return _mm_set_epi64x(fill, fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load from
|
||||||
|
MCS_FORCE_INLINE SimdType loadFrom(const char* from)
|
||||||
|
{
|
||||||
|
return _mm_loadu_si128(reinterpret_cast<const SimdType*>(from));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare
|
||||||
|
MCS_FORCE_INLINE MT cmpGe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(y, x) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
SimdType signVec = constant4i<0,(int32_t)0x80000000,0,(int32_t)0x80000000>();
|
||||||
|
SimdType xFlip = _mm_xor_si128(x, signVec);
|
||||||
|
SimdType yFlip = _mm_xor_si128(y, signVec);
|
||||||
|
return _mm_movemask_epi8(_mm_cmpgt_epi64(xFlip, yFlip));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi64(x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(x, y) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi64(x, y)) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysFalse(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysTrue(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// misc
|
||||||
|
MCS_FORCE_INLINE MT convertVectorToBitMask(SimdType& vmask)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(vmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType setToZero()
|
||||||
|
{
|
||||||
|
return _mm_setzero_si128();
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpNe(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpEq(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
// store
|
||||||
|
MCS_FORCE_INLINE void storeWMask(SimdType& x, SimdType& vmask, char* dst)
|
||||||
|
{
|
||||||
|
_mm_maskmoveu_si128(x, vmask, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE void store(char* dst, SimdType& x)
|
||||||
|
{
|
||||||
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename VT, typename CHECK_T>
|
||||||
|
class SimdFilterProcessor<VT, CHECK_T,
|
||||||
|
typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, int32_t>::value &&
|
||||||
!std::is_same<CHECK_T, float>::value>::type>
|
!std::is_same<CHECK_T, float>::value>::type>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -674,9 +793,119 @@ class SimdFilterProcessor<VT, CHECK_T,
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename VT, typename CHECK_T>
|
||||||
|
class SimdFilterProcessor<VT, CHECK_T,
|
||||||
|
typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, uint32_t>::value &&
|
||||||
|
!std::is_same<CHECK_T, float>::value>::type>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
constexpr static const uint16_t vecByteSize = 16U;
|
||||||
|
constexpr static const uint16_t vecBitSize = 128U;
|
||||||
|
using T = typename datatypes::WidthToSIntegralType<sizeof(CHECK_T)>::type;
|
||||||
|
using SimdWrapperType = vi128_wr;
|
||||||
|
using SimdType = vi128_t;
|
||||||
|
using FilterType = T;
|
||||||
|
using StorageType = T;
|
||||||
|
// Mask calculation for int and float types differs.
|
||||||
|
// See corresponding intrinsics algos for details.
|
||||||
|
constexpr static const uint16_t FilterMaskStep = sizeof(T);
|
||||||
|
// Load value
|
||||||
|
MCS_FORCE_INLINE SimdType emptyNullLoadValue(const T fill)
|
||||||
|
{
|
||||||
|
return loadValue(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType loadValue(const T fill)
|
||||||
|
{
|
||||||
|
return _mm_set1_epi32(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load from
|
||||||
|
MCS_FORCE_INLINE SimdType loadFrom(const char* from)
|
||||||
|
{
|
||||||
|
return _mm_loadu_si128(reinterpret_cast<const SimdType*>(from));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare
|
||||||
|
MCS_FORCE_INLINE MT cmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi32(x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(y, x) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
SimdType signVec = constant4i<(int32_t)0x80000000,(int32_t)0x80000000,(int32_t)0x80000000,(int32_t)0x80000000>();
|
||||||
|
SimdType xFlip = _mm_xor_si128(x, signVec);
|
||||||
|
SimdType yFlip = _mm_xor_si128(y, signVec);
|
||||||
|
return _mm_movemask_epi8(_mm_cmpgt_epi32(xFlip, yFlip));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(x, y) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi32(x, y)) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysFalse(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysTrue(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// misc
|
||||||
|
MCS_FORCE_INLINE MT convertVectorToBitMask(SimdType& vmask)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(vmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpNe(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpEq(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType setToZero()
|
||||||
|
{
|
||||||
|
return _mm_setzero_si128();
|
||||||
|
}
|
||||||
|
|
||||||
|
// store
|
||||||
|
MCS_FORCE_INLINE void storeWMask(SimdType& x, SimdType& vmask, char* dst)
|
||||||
|
{
|
||||||
|
_mm_maskmoveu_si128(x, vmask, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE void store(char* dst, SimdType& x)
|
||||||
|
{
|
||||||
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
class SimdFilterProcessor<
|
class SimdFilterProcessor<
|
||||||
VT, CHECK_T, typename std::enable_if<std::is_same<VT, vi128_wr>::value && sizeof(CHECK_T) == 2>::type>
|
VT, CHECK_T, typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, int16_t>::value>::type>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
constexpr static const uint16_t vecByteSize = 16U;
|
constexpr static const uint16_t vecByteSize = 16U;
|
||||||
@ -782,7 +1011,227 @@ class SimdFilterProcessor<
|
|||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
class SimdFilterProcessor<
|
class SimdFilterProcessor<
|
||||||
VT, CHECK_T, typename std::enable_if<std::is_same<VT, vi128_wr>::value && sizeof(CHECK_T) == 1>::type>
|
VT, CHECK_T, typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, uint16_t>::value>::type>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
constexpr static const uint16_t vecByteSize = 16U;
|
||||||
|
constexpr static const uint16_t vecBitSize = 128U;
|
||||||
|
using T = typename datatypes::WidthToSIntegralType<sizeof(CHECK_T)>::type;
|
||||||
|
using SimdWrapperType = simd::vi128_wr;
|
||||||
|
using SimdType = simd::vi128_t;
|
||||||
|
using FilterType = T;
|
||||||
|
using StorageType = T;
|
||||||
|
// Mask calculation for int and float types differs.
|
||||||
|
// See corresponding intrinsics algos for details.
|
||||||
|
constexpr static const uint16_t FilterMaskStep = sizeof(T);
|
||||||
|
// Load value
|
||||||
|
MCS_FORCE_INLINE SimdType emptyNullLoadValue(const T fill)
|
||||||
|
{
|
||||||
|
return loadValue(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType loadValue(const T fill)
|
||||||
|
{
|
||||||
|
return _mm_set1_epi16(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load from
|
||||||
|
MCS_FORCE_INLINE SimdType loadFrom(const char* from)
|
||||||
|
{
|
||||||
|
return _mm_loadu_si128(reinterpret_cast<const SimdType*>(from));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare
|
||||||
|
MCS_FORCE_INLINE MT cmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
SimdType maxOfTwo = _mm_max_epu16(x, y); // max(x, y), unsigned
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, maxOfTwo));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGe(y, x) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGe(y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGe(x, y) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, y)) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysFalse(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysTrue(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// misc
|
||||||
|
MCS_FORCE_INLINE MT convertVectorToBitMask(SimdType& vmask)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(vmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpNe(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpEq(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType setToZero()
|
||||||
|
{
|
||||||
|
return _mm_setzero_si128();
|
||||||
|
}
|
||||||
|
|
||||||
|
// store
|
||||||
|
MCS_FORCE_INLINE void storeWMask(SimdType& x, SimdType& vmask, char* dst)
|
||||||
|
{
|
||||||
|
_mm_maskmoveu_si128(x, vmask, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE void store(char* dst, SimdType& x)
|
||||||
|
{
|
||||||
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename VT, typename CHECK_T>
|
||||||
|
class SimdFilterProcessor<
|
||||||
|
VT, CHECK_T, typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, int8_t>::value>::type>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
constexpr static const uint16_t vecByteSize = 16U;
|
||||||
|
constexpr static const uint16_t vecBitSize = 128U;
|
||||||
|
using T = typename datatypes::WidthToSIntegralType<sizeof(CHECK_T)>::type;
|
||||||
|
using SimdWrapperType = vi128_wr;
|
||||||
|
using SimdType = vi128_t;
|
||||||
|
using FilterType = T;
|
||||||
|
using StorageType = T;
|
||||||
|
// Mask calculation for int and float types differs.
|
||||||
|
// See corresponding intrinsics algos for details.
|
||||||
|
constexpr static const uint16_t FilterMaskStep = sizeof(T);
|
||||||
|
// Load value
|
||||||
|
MCS_FORCE_INLINE SimdType emptyNullLoadValue(const T fill)
|
||||||
|
{
|
||||||
|
return loadValue(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType loadValue(const T fill)
|
||||||
|
{
|
||||||
|
return _mm_set1_epi8(fill);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load from
|
||||||
|
MCS_FORCE_INLINE SimdType loadFrom(const char* from)
|
||||||
|
{
|
||||||
|
return _mm_loadu_si128(reinterpret_cast<const SimdType*>(from));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare
|
||||||
|
MCS_FORCE_INLINE MT cmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpLt(x, y) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpGt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpgt_epi8(x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpGt(x, y) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpLt(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmplt_epi8(x, y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysFalse(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT cmpAlwaysTrue(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// permute
|
||||||
|
/* TODO Available in AVX-512
|
||||||
|
MCS_FORCE_INLINE SimdType perm8Bits(SimdType& x, SimdType& idx)
|
||||||
|
{
|
||||||
|
return _mm_permutexvar_epi8(x, idx);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// misc
|
||||||
|
MCS_FORCE_INLINE MT convertVectorToBitMask(SimdType& vmask)
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(vmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpNe(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpNe(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE MT nullEmptyCmpEq(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return cmpEq(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType setToZero()
|
||||||
|
{
|
||||||
|
return _mm_setzero_si128();
|
||||||
|
}
|
||||||
|
|
||||||
|
// store
|
||||||
|
MCS_FORCE_INLINE void storeWMask(SimdType& x, SimdType& vmask, char* dst)
|
||||||
|
{
|
||||||
|
_mm_maskmoveu_si128(x, vmask, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE void store(char* dst, SimdType& x)
|
||||||
|
{
|
||||||
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename VT, typename CHECK_T>
|
||||||
|
class SimdFilterProcessor<
|
||||||
|
VT, CHECK_T, typename std::enable_if<std::is_same<VT, vi128_wr>::value && std::is_same<CHECK_T, uint8_t>::value>::type>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
constexpr static const uint16_t vecByteSize = 16U;
|
constexpr static const uint16_t vecByteSize = 16U;
|
||||||
|
Reference in New Issue
Block a user