1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-12-24 14:20:59 +03:00

clang format apply

This commit is contained in:
Leonid Fedorov
2022-01-21 16:43:49 +00:00
parent 6b6411229f
commit 04752ec546
1376 changed files with 393460 additions and 412662 deletions

View File

@@ -17,455 +17,456 @@
#pragma once
#if defined(__x86_64__ )
#if defined(__x86_64__)
#include <cstdint>
#include <type_traits>
#ifdef __OPTIMIZE__
#include <smmintrin.h>
#include <emmintrin.h>
#define MCS_FORCE_INLINE __attribute__((__always_inline__))
#include <smmintrin.h>
#include <emmintrin.h>
#define MCS_FORCE_INLINE __attribute__((__always_inline__))
#else
#define __OPTIMIZE__
#include <smmintrin.h>
#include <emmintrin.h>
#undef __OPTIMIZE__
#define MCS_FORCE_INLINE inline
#define __OPTIMIZE__
#include <smmintrin.h>
#include <emmintrin.h>
#undef __OPTIMIZE__
#define MCS_FORCE_INLINE inline
#endif
#include <mcs_datatype.h>
namespace simd
{
using vi128_t = __m128i;
using msk128_t = uint16_t;
using int128_t = __int128;
using MT = uint16_t;
// This ugly wrapper used to allow to use __m128i as a template class parameter argument
struct vi128_wr
using vi128_t = __m128i;
using msk128_t = uint16_t;
using int128_t = __int128;
using MT = uint16_t;
// This ugly wrapper used to allow to use __m128i as a template class parameter argument
struct vi128_wr
{
__m128i v;
};
template <typename VT, int WIDTH>
class SimdFilterProcessor
{
};
template <>
class SimdFilterProcessor<vi128_wr, 16>
{
// This is a dummy class that is not currently used.
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = int128_t;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
__m128i v;
};
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(&fill));
}
template<typename VT, int WIDTH>
class SimdFilterProcessor
{ };
template<>
class SimdFilterProcessor<vi128_wr, 16>
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
// This is a dummy class that is not currently used.
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = int128_t;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(&fill));
}
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
MCS_FORCE_INLINE MT cmpDummy(vi128_t& x, vi128_t& y)
{
return 0xFFFF;
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE uint16_t convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template<>
class SimdFilterProcessor<vi128_wr, 8>
MCS_FORCE_INLINE MT cmpDummy(vi128_t& x, vi128_t& y)
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<8>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set_epi64x(fill, fill);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Compare
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_or_si128(_mm_cmpgt_epi64(x, y),_mm_cmpeq_epi64(x, y)));
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi64(x, y));
}
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi64(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return cmpNe(x, y) ^ cmpGt(x, y);
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi64(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template<>
class SimdFilterProcessor<vi128_wr, 4>
return 0xFFFF;
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<4>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set1_epi32(fill);
}
return cmpDummy(x, y);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi32(x, y));
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpLt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi32(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmplt_epi32(x, y));
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi32(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template<>
class SimdFilterProcessor<vi128_wr, 2>
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<2>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set1_epi16(fill);
}
return cmpDummy(x, y);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, y));
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpLt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi16(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmplt_epi16(x, y));
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template<>
class SimdFilterProcessor<vi128_wr, 1>
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<1>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set1_epi8(fill);
}
return cmpDummy(x, y);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpLt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return cmpDummy(x, y);
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi8(x, y));
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
// misc
MCS_FORCE_INLINE uint16_t convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmplt_epi8(x, y));
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) ^ 0xFFFF;
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
// permute
/* TODO Available in AVX-512
MCS_FORCE_INLINE vi128_t perm8Bits(vi128_t& x, vi128_t& idx)
{
return _mm_permutexvar_epi8(x, idx);
}
*/
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
template <>
class SimdFilterProcessor<vi128_wr, 8>
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<8>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set_epi64x(fill, fill);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
// Compare
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_or_si128(_mm_cmpgt_epi64(x, y), _mm_cmpeq_epi64(x, y)));
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi64(x, y));
}
} // end of simd
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi64(x, y));
}
#endif // if defined(__x86_64__ )
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return cmpNe(x, y) ^ cmpGt(x, y);
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi64(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template <>
class SimdFilterProcessor<vi128_wr, 4>
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<4>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set1_epi32(fill);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi32(x, y));
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpLt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi32(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmplt_epi32(x, y));
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi32(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template <>
class SimdFilterProcessor<vi128_wr, 2>
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<2>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set1_epi16(fill);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, y));
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpLt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi16(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmplt_epi16(x, y));
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi16(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
template <>
class SimdFilterProcessor<vi128_wr, 1>
{
public:
constexpr static const uint16_t vecByteSize = 16U;
constexpr static const uint16_t vecBitSize = 128U;
using T = datatypes::WidthToSIntegralType<1>::type;
using SIMD_WRAPPER_TYPE = simd::vi128_wr;
using SIMD_TYPE = simd::vi128_t;
// Load value
MCS_FORCE_INLINE vi128_t loadValue(const T fill)
{
return _mm_set1_epi8(fill);
}
// Load from
MCS_FORCE_INLINE vi128_t loadFrom(const char* from)
{
return _mm_loadu_si128(reinterpret_cast<const vi128_t*>(from));
}
// Compare
MCS_FORCE_INLINE MT cmpEq(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
}
MCS_FORCE_INLINE MT cmpGe(vi128_t& x, vi128_t& y)
{
return cmpLt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpGt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpgt_epi8(x, y));
}
MCS_FORCE_INLINE MT cmpLe(vi128_t& x, vi128_t& y)
{
return cmpGt(x, y) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpLt(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmplt_epi8(x, y));
}
MCS_FORCE_INLINE MT cmpNe(vi128_t& x, vi128_t& y)
{
return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) ^ 0xFFFF;
}
MCS_FORCE_INLINE MT cmpAlwaysFalse(vi128_t& x, vi128_t& y)
{
return 0;
}
// permute
/* TODO Available in AVX-512
MCS_FORCE_INLINE vi128_t perm8Bits(vi128_t& x, vi128_t& idx)
{
return _mm_permutexvar_epi8(x, idx);
}
*/
// misc
MCS_FORCE_INLINE MT convertVectorToBitMask(vi128_t& vmask)
{
return _mm_movemask_epi8(vmask);
}
MCS_FORCE_INLINE vi128_t setToZero()
{
return _mm_setzero_si128();
}
// store
MCS_FORCE_INLINE void storeWMask(vi128_t& x, vi128_t& vmask, char* dst)
{
_mm_maskmoveu_si128(x, vmask, dst);
}
MCS_FORCE_INLINE void store(char* dst, vi128_t& x)
{
_mm_storeu_si128(reinterpret_cast<vi128_t*>(dst), x);
}
};
} // namespace simd
#endif // if defined(__x86_64__ )
// vim:ts=2 sw=2: