You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-08 14:22:09 +03:00
Vectorizing min/max for KIND_TEXT
This commit is contained in:
@@ -43,6 +43,7 @@ using namespace boost;
|
|||||||
#include "simd_sse.h"
|
#include "simd_sse.h"
|
||||||
#include "simd_arm.h"
|
#include "simd_arm.h"
|
||||||
#include "utils/common/columnwidth.h"
|
#include "utils/common/columnwidth.h"
|
||||||
|
#include "utils/common/bit_cast.h"
|
||||||
|
|
||||||
#include "exceptclasses.h"
|
#include "exceptclasses.h"
|
||||||
|
|
||||||
@@ -119,24 +120,6 @@ inline int compareBlock(const void* a, const void* b)
|
|||||||
return ((*(T*)a) - (*(T*)b));
|
return ((*(T*)a) - (*(T*)b));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class To, class From>
|
|
||||||
std::enable_if_t<
|
|
||||||
sizeof(To) == sizeof(From) &&
|
|
||||||
std::is_trivially_copyable_v<From> &&
|
|
||||||
std::is_trivially_copyable_v<To>,
|
|
||||||
To>
|
|
||||||
// constexpr support needs compiler magic
|
|
||||||
bitCast(const From& src) noexcept
|
|
||||||
{
|
|
||||||
static_assert(std::is_trivially_constructible_v<To>,
|
|
||||||
"This implementation additionally requires "
|
|
||||||
"destination type to be trivially constructible");
|
|
||||||
|
|
||||||
To dst;
|
|
||||||
std::memcpy(&dst, &src, sizeof(To));
|
|
||||||
return dst;
|
|
||||||
}
|
|
||||||
|
|
||||||
// this function is out-of-band, we don't need to inline it
|
// this function is out-of-band, we don't need to inline it
|
||||||
void logIt(int mid, int arg1, const string& arg2 = string())
|
void logIt(int mid, int arg1, const string& arg2 = string())
|
||||||
{
|
{
|
||||||
@@ -1301,30 +1284,28 @@ void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask,
|
|||||||
{
|
{
|
||||||
if (validMinMax)
|
if (validMinMax)
|
||||||
{
|
{
|
||||||
simdMin = simdProcessor.blend(
|
auto byteMask = utils::bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask));
|
||||||
simdMin, dataVec,
|
simdMin = simdProcessor.blend(simdMin, dataVec,
|
||||||
simdProcessor.bwAnd(simdProcessor.cmpGt2(simdMin, dataVec),
|
simdProcessor.bwAnd(simdProcessor.cmpGt2(simdMin, dataVec), byteMask));
|
||||||
bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask))));
|
simdMax = simdProcessor.blend(simdMax, dataVec,
|
||||||
simdMax = simdProcessor.blend(
|
simdProcessor.bwAnd(simdProcessor.cmpGt2(dataVec, simdMax), byteMask));
|
||||||
simdMax, dataVec,
|
|
||||||
simdProcessor.bwAnd(simdProcessor.cmpGt2(dataVec, simdMax),
|
|
||||||
bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask))));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename VT, ENUM_KIND KIND>
|
template <typename VT, typename SimdType>
|
||||||
void scalarUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& simdPRocessor,
|
void vectorizedTextUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT simdProcessor,
|
||||||
T* dataVecTPtr, T& min, T& max, NewColRequestHeader* in)
|
SimdType& dataVec, SimdType& simdMin, SimdType& simdMax,
|
||||||
|
SimdType& swapedOrderDataVec, SimdType& weightsMin, SimdType& weightsMax)
|
||||||
{
|
{
|
||||||
constexpr const uint16_t filterMaskStep = VT::FilterMaskStep;
|
if (validMinMax)
|
||||||
uint16_t j = 0;
|
|
||||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += filterMaskStep)
|
|
||||||
{
|
{
|
||||||
MT bitMapPosition = 1 << it;
|
auto byteMask = utils::bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask));
|
||||||
if (validMinMax && (nonNullOrEmptyMask & bitMapPosition))
|
auto minComp = simdProcessor.bwAnd(simdProcessor.cmpGt2(weightsMin, swapedOrderDataVec), byteMask);
|
||||||
{
|
auto maxComp = simdProcessor.bwAnd(simdProcessor.cmpGt2(swapedOrderDataVec, weightsMax), byteMask);
|
||||||
updateMinMax<KIND>(min, max, dataVecTPtr[j], in);
|
simdMin = simdProcessor.blend(simdMin, dataVec, minComp);
|
||||||
}
|
weightsMin = simdProcessor.blend(weightsMin, swapedOrderDataVec, minComp);
|
||||||
|
simdMax = simdProcessor.blend(simdMax, dataVec, maxComp);
|
||||||
|
weightsMax = simdProcessor.blend(weightsMax, swapedOrderDataVec, maxComp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1338,12 +1319,21 @@ void extractMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, T& min
|
|||||||
min = *std::min_element(simdMinVec, simdMinVec + size);
|
min = *std::min_element(simdMinVec, simdMinVec + size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename VT, typename SimdType>
|
template<typename T, typename VT, typename SimdType>
|
||||||
void getInitialSimdMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T min, T max)
|
void extractTextMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, SimdType weightsMin,
|
||||||
|
SimdType weightsMax, T& min, T& max)
|
||||||
{
|
{
|
||||||
simdMin = simdProcessor.loadValue(min);
|
constexpr const uint16_t size = VT::vecByteSize / sizeof(T);
|
||||||
simdMax = simdProcessor.loadValue(max);
|
T* simdMinVec = reinterpret_cast<T*>(&simdMin);
|
||||||
|
T* simdMaxVec = reinterpret_cast<T*>(&simdMax);
|
||||||
|
T* weightsMinVec = reinterpret_cast<T*>(&weightsMin);
|
||||||
|
T* weightsMaxVec = reinterpret_cast<T*>(&weightsMax);
|
||||||
|
auto indMin = std::min_element(weightsMinVec, weightsMinVec + size);
|
||||||
|
auto indMax = std::max_element(weightsMaxVec, weightsMaxVec + size);
|
||||||
|
min = simdMinVec[indMin - weightsMinVec];
|
||||||
|
max = simdMaxVec[indMax - weightsMaxVec];
|
||||||
}
|
}
|
||||||
|
|
||||||
// This routine filters input block in a vectorized manner.
|
// This routine filters input block in a vectorized manner.
|
||||||
// It supports all output types, all input types.
|
// It supports all output types, all input types.
|
||||||
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
||||||
@@ -1479,11 +1469,14 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
[[maybe_unused]] SimdType simdMin;
|
SimdType simdMin = simdProcessor.loadValue(min);;
|
||||||
[[maybe_unused]] SimdType simdMax;
|
SimdType simdMax = simdProcessor.loadValue(max);;
|
||||||
if constexpr (KIND != KIND_TEXT)
|
[[maybe_unused]] SimdType weightsMin;
|
||||||
|
[[maybe_unused]] SimdType weightsMax;
|
||||||
|
if constexpr (KIND == KIND_TEXT)
|
||||||
{
|
{
|
||||||
getInitialSimdMinMax(simdProcessor, simdMin, simdMax, min, max);
|
weightsMin = simdSwapedOrderDataLoad<KIND, VT, SimdWrapperType, T>(typeHolder, simdProcessor, simdMin).v;
|
||||||
|
weightsMax = simdSwapedOrderDataLoad<KIND, VT, SimdWrapperType, T>(typeHolder, simdProcessor, simdMax).v;
|
||||||
}
|
}
|
||||||
// main loop
|
// main loop
|
||||||
// writeMask tells which values must get into the result. Includes values that matches filters. Can have
|
// writeMask tells which values must get into the result. Includes values that matches filters. Can have
|
||||||
@@ -1539,14 +1532,10 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
in, out, nonNullOrEmptyMask, ridArray);
|
in, out, nonNullOrEmptyMask, ridArray);
|
||||||
|
|
||||||
if constexpr (KIND != KIND_TEXT)
|
if constexpr (KIND != KIND_TEXT)
|
||||||
{
|
|
||||||
vectorizedUpdateMinMax(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVec, simdMin, simdMax);
|
vectorizedUpdateMinMax(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVec, simdMin, simdMax);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
vectorizedTextUpdateMinMax(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVec, simdMin, simdMax,
|
||||||
scalarUpdateMinMax<T, VT, KIND>(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVecTPtr, min, max,
|
swapedOrderDataVec, weightsMin, weightsMax);
|
||||||
in);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate bytes written
|
// Calculate bytes written
|
||||||
uint16_t bytesWritten = valuesWritten * WIDTH;
|
uint16_t bytesWritten = valuesWritten * WIDTH;
|
||||||
@@ -1557,10 +1546,11 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
srcArray += VECTOR_SIZE;
|
srcArray += VECTOR_SIZE;
|
||||||
ridArray += VECTOR_SIZE;
|
ridArray += VECTOR_SIZE;
|
||||||
}
|
}
|
||||||
if constexpr(KIND != KIND_TEXT)
|
if constexpr (KIND != KIND_TEXT)
|
||||||
{
|
|
||||||
extractMinMax(simdProcessor, simdMin, simdMax, min, max);
|
extractMinMax(simdProcessor, simdMin, simdMax, min, max);
|
||||||
}
|
else
|
||||||
|
extractTextMinMax(simdProcessor, simdMin, simdMax, weightsMin, weightsMax, min, max);
|
||||||
|
|
||||||
// Set the number of output values here b/c tail processing can skip this operation.
|
// Set the number of output values here b/c tail processing can skip this operation.
|
||||||
out->NVALS = totalValuesWritten;
|
out->NVALS = totalValuesWritten;
|
||||||
|
|
||||||
|
39
utils/common/bit_cast.h
Normal file
39
utils/common/bit_cast.h
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
/* Copyright (C) 2020 MariaDB Corporation
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; version 2 of
|
||||||
|
the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
MA 02110-1301, USA. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace utils
|
||||||
|
{
|
||||||
|
template <class To, class From>
|
||||||
|
std::enable_if_t<
|
||||||
|
sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From> && std::is_trivially_copyable_v<To>, To>
|
||||||
|
// constexpr support needs compiler magic
|
||||||
|
bitCast(const From& src) noexcept
|
||||||
|
{
|
||||||
|
static_assert(std::is_trivially_constructible_v<To>,
|
||||||
|
"This implementation additionally requires "
|
||||||
|
"destination type to be trivially constructible");
|
||||||
|
|
||||||
|
To dst;
|
||||||
|
std::memcpy(&dst, &src, sizeof(To));
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
} // namespace utils
|
Reference in New Issue
Block a user