From 231930b71d2bbf998280f47c8e7429d4d552232b Mon Sep 17 00:00:00 2001 From: NTH19 <3310288189@qq.com> Date: Thu, 14 Jul 2022 22:46:23 +0800 Subject: [PATCH] update --- primitives/linux-port/column.cpp | 58 +++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index 40c610edf..a9bbac10a 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -119,6 +119,24 @@ inline int compareBlock(const void* a, const void* b) return ((*(T*)a) - (*(T*)b)); } +template +std::enable_if_t< + sizeof(To) == sizeof(From) && + std::is_trivially_copyable_v && + std::is_trivially_copyable_v, + To> +// constexpr support needs compiler magic +bitCast(const From& src) noexcept +{ + static_assert(std::is_trivially_constructible_v, + "This implementation additionally requires " + "destination type to be trivially constructible"); + + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; +} + // this function is out-of-band, we don't need to inline it void logIt(int mid, int arg1, const string& arg2 = string()) { @@ -925,7 +943,7 @@ inline void writeColValue(uint8_t OutputType, ColResultHeader* out, uint16_t rid } } -#if defined(__x86_64__) || defined(__aarch64__) +#if defined(__x86_64__)|| defined(__aarch64__) template ::type* = nullptr> inline void vectUpdateMinMax(const bool validMinMax, const bool isNonNullOrEmpty, T& Min, T& Max, T curValue, @@ -1222,7 +1240,7 @@ void scalarFiltering( } } -#if defined(__x86_64__) || defined(__aarch64__) +#if defined(__x86_64__)|| defined(__aarch64__) template ::type* = nullptr> inline SIMD_WRAPPER_TYPE simdDataLoad(VT& processor, const T* srcArray, const T* origSrcArray, @@ -1278,13 +1296,19 @@ inline SIMD_WRAPPER_TYPE simdSwapedOrderDataLoad(const ColRequestHeaderDataType } template -void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& simdProcessor, +void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT simdProcessor, SimdType& dataVec, SimdType& simdMin, SimdType& simdMax) { - if (validMinMax && nonNullOrEmptyMask) + if (validMinMax) { - simdMin = simdProcessor.min(simdMin, dataVec); - simdMax = simdProcessor.max(simdMax, dataVec); + simdMin = simdProcessor.blend( + simdMin, dataVec, + simdProcessor.bwAnd(simdProcessor.cmpGt2(simdMin, dataVec), + bitCast(simd::bitMaskToByteMask16(nonNullOrEmptyMask)))); + simdMax = simdProcessor.blend( + simdMax, dataVec, + simdProcessor.bwAnd(simdProcessor.cmpGt2(dataVec, simdMax), + bitCast(simd::bitMaskToByteMask16(nonNullOrEmptyMask)))); } } @@ -1305,7 +1329,7 @@ void scalarUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& } template -void extractMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T& min, T& max) +void extractMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, T& min, T& max) { constexpr const uint16_t size = VT::vecByteSize / sizeof(T); T* simdMinVec = reinterpret_cast(&simdMin); @@ -1313,6 +1337,13 @@ void extractMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T& m max = *std::max_element(simdMaxVec, simdMaxVec + size); min = *std::min_element(simdMinVec, simdMinVec + size); } + +template +void getInitialSimdMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T min, T max) +{ + simdMin = simdProcessor.loadValue(min); + simdMax = simdProcessor.loadValue(max); +} // This routine filters input block in a vectorized manner. // It supports all output types, all input types. // It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand. @@ -1448,9 +1479,12 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T* } } } - [[maybe_unused]] SimdType simdMin = simdDataLoad(simdProcessor, srcArray, - origSrcArray, ridArray, 0).v;; - [[maybe_unused]] SimdType simdMax = simdMin; + [[maybe_unused]] SimdType simdMin; + [[maybe_unused]] SimdType simdMax; + if constexpr (KIND != KIND_TEXT) + { + getInitialSimdMinMax(simdProcessor, simdMin, simdMax, min, max); + } // main loop // writeMask tells which values must get into the result. Includes values that matches filters. Can have // NULLs. nonEmptyMask tells which vector coords are not EMPTY magics. nonNullMask tells which vector coords @@ -1671,7 +1705,7 @@ void filterColumnData(NewColRequestHeader* in, ColResultHeader* out, uint16_t* r // Syscat queries mustn't follow vectorized processing path b/c PP must return // all values w/o any filter(even empty values filter) applied. -#if defined(__x86_64__) || defined(__aarch64__) +#if defined(__x86_64__)|| defined(__aarch64__) // Don't use vectorized filtering for text based data types. if (WIDTH < 16 && (KIND != KIND_TEXT || (KIND == KIND_TEXT && in->colType.strnxfrmIsValid()) )) @@ -1903,4 +1937,4 @@ template void primitives::PrimitiveProcessor::columnScanAndFilter(NewCo template void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, ColResultHeader*); -} // namespace primitives +} // namespace primitives \ No newline at end of file