diff --git a/primitives/primproc/columncommand.cpp b/primitives/primproc/columncommand.cpp index d1b122466..62ac4f66f 100644 --- a/primitives/primproc/columncommand.cpp +++ b/primitives/primproc/columncommand.cpp @@ -231,7 +231,11 @@ void ColumnCommand::loadData() { ByteStream::hexbyte h; utils::getEmptyRowValue(colType.colDataType, colType.colWidth, (uint8_t*)&h); - hPtr[idx] = h; + __asm__ volatile("movups %1,%0" + :"=m" ( hPtr[idx] ) // output + :"v"( h ) // input + : "memory" // clobbered + ); } } @@ -328,7 +332,18 @@ void ColumnCommand::process_OT_BOTH() bpp->relRids[i] = *((uint16_t*) &bpp->outputMsg[pos]); pos += 2; - wide128Values[i] = *((int128_t*) &bpp->outputMsg[pos]); + int128_t* int128Ptr = reinterpret_cast(&bpp->outputMsg[pos]); + __asm__ volatile("movdqu %0,%%xmm0;" + : + :"m"( *int128Ptr ) // input + :"xmm0" // clobbered + ); + __asm__ volatile("movups %%xmm0,%0;" + : "=m" (wide128Values[i])// output + : // input + : "memory", "xmm0" // clobbered + ); + pos += 16; } diff --git a/utils/messageqcpp/bytestream.cpp b/utils/messageqcpp/bytestream.cpp index c0fad8554..d42239eb0 100644 --- a/utils/messageqcpp/bytestream.cpp +++ b/utils/messageqcpp/bytestream.cpp @@ -240,7 +240,12 @@ ByteStream& ByteStream::operator<<(const uint128_t& o) if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead)) growBuf(fMaxLen + BlockSize); - *((uint128_t*) fCurInPtr) = o; + __asm__ volatile("movups %1,%0;" + :"=m" ( *fCurInPtr ) // output + :"v"( o ) // input + : "memory" // clobbered + ); + fCurInPtr += 16; return *this; @@ -251,7 +256,11 @@ ByteStream& ByteStream::operator<<(const int128_t& o) if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead)) growBuf(fMaxLen + BlockSize); - *((int128_t*) fCurInPtr) = o; + __asm__ volatile("movups %1,%0;" + :"=m" ( *fCurInPtr ) // output + :"v"( o ) // input + : "memory" // clobbered + ); fCurInPtr += 16; return *this; @@ -441,7 +450,16 @@ void ByteStream::peek(uint128_t& o) const if (length() < 16) throw underflow_error("ByteStream>uint128_t: not enough data in stream to fill datatype"); - o = *((uint128_t*) fCurOutPtr); + __asm__ volatile("movdqu %0,%%xmm0;" + : + :"m"( *fCurOutPtr ) // input + :"xmm0" // clobbered + ); + __asm__ volatile("movups %%xmm0,%0;" + : "=m" (o)// output + : // input + : "memory", "xmm0" // clobbered + ); } void ByteStream::peek(int128_t& o) const @@ -450,7 +468,16 @@ void ByteStream::peek(int128_t& o) const if (length() < 16) throw underflow_error("ByteStream>int128_t: not enough data in stream to fill datatype"); - o = *((int128_t*) fCurOutPtr); + __asm__ volatile("movdqu %0,%%xmm0;" + : + :"m"( *fCurOutPtr ) // input + :"xmm0" // clobbered + ); + __asm__ volatile("movups %%xmm0,%0;" + : "=m" (o)// output + : // input + : "memory", "xmm0" // clobbered + ); } void ByteStream::peek(string& s) const diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index 3eccb7b0f..ade220d76 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -151,6 +151,7 @@ public: * push an int128_t onto the end of the stream. The byte order is whatever the native byte order is. */ EXPORT ByteStream& operator<<(const int128_t& o); + /** * push an uint128_t onto the end of the stream. The byte order is whatever the native byte order is. */ diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 748bf3dfa..9064aa8b8 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -848,6 +848,22 @@ inline void Row::setBinaryField_offset(uint8_t* value, uint32_t width, memcpy(&data[offset], value, width); } +template<> +inline void Row::setBinaryField_offset(int128_t* value, uint32_t width, uint32_t offset) +{ + int128_t *dst128Ptr = reinterpret_cast(&data[offset]); + __asm__ volatile("movdqu %0,%%xmm0;" + : + :"m"( *value ) // input + :"xmm0" // clobbered + ); + __asm__ volatile("movups %%xmm0,%0;" + : "=m" (*dst128Ptr)// output + : // input + : "memory", "xmm0" // clobbered + ); +} + inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex) { uint64_t offset;