You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-4313 Very fragile but high speed approach with inline ASM
GCC compiler uses aligned versions of SIMD instructions expecting aligned memory blocks that is hard to implement now
This commit is contained in:
@ -231,7 +231,11 @@ void ColumnCommand::loadData()
|
||||
{
|
||||
ByteStream::hexbyte h;
|
||||
utils::getEmptyRowValue(colType.colDataType, colType.colWidth, (uint8_t*)&h);
|
||||
hPtr[idx] = h;
|
||||
__asm__ volatile("movups %1,%0"
|
||||
:"=m" ( hPtr[idx] ) // output
|
||||
:"v"( h ) // input
|
||||
: "memory" // clobbered
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -328,7 +332,18 @@ void ColumnCommand::process_OT_BOTH()
|
||||
|
||||
bpp->relRids[i] = *((uint16_t*) &bpp->outputMsg[pos]);
|
||||
pos += 2;
|
||||
wide128Values[i] = *((int128_t*) &bpp->outputMsg[pos]);
|
||||
int128_t* int128Ptr = reinterpret_cast<int128_t*>(&bpp->outputMsg[pos]);
|
||||
__asm__ volatile("movdqu %0,%%xmm0;"
|
||||
:
|
||||
:"m"( *int128Ptr ) // input
|
||||
:"xmm0" // clobbered
|
||||
);
|
||||
__asm__ volatile("movups %%xmm0,%0;"
|
||||
: "=m" (wide128Values[i])// output
|
||||
: // input
|
||||
: "memory", "xmm0" // clobbered
|
||||
);
|
||||
|
||||
pos += 16;
|
||||
}
|
||||
|
||||
|
@ -240,7 +240,12 @@ ByteStream& ByteStream::operator<<(const uint128_t& o)
|
||||
if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead))
|
||||
growBuf(fMaxLen + BlockSize);
|
||||
|
||||
*((uint128_t*) fCurInPtr) = o;
|
||||
__asm__ volatile("movups %1,%0;"
|
||||
:"=m" ( *fCurInPtr ) // output
|
||||
:"v"( o ) // input
|
||||
: "memory" // clobbered
|
||||
);
|
||||
|
||||
fCurInPtr += 16;
|
||||
|
||||
return *this;
|
||||
@ -251,7 +256,11 @@ ByteStream& ByteStream::operator<<(const int128_t& o)
|
||||
if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead))
|
||||
growBuf(fMaxLen + BlockSize);
|
||||
|
||||
*((int128_t*) fCurInPtr) = o;
|
||||
__asm__ volatile("movups %1,%0;"
|
||||
:"=m" ( *fCurInPtr ) // output
|
||||
:"v"( o ) // input
|
||||
: "memory" // clobbered
|
||||
);
|
||||
fCurInPtr += 16;
|
||||
|
||||
return *this;
|
||||
@ -441,7 +450,16 @@ void ByteStream::peek(uint128_t& o) const
|
||||
if (length() < 16)
|
||||
throw underflow_error("ByteStream>uint128_t: not enough data in stream to fill datatype");
|
||||
|
||||
o = *((uint128_t*) fCurOutPtr);
|
||||
__asm__ volatile("movdqu %0,%%xmm0;"
|
||||
:
|
||||
:"m"( *fCurOutPtr ) // input
|
||||
:"xmm0" // clobbered
|
||||
);
|
||||
__asm__ volatile("movups %%xmm0,%0;"
|
||||
: "=m" (o)// output
|
||||
: // input
|
||||
: "memory", "xmm0" // clobbered
|
||||
);
|
||||
}
|
||||
|
||||
void ByteStream::peek(int128_t& o) const
|
||||
@ -450,7 +468,16 @@ void ByteStream::peek(int128_t& o) const
|
||||
if (length() < 16)
|
||||
throw underflow_error("ByteStream>int128_t: not enough data in stream to fill datatype");
|
||||
|
||||
o = *((int128_t*) fCurOutPtr);
|
||||
__asm__ volatile("movdqu %0,%%xmm0;"
|
||||
:
|
||||
:"m"( *fCurOutPtr ) // input
|
||||
:"xmm0" // clobbered
|
||||
);
|
||||
__asm__ volatile("movups %%xmm0,%0;"
|
||||
: "=m" (o)// output
|
||||
: // input
|
||||
: "memory", "xmm0" // clobbered
|
||||
);
|
||||
}
|
||||
|
||||
void ByteStream::peek(string& s) const
|
||||
|
@ -151,6 +151,7 @@ public:
|
||||
* push an int128_t onto the end of the stream. The byte order is whatever the native byte order is.
|
||||
*/
|
||||
EXPORT ByteStream& operator<<(const int128_t& o);
|
||||
|
||||
/**
|
||||
* push an uint128_t onto the end of the stream. The byte order is whatever the native byte order is.
|
||||
*/
|
||||
|
@ -848,6 +848,22 @@ inline void Row::setBinaryField_offset<uint8_t>(uint8_t* value, uint32_t width,
|
||||
memcpy(&data[offset], value, width);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void Row::setBinaryField_offset<int128_t>(int128_t* value, uint32_t width, uint32_t offset)
|
||||
{
|
||||
int128_t *dst128Ptr = reinterpret_cast<int128_t*>(&data[offset]);
|
||||
__asm__ volatile("movdqu %0,%%xmm0;"
|
||||
:
|
||||
:"m"( *value ) // input
|
||||
:"xmm0" // clobbered
|
||||
);
|
||||
__asm__ volatile("movups %%xmm0,%0;"
|
||||
: "=m" (*dst128Ptr)// output
|
||||
: // input
|
||||
: "memory", "xmm0" // clobbered
|
||||
);
|
||||
}
|
||||
|
||||
inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
|
||||
{
|
||||
uint64_t offset;
|
||||
|
Reference in New Issue
Block a user