1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-641 Refactor empty value implementation in writeengine.

This commit is contained in:
Gagan Goel
2020-03-02 13:23:07 -05:00
committed by Roman Nozdrin
parent 97ee1609b2
commit 824615a55b
31 changed files with 418 additions and 812 deletions

View File

@ -34,6 +34,8 @@
using namespace execplan;
#include "emptyvaluemanip.h"
namespace WriteEngine
{
@ -83,110 +85,10 @@ bool BlockOp::calculateRowId(
* emptyVal - the value of empty row
***********************************************************/
// TODO MCOL-641 Add support here
uint64_t BlockOp::getEmptyRowValue(
const CalpontSystemCatalog::ColDataType colDataType, const int width ) const
void BlockOp::getEmptyRowValue(
const CalpontSystemCatalog::ColDataType colDataType, const int width, uint8_t* emptyVal ) const
{
uint64_t emptyVal = 0;
int offset = 0;
switch ( colDataType )
{
case CalpontSystemCatalog::TINYINT :
emptyVal = joblist::TINYINTEMPTYROW;
break;
case CalpontSystemCatalog::SMALLINT:
emptyVal = joblist::SMALLINTEMPTYROW;
break;
case CalpontSystemCatalog::MEDINT :
case CalpontSystemCatalog::INT :
emptyVal = joblist::INTEMPTYROW;
break;
case CalpontSystemCatalog::BIGINT :
emptyVal = joblist::BIGINTEMPTYROW;
break;
case CalpontSystemCatalog::FLOAT :
case CalpontSystemCatalog::UFLOAT :
emptyVal = joblist::FLOATEMPTYROW;
break;
case CalpontSystemCatalog::DOUBLE :
case CalpontSystemCatalog::UDOUBLE :
emptyVal = joblist::DOUBLEEMPTYROW;
break;
case CalpontSystemCatalog::DECIMAL :
case CalpontSystemCatalog::UDECIMAL :
/* if( width <= 4 )
emptyVal = joblist::SMALLINTEMPTYROW;
else
if( width <= 9 )
emptyVal = 0x80000001;
else
if( width <= 18 )
emptyVal = 0x8000000000000001LL;
else
emptyVal = 0xFFFFFFFFFFFFFFFFLL;
*/
// @bug 194 use the correct logic in handling empty value for decimal
if (width <= 1)
emptyVal = joblist::TINYINTEMPTYROW;
else if ( width <= 2 )
emptyVal = joblist::SMALLINTEMPTYROW;
else if ( width <= 4 )
emptyVal = joblist::INTEMPTYROW;
else if ( width <= 8 )
emptyVal = joblist::BIGINTEMPTYROW;
else
emptyVal = joblist::BINARYEMPTYROW;
break;
case CalpontSystemCatalog::UTINYINT :
emptyVal = joblist::UTINYINTEMPTYROW;
break;
case CalpontSystemCatalog::USMALLINT:
emptyVal = joblist::USMALLINTEMPTYROW;
break;
case CalpontSystemCatalog::UMEDINT :
case CalpontSystemCatalog::UINT :
emptyVal = joblist::UINTEMPTYROW;
break;
case CalpontSystemCatalog::UBIGINT :
emptyVal = joblist::UBIGINTEMPTYROW;
break;
case CalpontSystemCatalog::BINARY :
emptyVal = joblist::BINARYEMPTYROW;
break;
case CalpontSystemCatalog::CHAR :
case CalpontSystemCatalog::VARCHAR :
case CalpontSystemCatalog::DATE :
case CalpontSystemCatalog::DATETIME :
case CalpontSystemCatalog::TIMESTAMP :
default:
offset = ( colDataType == CalpontSystemCatalog::VARCHAR ) ? -1 : 0;
emptyVal = joblist::CHAR1EMPTYROW;
if ( width == (2 + offset) )
emptyVal = joblist::CHAR2EMPTYROW;
else if ( width >= (3 + offset) && width <= ( 4 + offset ) )
emptyVal = joblist::CHAR4EMPTYROW;
else if ( width >= (5 + offset) )
emptyVal = joblist::CHAR8EMPTYROW;
break;
}
return emptyVal;
utils::getEmptyRowValue(colDataType, width, emptyVal);
}
/***********************************************************
@ -264,7 +166,7 @@ void BlockOp::resetBuf( unsigned char* buf, const int bufSize ) const
***********************************************************/
/* static */
void BlockOp::setEmptyBuf(
unsigned char* buf, const int bufSize, uint64_t emptyVal, const int width )
unsigned char* buf, const int bufSize, uint8_t* emptyVal, const int width )
{
const int ARRAY_COUNT = 128;
const int NBYTES_IN_ARRAY = width * ARRAY_COUNT;
@ -275,10 +177,9 @@ void BlockOp::setEmptyBuf(
// instead of individual values. This reduces the number of calls to
// memcpy().
int w = width > 8 ? 8: width;
for(uint8_t* pos = emptyValArray, * end = pos + NBYTES_IN_ARRAY; pos < end; pos += w) //FIXME for no loop
for(uint8_t* pos = emptyValArray, * end = pos + NBYTES_IN_ARRAY; pos < end; pos += width) //FIXME for no loop
{
memcpy(pos, &emptyVal, w);
memcpy(pos, emptyVal, width);
}
int countFull128 = (bufSize / width) / ARRAY_COUNT;

View File

@ -89,8 +89,9 @@ public:
/**
* @brief Get an empty row value
*/
EXPORT uint64_t getEmptyRowValue(const execplan::CalpontSystemCatalog::ColDataType colDataType,
const int width ) const;
EXPORT void getEmptyRowValue(const execplan::CalpontSystemCatalog::ColDataType colDataType,
const int width,
uint8_t* emptyVal ) const;
/**
* @brief Calculate row id
@ -116,7 +117,7 @@ public:
*/
EXPORT void static setEmptyBuf( unsigned char* buf,
const int bufSize,
uint64_t emptyVal, const int width );
uint8_t* emptyVal, const int width );
/**
* @brief Set a value in a buffer

View File

@ -306,7 +306,8 @@ void BulkRollbackFile::reInitTruncColumnExtent(
}
// Initialize the remainder of the extent after the HWM block
uint64_t emptyVal = fDbFile.getEmptyRowValue( colType, colWidth );
uint8_t* emptyVal = (uint8_t*) alloca(colWidth);
fDbFile.getEmptyRowValue( colType, colWidth, emptyVal );
int rc = fDbFile.reInitPartialColumnExtent( pFile,
startOffset,

View File

@ -374,7 +374,8 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
if (nBlocksToInit > 0)
{
uint64_t emptyVal = fDbFile.getEmptyRowValue( colType, colWidth );
uint8_t* emptyVal = (uint8_t*) alloca(colWidth);
fDbFile.getEmptyRowValue( colType, colWidth, emptyVal );
rc = fDbFile.reInitPartialColumnExtent( pFile,
(chunkPtrs[chunkIndex].first + restoredChunkLen),
nBlocksToInit,

View File

@ -821,7 +821,8 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
void ChunkManager::initializeColumnChunk(char* buf, CompFileData* fileData)
{
int size = UNCOMPRESSED_CHUNK_SIZE;
uint64_t emptyVal = fFileOp->getEmptyRowValue(fileData->fColDataType, fileData->fColWidth);
uint8_t* emptyVal = (uint8_t*) alloca(fileData->fColWidth);
fFileOp->getEmptyRowValue(fileData->fColDataType, fileData->fColWidth, emptyVal);
fFileOp->setEmptyBuf((unsigned char*)buf, size, emptyVal, fileData->fColWidth);
}
@ -1342,7 +1343,7 @@ inline int ChunkManager::writeHeader_(CompFileData* fileData, int ptrSecSize)
// For the specified segment file (pFile), read in an abbreviated/compressed
// chunk extent, uncompress, and expand to a full chunk for a full extent.
//------------------------------------------------------------------------------
int ChunkManager::expandAbbrevColumnExtent(IDBDataFile* pFile, uint64_t emptyVal, int width)
int ChunkManager::expandAbbrevColumnExtent(IDBDataFile* pFile, uint8_t* emptyVal, int width)
{
map<IDBDataFile*, CompFileData*>::iterator i = fFilePtrMap.find(pFile);

View File

@ -214,7 +214,7 @@ public:
void cleanUp(const std::map<FID, FID>& columOids);
// @brief Expand an initial column, not dictionary, extent to a full extent.
int expandAbbrevColumnExtent(IDBDataFile* pFile, uint64_t emptyVal, int width);
int expandAbbrevColumnExtent(IDBDataFile* pFile, uint8_t* emptyVal, int width);
// @brief Update column extent
int updateColumnExtent(IDBDataFile* pFile, int addBlockCount);

View File

@ -639,8 +639,6 @@ void Convertor::convertColType(ColStruct* curStruct)
break;
default:
// WIP replace with BINARY
//*internalType = WriteEngine::WR_INT128;
*internalType = WriteEngine::WR_BINARY;
break;
}
@ -710,14 +708,8 @@ void Convertor::convertColType(ColStruct* curStruct)
// check whether width is in sync with the requirement
*width = getCorrectRowWidth(dataType, *width);
// This is the patch for the decimal thing, override
// if (dataType == CalpontSystemCatalog::DECIMAL)
// {
// *internalType = *width <= 4 ?
// WriteEngine::WR_INT : WriteEngine::WR_LONGLONG;
// }
}
/*******************************************************************************
* DESCRIPTION:

View File

@ -161,7 +161,7 @@ int FileOp::createDir( const char* dirName, mode_t mode ) const
* ERR_FILE_CREATE if can not create the file
***********************************************************/
int FileOp::createFile( const char* fileName, int numOfBlock,
uint64_t emptyVal, int width,
uint8_t* emptyVal, int width,
uint16_t dbRoot )
{
IDBDataFile* pFile =
@ -228,7 +228,7 @@ int FileOp::createFile(FID fid,
uint16_t dbRoot,
uint32_t partition,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint64_t emptyVal,
uint8_t* emptyVal,
int width)
{
//std::cout << "Creating file oid: " << fid <<
@ -569,7 +569,7 @@ bool FileOp::existsOIDDir( FID fid ) const
***********************************************************/
int FileOp::extendFile(
OID oid,
uint64_t emptyVal,
uint8_t* emptyVal,
int width,
HWM hwm,
BRM::LBID_t startLbid,
@ -875,7 +875,7 @@ int FileOp::extendFile(
***********************************************************/
int FileOp::addExtentExactFile(
OID oid,
uint64_t emptyVal,
uint8_t* emptyVal,
int width,
int& allocSize,
uint16_t dbRoot,
@ -1045,7 +1045,7 @@ int FileOp::initColumnExtent(
IDBDataFile* pFile,
uint16_t dbRoot,
int nBlocks,
uint64_t emptyVal,
uint8_t* emptyVal,
int width,
bool bNewFile,
bool bExpandExtent,
@ -1225,7 +1225,7 @@ int FileOp::initAbbrevCompColumnExtent(
IDBDataFile* pFile,
uint16_t dbRoot,
int nBlocks,
uint64_t emptyVal,
uint8_t* emptyVal,
int width)
{
// Reserve disk space for optimized abbreviated extent
@ -1285,7 +1285,7 @@ int FileOp::writeInitialCompColumnChunk(
IDBDataFile* pFile,
int nBlocksAllocated,
int nRows,
uint64_t emptyVal,
uint8_t* emptyVal,
int width,
char* hdrs)
{
@ -1366,7 +1366,7 @@ int FileOp::writeInitialCompColumnChunk(
***********************************************************/
int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
int colWidth,
uint64_t emptyVal,
uint8_t* emptyVal,
uint16_t dbRoot,
uint32_t partition,
uint16_t segment,
@ -1671,7 +1671,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
***********************************************************/
int FileOp::expandAbbrevColumnChunk(
IDBDataFile* pFile,
uint64_t emptyVal,
uint8_t* emptyVal,
int colWidth,
const CompChunkPtr& chunkInPtr,
CompChunkPtr& chunkOutPtr )
@ -2036,7 +2036,7 @@ int FileOp::reInitPartialColumnExtent(
IDBDataFile* pFile,
long long startOffset,
int nBlocks,
uint64_t emptyVal,
uint8_t* emptyVal,
int width )
{
int rc = setFileOffset( pFile, startOffset, SEEK_SET );
@ -2845,7 +2845,7 @@ bool FileOp::isDiskSpaceAvail(const std::string& fileName, int nBlocks) const
int FileOp::expandAbbrevColumnExtent(
IDBDataFile* pFile, // FILE ptr to file where abbrev extent is to be expanded
uint16_t dbRoot, // The DBRoot of the file with the abbreviated extent
uint64_t emptyVal,// Empty value to be used in expanding the extent
uint8_t* emptyVal,// Empty value to be used in expanding the extent
int width ) // Width of the column (in bytes)
{
// Based on extent size, see how many blocks to add to fill the extent

View File

@ -92,7 +92,7 @@ public:
int& allocSize,
uint16_t dbRoot, uint32_t partition,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint64_t emptyVal = 0, int width = 1 ) ;
uint8_t* emptyVal, int width = 1 ) ;
/**
@ -100,7 +100,7 @@ public:
* Changed to public for UT.
*/
int createFile( const char* fileName, int fileSize,
uint64_t emptyVal, int width,
uint8_t* emptyVal, int width,
uint16_t dbRoot );
/**
@ -163,7 +163,7 @@ public:
EXPORT virtual int expandAbbrevColumnExtent(
IDBDataFile* pFile,
uint16_t dbRoot,
uint64_t emptyVal,
uint8_t* emptyVal,
int width );
/**
@ -198,7 +198,7 @@ public:
* @param hdrs (in/out) Contents of headers, if file is compressed.
* @return returns NO_ERROR if success.
*/
EXPORT int extendFile(OID oid, uint64_t emptyVal,
EXPORT int extendFile(OID oid, uint8_t* emptyVal,
int width,
HWM hwm,
BRM::LBID_t startLbid,
@ -226,7 +226,7 @@ public:
* @param newFile (out) Indicates if a new file was created for the extent
* @param hdrs (in/out) Contents of headers, if file is compressed.
*/
EXPORT int addExtentExactFile(OID oid, uint64_t emptyVal,
EXPORT int addExtentExactFile(OID oid, uint8_t* emptyVal,
int width,
int& allocSize,
uint16_t dbRoot,
@ -253,7 +253,7 @@ public:
*/
EXPORT int fillCompColumnExtentEmptyChunks(OID oid,
int colWidth,
uint64_t emptyVal,
uint8_t* emptyVal,
uint16_t dbRoot,
uint32_t partition,
uint16_t segment,
@ -433,7 +433,7 @@ public:
EXPORT int reInitPartialColumnExtent( IDBDataFile* pFile,
long long startOffset,
int nBlocks,
uint64_t emptyVal,
uint8_t* emptyVal,
int width );
/**
@ -497,7 +497,7 @@ public:
int initColumnExtent( IDBDataFile* pFile,
uint16_t dbRoot,
int nBlocks,
uint64_t emptyVal,
uint8_t* emptyVal,
int width,
bool bNewFile,
bool bExpandExtent,
@ -519,7 +519,7 @@ private:
FileOp& operator=(const FileOp& rhs);
int expandAbbrevColumnChunk( IDBDataFile* pFile,
uint64_t emptyVal,
uint8_t* emptyVal,
int colWidth,
const compress::CompChunkPtr& chunkInPtr,
compress::CompChunkPtr& chunkOutPt);
@ -527,7 +527,7 @@ private:
int initAbbrevCompColumnExtent( IDBDataFile* pFile,
uint16_t dbRoot,
int nBlocks,
uint64_t emptyVal,
uint8_t* emptyVal,
int width);
static void initDbRootExtentMutexes();
@ -536,7 +536,7 @@ private:
int writeInitialCompColumnChunk( IDBDataFile* pFile,
int nBlocksAllocated,
int nRows,
uint64_t emptyVal,
uint8_t* emptyVal,
int width,
char* hdrs);

View File

@ -57,6 +57,7 @@ typedef uint32_t FID; /** @brief File ID */
typedef uint64_t RID; /** @brief Row ID */
typedef uint32_t TxnID; /** @brief Transaction ID (New)*/
typedef uint32_t HWM; /** @brief high water mark */
typedef unsigned __int128 uint128_t;
/************************************************************************
* Type enumerations
@ -347,7 +348,7 @@ struct JobColumn /** @brief Job Column Structure */
execplan::CalpontSystemCatalog::ColDataType dataType; /** @brief column data type */
ColType weType; /** @brief write engine data type */
std::string typeName; /** @brief data type name */
uint64_t emptyVal; /** @brief default empty value */
uint128_t emptyVal; /** @brief default empty value */
int width; /** @brief column width; for a dictionary column, this is "eventually" the token width */
int definedWidth; /** @brief column width as defined in the table, used for non-dictionary strings */
int dctnryWidth; /** @brief dictionary width */