1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-498 Changes made according with review suggestions.

Add more comments.
    Changed return value for HDFS'es fallocate.
    Removed unnecessary code in ColumnBufferCompressed::writeToFile
    Replaced Nulls with Empties in variable names.
This commit is contained in:
Roman Nozdrin
2019-01-20 21:12:23 +03:00
parent cbdcdb9f10
commit abf7ef80c2
13 changed files with 96 additions and 55 deletions

View File

@ -279,6 +279,12 @@ int BufferedFile::close()
return ret;
}
/**
@brief
The wrapper for fallocate function.
@see
This one is used in shared/we_fileop.cpp to skip expensive file preallocation.
*/
int BufferedFile::fallocate(int mode, off64_t offset, off64_t length)
{
int ret = 0;

View File

@ -187,8 +187,11 @@ public:
virtual time_t mtime() = 0;
/**
* The fallocate() method returns the modification time of the file in
* seconds. Returns -1 on error.
* The fallocate() method preallocates disk space cheaper then
* sequential write. fallocate() is supported by a limited number
* of FSes.This method is implemented for Un-/BufferedFile classes
* only.
* Returns -1 on error.
*/
virtual int fallocate(int mode, off64_t offset, off64_t length) = 0;

View File

@ -329,6 +329,12 @@ int UnbufferedFile::close()
return ret;
}
/**
@brief
The wrapper for fallocate function.
@see
This one is used in shared/we_fileop.cpp to skip expensive file preallocation.
*/
int UnbufferedFile::fallocate(int mode, off64_t offset, off64_t length)
{
int ret = 0;

View File

@ -317,9 +317,17 @@ int HdfsRdwrFileBuffer::close()
return 0;
}
/**
@brief
The dummy wrapper for fallocate function.
This is an open question which code must this method return.
fallocate fails for HDFS b/c it doesn't use it.
@see
This one is used in shared/we_fileop.cpp to skip expensive file preallocation.
*/
int HdfsRdwrFileBuffer::fallocate(int mode, off64_t offset, off64_t length)
{
return 0;
return -1;
}
}

View File

@ -101,18 +101,19 @@ void ColumnBuffer::resizeAndCopy(int newSize, int startOffset, int endOffset)
//------------------------------------------------------------------------------
// Write data stored up in the output buffer to the segment column file.
// fillUpWEmpties is set when CS finishes with writing to add extra empty
// magics to fill up the block to its boundary.
//------------------------------------------------------------------------------
int ColumnBuffer::writeToFile(int startOffset, int writeSize, bool fillUpWNulls)
int ColumnBuffer::writeToFile(int startOffset, int writeSize, bool fillUpWEmpties)
{
if (writeSize == 0) // skip unnecessary write, if 0 bytes given
return NO_ERROR;
unsigned char *newBuf = NULL;
if ( fillUpWNulls )
if ( fillUpWEmpties )
{
BlockOp blockOp;
//TO DO Use scoped_ptr here
newBuf = new unsigned char[BYTE_PER_BLOCK];
uint64_t EmptyValue = blockOp.getEmptyRowValue(fColInfo->column.dataType,
fColInfo->column.width);
@ -125,19 +126,21 @@ int ColumnBuffer::writeToFile(int startOffset, int writeSize, bool fillUpWNulls)
Stats::startParseEvent(WE_STATS_WRITE_COL);
#endif
size_t nitems;
if ( fillUpWNulls )
if ( fillUpWEmpties )
nitems = fFile->write(newBuf, BYTE_PER_BLOCK) / BYTE_PER_BLOCK;
else
nitems = fFile->write(fBuffer + startOffset, writeSize) / writeSize;
if (nitems != 1)
{
delete newBuf;
return ERR_FILE_WRITE;
}
#ifdef PROFILE
Stats::stopParseEvent(WE_STATS_WRITE_COL);
#endif
//TO DO Use scoped_ptr here
delete newBuf;
return NO_ERROR;
}

View File

@ -107,11 +107,11 @@ public:
*
* @param startOffset The buffer offset from where the write should begin
* @param writeSize The number of bytes to be written to the file
* @param fillUpWNulls The flag to fill the buffer with NULLs up to
* the block boundary.
* @param fillUpWEmpties The flag to fill the buffer with empty magic values
* up to the block boundary.
*/
virtual int writeToFile(int startOffset, int writeSize,
bool fillUpWNulls = false);
bool fillUpWEmpties = false);
protected:

View File

@ -167,16 +167,11 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
// file, and instead buffer up the data to be compressed in 4M chunks before
// writing it out.
//------------------------------------------------------------------------------
int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
bool fillUpWNulls)
int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize)
{
if (writeSize == 0) // skip unnecessary write, if 0 bytes given
return NO_ERROR;
int fillUpWNullsWriteSize = 0;
if (fillUpWNulls)
fillUpWNullsWriteSize = BYTE_PER_BLOCK - writeSize % BYTE_PER_BLOCK;
// If we are starting a new file, we need to reinit the buffer and
// find out what our file offset should be set to.
if (!fToBeCompressedCapacity)
@ -224,7 +219,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Expand the compression buffer size if working with an abbrev extent, and
// the bytes we are about to add will overflow the abbreviated extent.
if ((fToBeCompressedCapacity < IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) &&
((fNumBytes + writeSize + fillUpWNullsWriteSize) > fToBeCompressedCapacity) )
((fNumBytes + writeSize) > fToBeCompressedCapacity) )
{
std::ostringstream oss;
oss << "Expanding abbrev to-be-compressed buffer for: OID-" <<
@ -236,7 +231,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
}
if ((fNumBytes + writeSize + fillUpWNullsWriteSize) <= fToBeCompressedCapacity)
if ((fNumBytes + writeSize) <= fToBeCompressedCapacity)
{
if (fLog->isDebug( DEBUG_2 ))
{
@ -247,14 +242,12 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
"; part-" << fColInfo->curCol.dataFile.fPartition <<
"; seg-" << fColInfo->curCol.dataFile.fSegment <<
"; addBytes-" << writeSize <<
"; extraBytes-" << fillUpWNullsWriteSize <<
"; totBytes-" << (fNumBytes + writeSize);
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
}
memcpy(bufOffset, (fBuffer + startOffset), writeSize);
fNumBytes += writeSize;
fNumBytes += fillUpWNullsWriteSize;
}
else // Not enough room to add all the data to the to-be-compressed buffer
{
@ -345,7 +338,6 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
memcpy(bufOffset, (fBuffer + startOffsetX), writeSizeOut);
fNumBytes += writeSizeOut;
fNumBytes += fillUpWNullsWriteSize;
}
startOffsetX += writeSizeOut;

View File

@ -521,7 +521,9 @@ int ColumnBufferManager::writeToFile(int endOffset)
// and the remaining buffer data will be written to the next segment file in
// the DBRoot, partition, segement number sequence.
// This function also catches and handles the case where an abbreviated
// extent needs to be expanded to a full extent on disk.
// extent needs to be expanded to a full extent on disk. When fillUpWEmpties is
// set then CS finishes with writing and has to fill with magics this block
// up to its boundary.
//
// WARNING: This means this function may change the information in the
// ColumnInfo struct that owns this ColumnBufferManager, if a
@ -529,7 +531,7 @@ int ColumnBufferManager::writeToFile(int endOffset)
// internal buffer, or if an abbreviated extent is expanded.
//------------------------------------------------------------------------------
int ColumnBufferManager::writeToFileExtentCheck(
uint32_t startOffset, uint32_t writeSize, bool fillUpWNulls)
uint32_t startOffset, uint32_t writeSize, bool fillUpWEmpties)
{
if (fLog->isDebug( DEBUG_3 ))
@ -571,7 +573,7 @@ int ColumnBufferManager::writeToFileExtentCheck(
if (availableFileSize >= writeSize)
{
int rc = fCBuf->writeToFile(startOffset, writeSize, fillUpWNulls);
int rc = fCBuf->writeToFile(startOffset, writeSize);
if (rc != NO_ERROR)
{
@ -583,9 +585,11 @@ int ColumnBufferManager::writeToFileExtentCheck(
return rc;
}
// MCOL-498 Fill it up to the block size boundary.
if ( fillUpWNulls )
// MCOL-498 Fill this block up to its boundary.
if ( fillUpWEmpties )
{
writeSize = BLOCK_SIZE;
}
fColInfo->updateBytesWrittenCounts( writeSize );
}
@ -628,7 +632,7 @@ int ColumnBufferManager::writeToFileExtentCheck(
}
int writeSize2 = writeSize - writeSize1;
rc = fCBuf->writeToFile(startOffset + writeSize1, writeSize2, fillUpWNulls);
rc = fCBuf->writeToFile(startOffset + writeSize1, writeSize2);
if (rc != NO_ERROR)
{
@ -640,9 +644,11 @@ int ColumnBufferManager::writeToFileExtentCheck(
return rc;
}
// MCOL-498 Fill it up to the block size boundary.
if ( fillUpWNulls )
// MCOL-498 Fill this block up to its boundary.
if ( fillUpWEmpties )
{
writeSize2 = BLOCK_SIZE;
}
fColInfo->updateBytesWrittenCounts( writeSize2 );
}
@ -651,7 +657,8 @@ int ColumnBufferManager::writeToFileExtentCheck(
}
//------------------------------------------------------------------------------
// Flush the contents of internal fCBuf (column buffer) to disk.
// Flush the contents of internal fCBuf (column buffer) to disk. If CS flushes
// less then BLOCK_SIZE bytes then it propagates this event down the stack.
//------------------------------------------------------------------------------
int ColumnBufferManager::flush( )
{
@ -676,19 +683,20 @@ int ColumnBufferManager::flush( )
int bufferSize = fCBuf->getSize();
// MCOL-498 There are less the BLOCK_SIZE bytes in the buffer left, so
// MCOL-498 There are less the BLOCK_SIZE bytes in the buffer left
// so propagate this info down the stack to fill the buffer up
// with empty magics.
// Account for circular buffer by making 2 calls to write the data,
// if we are wrapping around at the end of the buffer.
if (fBufFreeOffset < fBufWriteOffset)
{
// The check could be redundant.
bool fillUpWEmpty = ( static_cast<unsigned int>(bufferSize - fBufWriteOffset) >= BLOCK_SIZE )
bool fillUpWEmpties = ( static_cast<unsigned int>(bufferSize - fBufWriteOffset) >= BLOCK_SIZE )
? false : true;
RETURN_ON_ERROR( writeToFileExtentCheck( fBufWriteOffset,
bufferSize - fBufWriteOffset, fillUpWEmpty) );
bufferSize - fBufWriteOffset, fillUpWEmpties) );
fBufWriteOffset = 0;
}
// fill the buffer up with NULLs.
// MCOL-498 fill the buffer up with empty magics.
RETURN_ON_ERROR( writeToFileExtentCheck(
fBufWriteOffset, fBufFreeOffset - fBufWriteOffset, true) );
fBufWriteOffset = fBufFreeOffset;

View File

@ -193,12 +193,12 @@ protected:
* write out the buffer.
* @param startOffset The buffer offset where the write should begin
* @param writeSize The number of bytes to be written to the file
* @param fillUpWNulls The flag to fill the buffer with NULLs up to
* @param fillUpWEmpties The flag to fill the buffer with NULLs up to
* the block boundary.
* @return success or fail status
*/
virtual int writeToFileExtentCheck(uint32_t startOffset, uint32_t writeSize,
bool fillUpWNulls = false);
bool fillUpWEmpties = false);
//-------------------------------------------------------------------------
// Protected Data Members

View File

@ -259,6 +259,8 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth,
if ( m_dFile != NULL )
{
// MCOL-498 CS doesn't optimize abbreviated extent
// creation.
bool optimizePrealloc = ( flag ) ? false : true;
rc = FileOp::initDctnryExtent( m_dFile,
m_dbRoot,

View File

@ -540,7 +540,9 @@ bool FileOp::existsOIDDir( FID fid ) const
* the applicable column segment file does not exist, it is created.
* If this is the very first file for the specified DBRoot, then the
* partition and segment number must be specified, else the selected
* partition and segment numbers are returned.
* partition and segment numbers are returned. This method tries to
* optimize full extents creation either skiping disk space
* preallocation(if activated) or via fallocate.
* PARAMETERS:
* oid - OID of the column to be extended
* emptyVal - Empty value to be used for oid
@ -826,6 +828,7 @@ int FileOp::extendFile(
return rc;
// Initialize the contents of the extent.
// MCOL-498 optimize full extent creation.
rc = initColumnExtent( pFile,
dbRoot,
allocSize,
@ -834,7 +837,7 @@ int FileOp::extendFile(
newFile, // new or existing file
false, // don't expand; new extent
false, // add full (not abbreviated) extent
true); // try to use fallocate first
true); // try to optimize extent creation
return rc;
}
@ -1006,6 +1009,10 @@ int FileOp::addExtentExactFile(
* This function can be used to initialize an entirely new extent, or
* to finish initializing an extent that has already been started.
* nBlocks controls how many 8192-byte blocks are to be written out.
* If bOptExtension is set then method first checks config for
* DBRootX.Prealloc. If it is disabled then it skips disk space
* preallocation. If not it tries to go with fallocate first then
* fallbacks to sequential write.
* PARAMETERS:
* pFile (in) - IDBDataFile* of column segment file to be written to
* dbRoot (in) - DBRoot of pFile
@ -1016,7 +1023,7 @@ int FileOp::addExtentExactFile(
* headers will be included "if" it is a compressed file.
* bExpandExtent (in) - Expand existing extent, or initialize a new one
* bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent
* bOptExtension(in) - use fallocate() to extend the file if it is possible.
* bOptExtension(in) - skip or optimize full extent preallocation.
* RETURN:
* returns ERR_FILE_WRITE if an error occurs,
* else returns NO_ERROR.
@ -1045,7 +1052,6 @@ int FileOp::initColumnExtent(
}
// @bug5769 Don't initialize extents or truncate db files on HDFS
// MCOL-498 We don't need sequential segment files if a PM uses SSD either.
if (idbdatafile::IDBPolicy::useHdfs())
{
//@Bug 3219. update the compression header after the extent is expanded.
@ -1102,7 +1108,8 @@ int FileOp::initColumnExtent(
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT);
#endif
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
if ( bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
@ -1802,6 +1809,10 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
* This function can be used to initialize an entirely new extent, or
* to finish initializing an extent that has already been started.
* nBlocks controls how many 8192-byte blocks are to be written out.
* If bOptExtension is set then method first checks config for
* DBRootX.Prealloc. If it is disabled then it skips disk space
* preallocation. If not it tries to go with fallocate first then
* fallbacks to sequential write.
* PARAMETERS:
* pFile (in) - IDBDataFile* of column segment file to be written to
* dbRoot (in) - DBRoot of pFile
@ -1809,7 +1820,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
* blockHdrInit(in) - data used to initialize each block
* blockHdrInitSize(in) - number of bytes in blockHdrInit
* bExpandExtent (in) - Expand existing extent, or initialize a new one
* bOptExtension(in) - use fallocate() to extend the file if it is possible.
* bOptExtension(in) - skip or optimize full extent preallocation.
* RETURN:
* returns ERR_FILE_WRITE if an error occurs,
* else returns NO_ERROR.
@ -1825,7 +1836,6 @@ int FileOp::initDctnryExtent(
{
off64_t currFileSize = pFile->size();
// @bug5769 Don't initialize extents or truncate db files on HDFS
// MCOL-498 We don't need sequential segment files if a PM uses SSD either.
if (idbdatafile::IDBPolicy::useHdfs())
{
if (m_compressionType)
@ -1877,7 +1887,8 @@ int FileOp::initDctnryExtent(
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT);
#endif
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
if ( bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))

View File

@ -326,7 +326,7 @@ public:
* @param blockHdrInit(in) - data used to initialize each block header
* @param blockHdrInitSize(in) - number of bytes in blockHdrInit
* @param bExpandExtent (in) - Expand existing extent, or initialize new one
* @param bOptExtension (in) - use fallocate() to extend the file if it is possible.
* @param bOptExtension (in) - skip or optimize full extent preallocation
*/
EXPORT int initDctnryExtent( IDBDataFile* pFile,
uint16_t dbRoot,
@ -504,7 +504,7 @@ private:
// bNewFile (in) - Adding extent to new file
// bExpandExtent (in) - Expand existing extent, or initialize new one
// bAbbrevExtent (in) - If adding new extent, is it abbreviated
// bOptExtension(in) - use fallocate() to extend the file if it is possible.
// bOptExtension(in) - skip or optimize full extent preallocation
int initColumnExtent( IDBDataFile* pFile,
uint16_t dbRoot,
int nBlocks,

View File

@ -472,7 +472,9 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent,
if ( rc != NO_ERROR)
return rc;
// MCOL-498 Fill up the first block with empty values.
// MCOL-498 This must be a first block in a new extent so
// fill the block up to its boundary with empties. Otherwise
// there could be fantom values.
{
uint64_t emptyVal = getEmptyRowValue(column.colDataType, column.colWidth);
setEmptyBuf(buf, BYTE_PER_BLOCK, emptyVal, column.colWidth);
@ -1543,7 +1545,7 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray,
uint64_t emptyVal;
int rc = NO_ERROR;
bool fillUpWEmptyVals = false;
bool fistRowInBlock = false;
bool firstRowInBlock = false;
bool lastRowInBlock = false;
uint16_t rowsInBlock = BYTE_PER_BLOCK / curCol.colWidth;
@ -1565,15 +1567,15 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray,
bDataDirty = false;
// MCOL-498 We got into the next block, so the row is first in that block
// - fill the block up with NULLs.
// - fill the block up with empty magics.
if ( curDataFbo != -1 && !bDelete )
fillUpWEmptyVals = true;
}
// MCOL-498 CS hasn't touched any block yet,
// but the row filled will be the first in the block.
fistRowInBlock = ( !(curRowId % (rowsInBlock)) ) ? true : false;
if( fistRowInBlock && !bDelete )
firstRowInBlock = ( !(curRowId % (rowsInBlock)) ) ? true : false;
if( firstRowInBlock && !bDelete )
fillUpWEmptyVals = true;
curDataFbo = dataFbo;
@ -1585,7 +1587,7 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray,
bDataDirty = true;
}
// This is a awkward way to convert void* and get ith element, I just don't have a good solution for that
// This is a awkward way to convert void* and get its element, I just don't have a good solution for that
// How about pVal = valArray + i*curCol.colWidth?
switch (curCol.colType)
{
@ -1715,7 +1717,7 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray,
if ( rc != NO_ERROR)
return rc;
// MCOL-498 If it was the last row in a block fill the next block with
// MCOL-498 If it was the last row in a block fill the next block with
// empty vals, otherwise next ColumnOp::allocRowId()
// will fail on the next block.
lastRowInBlock = ( rowsInBlock - ( curRowId % rowsInBlock ) == 1 ) ? true : false;