You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-498 Reduced number of blocks created for abbreviated extents
thus reduced IO load when creating a table. Uncompressed abbreviated segment and dicts aren't affected by this b/c CS'es system catalog uses uncompressed dict files. CS now doesn't work with empty dicts files.
This commit is contained in:
@ -18,7 +18,6 @@
|
||||
// $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
@ -541,8 +540,8 @@ bool FileOp::existsOIDDir( FID fid ) const
|
||||
* If this is the very first file for the specified DBRoot, then the
|
||||
* partition and segment number must be specified, else the selected
|
||||
* partition and segment numbers are returned. This method tries to
|
||||
* optimize full extents creation either skiping disk space
|
||||
* preallocation(if activated) or via fallocate.
|
||||
* optimize full extents creation skiping disk space
|
||||
* preallocation(if activated).
|
||||
* PARAMETERS:
|
||||
* oid - OID of the column to be extended
|
||||
* emptyVal - Empty value to be used for oid
|
||||
@ -1013,8 +1012,7 @@ int FileOp::addExtentExactFile(
|
||||
* nBlocks controls how many 8192-byte blocks are to be written out.
|
||||
* If bOptExtension is set then method first checks config for
|
||||
* DBRootX.Prealloc. If it is disabled then it skips disk space
|
||||
* preallocation. If not it tries to go with fallocate first then
|
||||
* fallbacks to sequential write.
|
||||
* preallocation.
|
||||
* PARAMETERS:
|
||||
* pFile (in) - IDBDataFile* of column segment file to be written to
|
||||
* dbRoot (in) - DBRoot of pFile
|
||||
@ -1025,7 +1023,7 @@ int FileOp::addExtentExactFile(
|
||||
* headers will be included "if" it is a compressed file.
|
||||
* bExpandExtent (in) - Expand existing extent, or initialize a new one
|
||||
* bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent
|
||||
* bOptExtension(in) - skip or optimize full extent preallocation.
|
||||
* bOptExtension(in) - skip full extent preallocation.
|
||||
* RETURN:
|
||||
* returns ERR_FILE_WRITE if an error occurs,
|
||||
* else returns NO_ERROR.
|
||||
@ -1072,6 +1070,19 @@ int FileOp::initColumnExtent(
|
||||
// Create vector of mutexes used to serialize extent access per DBRoot
|
||||
initDbRootExtentMutexes( );
|
||||
|
||||
// MCOL-498 Skip the huge preallocations if the option is set
|
||||
// for the dbroot. This check is skiped for abbreviated extent.
|
||||
// IMO it is better to check bool then to call a function.
|
||||
if ( bOptExtension )
|
||||
{
|
||||
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
|
||||
? bOptExtension : false;
|
||||
}
|
||||
// Reduce number of blocks allocated for abbreviated extents thus
|
||||
// CS writes less when creates a new table. This couldn't be zero
|
||||
// b/c Snappy compressed file format doesn't tolerate empty files.
|
||||
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 3 : nBlocks;
|
||||
|
||||
// Determine the number of blocks in each call to fwrite(), and the
|
||||
// number of fwrite() calls to make, based on this. In other words,
|
||||
// we put a cap on the "writeSize" so that we don't allocate and write
|
||||
@ -1079,16 +1090,15 @@ int FileOp::initColumnExtent(
|
||||
// expanding an abbreviated 64M extent, we may not have an even
|
||||
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
|
||||
// blocks above and beyond loopCount*MAX_NBLOCKS.
|
||||
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
||||
int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
||||
int loopCount = 1;
|
||||
int remWriteSize = 0;
|
||||
off64_t currFileSize = pFile->size();
|
||||
|
||||
if (nBlocks > MAX_NBLOCKS) // 64M row extent size
|
||||
if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
|
||||
{
|
||||
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
|
||||
loopCount = nBlocks / MAX_NBLOCKS;
|
||||
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS);
|
||||
loopCount = realNBlocks / MAX_NBLOCKS;
|
||||
remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
|
||||
}
|
||||
|
||||
// Allocate a buffer, initialize it, and use it to create the extent
|
||||
@ -1109,39 +1119,13 @@ int FileOp::initColumnExtent(
|
||||
else
|
||||
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT);
|
||||
#endif
|
||||
// MCOL-498 Skip the huge preallocations if the option is set
|
||||
// for the dbroot. This check is skiped for abbreviated extent.
|
||||
// IMO it is better to check bool then to call a function.
|
||||
if ( bOptExtension )
|
||||
// Skip space preallocation if configured so
|
||||
// fallback to sequential write otherwise.
|
||||
// Couldn't avoid preallocation for full extents,
|
||||
// e.g. ADD COLUMN DDL b/c CS has to fill the file
|
||||
// with empty magics.
|
||||
if ( !bOptExtension )
|
||||
{
|
||||
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
|
||||
? bOptExtension : false;
|
||||
}
|
||||
int savedErrno = 0;
|
||||
// MCOL-498 fallocate the abbreviated extent,
|
||||
// fallback to sequential write if fallocate failed
|
||||
// Couldn't use fallocate for full extents, e.g. ADD COLUMN DDL
|
||||
// b/c CS has to fill the file with empty magics.
|
||||
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
|
||||
&& pFile->fallocate(0, currFileSize, writeSize) )
|
||||
)
|
||||
{
|
||||
savedErrno = errno;
|
||||
// Log the failed fallocate() call result
|
||||
if ( bOptExtension )
|
||||
{
|
||||
std::ostringstream oss;
|
||||
std::string errnoMsg;
|
||||
Convertor::mapErrnoToString(savedErrno, errnoMsg);
|
||||
oss << "FileOp::initColumnExtent(): fallocate(" << currFileSize <<
|
||||
", " << writeSize << "): errno = " << savedErrno <<
|
||||
": " << errnoMsg;
|
||||
logging::Message::Args args;
|
||||
args.add(oss.str());
|
||||
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
|
||||
logging::M0006);
|
||||
}
|
||||
|
||||
#ifdef PROFILE
|
||||
Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT);
|
||||
#endif
|
||||
@ -1231,7 +1215,7 @@ int FileOp::initAbbrevCompColumnExtent(
|
||||
uint64_t emptyVal,
|
||||
int width)
|
||||
{
|
||||
// Reserve disk space for full abbreviated extent
|
||||
// Reserve disk space for optimized abbreviated extent
|
||||
int rc = initColumnExtent( pFile,
|
||||
dbRoot,
|
||||
nBlocks,
|
||||
@ -1239,8 +1223,8 @@ int FileOp::initAbbrevCompColumnExtent(
|
||||
width,
|
||||
true, // new file
|
||||
false, // don't expand; add new extent
|
||||
true ); // add abbreviated extent
|
||||
|
||||
true, // add abbreviated extent
|
||||
true); // optimize the initial extent
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
return rc;
|
||||
@ -1815,8 +1799,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
|
||||
* nBlocks controls how many 8192-byte blocks are to be written out.
|
||||
* If bOptExtension is set then method first checks config for
|
||||
* DBRootX.Prealloc. If it is disabled then it skips disk space
|
||||
* preallocation. If not it tries to go with fallocate first then
|
||||
* fallbacks to sequential write.
|
||||
* preallocation.
|
||||
* PARAMETERS:
|
||||
* pFile (in) - IDBDataFile* of column segment file to be written to
|
||||
* dbRoot (in) - DBRoot of pFile
|
||||
@ -1824,7 +1807,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
|
||||
* blockHdrInit(in) - data used to initialize each block
|
||||
* blockHdrInitSize(in) - number of bytes in blockHdrInit
|
||||
* bExpandExtent (in) - Expand existing extent, or initialize a new one
|
||||
* bOptExtension(in) - skip or optimize full extent preallocation.
|
||||
* bOptExtension(in) - skip full extent preallocation.
|
||||
* RETURN:
|
||||
* returns ERR_FILE_WRITE if an error occurs,
|
||||
* else returns NO_ERROR.
|
||||
@ -1838,7 +1821,6 @@ int FileOp::initDctnryExtent(
|
||||
bool bExpandExtent,
|
||||
bool bOptExtension )
|
||||
{
|
||||
off64_t currFileSize = pFile->size();
|
||||
// @bug5769 Don't initialize extents or truncate db files on HDFS
|
||||
if (idbdatafile::IDBPolicy::useHdfs())
|
||||
{
|
||||
@ -1854,6 +1836,21 @@ int FileOp::initDctnryExtent(
|
||||
// Create vector of mutexes used to serialize extent access per DBRoot
|
||||
initDbRootExtentMutexes( );
|
||||
|
||||
// MCOL-498 Skip the huge preallocations if the option is set
|
||||
// for the dbroot. This check is skiped for abbreviated extent.
|
||||
// IMO it is better to check bool then to call a function.
|
||||
// CS uses non-compressed dict files for its system catalog so
|
||||
// CS doesn't optimize non-compressed dict creation.
|
||||
if ( bOptExtension )
|
||||
{
|
||||
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)
|
||||
&& m_compressionType) ? bOptExtension : false;
|
||||
}
|
||||
// Reduce number of blocks allocated for abbreviated extents thus
|
||||
// CS writes less when creates a new table. This couldn't be zero
|
||||
// b/c Snappy compressed file format doesn't tolerate empty files.
|
||||
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 1 : nBlocks;
|
||||
|
||||
// Determine the number of blocks in each call to fwrite(), and the
|
||||
// number of fwrite() calls to make, based on this. In other words,
|
||||
// we put a cap on the "writeSize" so that we don't allocate and write
|
||||
@ -1861,15 +1858,15 @@ int FileOp::initDctnryExtent(
|
||||
// expanding an abbreviated 64M extent, we may not have an even
|
||||
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
|
||||
// blocks above and beyond loopCount*MAX_NBLOCKS.
|
||||
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
||||
int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
||||
int loopCount = 1;
|
||||
int remWriteSize = 0;
|
||||
|
||||
if (nBlocks > MAX_NBLOCKS) // 64M row extent size
|
||||
if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
|
||||
{
|
||||
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
|
||||
loopCount = nBlocks / MAX_NBLOCKS;
|
||||
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS);
|
||||
loopCount = realNBlocks / MAX_NBLOCKS;
|
||||
remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
|
||||
}
|
||||
|
||||
// Allocate a buffer, initialize it, and use it to create the extent
|
||||
@ -1890,36 +1887,13 @@ int FileOp::initDctnryExtent(
|
||||
else
|
||||
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT);
|
||||
#endif
|
||||
// MCOL-498 Skip the huge preallocations if the option is set
|
||||
// for the dbroot. This check is skiped for abbreviated extent.
|
||||
// IMO it is better to check bool then to call a function.
|
||||
if ( bOptExtension )
|
||||
// Skip space preallocation if configured so
|
||||
// fallback to sequential write otherwise.
|
||||
// Couldn't avoid preallocation for full extents,
|
||||
// e.g. ADD COLUMN DDL b/c CS has to fill the file
|
||||
// with empty magics.
|
||||
if ( !bOptExtension )
|
||||
{
|
||||
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
|
||||
? bOptExtension : false;
|
||||
}
|
||||
int savedErrno = 0;
|
||||
// MCOL-498 fallocate the abbreviated extent,
|
||||
// fallback to sequential write if fallocate failed
|
||||
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
|
||||
&& pFile->fallocate(0, currFileSize, writeSize) )
|
||||
)
|
||||
{
|
||||
// MCOL-498 Log the failed fallocate() call result
|
||||
if ( bOptExtension )
|
||||
{
|
||||
std::ostringstream oss;
|
||||
std::string errnoMsg;
|
||||
Convertor::mapErrnoToString(savedErrno, errnoMsg);
|
||||
oss << "FileOp::initDctnryExtent(): fallocate(" << currFileSize <<
|
||||
", " << writeSize << "): errno = " << savedErrno <<
|
||||
": " << errnoMsg;
|
||||
logging::Message::Args args;
|
||||
args.add(oss.str());
|
||||
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
|
||||
logging::M0006);
|
||||
}
|
||||
|
||||
// Allocate buffer, and store in scoped_array to insure it's deletion.
|
||||
// Create scope {...} to manage deletion of writeBuf.
|
||||
{
|
||||
@ -1932,7 +1906,7 @@ int FileOp::initDctnryExtent(
|
||||
|
||||
memset(writeBuf, 0, writeSize);
|
||||
|
||||
for (int i = 0; i < nBlocks; i++)
|
||||
for (int i = 0; i < realNBlocks; i++)
|
||||
{
|
||||
memcpy( writeBuf + (i * BYTE_PER_BLOCK),
|
||||
blockHdrInit,
|
||||
|
Reference in New Issue
Block a user