1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

MCOL-498 Reduced number of blocks created for abbreviated extents

thus reduced IO load when creating a table.
    Uncompressed abbreviated segment and dicts aren't affected by
    this b/c CS'es system catalog uses uncompressed dict files. CS
    now doesn't work with empty dicts files.
This commit is contained in:
Roman Nozdrin
2019-04-03 10:26:57 +03:00
parent bc3c780e35
commit 22c0c98e61
2 changed files with 61 additions and 88 deletions

View File

@@ -259,16 +259,15 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth,
if ( m_dFile != NULL ) if ( m_dFile != NULL )
{ {
// MCOL-498 CS doesn't optimize abbreviated extent // MCOL-498 CS optimizes abbreviated extent
// creation. // creation.
bool optimizePrealloc = ( flag ) ? false : true;
rc = FileOp::initDctnryExtent( m_dFile, rc = FileOp::initDctnryExtent( m_dFile,
m_dbRoot, m_dbRoot,
totalSize, totalSize,
m_dctnryHeader2, m_dctnryHeader2,
m_totalHdrBytes, m_totalHdrBytes,
false, false,
optimizePrealloc ); true ); // explicitly optimize
if (rc != NO_ERROR) if (rc != NO_ERROR)
{ {
@@ -334,7 +333,7 @@ int Dctnry::expandDctnryExtent()
m_dctnryHeader2, m_dctnryHeader2,
m_totalHdrBytes, m_totalHdrBytes,
true, true,
true ); true ); // explicitly optimize
if (rc != NO_ERROR) if (rc != NO_ERROR)
return rc; return rc;

View File

@@ -18,7 +18,6 @@
// $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $ // $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
#include "config.h" #include "config.h"
#include <unistd.h> #include <unistd.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
@@ -541,8 +540,8 @@ bool FileOp::existsOIDDir( FID fid ) const
* If this is the very first file for the specified DBRoot, then the * If this is the very first file for the specified DBRoot, then the
* partition and segment number must be specified, else the selected * partition and segment number must be specified, else the selected
* partition and segment numbers are returned. This method tries to * partition and segment numbers are returned. This method tries to
* optimize full extents creation either skiping disk space * optimize full extents creation skiping disk space
* preallocation(if activated) or via fallocate. * preallocation(if activated).
* PARAMETERS: * PARAMETERS:
* oid - OID of the column to be extended * oid - OID of the column to be extended
* emptyVal - Empty value to be used for oid * emptyVal - Empty value to be used for oid
@@ -1013,8 +1012,7 @@ int FileOp::addExtentExactFile(
* nBlocks controls how many 8192-byte blocks are to be written out. * nBlocks controls how many 8192-byte blocks are to be written out.
* If bOptExtension is set then method first checks config for * If bOptExtension is set then method first checks config for
* DBRootX.Prealloc. If it is disabled then it skips disk space * DBRootX.Prealloc. If it is disabled then it skips disk space
* preallocation. If not it tries to go with fallocate first then * preallocation.
* fallbacks to sequential write.
* PARAMETERS: * PARAMETERS:
* pFile (in) - IDBDataFile* of column segment file to be written to * pFile (in) - IDBDataFile* of column segment file to be written to
* dbRoot (in) - DBRoot of pFile * dbRoot (in) - DBRoot of pFile
@@ -1025,7 +1023,7 @@ int FileOp::addExtentExactFile(
* headers will be included "if" it is a compressed file. * headers will be included "if" it is a compressed file.
* bExpandExtent (in) - Expand existing extent, or initialize a new one * bExpandExtent (in) - Expand existing extent, or initialize a new one
* bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent * bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent
* bOptExtension(in) - skip or optimize full extent preallocation. * bOptExtension(in) - skip full extent preallocation.
* RETURN: * RETURN:
* returns ERR_FILE_WRITE if an error occurs, * returns ERR_FILE_WRITE if an error occurs,
* else returns NO_ERROR. * else returns NO_ERROR.
@@ -1072,6 +1070,19 @@ int FileOp::initColumnExtent(
// Create vector of mutexes used to serialize extent access per DBRoot // Create vector of mutexes used to serialize extent access per DBRoot
initDbRootExtentMutexes( ); initDbRootExtentMutexes( );
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
if ( bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
? bOptExtension : false;
}
// Reduce number of blocks allocated for abbreviated extents thus
// CS writes less when creates a new table. This couldn't be zero
// b/c Snappy compressed file format doesn't tolerate empty files.
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 3 : nBlocks;
// Determine the number of blocks in each call to fwrite(), and the // Determine the number of blocks in each call to fwrite(), and the
// number of fwrite() calls to make, based on this. In other words, // number of fwrite() calls to make, based on this. In other words,
// we put a cap on the "writeSize" so that we don't allocate and write // we put a cap on the "writeSize" so that we don't allocate and write
@@ -1079,16 +1090,15 @@ int FileOp::initColumnExtent(
// expanding an abbreviated 64M extent, we may not have an even // expanding an abbreviated 64M extent, we may not have an even
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of // multiple of MAX_NBLOCKS to write; remWriteSize is the number of
// blocks above and beyond loopCount*MAX_NBLOCKS. // blocks above and beyond loopCount*MAX_NBLOCKS.
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
int loopCount = 1; int loopCount = 1;
int remWriteSize = 0; int remWriteSize = 0;
off64_t currFileSize = pFile->size();
if (nBlocks > MAX_NBLOCKS) // 64M row extent size if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
{ {
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK; writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
loopCount = nBlocks / MAX_NBLOCKS; loopCount = realNBlocks / MAX_NBLOCKS;
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS); remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
} }
// Allocate a buffer, initialize it, and use it to create the extent // Allocate a buffer, initialize it, and use it to create the extent
@@ -1109,39 +1119,13 @@ int FileOp::initColumnExtent(
else else
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT); Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT);
#endif #endif
// MCOL-498 Skip the huge preallocations if the option is set // Skip space preallocation if configured so
// for the dbroot. This check is skiped for abbreviated extent. // fallback to sequential write otherwise.
// IMO it is better to check bool then to call a function. // Couldn't avoid preallocation for full extents,
if ( bOptExtension ) // e.g. ADD COLUMN DDL b/c CS has to fill the file
// with empty magics.
if ( !bOptExtension )
{ {
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
? bOptExtension : false;
}
int savedErrno = 0;
// MCOL-498 fallocate the abbreviated extent,
// fallback to sequential write if fallocate failed
// Couldn't use fallocate for full extents, e.g. ADD COLUMN DDL
// b/c CS has to fill the file with empty magics.
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
&& pFile->fallocate(0, currFileSize, writeSize) )
)
{
savedErrno = errno;
// Log the failed fallocate() call result
if ( bOptExtension )
{
std::ostringstream oss;
std::string errnoMsg;
Convertor::mapErrnoToString(savedErrno, errnoMsg);
oss << "FileOp::initColumnExtent(): fallocate(" << currFileSize <<
", " << writeSize << "): errno = " << savedErrno <<
": " << errnoMsg;
logging::Message::Args args;
args.add(oss.str());
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
logging::M0006);
}
#ifdef PROFILE #ifdef PROFILE
Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT); Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT);
#endif #endif
@@ -1231,7 +1215,7 @@ int FileOp::initAbbrevCompColumnExtent(
uint64_t emptyVal, uint64_t emptyVal,
int width) int width)
{ {
// Reserve disk space for full abbreviated extent // Reserve disk space for optimized abbreviated extent
int rc = initColumnExtent( pFile, int rc = initColumnExtent( pFile,
dbRoot, dbRoot,
nBlocks, nBlocks,
@@ -1239,8 +1223,8 @@ int FileOp::initAbbrevCompColumnExtent(
width, width,
true, // new file true, // new file
false, // don't expand; add new extent false, // don't expand; add new extent
true ); // add abbreviated extent true, // add abbreviated extent
true); // optimize the initial extent
if (rc != NO_ERROR) if (rc != NO_ERROR)
{ {
return rc; return rc;
@@ -1815,8 +1799,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
* nBlocks controls how many 8192-byte blocks are to be written out. * nBlocks controls how many 8192-byte blocks are to be written out.
* If bOptExtension is set then method first checks config for * If bOptExtension is set then method first checks config for
* DBRootX.Prealloc. If it is disabled then it skips disk space * DBRootX.Prealloc. If it is disabled then it skips disk space
* preallocation. If not it tries to go with fallocate first then * preallocation.
* fallbacks to sequential write.
* PARAMETERS: * PARAMETERS:
* pFile (in) - IDBDataFile* of column segment file to be written to * pFile (in) - IDBDataFile* of column segment file to be written to
* dbRoot (in) - DBRoot of pFile * dbRoot (in) - DBRoot of pFile
@@ -1824,7 +1807,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
* blockHdrInit(in) - data used to initialize each block * blockHdrInit(in) - data used to initialize each block
* blockHdrInitSize(in) - number of bytes in blockHdrInit * blockHdrInitSize(in) - number of bytes in blockHdrInit
* bExpandExtent (in) - Expand existing extent, or initialize a new one * bExpandExtent (in) - Expand existing extent, or initialize a new one
* bOptExtension(in) - skip or optimize full extent preallocation. * bOptExtension(in) - skip full extent preallocation.
* RETURN: * RETURN:
* returns ERR_FILE_WRITE if an error occurs, * returns ERR_FILE_WRITE if an error occurs,
* else returns NO_ERROR. * else returns NO_ERROR.
@@ -1838,7 +1821,6 @@ int FileOp::initDctnryExtent(
bool bExpandExtent, bool bExpandExtent,
bool bOptExtension ) bool bOptExtension )
{ {
off64_t currFileSize = pFile->size();
// @bug5769 Don't initialize extents or truncate db files on HDFS // @bug5769 Don't initialize extents or truncate db files on HDFS
if (idbdatafile::IDBPolicy::useHdfs()) if (idbdatafile::IDBPolicy::useHdfs())
{ {
@@ -1854,6 +1836,21 @@ int FileOp::initDctnryExtent(
// Create vector of mutexes used to serialize extent access per DBRoot // Create vector of mutexes used to serialize extent access per DBRoot
initDbRootExtentMutexes( ); initDbRootExtentMutexes( );
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
// CS uses non-compressed dict files for its system catalog so
// CS doesn't optimize non-compressed dict creation.
if ( bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)
&& m_compressionType) ? bOptExtension : false;
}
// Reduce number of blocks allocated for abbreviated extents thus
// CS writes less when creates a new table. This couldn't be zero
// b/c Snappy compressed file format doesn't tolerate empty files.
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 1 : nBlocks;
// Determine the number of blocks in each call to fwrite(), and the // Determine the number of blocks in each call to fwrite(), and the
// number of fwrite() calls to make, based on this. In other words, // number of fwrite() calls to make, based on this. In other words,
// we put a cap on the "writeSize" so that we don't allocate and write // we put a cap on the "writeSize" so that we don't allocate and write
@@ -1861,15 +1858,15 @@ int FileOp::initDctnryExtent(
// expanding an abbreviated 64M extent, we may not have an even // expanding an abbreviated 64M extent, we may not have an even
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of // multiple of MAX_NBLOCKS to write; remWriteSize is the number of
// blocks above and beyond loopCount*MAX_NBLOCKS. // blocks above and beyond loopCount*MAX_NBLOCKS.
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
int loopCount = 1; int loopCount = 1;
int remWriteSize = 0; int remWriteSize = 0;
if (nBlocks > MAX_NBLOCKS) // 64M row extent size if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
{ {
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK; writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
loopCount = nBlocks / MAX_NBLOCKS; loopCount = realNBlocks / MAX_NBLOCKS;
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS); remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
} }
// Allocate a buffer, initialize it, and use it to create the extent // Allocate a buffer, initialize it, and use it to create the extent
@@ -1890,36 +1887,13 @@ int FileOp::initDctnryExtent(
else else
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT); Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT);
#endif #endif
// MCOL-498 Skip the huge preallocations if the option is set // Skip space preallocation if configured so
// for the dbroot. This check is skiped for abbreviated extent. // fallback to sequential write otherwise.
// IMO it is better to check bool then to call a function. // Couldn't avoid preallocation for full extents,
if ( bOptExtension ) // e.g. ADD COLUMN DDL b/c CS has to fill the file
// with empty magics.
if ( !bOptExtension )
{ {
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
? bOptExtension : false;
}
int savedErrno = 0;
// MCOL-498 fallocate the abbreviated extent,
// fallback to sequential write if fallocate failed
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
&& pFile->fallocate(0, currFileSize, writeSize) )
)
{
// MCOL-498 Log the failed fallocate() call result
if ( bOptExtension )
{
std::ostringstream oss;
std::string errnoMsg;
Convertor::mapErrnoToString(savedErrno, errnoMsg);
oss << "FileOp::initDctnryExtent(): fallocate(" << currFileSize <<
", " << writeSize << "): errno = " << savedErrno <<
": " << errnoMsg;
logging::Message::Args args;
args.add(oss.str());
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
logging::M0006);
}
// Allocate buffer, and store in scoped_array to insure it's deletion. // Allocate buffer, and store in scoped_array to insure it's deletion.
// Create scope {...} to manage deletion of writeBuf. // Create scope {...} to manage deletion of writeBuf.
{ {
@@ -1932,7 +1906,7 @@ int FileOp::initDctnryExtent(
memset(writeBuf, 0, writeSize); memset(writeBuf, 0, writeSize);
for (int i = 0; i < nBlocks; i++) for (int i = 0; i < realNBlocks; i++)
{ {
memcpy( writeBuf + (i * BYTE_PER_BLOCK), memcpy( writeBuf + (i * BYTE_PER_BLOCK),
blockHdrInit, blockHdrInit,