You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-07 03:22:57 +03:00
MCOL-498 Reduced number of blocks created for abbreviated extents
thus reduced IO load when creating a table. Uncompressed abbreviated segment and dicts aren't affected by this b/c CS'es system catalog uses uncompressed dict files. CS now doesn't work with empty dicts files.
This commit is contained in:
@@ -259,16 +259,15 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth,
|
|||||||
|
|
||||||
if ( m_dFile != NULL )
|
if ( m_dFile != NULL )
|
||||||
{
|
{
|
||||||
// MCOL-498 CS doesn't optimize abbreviated extent
|
// MCOL-498 CS optimizes abbreviated extent
|
||||||
// creation.
|
// creation.
|
||||||
bool optimizePrealloc = ( flag ) ? false : true;
|
|
||||||
rc = FileOp::initDctnryExtent( m_dFile,
|
rc = FileOp::initDctnryExtent( m_dFile,
|
||||||
m_dbRoot,
|
m_dbRoot,
|
||||||
totalSize,
|
totalSize,
|
||||||
m_dctnryHeader2,
|
m_dctnryHeader2,
|
||||||
m_totalHdrBytes,
|
m_totalHdrBytes,
|
||||||
false,
|
false,
|
||||||
optimizePrealloc );
|
true ); // explicitly optimize
|
||||||
|
|
||||||
if (rc != NO_ERROR)
|
if (rc != NO_ERROR)
|
||||||
{
|
{
|
||||||
@@ -334,7 +333,7 @@ int Dctnry::expandDctnryExtent()
|
|||||||
m_dctnryHeader2,
|
m_dctnryHeader2,
|
||||||
m_totalHdrBytes,
|
m_totalHdrBytes,
|
||||||
true,
|
true,
|
||||||
true );
|
true ); // explicitly optimize
|
||||||
|
|
||||||
if (rc != NO_ERROR)
|
if (rc != NO_ERROR)
|
||||||
return rc;
|
return rc;
|
||||||
|
@@ -18,7 +18,6 @@
|
|||||||
// $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
|
// $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -541,8 +540,8 @@ bool FileOp::existsOIDDir( FID fid ) const
|
|||||||
* If this is the very first file for the specified DBRoot, then the
|
* If this is the very first file for the specified DBRoot, then the
|
||||||
* partition and segment number must be specified, else the selected
|
* partition and segment number must be specified, else the selected
|
||||||
* partition and segment numbers are returned. This method tries to
|
* partition and segment numbers are returned. This method tries to
|
||||||
* optimize full extents creation either skiping disk space
|
* optimize full extents creation skiping disk space
|
||||||
* preallocation(if activated) or via fallocate.
|
* preallocation(if activated).
|
||||||
* PARAMETERS:
|
* PARAMETERS:
|
||||||
* oid - OID of the column to be extended
|
* oid - OID of the column to be extended
|
||||||
* emptyVal - Empty value to be used for oid
|
* emptyVal - Empty value to be used for oid
|
||||||
@@ -1013,8 +1012,7 @@ int FileOp::addExtentExactFile(
|
|||||||
* nBlocks controls how many 8192-byte blocks are to be written out.
|
* nBlocks controls how many 8192-byte blocks are to be written out.
|
||||||
* If bOptExtension is set then method first checks config for
|
* If bOptExtension is set then method first checks config for
|
||||||
* DBRootX.Prealloc. If it is disabled then it skips disk space
|
* DBRootX.Prealloc. If it is disabled then it skips disk space
|
||||||
* preallocation. If not it tries to go with fallocate first then
|
* preallocation.
|
||||||
* fallbacks to sequential write.
|
|
||||||
* PARAMETERS:
|
* PARAMETERS:
|
||||||
* pFile (in) - IDBDataFile* of column segment file to be written to
|
* pFile (in) - IDBDataFile* of column segment file to be written to
|
||||||
* dbRoot (in) - DBRoot of pFile
|
* dbRoot (in) - DBRoot of pFile
|
||||||
@@ -1025,7 +1023,7 @@ int FileOp::addExtentExactFile(
|
|||||||
* headers will be included "if" it is a compressed file.
|
* headers will be included "if" it is a compressed file.
|
||||||
* bExpandExtent (in) - Expand existing extent, or initialize a new one
|
* bExpandExtent (in) - Expand existing extent, or initialize a new one
|
||||||
* bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent
|
* bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent
|
||||||
* bOptExtension(in) - skip or optimize full extent preallocation.
|
* bOptExtension(in) - skip full extent preallocation.
|
||||||
* RETURN:
|
* RETURN:
|
||||||
* returns ERR_FILE_WRITE if an error occurs,
|
* returns ERR_FILE_WRITE if an error occurs,
|
||||||
* else returns NO_ERROR.
|
* else returns NO_ERROR.
|
||||||
@@ -1072,6 +1070,19 @@ int FileOp::initColumnExtent(
|
|||||||
// Create vector of mutexes used to serialize extent access per DBRoot
|
// Create vector of mutexes used to serialize extent access per DBRoot
|
||||||
initDbRootExtentMutexes( );
|
initDbRootExtentMutexes( );
|
||||||
|
|
||||||
|
// MCOL-498 Skip the huge preallocations if the option is set
|
||||||
|
// for the dbroot. This check is skiped for abbreviated extent.
|
||||||
|
// IMO it is better to check bool then to call a function.
|
||||||
|
if ( bOptExtension )
|
||||||
|
{
|
||||||
|
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
|
||||||
|
? bOptExtension : false;
|
||||||
|
}
|
||||||
|
// Reduce number of blocks allocated for abbreviated extents thus
|
||||||
|
// CS writes less when creates a new table. This couldn't be zero
|
||||||
|
// b/c Snappy compressed file format doesn't tolerate empty files.
|
||||||
|
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 3 : nBlocks;
|
||||||
|
|
||||||
// Determine the number of blocks in each call to fwrite(), and the
|
// Determine the number of blocks in each call to fwrite(), and the
|
||||||
// number of fwrite() calls to make, based on this. In other words,
|
// number of fwrite() calls to make, based on this. In other words,
|
||||||
// we put a cap on the "writeSize" so that we don't allocate and write
|
// we put a cap on the "writeSize" so that we don't allocate and write
|
||||||
@@ -1079,16 +1090,15 @@ int FileOp::initColumnExtent(
|
|||||||
// expanding an abbreviated 64M extent, we may not have an even
|
// expanding an abbreviated 64M extent, we may not have an even
|
||||||
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
|
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
|
||||||
// blocks above and beyond loopCount*MAX_NBLOCKS.
|
// blocks above and beyond loopCount*MAX_NBLOCKS.
|
||||||
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
||||||
int loopCount = 1;
|
int loopCount = 1;
|
||||||
int remWriteSize = 0;
|
int remWriteSize = 0;
|
||||||
off64_t currFileSize = pFile->size();
|
|
||||||
|
|
||||||
if (nBlocks > MAX_NBLOCKS) // 64M row extent size
|
if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
|
||||||
{
|
{
|
||||||
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
|
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
|
||||||
loopCount = nBlocks / MAX_NBLOCKS;
|
loopCount = realNBlocks / MAX_NBLOCKS;
|
||||||
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS);
|
remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a buffer, initialize it, and use it to create the extent
|
// Allocate a buffer, initialize it, and use it to create the extent
|
||||||
@@ -1109,39 +1119,13 @@ int FileOp::initColumnExtent(
|
|||||||
else
|
else
|
||||||
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT);
|
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT);
|
||||||
#endif
|
#endif
|
||||||
// MCOL-498 Skip the huge preallocations if the option is set
|
// Skip space preallocation if configured so
|
||||||
// for the dbroot. This check is skiped for abbreviated extent.
|
// fallback to sequential write otherwise.
|
||||||
// IMO it is better to check bool then to call a function.
|
// Couldn't avoid preallocation for full extents,
|
||||||
if ( bOptExtension )
|
// e.g. ADD COLUMN DDL b/c CS has to fill the file
|
||||||
|
// with empty magics.
|
||||||
|
if ( !bOptExtension )
|
||||||
{
|
{
|
||||||
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
|
|
||||||
? bOptExtension : false;
|
|
||||||
}
|
|
||||||
int savedErrno = 0;
|
|
||||||
// MCOL-498 fallocate the abbreviated extent,
|
|
||||||
// fallback to sequential write if fallocate failed
|
|
||||||
// Couldn't use fallocate for full extents, e.g. ADD COLUMN DDL
|
|
||||||
// b/c CS has to fill the file with empty magics.
|
|
||||||
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
|
|
||||||
&& pFile->fallocate(0, currFileSize, writeSize) )
|
|
||||||
)
|
|
||||||
{
|
|
||||||
savedErrno = errno;
|
|
||||||
// Log the failed fallocate() call result
|
|
||||||
if ( bOptExtension )
|
|
||||||
{
|
|
||||||
std::ostringstream oss;
|
|
||||||
std::string errnoMsg;
|
|
||||||
Convertor::mapErrnoToString(savedErrno, errnoMsg);
|
|
||||||
oss << "FileOp::initColumnExtent(): fallocate(" << currFileSize <<
|
|
||||||
", " << writeSize << "): errno = " << savedErrno <<
|
|
||||||
": " << errnoMsg;
|
|
||||||
logging::Message::Args args;
|
|
||||||
args.add(oss.str());
|
|
||||||
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
|
|
||||||
logging::M0006);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef PROFILE
|
#ifdef PROFILE
|
||||||
Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT);
|
Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT);
|
||||||
#endif
|
#endif
|
||||||
@@ -1231,7 +1215,7 @@ int FileOp::initAbbrevCompColumnExtent(
|
|||||||
uint64_t emptyVal,
|
uint64_t emptyVal,
|
||||||
int width)
|
int width)
|
||||||
{
|
{
|
||||||
// Reserve disk space for full abbreviated extent
|
// Reserve disk space for optimized abbreviated extent
|
||||||
int rc = initColumnExtent( pFile,
|
int rc = initColumnExtent( pFile,
|
||||||
dbRoot,
|
dbRoot,
|
||||||
nBlocks,
|
nBlocks,
|
||||||
@@ -1239,8 +1223,8 @@ int FileOp::initAbbrevCompColumnExtent(
|
|||||||
width,
|
width,
|
||||||
true, // new file
|
true, // new file
|
||||||
false, // don't expand; add new extent
|
false, // don't expand; add new extent
|
||||||
true ); // add abbreviated extent
|
true, // add abbreviated extent
|
||||||
|
true); // optimize the initial extent
|
||||||
if (rc != NO_ERROR)
|
if (rc != NO_ERROR)
|
||||||
{
|
{
|
||||||
return rc;
|
return rc;
|
||||||
@@ -1815,8 +1799,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
|
|||||||
* nBlocks controls how many 8192-byte blocks are to be written out.
|
* nBlocks controls how many 8192-byte blocks are to be written out.
|
||||||
* If bOptExtension is set then method first checks config for
|
* If bOptExtension is set then method first checks config for
|
||||||
* DBRootX.Prealloc. If it is disabled then it skips disk space
|
* DBRootX.Prealloc. If it is disabled then it skips disk space
|
||||||
* preallocation. If not it tries to go with fallocate first then
|
* preallocation.
|
||||||
* fallbacks to sequential write.
|
|
||||||
* PARAMETERS:
|
* PARAMETERS:
|
||||||
* pFile (in) - IDBDataFile* of column segment file to be written to
|
* pFile (in) - IDBDataFile* of column segment file to be written to
|
||||||
* dbRoot (in) - DBRoot of pFile
|
* dbRoot (in) - DBRoot of pFile
|
||||||
@@ -1824,7 +1807,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
|
|||||||
* blockHdrInit(in) - data used to initialize each block
|
* blockHdrInit(in) - data used to initialize each block
|
||||||
* blockHdrInitSize(in) - number of bytes in blockHdrInit
|
* blockHdrInitSize(in) - number of bytes in blockHdrInit
|
||||||
* bExpandExtent (in) - Expand existing extent, or initialize a new one
|
* bExpandExtent (in) - Expand existing extent, or initialize a new one
|
||||||
* bOptExtension(in) - skip or optimize full extent preallocation.
|
* bOptExtension(in) - skip full extent preallocation.
|
||||||
* RETURN:
|
* RETURN:
|
||||||
* returns ERR_FILE_WRITE if an error occurs,
|
* returns ERR_FILE_WRITE if an error occurs,
|
||||||
* else returns NO_ERROR.
|
* else returns NO_ERROR.
|
||||||
@@ -1838,7 +1821,6 @@ int FileOp::initDctnryExtent(
|
|||||||
bool bExpandExtent,
|
bool bExpandExtent,
|
||||||
bool bOptExtension )
|
bool bOptExtension )
|
||||||
{
|
{
|
||||||
off64_t currFileSize = pFile->size();
|
|
||||||
// @bug5769 Don't initialize extents or truncate db files on HDFS
|
// @bug5769 Don't initialize extents or truncate db files on HDFS
|
||||||
if (idbdatafile::IDBPolicy::useHdfs())
|
if (idbdatafile::IDBPolicy::useHdfs())
|
||||||
{
|
{
|
||||||
@@ -1854,6 +1836,21 @@ int FileOp::initDctnryExtent(
|
|||||||
// Create vector of mutexes used to serialize extent access per DBRoot
|
// Create vector of mutexes used to serialize extent access per DBRoot
|
||||||
initDbRootExtentMutexes( );
|
initDbRootExtentMutexes( );
|
||||||
|
|
||||||
|
// MCOL-498 Skip the huge preallocations if the option is set
|
||||||
|
// for the dbroot. This check is skiped for abbreviated extent.
|
||||||
|
// IMO it is better to check bool then to call a function.
|
||||||
|
// CS uses non-compressed dict files for its system catalog so
|
||||||
|
// CS doesn't optimize non-compressed dict creation.
|
||||||
|
if ( bOptExtension )
|
||||||
|
{
|
||||||
|
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)
|
||||||
|
&& m_compressionType) ? bOptExtension : false;
|
||||||
|
}
|
||||||
|
// Reduce number of blocks allocated for abbreviated extents thus
|
||||||
|
// CS writes less when creates a new table. This couldn't be zero
|
||||||
|
// b/c Snappy compressed file format doesn't tolerate empty files.
|
||||||
|
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 1 : nBlocks;
|
||||||
|
|
||||||
// Determine the number of blocks in each call to fwrite(), and the
|
// Determine the number of blocks in each call to fwrite(), and the
|
||||||
// number of fwrite() calls to make, based on this. In other words,
|
// number of fwrite() calls to make, based on this. In other words,
|
||||||
// we put a cap on the "writeSize" so that we don't allocate and write
|
// we put a cap on the "writeSize" so that we don't allocate and write
|
||||||
@@ -1861,15 +1858,15 @@ int FileOp::initDctnryExtent(
|
|||||||
// expanding an abbreviated 64M extent, we may not have an even
|
// expanding an abbreviated 64M extent, we may not have an even
|
||||||
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
|
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
|
||||||
// blocks above and beyond loopCount*MAX_NBLOCKS.
|
// blocks above and beyond loopCount*MAX_NBLOCKS.
|
||||||
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
|
||||||
int loopCount = 1;
|
int loopCount = 1;
|
||||||
int remWriteSize = 0;
|
int remWriteSize = 0;
|
||||||
|
|
||||||
if (nBlocks > MAX_NBLOCKS) // 64M row extent size
|
if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
|
||||||
{
|
{
|
||||||
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
|
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
|
||||||
loopCount = nBlocks / MAX_NBLOCKS;
|
loopCount = realNBlocks / MAX_NBLOCKS;
|
||||||
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS);
|
remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a buffer, initialize it, and use it to create the extent
|
// Allocate a buffer, initialize it, and use it to create the extent
|
||||||
@@ -1890,36 +1887,13 @@ int FileOp::initDctnryExtent(
|
|||||||
else
|
else
|
||||||
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT);
|
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT);
|
||||||
#endif
|
#endif
|
||||||
// MCOL-498 Skip the huge preallocations if the option is set
|
// Skip space preallocation if configured so
|
||||||
// for the dbroot. This check is skiped for abbreviated extent.
|
// fallback to sequential write otherwise.
|
||||||
// IMO it is better to check bool then to call a function.
|
// Couldn't avoid preallocation for full extents,
|
||||||
if ( bOptExtension )
|
// e.g. ADD COLUMN DDL b/c CS has to fill the file
|
||||||
|
// with empty magics.
|
||||||
|
if ( !bOptExtension )
|
||||||
{
|
{
|
||||||
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
|
|
||||||
? bOptExtension : false;
|
|
||||||
}
|
|
||||||
int savedErrno = 0;
|
|
||||||
// MCOL-498 fallocate the abbreviated extent,
|
|
||||||
// fallback to sequential write if fallocate failed
|
|
||||||
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
|
|
||||||
&& pFile->fallocate(0, currFileSize, writeSize) )
|
|
||||||
)
|
|
||||||
{
|
|
||||||
// MCOL-498 Log the failed fallocate() call result
|
|
||||||
if ( bOptExtension )
|
|
||||||
{
|
|
||||||
std::ostringstream oss;
|
|
||||||
std::string errnoMsg;
|
|
||||||
Convertor::mapErrnoToString(savedErrno, errnoMsg);
|
|
||||||
oss << "FileOp::initDctnryExtent(): fallocate(" << currFileSize <<
|
|
||||||
", " << writeSize << "): errno = " << savedErrno <<
|
|
||||||
": " << errnoMsg;
|
|
||||||
logging::Message::Args args;
|
|
||||||
args.add(oss.str());
|
|
||||||
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
|
|
||||||
logging::M0006);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate buffer, and store in scoped_array to insure it's deletion.
|
// Allocate buffer, and store in scoped_array to insure it's deletion.
|
||||||
// Create scope {...} to manage deletion of writeBuf.
|
// Create scope {...} to manage deletion of writeBuf.
|
||||||
{
|
{
|
||||||
@@ -1932,7 +1906,7 @@ int FileOp::initDctnryExtent(
|
|||||||
|
|
||||||
memset(writeBuf, 0, writeSize);
|
memset(writeBuf, 0, writeSize);
|
||||||
|
|
||||||
for (int i = 0; i < nBlocks; i++)
|
for (int i = 0; i < realNBlocks; i++)
|
||||||
{
|
{
|
||||||
memcpy( writeBuf + (i * BYTE_PER_BLOCK),
|
memcpy( writeBuf + (i * BYTE_PER_BLOCK),
|
||||||
blockHdrInit,
|
blockHdrInit,
|
||||||
|
Reference in New Issue
Block a user