1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-498 Reduced number of blocks created for abbreviated extents

thus reduced IO load when creating a table.
    Uncompressed abbreviated segment and dicts aren't affected by
    this b/c CS'es system catalog uses uncompressed dict files. CS
    now doesn't work with empty dicts files.
This commit is contained in:
Roman Nozdrin
2019-04-03 10:26:57 +03:00
parent bc3c780e35
commit 22c0c98e61
2 changed files with 61 additions and 88 deletions

View File

@ -18,7 +18,6 @@
// $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
#include "config.h"
#include <unistd.h>
#include <stdio.h>
#include <string.h>
@ -541,8 +540,8 @@ bool FileOp::existsOIDDir( FID fid ) const
* If this is the very first file for the specified DBRoot, then the
* partition and segment number must be specified, else the selected
* partition and segment numbers are returned. This method tries to
* optimize full extents creation either skiping disk space
* preallocation(if activated) or via fallocate.
* optimize full extents creation skiping disk space
* preallocation(if activated).
* PARAMETERS:
* oid - OID of the column to be extended
* emptyVal - Empty value to be used for oid
@ -1013,8 +1012,7 @@ int FileOp::addExtentExactFile(
* nBlocks controls how many 8192-byte blocks are to be written out.
* If bOptExtension is set then method first checks config for
* DBRootX.Prealloc. If it is disabled then it skips disk space
* preallocation. If not it tries to go with fallocate first then
* fallbacks to sequential write.
* preallocation.
* PARAMETERS:
* pFile (in) - IDBDataFile* of column segment file to be written to
* dbRoot (in) - DBRoot of pFile
@ -1025,7 +1023,7 @@ int FileOp::addExtentExactFile(
* headers will be included "if" it is a compressed file.
* bExpandExtent (in) - Expand existing extent, or initialize a new one
* bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent
* bOptExtension(in) - skip or optimize full extent preallocation.
* bOptExtension(in) - skip full extent preallocation.
* RETURN:
* returns ERR_FILE_WRITE if an error occurs,
* else returns NO_ERROR.
@ -1072,6 +1070,19 @@ int FileOp::initColumnExtent(
// Create vector of mutexes used to serialize extent access per DBRoot
initDbRootExtentMutexes( );
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
if ( bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
? bOptExtension : false;
}
// Reduce number of blocks allocated for abbreviated extents thus
// CS writes less when creates a new table. This couldn't be zero
// b/c Snappy compressed file format doesn't tolerate empty files.
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 3 : nBlocks;
// Determine the number of blocks in each call to fwrite(), and the
// number of fwrite() calls to make, based on this. In other words,
// we put a cap on the "writeSize" so that we don't allocate and write
@ -1079,16 +1090,15 @@ int FileOp::initColumnExtent(
// expanding an abbreviated 64M extent, we may not have an even
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
// blocks above and beyond loopCount*MAX_NBLOCKS.
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
int loopCount = 1;
int remWriteSize = 0;
off64_t currFileSize = pFile->size();
if (nBlocks > MAX_NBLOCKS) // 64M row extent size
if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
{
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
loopCount = nBlocks / MAX_NBLOCKS;
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS);
loopCount = realNBlocks / MAX_NBLOCKS;
remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
}
// Allocate a buffer, initialize it, and use it to create the extent
@ -1109,39 +1119,13 @@ int FileOp::initColumnExtent(
else
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT);
#endif
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
if ( bOptExtension )
// Skip space preallocation if configured so
// fallback to sequential write otherwise.
// Couldn't avoid preallocation for full extents,
// e.g. ADD COLUMN DDL b/c CS has to fill the file
// with empty magics.
if ( !bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
? bOptExtension : false;
}
int savedErrno = 0;
// MCOL-498 fallocate the abbreviated extent,
// fallback to sequential write if fallocate failed
// Couldn't use fallocate for full extents, e.g. ADD COLUMN DDL
// b/c CS has to fill the file with empty magics.
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
&& pFile->fallocate(0, currFileSize, writeSize) )
)
{
savedErrno = errno;
// Log the failed fallocate() call result
if ( bOptExtension )
{
std::ostringstream oss;
std::string errnoMsg;
Convertor::mapErrnoToString(savedErrno, errnoMsg);
oss << "FileOp::initColumnExtent(): fallocate(" << currFileSize <<
", " << writeSize << "): errno = " << savedErrno <<
": " << errnoMsg;
logging::Message::Args args;
args.add(oss.str());
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
logging::M0006);
}
#ifdef PROFILE
Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT);
#endif
@ -1231,7 +1215,7 @@ int FileOp::initAbbrevCompColumnExtent(
uint64_t emptyVal,
int width)
{
// Reserve disk space for full abbreviated extent
// Reserve disk space for optimized abbreviated extent
int rc = initColumnExtent( pFile,
dbRoot,
nBlocks,
@ -1239,8 +1223,8 @@ int FileOp::initAbbrevCompColumnExtent(
width,
true, // new file
false, // don't expand; add new extent
true ); // add abbreviated extent
true, // add abbreviated extent
true); // optimize the initial extent
if (rc != NO_ERROR)
{
return rc;
@ -1815,8 +1799,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
* nBlocks controls how many 8192-byte blocks are to be written out.
* If bOptExtension is set then method first checks config for
* DBRootX.Prealloc. If it is disabled then it skips disk space
* preallocation. If not it tries to go with fallocate first then
* fallbacks to sequential write.
* preallocation.
* PARAMETERS:
* pFile (in) - IDBDataFile* of column segment file to be written to
* dbRoot (in) - DBRoot of pFile
@ -1824,7 +1807,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
* blockHdrInit(in) - data used to initialize each block
* blockHdrInitSize(in) - number of bytes in blockHdrInit
* bExpandExtent (in) - Expand existing extent, or initialize a new one
* bOptExtension(in) - skip or optimize full extent preallocation.
* bOptExtension(in) - skip full extent preallocation.
* RETURN:
* returns ERR_FILE_WRITE if an error occurs,
* else returns NO_ERROR.
@ -1838,7 +1821,6 @@ int FileOp::initDctnryExtent(
bool bExpandExtent,
bool bOptExtension )
{
off64_t currFileSize = pFile->size();
// @bug5769 Don't initialize extents or truncate db files on HDFS
if (idbdatafile::IDBPolicy::useHdfs())
{
@ -1854,6 +1836,21 @@ int FileOp::initDctnryExtent(
// Create vector of mutexes used to serialize extent access per DBRoot
initDbRootExtentMutexes( );
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
// CS uses non-compressed dict files for its system catalog so
// CS doesn't optimize non-compressed dict creation.
if ( bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)
&& m_compressionType) ? bOptExtension : false;
}
// Reduce number of blocks allocated for abbreviated extents thus
// CS writes less when creates a new table. This couldn't be zero
// b/c Snappy compressed file format doesn't tolerate empty files.
int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 1 : nBlocks;
// Determine the number of blocks in each call to fwrite(), and the
// number of fwrite() calls to make, based on this. In other words,
// we put a cap on the "writeSize" so that we don't allocate and write
@ -1861,15 +1858,15 @@ int FileOp::initDctnryExtent(
// expanding an abbreviated 64M extent, we may not have an even
// multiple of MAX_NBLOCKS to write; remWriteSize is the number of
// blocks above and beyond loopCount*MAX_NBLOCKS.
int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size
int loopCount = 1;
int remWriteSize = 0;
if (nBlocks > MAX_NBLOCKS) // 64M row extent size
if (realNBlocks > MAX_NBLOCKS) // 64M row extent size
{
writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK;
loopCount = nBlocks / MAX_NBLOCKS;
remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS);
loopCount = realNBlocks / MAX_NBLOCKS;
remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS);
}
// Allocate a buffer, initialize it, and use it to create the extent
@ -1890,36 +1887,13 @@ int FileOp::initDctnryExtent(
else
Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT);
#endif
// MCOL-498 Skip the huge preallocations if the option is set
// for the dbroot. This check is skiped for abbreviated extent.
// IMO it is better to check bool then to call a function.
if ( bOptExtension )
// Skip space preallocation if configured so
// fallback to sequential write otherwise.
// Couldn't avoid preallocation for full extents,
// e.g. ADD COLUMN DDL b/c CS has to fill the file
// with empty magics.
if ( !bOptExtension )
{
bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot))
? bOptExtension : false;
}
int savedErrno = 0;
// MCOL-498 fallocate the abbreviated extent,
// fallback to sequential write if fallocate failed
if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK
&& pFile->fallocate(0, currFileSize, writeSize) )
)
{
// MCOL-498 Log the failed fallocate() call result
if ( bOptExtension )
{
std::ostringstream oss;
std::string errnoMsg;
Convertor::mapErrnoToString(savedErrno, errnoMsg);
oss << "FileOp::initDctnryExtent(): fallocate(" << currFileSize <<
", " << writeSize << "): errno = " << savedErrno <<
": " << errnoMsg;
logging::Message::Args args;
args.add(oss.str());
SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO,
logging::M0006);
}
// Allocate buffer, and store in scoped_array to insure it's deletion.
// Create scope {...} to manage deletion of writeBuf.
{
@ -1932,7 +1906,7 @@ int FileOp::initDctnryExtent(
memset(writeBuf, 0, writeSize);
for (int i = 0; i < nBlocks; i++)
for (int i = 0; i < realNBlocks; i++)
{
memcpy( writeBuf + (i * BYTE_PER_BLOCK),
blockHdrInit,