diff --git a/writeengine/dictionary/we_dctnry.cpp b/writeengine/dictionary/we_dctnry.cpp index 7e63bc952..dfc5622b2 100644 --- a/writeengine/dictionary/we_dctnry.cpp +++ b/writeengine/dictionary/we_dctnry.cpp @@ -259,16 +259,15 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth, if ( m_dFile != NULL ) { - // MCOL-498 CS doesn't optimize abbreviated extent + // MCOL-498 CS optimizes abbreviated extent // creation. - bool optimizePrealloc = ( flag ) ? false : true; rc = FileOp::initDctnryExtent( m_dFile, m_dbRoot, totalSize, m_dctnryHeader2, m_totalHdrBytes, false, - optimizePrealloc ); + true ); // explicitly optimize if (rc != NO_ERROR) { @@ -334,7 +333,7 @@ int Dctnry::expandDctnryExtent() m_dctnryHeader2, m_totalHdrBytes, true, - true ); + true ); // explicitly optimize if (rc != NO_ERROR) return rc; diff --git a/writeengine/shared/we_fileop.cpp b/writeengine/shared/we_fileop.cpp index bc176da6f..784f8b4d9 100644 --- a/writeengine/shared/we_fileop.cpp +++ b/writeengine/shared/we_fileop.cpp @@ -18,7 +18,6 @@ // $Id: we_fileop.cpp 4737 2013-08-14 20:45:46Z bwilkinson $ #include "config.h" - #include #include #include @@ -541,8 +540,8 @@ bool FileOp::existsOIDDir( FID fid ) const * If this is the very first file for the specified DBRoot, then the * partition and segment number must be specified, else the selected * partition and segment numbers are returned. This method tries to - * optimize full extents creation either skiping disk space - * preallocation(if activated) or via fallocate. + * optimize full extents creation skiping disk space + * preallocation(if activated). * PARAMETERS: * oid - OID of the column to be extended * emptyVal - Empty value to be used for oid @@ -1013,8 +1012,7 @@ int FileOp::addExtentExactFile( * nBlocks controls how many 8192-byte blocks are to be written out. * If bOptExtension is set then method first checks config for * DBRootX.Prealloc. If it is disabled then it skips disk space - * preallocation. If not it tries to go with fallocate first then - * fallbacks to sequential write. + * preallocation. * PARAMETERS: * pFile (in) - IDBDataFile* of column segment file to be written to * dbRoot (in) - DBRoot of pFile @@ -1025,7 +1023,7 @@ int FileOp::addExtentExactFile( * headers will be included "if" it is a compressed file. * bExpandExtent (in) - Expand existing extent, or initialize a new one * bAbbrevExtent(in) - if creating new extent, is it an abbreviated extent - * bOptExtension(in) - skip or optimize full extent preallocation. + * bOptExtension(in) - skip full extent preallocation. * RETURN: * returns ERR_FILE_WRITE if an error occurs, * else returns NO_ERROR. @@ -1072,6 +1070,19 @@ int FileOp::initColumnExtent( // Create vector of mutexes used to serialize extent access per DBRoot initDbRootExtentMutexes( ); + // MCOL-498 Skip the huge preallocations if the option is set + // for the dbroot. This check is skiped for abbreviated extent. + // IMO it is better to check bool then to call a function. + if ( bOptExtension ) + { + bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)) + ? bOptExtension : false; + } + // Reduce number of blocks allocated for abbreviated extents thus + // CS writes less when creates a new table. This couldn't be zero + // b/c Snappy compressed file format doesn't tolerate empty files. + int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 3 : nBlocks; + // Determine the number of blocks in each call to fwrite(), and the // number of fwrite() calls to make, based on this. In other words, // we put a cap on the "writeSize" so that we don't allocate and write @@ -1079,16 +1090,15 @@ int FileOp::initColumnExtent( // expanding an abbreviated 64M extent, we may not have an even // multiple of MAX_NBLOCKS to write; remWriteSize is the number of // blocks above and beyond loopCount*MAX_NBLOCKS. - int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size + int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size int loopCount = 1; int remWriteSize = 0; - off64_t currFileSize = pFile->size(); - if (nBlocks > MAX_NBLOCKS) // 64M row extent size + if (realNBlocks > MAX_NBLOCKS) // 64M row extent size { writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK; - loopCount = nBlocks / MAX_NBLOCKS; - remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS); + loopCount = realNBlocks / MAX_NBLOCKS; + remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS); } // Allocate a buffer, initialize it, and use it to create the extent @@ -1109,39 +1119,13 @@ int FileOp::initColumnExtent( else Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT); #endif - // MCOL-498 Skip the huge preallocations if the option is set - // for the dbroot. This check is skiped for abbreviated extent. - // IMO it is better to check bool then to call a function. - if ( bOptExtension ) + // Skip space preallocation if configured so + // fallback to sequential write otherwise. + // Couldn't avoid preallocation for full extents, + // e.g. ADD COLUMN DDL b/c CS has to fill the file + // with empty magics. + if ( !bOptExtension ) { - bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)) - ? bOptExtension : false; - } - int savedErrno = 0; - // MCOL-498 fallocate the abbreviated extent, - // fallback to sequential write if fallocate failed - // Couldn't use fallocate for full extents, e.g. ADD COLUMN DDL - // b/c CS has to fill the file with empty magics. - if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK - && pFile->fallocate(0, currFileSize, writeSize) ) - ) - { - savedErrno = errno; - // Log the failed fallocate() call result - if ( bOptExtension ) - { - std::ostringstream oss; - std::string errnoMsg; - Convertor::mapErrnoToString(savedErrno, errnoMsg); - oss << "FileOp::initColumnExtent(): fallocate(" << currFileSize << - ", " << writeSize << "): errno = " << savedErrno << - ": " << errnoMsg; - logging::Message::Args args; - args.add(oss.str()); - SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO, - logging::M0006); - } - #ifdef PROFILE Stats::startParseEvent(WE_STATS_INIT_COL_EXTENT); #endif @@ -1231,7 +1215,7 @@ int FileOp::initAbbrevCompColumnExtent( uint64_t emptyVal, int width) { - // Reserve disk space for full abbreviated extent + // Reserve disk space for optimized abbreviated extent int rc = initColumnExtent( pFile, dbRoot, nBlocks, @@ -1239,8 +1223,8 @@ int FileOp::initAbbrevCompColumnExtent( width, true, // new file false, // don't expand; add new extent - true ); // add abbreviated extent - + true, // add abbreviated extent + true); // optimize the initial extent if (rc != NO_ERROR) { return rc; @@ -1815,8 +1799,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr, * nBlocks controls how many 8192-byte blocks are to be written out. * If bOptExtension is set then method first checks config for * DBRootX.Prealloc. If it is disabled then it skips disk space - * preallocation. If not it tries to go with fallocate first then - * fallbacks to sequential write. + * preallocation. * PARAMETERS: * pFile (in) - IDBDataFile* of column segment file to be written to * dbRoot (in) - DBRoot of pFile @@ -1824,7 +1807,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr, * blockHdrInit(in) - data used to initialize each block * blockHdrInitSize(in) - number of bytes in blockHdrInit * bExpandExtent (in) - Expand existing extent, or initialize a new one - * bOptExtension(in) - skip or optimize full extent preallocation. + * bOptExtension(in) - skip full extent preallocation. * RETURN: * returns ERR_FILE_WRITE if an error occurs, * else returns NO_ERROR. @@ -1838,7 +1821,6 @@ int FileOp::initDctnryExtent( bool bExpandExtent, bool bOptExtension ) { - off64_t currFileSize = pFile->size(); // @bug5769 Don't initialize extents or truncate db files on HDFS if (idbdatafile::IDBPolicy::useHdfs()) { @@ -1854,6 +1836,21 @@ int FileOp::initDctnryExtent( // Create vector of mutexes used to serialize extent access per DBRoot initDbRootExtentMutexes( ); + // MCOL-498 Skip the huge preallocations if the option is set + // for the dbroot. This check is skiped for abbreviated extent. + // IMO it is better to check bool then to call a function. + // CS uses non-compressed dict files for its system catalog so + // CS doesn't optimize non-compressed dict creation. + if ( bOptExtension ) + { + bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot) + && m_compressionType) ? bOptExtension : false; + } + // Reduce number of blocks allocated for abbreviated extents thus + // CS writes less when creates a new table. This couldn't be zero + // b/c Snappy compressed file format doesn't tolerate empty files. + int realNBlocks = ( bOptExtension && nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK ) ? 1 : nBlocks; + // Determine the number of blocks in each call to fwrite(), and the // number of fwrite() calls to make, based on this. In other words, // we put a cap on the "writeSize" so that we don't allocate and write @@ -1861,15 +1858,15 @@ int FileOp::initDctnryExtent( // expanding an abbreviated 64M extent, we may not have an even // multiple of MAX_NBLOCKS to write; remWriteSize is the number of // blocks above and beyond loopCount*MAX_NBLOCKS. - int writeSize = nBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size + int writeSize = realNBlocks * BYTE_PER_BLOCK; // 1M and 8M row extent size int loopCount = 1; int remWriteSize = 0; - if (nBlocks > MAX_NBLOCKS) // 64M row extent size + if (realNBlocks > MAX_NBLOCKS) // 64M row extent size { writeSize = MAX_NBLOCKS * BYTE_PER_BLOCK; - loopCount = nBlocks / MAX_NBLOCKS; - remWriteSize = nBlocks - (loopCount * MAX_NBLOCKS); + loopCount = realNBlocks / MAX_NBLOCKS; + remWriteSize = realNBlocks - (loopCount * MAX_NBLOCKS); } // Allocate a buffer, initialize it, and use it to create the extent @@ -1890,36 +1887,13 @@ int FileOp::initDctnryExtent( else Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT); #endif - // MCOL-498 Skip the huge preallocations if the option is set - // for the dbroot. This check is skiped for abbreviated extent. - // IMO it is better to check bool then to call a function. - if ( bOptExtension ) + // Skip space preallocation if configured so + // fallback to sequential write otherwise. + // Couldn't avoid preallocation for full extents, + // e.g. ADD COLUMN DDL b/c CS has to fill the file + // with empty magics. + if ( !bOptExtension ) { - bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)) - ? bOptExtension : false; - } - int savedErrno = 0; - // MCOL-498 fallocate the abbreviated extent, - // fallback to sequential write if fallocate failed - if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK - && pFile->fallocate(0, currFileSize, writeSize) ) - ) - { - // MCOL-498 Log the failed fallocate() call result - if ( bOptExtension ) - { - std::ostringstream oss; - std::string errnoMsg; - Convertor::mapErrnoToString(savedErrno, errnoMsg); - oss << "FileOp::initDctnryExtent(): fallocate(" << currFileSize << - ", " << writeSize << "): errno = " << savedErrno << - ": " << errnoMsg; - logging::Message::Args args; - args.add(oss.str()); - SimpleSysLog::instance()->logMsg(args, logging::LOG_TYPE_INFO, - logging::M0006); - } - // Allocate buffer, and store in scoped_array to insure it's deletion. // Create scope {...} to manage deletion of writeBuf. { @@ -1932,7 +1906,7 @@ int FileOp::initDctnryExtent( memset(writeBuf, 0, writeSize); - for (int i = 0; i < nBlocks; i++) + for (int i = 0; i < realNBlocks; i++) { memcpy( writeBuf + (i * BYTE_PER_BLOCK), blockHdrInit,