diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml index 60bc2d9fc..b5610bf11 100644 --- a/oam/etc/Columnstore.xml +++ b/oam/etc/Columnstore.xml @@ -102,162 +102,130 @@ 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON 0.0.0.0 8620 - ON C diff --git a/utils/idbdatafile/IDBPolicy.cpp b/utils/idbdatafile/IDBPolicy.cpp index a8c194918..6d06b9b30 100644 --- a/utils/idbdatafile/IDBPolicy.cpp +++ b/utils/idbdatafile/IDBPolicy.cpp @@ -18,14 +18,13 @@ #include #include #include -#include #include #include -#include // to_upper #include #include "configcpp.h" // for Config #include "oamcache.h" +#include "liboamcpp.h" #include "IDBPolicy.h" #include "PosixFileSystem.h" //#include "HdfsFileSystem.h" @@ -50,7 +49,7 @@ int64_t IDBPolicy::s_hdfsRdwrBufferMaxSize = 0; std::string IDBPolicy::s_hdfsRdwrScratch; bool IDBPolicy::s_configed = false; boost::mutex IDBPolicy::s_mutex; -bool IDBPolicy::s_PreallocSpace = true; +std::vector IDBPolicy::s_PreallocSpace; void IDBPolicy::init( bool bEnableLogging, bool bUseRdwrMemBuffer, const string& hdfsRdwrScratch, int64_t hdfsRdwrBufferMaxSize ) { @@ -184,10 +183,12 @@ void IDBPolicy::configIDBPolicy() bool idblog = false; string idblogstr = cf->getConfig("SystemConfig", "DataFileLog"); - if ( idblogstr.length() != 0 ) + // Must be faster. + if ( idblogstr.size() == 2 + && ( idblogstr[0] == 'O' || idblogstr[0] == 'o' ) + && ( idblogstr[1] == 'N' || idblogstr[1] == 'n' )) { - boost::to_upper(idblogstr); - idblog = ( idblogstr == "ON" ); + idblog = true; } //-------------------------------------------------------------------------- @@ -227,26 +228,73 @@ void IDBPolicy::configIDBPolicy() string scratch = cf->getConfig("SystemConfig", "hdfsRdwrScratch"); string hdfsRdwrScratch = tmpDir + scratch; - // MCOL-498. Set the PMSX.PreallocSpace knob, where X is a PM number, - // to disable file space preallocation. The feature is used in the FileOp code - // and is enabled by default for a backward compatibility. - oam::OamCache* oamcache = oam::OamCache::makeOamCache(); - int PMId = oamcache->getLocalPMId(); - char configSectionPref[] = "PMS"; - char configSection[sizeof(configSectionPref)+oam::MAX_MODULE_ID_SIZE]; - ::memset(configSection, 0, sizeof(configSection)); - sprintf(configSection, "%s%d", configSectionPref, PMId); - string PreallocSpace = cf->getConfig(configSection, "PreallocSpace"); - - if ( PreallocSpace.length() != 0 ) - { - boost::to_upper(PreallocSpace); - s_PreallocSpace = ( PreallocSpace != "OFF" ); - } + // MCOL-498. Use DBRootX.PreallocSpace to disable + // dbroots file space preallocation. + // The feature is used in the FileOp code and enabled by default. + char configSectionPref[] = "DBRoot"; + int confSectionLen = sizeof(configSectionPref)+oam::MAX_MODULE_ID_SIZE; + char configSection[confSectionLen]; IDBPolicy::init( idblog, bUseRdwrMemBuffer, hdfsRdwrScratch, hdfsRdwrBufferMaxSize ); - s_configed = true; + + oam::OamCache* oamcache = oam::OamCache::makeOamCache(); + int PMId = oamcache->getLocalPMId(); + + oam::OamCache::PMDbrootsMap_t pmDbrootsMap; + pmDbrootsMap.reset(new oam::OamCache::PMDbrootsMap_t::element_type()); + oam::systemStorageInfo_t t; + oam::Oam oamInst; + t = oamInst.getStorageConfig(); + + oam::DeviceDBRootList moduledbrootlist = boost::get<2>(t); + oam::DeviceDBRootList::iterator pt = moduledbrootlist.begin(); + + while ( pt != moduledbrootlist.end() && (*pt).DeviceID != PMId) + { + pt++; + continue; + } + + // CS could return here b/c it initialised this singleton and + // there is no DBRootX sections in XML. + if (pt == moduledbrootlist.end()) + { + return; + } + + oam::DBRootConfigList &dbRootVec = (*pt).dbrootConfigList; + s_PreallocSpace.reserve(dbRootVec.size()>>1); + { + int rc; + oam::DBRootConfigList::iterator dbRootIter = dbRootVec.begin(); + for(; dbRootIter != dbRootVec.end(); dbRootIter++) + { + ::memset(configSection + sizeof(configSectionPref), 0, oam::MAX_MODULE_ID_SIZE); + rc = snprintf(configSection, confSectionLen, "%s%d", configSectionPref, *dbRootIter); + // gcc 8.2 warnings + if ( rc < 0 || rc >= confSectionLen) + { + ostringstream oss; + oss << "IDBPolicy::configIDBPolicy: failed to parse DBRootX section."; + throw runtime_error(oss.str()); + } + string setting = cf->getConfig(configSection, "PreallocSpace"); + + if ( setting.length() != 0 ) + { + if ( setting.size() == 3 + && ( setting[0] == 'O' || setting[0] == 'o' ) + && ( setting[1] == 'F' || setting[1] == 'f' ) + && ( setting[2] == 'F' || setting[2] == 'f' ) + ) + { + // int into uint16_t implicit conversion + s_PreallocSpace.push_back(*dbRootIter); + } + } + } + } } } diff --git a/utils/idbdatafile/IDBPolicy.h b/utils/idbdatafile/IDBPolicy.h index 5212f3b11..0e28d5004 100644 --- a/utils/idbdatafile/IDBPolicy.h +++ b/utils/idbdatafile/IDBPolicy.h @@ -19,6 +19,7 @@ #define IDBPOLICY_H_ #include +#include #include #include @@ -81,9 +82,9 @@ public: static bool useHdfs(); /** - * Accessor method that returns whether or not HDFS is enabled + * Checks for disk space preallocation feature status for a dbroot */ - static bool PreallocSpace(); + static bool PreallocSpace(uint16_t dbRoot); /** * Accessor method that returns whether to use HDFS memory buffers @@ -139,7 +140,7 @@ private: static bool isLocalFile( const std::string& path ); static bool s_usehdfs; - static bool s_PreallocSpace; + static std::vector s_PreallocSpace; static bool s_bUseRdwrMemBuffer; static std::string s_hdfsRdwrScratch; static int64_t s_hdfsRdwrBufferMaxSize; @@ -159,10 +160,13 @@ bool IDBPolicy::useHdfs() return s_usehdfs; } +// MCOL-498 Looking for dbRoot in the List set in configIDBPolicy. inline -bool IDBPolicy::PreallocSpace() +bool IDBPolicy::PreallocSpace(uint16_t dbRoot) { - return s_PreallocSpace; + std::vector::iterator dbRootIter = + find(s_PreallocSpace.begin(), s_PreallocSpace.end(), dbRoot); + return dbRootIter != s_PreallocSpace.end(); } inline diff --git a/writeengine/dictionary/we_dctnry.cpp b/writeengine/dictionary/we_dctnry.cpp index c5461d4e5..c7cfea1d5 100644 --- a/writeengine/dictionary/we_dctnry.cpp +++ b/writeengine/dictionary/we_dctnry.cpp @@ -259,12 +259,14 @@ int Dctnry::createDctnry( const OID& dctnryOID, int colWidth, if ( m_dFile != NULL ) { + bool optimizePrealloc = ( flag ) ? false : true; rc = FileOp::initDctnryExtent( m_dFile, m_dbRoot, totalSize, m_dctnryHeader2, m_totalHdrBytes, - false ); + false, + optimizePrealloc ); if (rc != NO_ERROR) { diff --git a/writeengine/server/we_dmlcommandproc.cpp b/writeengine/server/we_dmlcommandproc.cpp index a6c5b025c..5da10433e 100644 --- a/writeengine/server/we_dmlcommandproc.cpp +++ b/writeengine/server/we_dmlcommandproc.cpp @@ -102,13 +102,13 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std: bs >> tmp32; uint32_t dbroot = tmp32; - cout << "processSingleInsert received bytestream length " << bs.length() << endl; + //cout << "processSingleInsert received bytestream length " << bs.length() << endl; messageqcpp::ByteStream::byte packageType; bs >> packageType; insertPkg.read( bs); uint32_t sessionId = insertPkg.get_SessionID(); - cout << " processSingleInsert for session " << sessionId << endl; + //cout << " processSingleInsert for session " << sessionId << endl; DMLTable* tablePtr = insertPkg.get_Table(); RowList rows = tablePtr->get_RowList(); diff --git a/writeengine/shared/we_define.h b/writeengine/shared/we_define.h index c26b926fd..8c3e85fe9 100644 --- a/writeengine/shared/we_define.h +++ b/writeengine/shared/we_define.h @@ -45,7 +45,8 @@ const short ROW_PER_BYTE = 8; // Rows/byte in bitmap file const int BYTE_PER_BLOCK = 8192; // Num bytes per data block const int BYTE_PER_SUBBLOCK = 256; // Num bytes per sub block const int ENTRY_PER_SUBBLOCK = 32; // Num entries per sub block -const int INITIAL_EXTENT_ROWS_TO_DISK = 256 * 1024; +const int INITIAL_EXTENT_ROWS_TO_DISK = 256 * 1024; // Used for initial number of blocks calculation +const int MAX_INITIAL_EXTENT_BLOCKS_TO_DISK = 256; // Number of blocks in abbrev extent for 8byte col. // Num rows reserved to disk for 'initial' extent const int FILE_NAME_SIZE = 200; // Max size of file name const long long MAX_ALLOW_ERROR_COUNT = 100000; //Max allowable error count diff --git a/writeengine/shared/we_fileop.cpp b/writeengine/shared/we_fileop.cpp index e028705f1..d079700dd 100644 --- a/writeengine/shared/we_fileop.cpp +++ b/writeengine/shared/we_fileop.cpp @@ -1046,7 +1046,7 @@ int FileOp::initColumnExtent( // @bug5769 Don't initialize extents or truncate db files on HDFS // MCOL-498 We don't need sequential segment files if a PM uses SSD either. - if (idbdatafile::IDBPolicy::useHdfs() || !idbdatafile::IDBPolicy::PreallocSpace()) + if (idbdatafile::IDBPolicy::useHdfs()) { //@Bug 3219. update the compression header after the extent is expanded. if ((!bNewFile) && (m_compressionType) && (bExpandExtent)) @@ -1101,10 +1101,19 @@ int FileOp::initColumnExtent( else Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_COL_EXTENT); #endif - + // MCOL-498 Skip the huge preallocations if the option is set + // for the dbroot + if ( bOptExtension ) + { + bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)) + ? bOptExtension : false; + } int savedErrno = 0; - // MCOL-498 Try to preallocate the space, fallback to write if fallocate has failed - if ( !bOptExtension || ( nBlocks < 300 && pFile->fallocate(0, currFileSize, writeSize) )) + // MCOL-498 fallocate the abbreviated extent, + // fallback to sequential write if fallocate failed + if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK + && pFile->fallocate(0, currFileSize, writeSize) ) + ) { savedErrno = errno; // Log the failed fallocate() call result @@ -1817,7 +1826,7 @@ int FileOp::initDctnryExtent( off64_t currFileSize = pFile->size(); // @bug5769 Don't initialize extents or truncate db files on HDFS // MCOL-498 We don't need sequential segment files if a PM uses SSD either. - if (idbdatafile::IDBPolicy::useHdfs() || !idbdatafile::IDBPolicy::PreallocSpace()) + if (idbdatafile::IDBPolicy::useHdfs()) { if (m_compressionType) updateDctnryExtent(pFile, nBlocks); @@ -1867,12 +1876,21 @@ int FileOp::initDctnryExtent( else Stats::stopParseEvent(WE_STATS_WAIT_TO_CREATE_DCT_EXTENT); #endif - int savedErrno = 0; - // MCOL-498 Try to preallocate the space, fallback to write if fallocate - // has failed - //if (!bOptExtension || pFile->fallocate(0, currFileSize, writeSize)) + // MCOL-498 Skip the huge preallocations if the option is set + // for the dbroot + if ( bOptExtension ) { - // Log the failed fallocate() call result + bOptExtension = (idbdatafile::IDBPolicy::PreallocSpace(dbRoot)) + ? bOptExtension : false; + } + int savedErrno = 0; + // MCOL-498 fallocate the abbreviated extent, + // fallback to sequential write if fallocate failed + if ( !bOptExtension || ( nBlocks <= MAX_INITIAL_EXTENT_BLOCKS_TO_DISK + && pFile->fallocate(0, currFileSize, writeSize) ) + ) + { + // MCOL-498 Log the failed fallocate() call result if ( bOptExtension ) { std::ostringstream oss; @@ -1935,23 +1953,22 @@ int FileOp::initDctnryExtent( return ERR_FILE_WRITE; } } - } - - if (m_compressionType) - updateDctnryExtent(pFile, nBlocks); - - // Synchronize to avoid write buffer pile up too much, which could cause - // controllernode to timeout later when it needs to save a snapshot. - pFile->flush(); + // CS doesn't account flush timings. #ifdef PROFILE - - if (bExpandExtent) - Stats::stopParseEvent(WE_STATS_EXPAND_DCT_EXTENT); - else - Stats::stopParseEvent(WE_STATS_CREATE_DCT_EXTENT); + if (bExpandExtent) + Stats::stopParseEvent(WE_STATS_EXPAND_DCT_EXTENT); + else + Stats::stopParseEvent(WE_STATS_CREATE_DCT_EXTENT); #endif - } + } + } // preallocation fallback end + // MCOL-498 CS has to set a number of blocs in the chunk header + if ( m_compressionType ) + { + updateDctnryExtent(pFile, nBlocks); + } + pFile->flush(); } return NO_ERROR; diff --git a/writeengine/wrapper/we_colop.cpp b/writeengine/wrapper/we_colop.cpp index 067453b87..48d9983a0 100644 --- a/writeengine/wrapper/we_colop.cpp +++ b/writeengine/wrapper/we_colop.cpp @@ -1527,7 +1527,7 @@ void ColumnOp::setColParam(Column& column, * rowIdArray - the array of row id, for performance purpose, I am assuming the rowIdArray is sorted * valArray - the array of row values * oldValArray - the array of old value - * bDelete - yet + * bDelete - yet. The flag must be useless. * RETURN: * NO_ERROR if success, other number otherwise ***********************************************************/ @@ -1571,7 +1571,7 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray, } // MCOL-498 CS hasn't touched any block yet, - // but the row fill be the first in the block. + // but the row filled will be the first in the block. fistRowInBlock = ( !(curRowId % (rowsInBlock)) ) ? true : false; if( fistRowInBlock && !bDelete ) fillUpWEmptyVals = true; @@ -1708,8 +1708,6 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray, if ( writeSize ) setEmptyBuf( dataBuf + dataBio + curCol.colWidth, writeSize, emptyVal, curCol.colWidth ); - //fillUpWEmptyVals = false; - //fistRowInBlock = false; } rc = saveBlock(curCol.dataFile.pFile, dataBuf, curDataFbo); @@ -1726,7 +1724,7 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray, if( !fillUpWEmptyVals ) emptyVal = getEmptyRowValue(curCol.colDataType, curCol.colWidth); // MCOL-498 Skip if this is the last block in an extent. - if ( curDataFbo != MAX_NBLOCKS - 1) + if ( curDataFbo % MAX_NBLOCKS != MAX_NBLOCKS - 1 ) { rc = saveBlock(curCol.dataFile.pFile, dataBuf, curDataFbo); if ( rc != NO_ERROR)