You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-31 18:30:33 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			1972 lines
		
	
	
		
			71 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1972 lines
		
	
	
		
			71 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (C) 2014 InfiniDB, Inc.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| /******************************************************************************
 | |
| * $Id: we_columninfo.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
 | |
| *
 | |
| *******************************************************************************/
 | |
| 
 | |
| #include <cstdlib>
 | |
| #include <sstream>
 | |
| #include <unistd.h>
 | |
| //#define NDEBUG
 | |
| //#include <cassert>
 | |
| #include <cctype>
 | |
| 
 | |
| #include "we_columninfo.h"
 | |
| #include "we_log.h"
 | |
| #include "we_stats.h"
 | |
| #include "we_colopbulk.h"
 | |
| #include "brmtypes.h"
 | |
| #include "we_columnautoinc.h"
 | |
| #include "we_dbrootextenttracker.h"
 | |
| #include "we_brmreporter.h"
 | |
| 
 | |
| #include "we_tableinfo.h"
 | |
| #include "IDBDataFile.h"
 | |
| using namespace idbdatafile;
 | |
| 
 | |
| namespace
 | |
| {
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Do a fast ascii-hex-string to binary data conversion. This is done in-place.
 | |
| // We take bytes 1 and 2 and put them back into byte 1; 3 and 4 into 2; etc.
 | |
| // The length is adjusted by 1/2 and returned to the caller as the new length.
 | |
| // If any invalid hex characters are present in the string (not 0-9,A-F, or
 | |
| // a-f), then the string is considered invalid, and a null token will be used.
 | |
| //------------------------------------------------------------------------------
 | |
| unsigned int compactVarBinary(char* charTmpBuf, int fieldLength)
 | |
| {
 | |
|     unsigned char* p = reinterpret_cast<unsigned char*>(charTmpBuf);
 | |
|     char* f = charTmpBuf;
 | |
|     char v = '\0';
 | |
| 
 | |
|     for (int i = 0; i < fieldLength / 2; i++, p++)
 | |
|     {
 | |
|         // Store even number byte in high order 4 bits of next output byte
 | |
|         v = *f;
 | |
| 
 | |
|         if (!isxdigit(v))
 | |
|             return WriteEngine::COLPOSPAIR_NULL_TOKEN_OFFSET;
 | |
| 
 | |
|         if (v <= '9')
 | |
|             *p = v - '0';
 | |
|         else if (v <= 'F')
 | |
|             *p = v - 'A' + 10;
 | |
|         else //if (v <= 'f')
 | |
|             *p = v - 'a' + 10;
 | |
| 
 | |
|         *p <<= 4;
 | |
|         f++;
 | |
| 
 | |
|         // Store odd number byte in low order 4 bite of next output byte
 | |
|         v = *f;
 | |
| 
 | |
|         if (!isxdigit(v))
 | |
|             return WriteEngine::COLPOSPAIR_NULL_TOKEN_OFFSET;
 | |
| 
 | |
|         if (v <= '9')
 | |
|             *p |= v - '0';
 | |
|         else if (v <= 'F')
 | |
|             *p |= v - 'A' + 10;
 | |
|         else //if (v <= 'f')
 | |
|             *p |= v - 'a' + 10;
 | |
| 
 | |
|         f++;
 | |
|     }
 | |
| 
 | |
| // Changed our mind and decided to have the read thread reject rows with
 | |
| // incomplete (odd length) varbinary fields, so the following check is not
 | |
| // necessary.  We should only get to this function with an even fieldLength.
 | |
| #if 0
 | |
| 
 | |
|     // Handle case where input data field has "odd" byte length.
 | |
|     // Store last input byte in high order 4 bits of additional output byte,
 | |
|     // and leave the low order bits set to 0.
 | |
|     if ((fieldLength & 1) == 1)
 | |
|     {
 | |
|         v = *f;
 | |
| 
 | |
|         if (!isxdigit(v))
 | |
|             return WriteEngine::COLPOSPAIR_NULL_TOKEN_OFFSET;
 | |
| 
 | |
|         if (v <= '9')
 | |
|             *p = v - '0';
 | |
|         else if (v <= 'F')
 | |
|             *p = v - 'A' + 10;
 | |
|         else //if (v <= 'f')
 | |
|             *p = v - 'a' + 10;
 | |
| 
 | |
|         *p <<= 4;
 | |
| 
 | |
|         fieldLength++;
 | |
|     }
 | |
| 
 | |
| #endif
 | |
| 
 | |
|     return (fieldLength / 2);
 | |
| }
 | |
| 
 | |
| }
 | |
| 
 | |
| namespace WriteEngine
 | |
| {
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // ColumnInfo constructor
 | |
| //------------------------------------------------------------------------------
 | |
| ColumnInfo::ColumnInfo(Log*             logger,
 | |
|                        int              idIn,
 | |
|                        const JobColumn& columnIn,
 | |
|                        DBRootExtentTracker* pDBRootExtTrk,
 | |
|                        TableInfo* pTableInfo) :
 | |
|     id(idIn),
 | |
|     lastProcessingTime(0),
 | |
| #ifdef PROFILE
 | |
|     totalProcessingTime(0),
 | |
| #endif
 | |
|     fColBufferMgr(0),
 | |
|     availFileSize(0),
 | |
|     fileSize(0),
 | |
|     fLog(logger),
 | |
|     fDelayedFileStartBlksSkipped(0),
 | |
|     fSavedLbid(0),
 | |
|     fSizeWrittenStart(0),
 | |
|     fSizeWritten(0),
 | |
|     fLastInputRowInCurrentExtent(0),
 | |
|     fLoadingAbbreviatedExtent(false),
 | |
|     fColExtInf(0),
 | |
|     fMaxNumRowsPerSegFile(0),
 | |
|     fStore(0),
 | |
|     fAutoIncLastValue(0),
 | |
|     fSaturatedRowCnt(0),
 | |
|     fpTableInfo(pTableInfo),
 | |
|     fAutoIncMgr(0),
 | |
|     fDbRootExtTrk(pDBRootExtTrk),
 | |
|     fColWidthFactor(1),
 | |
|     fDelayedFileCreation(INITIAL_DBFILE_STAT_FILE_EXISTS),
 | |
|     fRowsPerExtent(0)
 | |
| {
 | |
|     column = columnIn;
 | |
| 
 | |
|     fRowsPerExtent = BRMWrapper::getInstance()->getExtentRows();
 | |
| 
 | |
|     // Allocate a ColExtInfBase object for those types that won't track
 | |
|     // min/max CasualPartition info; this is a stub class that won't do
 | |
|     // anything.
 | |
|     switch ( column.weType )
 | |
|     {
 | |
|         case WriteEngine::WR_FLOAT:
 | |
|         case WriteEngine::WR_DOUBLE:
 | |
|         case WriteEngine::WR_VARBINARY: // treat like char dictionary for now
 | |
|         case WriteEngine::WR_TOKEN:
 | |
|         {
 | |
|             fColExtInf = new ColExtInfBase( );
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case WriteEngine::WR_CHAR:
 | |
|         {
 | |
|             if (column.colType == COL_TYPE_DICT)
 | |
|             {
 | |
|                 fColExtInf = new ColExtInfBase( );
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 fColExtInf = new ColExtInf(column.mapOid, logger);
 | |
|             }
 | |
| 
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case WriteEngine::WR_SHORT:
 | |
|         case WriteEngine::WR_BYTE:
 | |
|         case WriteEngine::WR_LONGLONG:
 | |
|         case WriteEngine::WR_INT:
 | |
|         case WriteEngine::WR_USHORT:
 | |
|         case WriteEngine::WR_UBYTE:
 | |
|         case WriteEngine::WR_ULONGLONG:
 | |
|         case WriteEngine::WR_UINT:
 | |
|         default:
 | |
|         {
 | |
|             fColExtInf = new ColExtInf(column.mapOid, logger);
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     colOp.reset(new ColumnOpBulk(logger, column.compressionType));
 | |
| 
 | |
|     fMaxNumRowsPerSegFile = fRowsPerExtent *
 | |
|                             Config::getExtentsPerSegmentFile();
 | |
| 
 | |
|     // Create auto-increment object to manage auto-increment next-value
 | |
|     if (column.autoIncFlag)
 | |
|     {
 | |
|         fAutoIncMgr = new ColumnAutoIncIncremental(logger);
 | |
|         // formerly used ColumnAutoIncJob for Shared Everything
 | |
|         // fAutoIncMgr = new ColumnAutoIncJob(logger);
 | |
|     }
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // ColumnInfo destructor
 | |
| //------------------------------------------------------------------------------
 | |
| ColumnInfo::~ColumnInfo()
 | |
| {
 | |
|     clearMemory();
 | |
| 
 | |
|     // Closing dictionary file also updates the extent map; which we
 | |
|     // don't want to do if we are aborting the job.  Besides, the
 | |
|     // application code should be closing the dictionary as needed,
 | |
|     // instead of relying on the destructor, so disabled this code.
 | |
|     //if(fStore != NULL)
 | |
|     //{
 | |
|     //    fStore->closeDctnryStore();
 | |
|     //    delete fStore;
 | |
|     //}
 | |
| 
 | |
|     if (fColExtInf)
 | |
|         delete fColExtInf;
 | |
| 
 | |
|     if (fAutoIncMgr)
 | |
|         delete fAutoIncMgr;
 | |
| 
 | |
|     if (fDbRootExtTrk)
 | |
|         delete fDbRootExtTrk;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Clear memory consumed by this ColumnInfo object.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::clearMemory( )
 | |
| {
 | |
|     if (fColBufferMgr)
 | |
|     {
 | |
|         delete fColBufferMgr;
 | |
|         fColBufferMgr = 0;
 | |
|     }
 | |
| 
 | |
|     fDictBlocks.clear();
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // If at the start of the job, We have encountered a PM that has no DB file for
 | |
| // this column, or whose HWM extent is disabled; then this function is called
 | |
| // to setup delayed file creation.
 | |
| // A starting DB file will be created if/when we determine that we have rows
 | |
| // to be processed.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::setupDelayedFileCreation(
 | |
|     uint16_t dbRoot,
 | |
|     uint32_t partition,
 | |
|     uint16_t segment,
 | |
|     HWM       hwm,
 | |
|     bool      bEmptyPM )
 | |
| {
 | |
|     if (bEmptyPM)
 | |
|         fDelayedFileCreation = INITIAL_DBFILE_STAT_CREATE_FILE_ON_EMPTY;
 | |
|     else
 | |
|         fDelayedFileCreation = INITIAL_DBFILE_STAT_CREATE_FILE_ON_DISABLED;
 | |
| 
 | |
|     fDelayedFileStartBlksSkipped = hwm;
 | |
|     fSavedLbid = INVALID_LBID;
 | |
| 
 | |
|     colOp->initColumn ( curCol );
 | |
|     colOp->setColParam( curCol, id,
 | |
|                         column.width,
 | |
|                         column.dataType,
 | |
|                         column.weType,
 | |
|                         column.mapOid,
 | |
|                         column.compressionType,
 | |
|                         dbRoot, partition, segment );
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Create a DB file as part of delayed file creation.  See setupDelayedFile-
 | |
| // Creation for an explanation.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
 | |
| {
 | |
|     int rc = NO_ERROR;
 | |
| 
 | |
|     // For optimization sake, we use a separate mutex (fDelayedFileCreateMutex)
 | |
|     // exclusively reserved to be used as the gatekeeper to this function.
 | |
|     // No sense in waiting for a fColMutex lock, when 99.99% of the time,
 | |
|     // all we need to do is check fDelayedFileCreation, see that it's value
 | |
|     // is INITIAL_DBFILE_STAT_FILE_EXISTS, and exit the function.
 | |
|     boost::mutex::scoped_lock lock(fDelayedFileCreateMutex);
 | |
| 
 | |
|     if (fDelayedFileCreation == INITIAL_DBFILE_STAT_FILE_EXISTS)
 | |
|         return NO_ERROR;
 | |
| 
 | |
|     // Don't try creating extent again if we are already in error state with a
 | |
|     // previous thread failing to create this extent.
 | |
|     if (fDelayedFileCreation == INITIAL_DBFILE_STAT_ERROR_STATE)
 | |
|     {
 | |
|         rc = ERR_FILE_CREATE;
 | |
|         std::ostringstream oss;
 | |
|         oss << "Previous attempt failed to create initial dbroot" <<
 | |
|             curCol.dataFile.fDbRoot <<
 | |
|             " extent for column file OID-" << column.mapOid <<
 | |
|             "; dbroot-"       << curCol.dataFile.fDbRoot     <<
 | |
|             "; partition-"    << curCol.dataFile.fPartition;
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     // Once we get this far, we go ahead and acquire a fColMutex lock.  The
 | |
|     // fDelayedFileCreateMutex lock might suffice, but better to explicitly
 | |
|     // lock fColMutex since we are modifying attributes that we typically
 | |
|     // change within the scope of a fColMutex lock.
 | |
|     boost::mutex::scoped_lock lock2(fColMutex);
 | |
| 
 | |
|     uint16_t dbRoot      = curCol.dataFile.fDbRoot;
 | |
|     uint32_t partition   = curCol.dataFile.fPartition;
 | |
| 
 | |
|     // We don't have a file on this PM, so we create an initial file
 | |
|     ColumnOpBulk tempColOp(fLog, column.compressionType);
 | |
| 
 | |
|     bool         createLeaveFileOpen = false;
 | |
|     IDBDataFile* createPFile      = 0;
 | |
|     uint16_t     createDbRoot     = dbRoot;
 | |
|     uint32_t     createPartition  = partition;
 | |
|     uint16_t     createSegment    = 0;
 | |
|     std::string  createSegFile;
 | |
|     HWM          createHwm        = 0;    //output
 | |
|     BRM::LBID_t  createStartLbid  = 0;    //output
 | |
|     bool         createNewFile    = true; //output
 | |
|     int          createAllocSize  = 0;    //output
 | |
|     char*        createHdrs       = 0;    //output
 | |
| 
 | |
|     std::string allocErrMsg;
 | |
|     rc = fpTableInfo->allocateBRMColumnExtent( curCol.dataFile.fid,
 | |
|             createDbRoot,
 | |
|             createPartition,
 | |
|             createSegment,
 | |
|             createStartLbid,
 | |
|             createAllocSize,
 | |
|             createHwm,
 | |
|             allocErrMsg );
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "Error creating initial dbroot" << dbRoot <<
 | |
|             " BRM extent for OID-" << column.mapOid <<
 | |
|             "; dbroot-"     << dbRoot    <<
 | |
|             "; partition-"  << partition <<
 | |
|             "; " << ec.errorString(rc)   <<
 | |
|             "; " << allocErrMsg;
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|         fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     uint16_t segment = createSegment;
 | |
|     partition        = createPartition;  // update our partition variable in
 | |
|     // case extent was added to a different
 | |
|     // partition than we intended
 | |
|     BRM::LBID_t lbid = createStartLbid;
 | |
| 
 | |
|     rc = tempColOp.extendColumn(
 | |
|              curCol,
 | |
|              createLeaveFileOpen,
 | |
|              createHwm,
 | |
|              createStartLbid,
 | |
|              createAllocSize,
 | |
|              createDbRoot,
 | |
|              createPartition,
 | |
|              createSegment,
 | |
|              createSegFile,
 | |
|              createPFile,
 | |
|              createNewFile,
 | |
|              createHdrs);
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "Error adding initial dbroot" << dbRoot <<
 | |
|             " extent to column file OID-" << column.mapOid <<
 | |
|             "; dbroot-"       << dbRoot    <<
 | |
|             "; partition-"    << partition <<
 | |
|             "; segment-"      << segment   <<
 | |
|             "; " << ec.errorString(rc);
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|         fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     // We don't have a file on this PM (or HWM extent is disabled), so we
 | |
|     // create a new file to load
 | |
|     std::ostringstream oss1;
 | |
| 
 | |
|     if (fDelayedFileCreation == INITIAL_DBFILE_STAT_CREATE_FILE_ON_EMPTY)
 | |
|         oss1 << "PM empty; Creating starting column extent";
 | |
|     else
 | |
|         oss1 << "HWM extent disabled; Creating starting column extent";
 | |
| 
 | |
|     oss1 << " on DBRoot-" << createDbRoot <<
 | |
|          " for OID-" << column.mapOid   <<
 | |
|          "; part-"   << createPartition <<
 | |
|          "; seg-"    << createSegment   <<
 | |
|          "; hwm-"    << createHwm       <<
 | |
|          "; LBID-"   << createStartLbid <<
 | |
|          "; file-"   << createSegFile;
 | |
|     fLog->logMsg( oss1.str(), MSGLVL_INFO2 );
 | |
| 
 | |
|     // Create corresponding dictionary store file if applicable
 | |
|     if (column.colType == COL_TYPE_DICT)
 | |
|     {
 | |
|         std::ostringstream oss;
 | |
|         oss << "Creating starting dictionary extent on dbroot" << dbRoot <<
 | |
|             " (segment " << segment <<
 | |
|             ") for dictionary OID " << column.dctnry.dctnryOid;
 | |
|         fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|         BRM::LBID_t dLbid;
 | |
|         Dctnry* tempD = 0;
 | |
| 
 | |
|         if (column.dctnry.fCompressionType != 0)
 | |
|         {
 | |
|             DctnryCompress1* tempD1;
 | |
|             tempD1 = new DctnryCompress1;
 | |
|             tempD1->setMaxActiveChunkNum(1);
 | |
|             tempD1->setBulkFlag(true);
 | |
|             tempD = tempD1;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             tempD = new DctnryCompress0;
 | |
|         }
 | |
| 
 | |
|         boost::scoped_ptr<Dctnry> refDctnry(tempD);
 | |
|         rc = tempD->createDctnry(
 | |
|                  column.dctnry.dctnryOid,
 | |
|                  column.dctnryWidth,
 | |
|                  dbRoot,
 | |
|                  partition,
 | |
|                  segment,
 | |
|                  dLbid,
 | |
|                  true); // creating the store file
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "Error creating initial dbroot" << dbRoot <<
 | |
|                 " extent for dictionary file OID-" <<
 | |
|                 column.dctnry.dctnryOid        <<
 | |
|                 "; dbroot-"       << dbRoot    <<
 | |
|                 "; partition-"    << partition <<
 | |
|                 "; segment-"      << segment   <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|             fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         rc = tempD->closeDctnry();
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "Error creating/closing initial dbroot" <<
 | |
|                 dbRoot << " extent for dictionary file OID-" <<
 | |
|                 column.dctnry.dctnryOid        <<
 | |
|                 "; partition-"    << partition <<
 | |
|                 "; segment-"      << segment   <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|             fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
 | |
|             return rc;
 | |
|         }
 | |
|     } // end of dictionary column processing
 | |
| 
 | |
|     // Check for special case: where we skip initial blk(s) at the start of
 | |
|     // the "very" 1st file on each PM.
 | |
|     // We are checking to see if the PM is empty, "and" if the partition is 0.
 | |
|     // The PM could be empty if all the existing files on the PM were dropped
 | |
|     // or disabled, but we don't want/need to do block skipping in this case;
 | |
|     // so we also check to see if the partition number is 0, denoting the 1st
 | |
|     // extent for the PM.
 | |
|     // (The reason we are skipping blocks in partition 0, is because import
 | |
|     // does this with the partition 0, segment 0 file created by DDL.
 | |
|     // We skip blocks on the other PMs, so that the 1st file created on each
 | |
|     // PM will employ the same block skipping.)
 | |
|     HWM hwm = 0;
 | |
| 
 | |
|     if ((fDelayedFileCreation == INITIAL_DBFILE_STAT_CREATE_FILE_ON_EMPTY) &&
 | |
|             (partition == 0))
 | |
|     {
 | |
|         hwm = fDelayedFileStartBlksSkipped;
 | |
|     }
 | |
| 
 | |
|     rc = setupInitialColumnExtent(
 | |
|              dbRoot, partition, segment,
 | |
|              tableName, lbid, hwm, hwm, false, true );
 | |
| 
 | |
|     if (rc == NO_ERROR)
 | |
|         fDelayedFileCreation = INITIAL_DBFILE_STAT_FILE_EXISTS;
 | |
|     else
 | |
|         fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Add an extent for this column.  The next segment file in the DBRoot,
 | |
| // partition, segment number rotation will be selected for the extent.
 | |
| //
 | |
| // NOTE: no mutex lock is employed here.  It is assumed that the calling
 | |
| //       application code is taking care of this, if it is needed.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::extendColumn( bool saveLBIDForCP )
 | |
| {
 | |
|     //..We assume the applicable file is already open, so...
 | |
|     //  the HWM of the current segment file should be set to reference the
 | |
|     //  last block in the current file (as specified in curCol.dataFile.pFile).
 | |
|     //
 | |
|     //  Prior to adding compression, we used ftell() to set HWM, but that
 | |
|     //  would not work for compressed data.  Code now assumes that if we
 | |
|     //  are adding an extent, that fSizeWritten is a multiple of blksize,
 | |
|     //  which it should be.  If we are adding an extent, fSizeWritten should
 | |
|     //  point to the last byte of a full extent boundary.
 | |
|     HWM hwm = (fSizeWritten / BYTE_PER_BLOCK) - 1;
 | |
| 
 | |
|     //..Save info about the current segment column file, and close that file.
 | |
|     addToSegFileList( curCol.dataFile, hwm );
 | |
| 
 | |
|     // Close current segment column file prior to adding extent to next seg file
 | |
|     int rc = closeColumnFile( true, false );
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         std::ostringstream oss;
 | |
|         oss << "extendColumn: error closing extent in "    <<
 | |
|             "column OID-" << curCol.dataFile.fid        <<
 | |
|             "; DBRoot-"   << curCol.dataFile.fDbRoot    <<
 | |
|             "; part-"     << curCol.dataFile.fPartition <<
 | |
|             "; seg-"      << curCol.dataFile.fSegment   <<
 | |
|             "; hwm-"      << hwm;
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     // Call Config::initConfigCache() to force the Config class
 | |
|     // to reload config cache "if" the config file has changed.
 | |
|     Config::initConfigCache();
 | |
| 
 | |
|     bool bChangeFlag = Config::hasLocalDBRootListChanged();
 | |
| 
 | |
|     //if (fLog->isDebug( DEBUG_1 ))
 | |
|     //{
 | |
|     //  std::ostringstream oss;
 | |
|     //  oss << "Checking DBRootListChangeFlag: " << bChangeFlag;
 | |
|     //  fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|     //}
 | |
|     if (bChangeFlag)
 | |
|     {
 | |
|         rc = ERR_BULK_DBROOT_CHANGE;
 | |
| 
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "extendColumn: DBRoots changed; " <<
 | |
|             ec.errorString( rc );
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     //..Declare variables used to advance to the next extent
 | |
|     uint16_t    dbRootNext   = 0;
 | |
|     uint32_t    partitionNext = 0;
 | |
|     uint16_t    segmentNext  = 0;
 | |
|     HWM         hwmNext      = 0;
 | |
|     BRM::LBID_t startLbid;
 | |
| 
 | |
|     //..When we finish an extent, we typically should be advancing to the next
 | |
|     //  DBRoot to create a "new" extent.  But "if" the user has moved a DBRoot
 | |
|     //  from another PM to this PM, then we may have a partial extent that we
 | |
|     //  need to fill up.  Here's where we just fill out such partially filled
 | |
|     //  extents with empty values, until we can get back to a "normal" full
 | |
|     //  extent boundary case.
 | |
|     bool bAllocNewExtent = false;
 | |
| 
 | |
|     while (!bAllocNewExtent)
 | |
|     {
 | |
|         //..If we have a DBRoot Tracker, then use that to determine next DBRoot
 | |
|         //  to rotate to, else the old legacy BRM extent allocator will assign,
 | |
|         //  if we pass in a dbroot of 0.
 | |
|         bAllocNewExtent = true;
 | |
| 
 | |
|         if (fDbRootExtTrk)
 | |
|         {
 | |
|             bAllocNewExtent = fDbRootExtTrk->nextSegFile(
 | |
|                                   dbRootNext, partitionNext, segmentNext, hwmNext, startLbid );
 | |
|         }
 | |
| 
 | |
|         // If our next extent is a partial extent, then fill out that extent
 | |
|         // to the next full extent boundary, and round up HWM accordingly.
 | |
|         if (!bAllocNewExtent)
 | |
|         {
 | |
|             rc = extendColumnOldExtent( dbRootNext,
 | |
|                                         partitionNext, segmentNext, hwmNext );
 | |
| 
 | |
|             if (rc != NO_ERROR)
 | |
|                 return rc;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Once we are back on a "normal" full extent boundary, we add a new extent
 | |
|     // to resume adding rows.
 | |
|     rc = extendColumnNewExtent( saveLBIDForCP, dbRootNext, partitionNext );
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Add a new extent to this column, at the specified DBRoot.  Partition may be
 | |
| // used if DBRoot is empty.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::extendColumnNewExtent(
 | |
|     bool     saveLBIDForCP,
 | |
|     uint16_t dbRootNew,
 | |
|     uint32_t partitionNew )
 | |
| {
 | |
|     //..Declare variables used to advance to the next extent
 | |
|     IDBDataFile* pFileNew     = 0;
 | |
|     HWM         hwmNew       = 0;
 | |
|     bool        newFile      = false;
 | |
|     std::string segFileNew;
 | |
| 
 | |
|     uint16_t    segmentNew   = 0;
 | |
|     BRM::LBID_t startLbid;
 | |
| 
 | |
|     char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
 | |
| 
 | |
|     // Extend the column by adding an extent to the next
 | |
|     // DBRoot, partition, and segment file in the rotation
 | |
|     int allocsize = 0;
 | |
|     std::string allocErrMsg;
 | |
|     int rc = fpTableInfo->allocateBRMColumnExtent( curCol.dataFile.fid,
 | |
|              dbRootNew,
 | |
|              partitionNew,
 | |
|              segmentNew,
 | |
|              startLbid,
 | |
|              allocsize,
 | |
|              hwmNew,
 | |
|              allocErrMsg );
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "extendColumnNewExtent: error creating BRM extent after " <<
 | |
|             "column OID-" << curCol.dataFile.fid          <<
 | |
|             "; DBRoot-"   << curCol.dataFile.fDbRoot      <<
 | |
|             "; part-"     << curCol.dataFile.fPartition   <<
 | |
|             "; seg-"      << curCol.dataFile.fSegment;
 | |
| 
 | |
|         oss << "; newDBRoot-" << dbRootNew                <<
 | |
|             "; newpart-"      << partitionNew             <<
 | |
|             "; " << ec.errorString(rc)                    <<
 | |
|             "; " << allocErrMsg;
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|         fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     rc = colOp->extendColumn ( curCol,
 | |
|                                true,  // leave file open
 | |
|                                hwmNew,
 | |
|                                startLbid,
 | |
|                                allocsize,
 | |
|                                dbRootNew,
 | |
|                                partitionNew,
 | |
|                                segmentNew,
 | |
|                                segFileNew,
 | |
|                                pFileNew,
 | |
|                                newFile,
 | |
|                                hdr );
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "extendColumnNewExtent: error adding file extent after " <<
 | |
|             "column OID-" << curCol.dataFile.fid          <<
 | |
|             "; DBRoot-"   << curCol.dataFile.fDbRoot      <<
 | |
|             "; part-"     << curCol.dataFile.fPartition   <<
 | |
|             "; seg-"      << curCol.dataFile.fSegment;
 | |
| 
 | |
|         oss << "; newDBRoot-" << dbRootNew                <<
 | |
|             "; newpart-"      << partitionNew             <<
 | |
|             "; newseg-"       << segmentNew               <<
 | |
|             "; fbo-"          << hwmNew                   <<
 | |
|             "; " << ec.errorString(rc);
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|         fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
| 
 | |
|         if (pFileNew)
 | |
|             colOp->closeFile( pFileNew ); // clean up loose ends
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     std::ostringstream oss;
 | |
|     oss << "Add column extent OID-" << curCol.dataFile.fid <<
 | |
|         "; DBRoot-" << dbRootNew    <<
 | |
|         "; part-"   << partitionNew <<
 | |
|         "; seg-"    << segmentNew   <<
 | |
|         "; hwm-"    << hwmNew       <<
 | |
|         "; LBID-"   << startLbid    <<
 | |
|         "; file-"   << segFileNew;
 | |
|     fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
| 
 | |
|     // Save the LBID with our CP extent info, so that we can update extent map
 | |
|     if (saveLBIDForCP)
 | |
|     {
 | |
|         int rcLBID = fColExtInf->updateEntryLbid( startLbid );
 | |
| 
 | |
|         // If error occurs, we log WARNING, but we don't fail the job.
 | |
|         if (rcLBID != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "updateEntryLbid failed for OID-" << curCol.dataFile.fid <<
 | |
|                 "; LBID-" << startLbid <<
 | |
|                 "; CasualPartition info may become invalid; " <<
 | |
|                 ec.errorString(rcLBID);
 | |
|             fLog->logMsg( oss.str(), rcLBID, MSGLVL_WARNING );
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     //..Reset data members to reflect where we are in the newly
 | |
|     //  opened column segment file.  The file may be a new file, or we may
 | |
|     //  be adding an extent to an existing column segment file.
 | |
|     curCol.dataFile.hwm          = hwmNew;
 | |
|     curCol.dataFile.pFile        = pFileNew;
 | |
|     curCol.dataFile.fPartition   = partitionNew;
 | |
|     curCol.dataFile.fSegment     = segmentNew;
 | |
|     curCol.dataFile.fDbRoot      = dbRootNew;
 | |
|     curCol.dataFile.fSegFileName = segFileNew;
 | |
| 
 | |
|     rc = resetFileOffsetsNewExtent(hdr);
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         std::ostringstream oss;
 | |
|         oss << "extendColumnNewExtent: error moving to new extent in " <<
 | |
|             "column OID-" << curCol.dataFile.fid           <<
 | |
|             "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
 | |
|             "; part-"     << curCol.dataFile.fPartition    <<
 | |
|             "; seg-"      << curCol.dataFile.fSegment      <<
 | |
|             "; hwm-"      << curCol.dataFile.hwm;
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|         if (pFileNew)
 | |
|             closeColumnFile( false, true ); // clean up loose ends
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     if (fLog->isDebug( DEBUG_1 ))
 | |
|     {
 | |
|         std::ostringstream oss2;
 | |
|         oss2 << "Extent added to column OID-" << curCol.dataFile.fid <<
 | |
|              "; DBRoot-" << dbRootNew    <<
 | |
|              "; part-"   << partitionNew <<
 | |
|              "; seg-"    << segmentNew   <<
 | |
|              "; begByte-" << fSizeWritten <<
 | |
|              "; endByte-" << fileSize     <<
 | |
|              "; freeBytes-" << availFileSize;
 | |
|         fLog->logMsg( oss2.str(), MSGLVL_INFO2 );
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Fill out existing partial extent to extent boundary, so that we can resume
 | |
| // inserting rows on an extent boundary basis.  This use case should only take
 | |
| // place when a DBRoot with a partial extent has been moved from one PM to
 | |
| // another.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::extendColumnOldExtent(
 | |
|     uint16_t    dbRootNext,
 | |
|     uint32_t    partitionNext,
 | |
|     uint16_t    segmentNext,
 | |
|     HWM         hwmNext )
 | |
| {
 | |
|     const unsigned int BLKS_PER_EXTENT =
 | |
|         (fRowsPerExtent * column.width) / BYTE_PER_BLOCK;
 | |
|     HWM hwmNextExtentBoundary = hwmNext;
 | |
| 
 | |
|     // Round up HWM to the end of the current extent
 | |
|     unsigned int nBlks = hwmNext + 1;
 | |
|     unsigned int nRem  = nBlks % BLKS_PER_EXTENT;
 | |
| 
 | |
|     if (nRem > 0)
 | |
|         hwmNextExtentBoundary = nBlks - nRem + BLKS_PER_EXTENT - 1;
 | |
|     else
 | |
|         hwmNextExtentBoundary = nBlks - 1;
 | |
| 
 | |
|     std::ostringstream oss;
 | |
|     oss << "Padding partial extent to extent boundary in OID-" <<
 | |
|         curCol.dataFile.fid <<
 | |
|         "; DBRoot-" << dbRootNext    <<
 | |
|         "; part-"   << partitionNext <<
 | |
|         "; seg-"    << segmentNext   <<
 | |
|         "; oldhwm-" << hwmNext       <<
 | |
|         "; newhwm-" << hwmNextExtentBoundary;
 | |
|     fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
| 
 | |
|     long long fileSizeBytes;
 | |
|     int rc = colOp->getFileSize( curCol.dataFile.fid,
 | |
|                                  dbRootNext, partitionNext, segmentNext, fileSizeBytes);
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         std::ostringstream oss;
 | |
|         oss << "extendColumnOldExtent: error padding partial extent for " <<
 | |
|             "column OID-" << curCol.dataFile.fid           <<
 | |
|             "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
 | |
|             "; part-"     << curCol.dataFile.fPartition    <<
 | |
|             "; seg-"      << curCol.dataFile.fSegment      <<
 | |
|             "; hwm-"      << curCol.dataFile.hwm;
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     curCol.dataFile.pFile        = 0;
 | |
|     curCol.dataFile.fDbRoot      = dbRootNext;
 | |
|     curCol.dataFile.fPartition   = partitionNext;
 | |
|     curCol.dataFile.fSegment     = segmentNext;
 | |
|     curCol.dataFile.hwm          = hwmNextExtentBoundary;
 | |
|     curCol.dataFile.fSegFileName.clear();
 | |
| 
 | |
|     // See if we have an abbreviated extent that needs to be expanded on disk
 | |
|     if (fileSizeBytes == (long long)INITIAL_EXTENT_ROWS_TO_DISK * column.width)
 | |
|     {
 | |
|         std::string segFile;
 | |
| 
 | |
|         // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
 | |
|         IDBDataFile* pFile = colOp->openFile( curCol,
 | |
|                                               dbRootNext, partitionNext, segmentNext, segFile, true );
 | |
| 
 | |
|         if ( !pFile )
 | |
|         {
 | |
|             std::ostringstream oss;
 | |
|             rc = ERR_FILE_OPEN;
 | |
|             oss << "extendColumnOldExtent: error padding partial extent for " <<
 | |
|                 "column OID-" << curCol.dataFile.fid           <<
 | |
|                 "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
 | |
|                 "; part-"     << curCol.dataFile.fPartition    <<
 | |
|                 "; seg-"      << curCol.dataFile.fSegment      <<
 | |
|                 "; hwm-"      << curCol.dataFile.hwm;
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         rc = colOp->expandAbbrevColumnExtent( pFile, dbRootNext,
 | |
|                                               column.emptyVal, column.width);
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             std::ostringstream oss;
 | |
|             oss << "extendColumnOldExtent: error padding partial extent for " <<
 | |
|                 "column OID-" << curCol.dataFile.fid           <<
 | |
|                 "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
 | |
|                 "; part-"     << curCol.dataFile.fPartition    <<
 | |
|                 "; seg-"      << curCol.dataFile.fSegment      <<
 | |
|                 "; hwm-"      << curCol.dataFile.hwm;
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|             fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
| 
 | |
|             colOp->closeFile( pFile );
 | |
| 
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         colOp->closeFile( pFile );
 | |
|     }
 | |
| 
 | |
|     addToSegFileList( curCol.dataFile, hwmNextExtentBoundary );
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| //  Either add or update the File object, so that it has the updated HWM.
 | |
| //  We will access this info to update the HWM in the ExtentMap at the end
 | |
| //  of the import.
 | |
| //  dmc-could optimize later by changing fSegFileUpdateList from a vector
 | |
| //  to a map or hashtable with a key consisting of partition and segment.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::addToSegFileList( File& dataFile, HWM hwm )
 | |
| {
 | |
|     bool foundFlag = false;
 | |
| 
 | |
|     for (unsigned int i = 0; i < fSegFileUpdateList.size(); i++)
 | |
|     {
 | |
|         if ((fSegFileUpdateList[i].fPartition == dataFile.fPartition) &&
 | |
|                 (fSegFileUpdateList[i].fSegment   == dataFile.fSegment))
 | |
|         {
 | |
|             if (fLog->isDebug( DEBUG_1 ))
 | |
|             {
 | |
|                 std::ostringstream oss3;
 | |
|                 oss3 << "Updating HWM list"
 | |
|                      "; column OID-" << dataFile.fid    <<
 | |
|                      "; DBRoot-" << dataFile.fDbRoot    <<
 | |
|                      "; part-"   << dataFile.fPartition <<
 | |
|                      "; seg-"    << dataFile.fSegment   <<
 | |
|                      "; oldhwm-" << fSegFileUpdateList[i].hwm  <<
 | |
|                      "; newhwm-" << hwm;
 | |
|                 fLog->logMsg( oss3.str(), MSGLVL_INFO2 );
 | |
|             }
 | |
| 
 | |
|             fSegFileUpdateList[i].hwm = hwm;
 | |
|             foundFlag = true;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!foundFlag)
 | |
|     {
 | |
|         if (fLog->isDebug( DEBUG_1 ))
 | |
|         {
 | |
|             std::ostringstream oss3;
 | |
|             oss3 << "Adding to HWM list" <<
 | |
|                  "; column OID-" << dataFile.fid    <<
 | |
|                  "; DBRoot-" << dataFile.fDbRoot    <<
 | |
|                  "; part-"   << dataFile.fPartition <<
 | |
|                  "; seg-"    << dataFile.fSegment   <<
 | |
|                  "; hwm-"    << hwm;
 | |
|             fLog->logMsg( oss3.str(), MSGLVL_INFO2 );
 | |
|         }
 | |
| 
 | |
|         dataFile.hwm = hwm;
 | |
|         fSegFileUpdateList.push_back( dataFile );
 | |
|     }
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Reset file offset data member attributes when we start working on the next
 | |
| // extent.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::resetFileOffsetsNewExtent(const char* /*hdr*/)
 | |
| {
 | |
|     setFileSize( curCol.dataFile.hwm, false );
 | |
|     long long byteOffset = (long long)curCol.dataFile.hwm *
 | |
|                            (long long)BYTE_PER_BLOCK;
 | |
|     fSizeWritten      = byteOffset;
 | |
|     fSizeWrittenStart = fSizeWritten;
 | |
|     availFileSize     = fileSize - fSizeWritten;
 | |
| 
 | |
|     // If we are adding an extent as part of preliminary block skipping, then
 | |
|     // we won't have a ColumnBufferManager object yet, but that's okay, because
 | |
|     // we are only adding the empty extent at this point.
 | |
|     if (fColBufferMgr)
 | |
|     {
 | |
|         RETURN_ON_ERROR( fColBufferMgr->setDbFile(
 | |
|                              curCol.dataFile.pFile, curCol.dataFile.hwm, 0) );
 | |
| 
 | |
|         RETURN_ON_ERROR( colOp->setFileOffset(curCol.dataFile.pFile,
 | |
|                                               byteOffset) );
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Set current size of file in raw (uncompressed) bytes, given the specified
 | |
| // hwm.  abbrevFlag indicates whether this is a fixed size abbreviated extent.
 | |
| // For unabbreviated extents the "logical" file size is calculated by rounding
 | |
| // the hwm up to the nearest multiple of the extent size.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::setFileSize( HWM hwm, int abbrevFlag )
 | |
| {
 | |
|     // Must be an abbreviated extent if there is only 1 compressed chunk in
 | |
|     // the db file.  Even a 1-byte column would have 2 4MB chunks for an 8M
 | |
|     // row column extent.
 | |
|     if (abbrevFlag)
 | |
|     {
 | |
|         fileSize = (INITIAL_EXTENT_ROWS_TO_DISK * curCol.colWidth);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         const unsigned int ROWS_PER_EXTENT = fRowsPerExtent;
 | |
| 
 | |
|         long long nRows = ((long long)(hwm + 1) * (long long)BYTE_PER_BLOCK) /
 | |
|                           (long long)curCol.colWidth;
 | |
|         long long nRem  = nRows % ROWS_PER_EXTENT;
 | |
| 
 | |
|         if (nRem == 0)
 | |
|         {
 | |
|             fileSize = nRows * curCol.colWidth;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             fileSize = (nRows - nRem + ROWS_PER_EXTENT) * curCol.colWidth;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // If we are dealing with the first extent in the first segment file for this
 | |
| // column, and the segment file is still equal to 256K rows, then we set the
 | |
| // fLoadingAbbreviatedExtent flag.  This tells us (later on) that we are dealing
 | |
| // with an abbreviated extent that still needs to be expanded and filled, before
 | |
| // we start adding new extents.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::setAbbrevExtentCheck( )
 | |
| {
 | |
| // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated?
 | |
|     if ((curCol.dataFile.fPartition == 0) &&
 | |
|             (curCol.dataFile.fSegment   == 0))
 | |
|     {
 | |
|         if (fileSize == (INITIAL_EXTENT_ROWS_TO_DISK * curCol.colWidth))
 | |
|         {
 | |
|             fLoadingAbbreviatedExtent = true;
 | |
| 
 | |
|             if (fLog->isDebug( DEBUG_1 ))
 | |
|             {
 | |
|                 std::ostringstream oss;
 | |
|                 oss << "Importing into abbreviated extent, column OID-" <<
 | |
|                     curCol.dataFile.fid   <<
 | |
|                     "; DBRoot-"   << curCol.dataFile.fDbRoot    <<
 | |
|                     "; part-"     << curCol.dataFile.fPartition <<
 | |
|                     "; seg-"      << curCol.dataFile.fSegment   <<
 | |
|                     "; fileSize-" << fileSize;
 | |
|                 fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // If this is an abbreviated extent, we expand the extent to a full extent on
 | |
| // disk, by initializing the necessary number of remaining blocks.
 | |
| // bRetainFilePos flag controls whether the current file position is retained
 | |
| // upon return from this function; else the file will be positioned at the end
 | |
| // of the file.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::expandAbbrevExtent( bool bRetainFilePos )
 | |
| {
 | |
|     if (fLoadingAbbreviatedExtent)
 | |
|     {
 | |
|         off64_t oldOffset = 0;
 | |
| 
 | |
|         if (bRetainFilePos)
 | |
|         {
 | |
|             oldOffset = curCol.dataFile.pFile->tell();
 | |
|         }
 | |
| 
 | |
|         colOp->setFileOffset( curCol.dataFile.pFile, 0, SEEK_END );
 | |
| 
 | |
|         std::ostringstream oss;
 | |
|         oss << "Expanding first extent to column OID-" << curCol.dataFile.fid <<
 | |
|             "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|             "; part-"   << curCol.dataFile.fPartition <<
 | |
|             "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|             "; file-"   << curCol.dataFile.fSegFileName;
 | |
|         fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
| 
 | |
|         int rc = colOp->expandAbbrevExtent ( curCol );
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "expandAbbrevExtent: error expanding extent for " <<
 | |
|                 "OID-"      << curCol.dataFile.fid        <<
 | |
|                 "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|                 "; part-"   << curCol.dataFile.fPartition <<
 | |
|                 "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|             fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         // Update available file size to reflect disk space added by expanding
 | |
|         // the extent.
 | |
|         long long fileSizeBeforeExpand = fileSize;
 | |
|         setFileSize( (fileSizeBeforeExpand / BYTE_PER_BLOCK), false );
 | |
|         availFileSize += (fileSize - fileSizeBeforeExpand);
 | |
| 
 | |
|         // Restore offset back to where we were before expanding the extent
 | |
|         if (bRetainFilePos)
 | |
|         {
 | |
|             rc = colOp->setFileOffset(curCol.dataFile.pFile, oldOffset, SEEK_SET);
 | |
| 
 | |
|             if (rc != NO_ERROR)
 | |
|             {
 | |
|                 WErrorCodes ec;
 | |
|                 std::ostringstream oss;
 | |
|                 oss << "expandAbbrevExtent: error seeking to new extent for " <<
 | |
|                     "OID-"      << curCol.dataFile.fid        <<
 | |
|                     "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|                     "; part-"   << curCol.dataFile.fPartition <<
 | |
|                     "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|                     "; " << ec.errorString(rc);
 | |
|                 fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|                 fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
|                 return rc;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // We only use abbreviated extents for the very first extent.  So after
 | |
|         // expanding a col's abbreviated extent, we should disable this check.
 | |
|         fLoadingAbbreviatedExtent = false;
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Close the current Column file.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::closeColumnFile(bool /*bCompletingExtent*/, bool /*bAbort*/)
 | |
| {
 | |
|     if ( curCol.dataFile.pFile )
 | |
|     {
 | |
|         colOp->closeFile( curCol.dataFile.pFile );
 | |
|         curCol.dataFile.pFile = 0;
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Initialize fLastInputRowInCurrentExtent used in detecting when a Read Buffer
 | |
| // is crossing an extent boundary, so that we can accurately track the min/max
 | |
| // for each extent as the Read buffers are parsed.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::lastInputRowInExtentInit( bool bIsNewExtent )
 | |
| {
 | |
|     // Reworked initial block skipping for compression:
 | |
|     const unsigned int ROWS_PER_EXTENT = fRowsPerExtent;
 | |
|     RID numRowsLeftInExtent = 0;
 | |
|     RID numRowsWritten = fSizeWritten / curCol.colWidth;
 | |
| 
 | |
|     if ((numRowsWritten % ROWS_PER_EXTENT) != 0)
 | |
|         numRowsLeftInExtent = ROWS_PER_EXTENT -
 | |
|                               (numRowsWritten % ROWS_PER_EXTENT);
 | |
| 
 | |
|     bool bRoomToAddToOriginalExtent = true;
 | |
| 
 | |
|     if (fSizeWritten > 0)
 | |
|     {
 | |
|         // Handle edge case; if numRowsLeftInExtent comes out to be 0, then
 | |
|         // current extent is full.  In this case we first bump up row count
 | |
|         // by a full extent before we subtract by 1 to get the last row number
 | |
|         // in extent.
 | |
|         if (numRowsLeftInExtent == 0)
 | |
|         {
 | |
|             numRowsLeftInExtent = ROWS_PER_EXTENT;;
 | |
|             bRoomToAddToOriginalExtent = false;
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         // Starting new file with empty extent, so set row count to full extent
 | |
|         numRowsLeftInExtent = ROWS_PER_EXTENT;
 | |
|     }
 | |
| 
 | |
|     fLastInputRowInCurrentExtent = numRowsLeftInExtent - 1;
 | |
| 
 | |
|     // If we have a pre-existing extent that we are going to add rows to,
 | |
|     // then we need to add that extent to our ColExtInf object, so that we
 | |
|     // can update the CP min/max at the end of the bulk load job.
 | |
|     if ( bRoomToAddToOriginalExtent )
 | |
|     {
 | |
|         fColExtInf->addFirstEntry(fLastInputRowInCurrentExtent,
 | |
|                                   fSavedLbid,
 | |
|                                   bIsNewExtent );
 | |
|     }
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Increment fLastRIDInExtent to the end of the next extent.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::lastInputRowInExtentInc( )
 | |
| {
 | |
|     fLastInputRowInCurrentExtent += fRowsPerExtent;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Parsing is complete for this column.  Flush pending data.  Close the current
 | |
| // segment file, and corresponding dictionary store file (if applicable).  Also
 | |
| // clears memory taken up by this ColumnInfo object.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::finishParsing( )
 | |
| {
 | |
|     int rc = NO_ERROR;
 | |
| 
 | |
|     // Close the dctnry file handle.
 | |
|     if (fStore)
 | |
|     {
 | |
|         rc = closeDctnryStore(false);
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "finishParsing: close dictionary file error with column " <<
 | |
|                 column.colName << "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR);
 | |
|             return rc;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // We don't need the mutex to protect against concurrent access by other
 | |
|     // threads, since by the time we get to this point, this is the last
 | |
|     // thread working on this column.  But, we use the mutex to insure that
 | |
|     // we see the latest state that may have been set by another parsing thread
 | |
|     // working with the same column.
 | |
|     boost::mutex::scoped_lock lock(fColMutex);
 | |
| 
 | |
|     // Force the flushing of remaining data in the output buffer
 | |
|     if (fColBufferMgr)
 | |
|     {
 | |
|         rc = fColBufferMgr->flush( );
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "finishParsing: flush error with column " << column.colName <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR);
 | |
|             return rc;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Close the column file
 | |
|     rc = closeColumnFile(false, false);
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "finishParsing: close column file error with column " <<
 | |
|             column.colName << "; " << ec.errorString(rc);
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR);
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     clearMemory();
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Store updated column information in BRMReporter for this column at EOJ;
 | |
| // so that Extent Map CP information and HWM's can be updated.
 | |
| // Bug2117-Src code from this function was factored over from we_tableinfo.cpp.
 | |
| //
 | |
| // We use mutex because this function is called by "one" of the parsing threads
 | |
| // when parsing is complete for all the columns from this column's table.
 | |
| // We use the mutex to insure that this parsing thread, which ends up being
 | |
| // responsible for updating BRM for this column, is getting the most up to
 | |
| // date values in fSegFileUpdateList, fSizeWritten, etc which may have been
 | |
| // set by another parsing thread.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::getBRMUpdateInfo( BRMReporter& brmReporter )
 | |
| {
 | |
|     boost::mutex::scoped_lock lock(fColMutex);
 | |
|     // Useful for debugging
 | |
|     //printCPInfo(column);
 | |
| 
 | |
|     int entriesAdded = getHWMInfoForBRM( brmReporter );
 | |
| 
 | |
|     // If we added any rows (HWM update count > 0), then update corresponding CP
 | |
|     if (entriesAdded > 0)
 | |
|         getCPInfoForBRM( brmReporter );
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Get updated Casual Partition (CP) information for BRM for this column at EOJ.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::getCPInfoForBRM( BRMReporter& brmReporter )
 | |
| {
 | |
|     fColExtInf->getCPInfoForBRM(column, brmReporter);
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Get updated HWM information for BRM for this column at EOJ.
 | |
| // Returns count of the number of HWM entries added to the BRMReporter.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::getHWMInfoForBRM( BRMReporter& brmReporter )
 | |
| {
 | |
|     //..If we wrote out any data to the last segment file, then
 | |
|     //  update HWM for the current (last) segment file we were writing to.
 | |
| 
 | |
|     //Bug1374 - Update HWM when data added to file
 | |
|     if ( fSizeWritten > fSizeWrittenStart )
 | |
|     {
 | |
|         //Bug1372.
 | |
|         HWM hwm = (fSizeWritten - 1) / BYTE_PER_BLOCK;
 | |
| 
 | |
|         addToSegFileList( curCol.dataFile, hwm );
 | |
|     }
 | |
| 
 | |
|     int entriesAdded = 0;
 | |
| 
 | |
|     //..Update HWM for each segment file we touched, including the last one
 | |
|     for (unsigned int iseg = 0;
 | |
|             iseg < fSegFileUpdateList.size(); iseg++)
 | |
|     {
 | |
|         // Log for now; may control with debug flag later
 | |
|         //if (fLog->isDebug( DEBUG_1 ))
 | |
|         {
 | |
|             std::ostringstream oss;
 | |
|             oss << "Saving HWM update for OID-"                       <<
 | |
|                 fSegFileUpdateList[iseg].fid                         <<
 | |
|                 "; hwm-"       << fSegFileUpdateList[iseg].hwm        <<
 | |
|                 "; DBRoot-"    << fSegFileUpdateList[iseg].fDbRoot    <<
 | |
|                 "; partition-" << fSegFileUpdateList[iseg].fPartition <<
 | |
|                 "; segment-"   << fSegFileUpdateList[iseg].fSegment;
 | |
| 
 | |
|             fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|         }
 | |
| 
 | |
|         BRM::BulkSetHWMArg hwmArg;
 | |
|         hwmArg.oid     = fSegFileUpdateList[iseg].fid;
 | |
|         hwmArg.partNum = fSegFileUpdateList[iseg].fPartition;
 | |
|         hwmArg.segNum  = fSegFileUpdateList[iseg].fSegment;
 | |
|         hwmArg.hwm     = fSegFileUpdateList[iseg].hwm;
 | |
|         brmReporter.addToHWMInfo( hwmArg );
 | |
| 
 | |
|         // Save list of modified db column files
 | |
|         BRM::FileInfo aFile;
 | |
|         aFile.oid          = fSegFileUpdateList[iseg].fid;
 | |
|         aFile.partitionNum = fSegFileUpdateList[iseg].fPartition;
 | |
|         aFile.segmentNum   = fSegFileUpdateList[iseg].fSegment;
 | |
|         aFile.dbRoot       = fSegFileUpdateList[iseg].fDbRoot;
 | |
|         aFile.compType     = curCol.compressionType;
 | |
|         brmReporter.addToFileInfo( aFile );
 | |
| 
 | |
|         // Save list of corresponding modified db dictionary store files
 | |
|         if (column.colType == COL_TYPE_DICT)
 | |
|         {
 | |
|             BRM::FileInfo dFile;
 | |
|             dFile.oid          = column.dctnry.dctnryOid;
 | |
|             dFile.partitionNum = fSegFileUpdateList[iseg].fPartition;
 | |
|             dFile.segmentNum   = fSegFileUpdateList[iseg].fSegment;
 | |
|             dFile.dbRoot       = fSegFileUpdateList[iseg].fDbRoot;
 | |
|             dFile.compType     = curCol.compressionType;
 | |
|             brmReporter.addToDctnryFileInfo( dFile );
 | |
|         }
 | |
| 
 | |
|         entriesAdded++;
 | |
|     }
 | |
| 
 | |
|     fSegFileUpdateList.clear(); // don't need vector anymore, so release memory
 | |
| 
 | |
|     return entriesAdded;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Setup initial extent we will begin loading at start of import.
 | |
| // DBRoot, partition, segment, etc for the starting extent are specified.
 | |
| // If block skipping is causing us to advance to the next extent, then we
 | |
| // set things up to point to the last block in the current extent.  When we
 | |
| // start adding rows, we will automatically advance to the next extent.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::setupInitialColumnExtent(
 | |
|     uint16_t   dbRoot,               // dbroot of starting extent
 | |
|     uint32_t   partition,            // partition number of starting extent
 | |
|     uint16_t   segment,              // segment number of starting extent
 | |
|     const std::string& tblName,       // name of table containing this column
 | |
|     BRM::LBID_t lbid,                 // starting LBID for starting extent
 | |
|     HWM         oldHwm,               // original HWM
 | |
|     HWM         hwm,                  // new projected HWM after block skipping
 | |
|     bool        bSkippedToNewExtent,  // blk skipping to next extent
 | |
|     bool        bIsNewExtent )        // treat as new extent (for CP updates)
 | |
| {
 | |
|     // Init the ColumnInfo object
 | |
|     colOp->initColumn( curCol );
 | |
|     colOp->setColParam( curCol, id,
 | |
|                         column.width,
 | |
|                         column.dataType,
 | |
|                         column.weType,
 | |
|                         column.mapOid,
 | |
|                         column.compressionType,
 | |
|                         dbRoot, partition, segment );
 | |
| 
 | |
|     // Open the column file
 | |
|     if (!colOp->exists(column.mapOid, dbRoot, partition, segment) )
 | |
|     {
 | |
|         std::ostringstream oss;
 | |
|         oss << "Column file does not exist for OID-" << column.mapOid <<
 | |
|             "; DBRoot-"    << dbRoot    <<
 | |
|             "; partition-" << partition <<
 | |
|             "; segment-"   << segment;
 | |
|         fLog->logMsg( oss.str(), ERR_FILE_NOT_EXIST, MSGLVL_ERROR );
 | |
|         return ERR_FILE_NOT_EXIST;
 | |
|     }
 | |
| 
 | |
|     std::string segFile;
 | |
|     bool useTmpSuffix = false;
 | |
| 
 | |
|     if (!bIsNewExtent)
 | |
|         useTmpSuffix = true;
 | |
| 
 | |
|     // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
 | |
|     int rc = colOp->openColumnFile( curCol, segFile, useTmpSuffix );
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "Error opening column file for OID-" << column.mapOid <<
 | |
|             "; DBRoot-"    << dbRoot    <<
 | |
|             "; partition-" << partition <<
 | |
|             "; segment-"   << segment   <<
 | |
|             "; filename-"  << segFile   <<
 | |
|             "; " << ec.errorString(rc);
 | |
|         fLog->logMsg( oss.str(), ERR_FILE_OPEN, MSGLVL_ERROR );
 | |
|         return ERR_FILE_OPEN;
 | |
|     }
 | |
| 
 | |
|     std::ostringstream oss1;
 | |
|     oss1 << "Initializing import: "    <<
 | |
|          "Table-"      << tblName       <<
 | |
|          "; Col-"      << column.colName;
 | |
| 
 | |
|     if (curCol.compressionType)
 | |
|         oss1          << " (compressed)";
 | |
| 
 | |
|     oss1 <<  "; OID-" << column.mapOid <<
 | |
|          "; hwm-"      << hwm;
 | |
| 
 | |
|     if (bSkippedToNewExtent)
 | |
|         oss1          << " (full; load into next extent)";
 | |
| 
 | |
|     oss1 << "; file-" << curCol.dataFile.fSegFileName;
 | |
|     fLog->logMsg( oss1.str(), MSGLVL_INFO2 );
 | |
| 
 | |
|     if (column.colType == COL_TYPE_DICT)
 | |
|     {
 | |
|         RETURN_ON_ERROR( openDctnryStore( true ) );
 | |
|     }
 | |
| 
 | |
|     fSavedLbid = lbid;
 | |
| 
 | |
|     if (bSkippedToNewExtent)
 | |
|         oldHwm = hwm;
 | |
| 
 | |
|     rc = setupInitialColumnFile(oldHwm, hwm);
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "Error reading/positioning column file for OID-" <<
 | |
|             column.mapOid <<
 | |
|             "; DBRoot-"    << dbRoot    <<
 | |
|             "; partition-" << partition <<
 | |
|             "; segment-"   << segment   <<
 | |
|             "; filename-"  << segFile   <<
 | |
|             "; " << ec.errorString(rc);
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     // Reworked initial block skipping for compression:
 | |
|     // Block skipping is causing us to wrap up this extent.  We consider
 | |
|     // the current extent to be full, so we "pretend" to fill out the
 | |
|     // last block by adding 8192 bytes to the bytes written count.
 | |
|     // This will help trigger the addition of a new extent when we
 | |
|     // try to store the first section of rows to the db.
 | |
|     if (bSkippedToNewExtent)
 | |
|     {
 | |
|         updateBytesWrittenCounts( BYTE_PER_BLOCK );
 | |
|         fSizeWrittenStart = fSizeWritten;
 | |
|     }
 | |
| 
 | |
|     // Reworked initial block skipping for compression:
 | |
|     // This initializes CP stats for first extent regardless of whether
 | |
|     // we end up adding rows to this extent, or initial block skipping
 | |
|     // ultimately causes us to start with a new extent.
 | |
|     lastInputRowInExtentInit( bIsNewExtent );
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Prepare the initial column segment file for import.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::setupInitialColumnFile( HWM oldHwm, HWM hwm )
 | |
| {
 | |
|     // Initialize the output buffer manager for the column.
 | |
|     if (column.colType == COL_TYPE_DICT)
 | |
|     {
 | |
|         fColBufferMgr = new ColumnBufferManagerDctnry(this, 8, fLog, 0);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         fColBufferMgr = new ColumnBufferManager(this, column.width, fLog, 0);
 | |
|     }
 | |
| 
 | |
|     RETURN_ON_ERROR( fColBufferMgr->setDbFile(curCol.dataFile.pFile, hwm, 0) );
 | |
| 
 | |
|     RETURN_ON_ERROR( colOp->getFileSize(curCol.dataFile.pFile, fileSize) );
 | |
| 
 | |
|     // See if dealing with abbreviated extent that will need expanding.
 | |
|     // This only applies to the first extent of the first segment file.
 | |
|     setAbbrevExtentCheck();
 | |
| 
 | |
|     // If we are dealing with initial extent, see if block skipping has
 | |
|     // exceeded disk allocation, in which case we expand to a full extent.
 | |
|     if (isAbbrevExtent())
 | |
|     {
 | |
|         unsigned int numBlksForFirstExtent =
 | |
|             (INITIAL_EXTENT_ROWS_TO_DISK * column.width) / BYTE_PER_BLOCK;
 | |
| 
 | |
|         if ( ((oldHwm + 1) <= numBlksForFirstExtent) &&
 | |
|                 ((hwm + 1   ) >  numBlksForFirstExtent) )
 | |
|         {
 | |
|             RETURN_ON_ERROR( expandAbbrevExtent(false) );
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Seek till the HWM lbid.
 | |
|     // Store the current allocated file size in availFileSize.
 | |
|     long long byteOffset = (long long)hwm * (long long)BYTE_PER_BLOCK;
 | |
|     RETURN_ON_ERROR( colOp->setFileOffset(curCol.dataFile.pFile, byteOffset) );
 | |
| 
 | |
|     fSizeWritten      = byteOffset;
 | |
|     fSizeWrittenStart = fSizeWritten;
 | |
|     availFileSize     = fileSize - fSizeWritten;
 | |
| 
 | |
|     if (fLog->isDebug( DEBUG_1 ))
 | |
|     {
 | |
|         std::ostringstream oss;
 | |
|         oss << "Init raw data offsets in column file OID-" <<
 | |
|             curCol.dataFile.fid <<
 | |
|             "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|             "; part-"   << curCol.dataFile.fPartition <<
 | |
|             "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|             "; begByte-" << fSizeWritten <<
 | |
|             "; endByte-" << fileSize     <<
 | |
|             "; freeBytes-" << availFileSize;
 | |
|         fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Update the number of bytes in the file, and the free space still remaining.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::updateBytesWrittenCounts( unsigned int numBytesWritten )
 | |
| {
 | |
|     availFileSize = availFileSize - numBytesWritten;
 | |
|     fSizeWritten  = fSizeWritten   + numBytesWritten;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Tell whether the current column segment file being managed by ColumnInfo,
 | |
| // has filled up all its extents with data.
 | |
| //------------------------------------------------------------------------------
 | |
| bool ColumnInfo::isFileComplete() const
 | |
| {
 | |
|     if ((fSizeWritten / column.width) >= fMaxNumRowsPerSegFile)
 | |
|         return true;
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Initialize last used auto-increment value from the current "next"
 | |
| // auto-increment value taken from the system catalog (or BRM).
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::initAutoInc( const std::string& fullTableName )
 | |
| {
 | |
|     int rc = fAutoIncMgr->init( fullTableName, this );
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Reserves the requested number of auto-increment numbers (autoIncCount).
 | |
| // The starting value of the reserved block of numbers is returned in nextValue.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::reserveAutoIncNums(uint32_t autoIncCount, uint64_t& nextValue )
 | |
| {
 | |
|     int rc = fAutoIncMgr->reserveNextRange( autoIncCount, nextValue );
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Finished using auto-increment.  Current value can be committed back to the
 | |
| // system catalog (or BRM).
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::finishAutoInc( )
 | |
| {
 | |
|     int rc = fAutoIncMgr->finish( );
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Get current dbroot, partition, segment, and HWM for this column.
 | |
| //
 | |
| // We use mutex because this function is called by "one" of the parsing threads
 | |
| // when parsing is complete for all the columns from this column's table.
 | |
| // We use the mutex to insure that this parsing thread, which ends up being
 | |
| // responsible for wrapping up this column, is getting the most up to
 | |
| // date values for dbroot, partition, segment, and HWM which may have been
 | |
| // set by another parsing thread.
 | |
| //------------------------------------------------------------------------------
 | |
| void ColumnInfo::getSegFileInfo( DBRootExtentInfo& fileInfo )
 | |
| {
 | |
|     boost::mutex::scoped_lock lock(fColMutex);
 | |
|     fileInfo.fDbRoot    = curCol.dataFile.fDbRoot;
 | |
|     fileInfo.fPartition = curCol.dataFile.fPartition;
 | |
|     fileInfo.fSegment   = curCol.dataFile.fSegment;
 | |
| 
 | |
|     if (fSizeWritten > 0)
 | |
|         fileInfo.fLocalHwm = (fSizeWritten - 1) / BYTE_PER_BLOCK;
 | |
|     else
 | |
|         fileInfo.fLocalHwm = 0;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Open a new or existing Dictionary store file based on the DBRoot,
 | |
| // partition, and segment settings in curCol.dataFile.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::openDctnryStore( bool bMustExist )
 | |
| {
 | |
|     int rc = NO_ERROR;
 | |
| 
 | |
|     if ( column.dctnry.fCompressionType != 0)
 | |
|     {
 | |
|         DctnryCompress1* dctnryCompress1 = new DctnryCompress1;
 | |
|         dctnryCompress1->setMaxActiveChunkNum(1);
 | |
|         dctnryCompress1->setBulkFlag(true);
 | |
|         fStore = dctnryCompress1;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         fStore = new DctnryCompress0;
 | |
|     }
 | |
| 
 | |
|     fStore->setLogger(fLog);
 | |
|     fStore->setColWidth( column.dctnryWidth );
 | |
| 
 | |
|     if (column.fWithDefault)
 | |
|         fStore->setDefault( column.fDefaultChr );
 | |
| 
 | |
|     fStore->setImportDataMode( fpTableInfo->getImportDataMode() );
 | |
| 
 | |
|     // If we are in the process of adding an extent to this column,
 | |
|     // and the extent we are adding is the first extent for the
 | |
|     // relevant column segment file, then the corresponding dictionary
 | |
|     // store file will not exist, in which case we must create
 | |
|     // the store file, else we open the applicable store file.
 | |
|     if ( (bMustExist) ||
 | |
|             (colOp->exists(column.dctnry.dctnryOid,
 | |
|                            curCol.dataFile.fDbRoot,
 | |
|                            curCol.dataFile.fPartition,
 | |
|                            curCol.dataFile.fSegment)) )
 | |
|     {
 | |
|         // Save HWM chunk (for compressed files) if this seg file calls for it
 | |
|         // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
 | |
|         bool useTmpSuffixDctnry = false;
 | |
|         RETURN_ON_ERROR( saveDctnryStoreHWMChunk( useTmpSuffixDctnry ) );
 | |
| 
 | |
|         // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
 | |
|         rc = fStore->openDctnry(
 | |
|                  column.dctnry.dctnryOid,
 | |
|                  curCol.dataFile.fDbRoot,
 | |
|                  curCol.dataFile.fPartition,
 | |
|                  curCol.dataFile.fSegment,
 | |
|                  useTmpSuffixDctnry );
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "openDctnryStore: error opening existing store file for " <<
 | |
|                 "OID-"      << column.dctnry.dctnryOid    <<
 | |
|                 "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|                 "; part-"   << curCol.dataFile.fPartition <<
 | |
|                 "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|                 "; tmpFlag-" << useTmpSuffixDctnry         <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|             // Ignore return code from closing file; already in error state
 | |
|             closeDctnryStore(true); // clean up loose ends
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         if (INVALID_LBID != fStore->getCurLbid())
 | |
|             fDictBlocks.push_back(fStore->getCurLbid());
 | |
| 
 | |
|         std::ostringstream oss;
 | |
|         oss << "Opening existing store file for " << column.colName <<
 | |
|             "; OID-"    << column.dctnry.dctnryOid    <<
 | |
|             "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|             "; part-"   << curCol.dataFile.fPartition <<
 | |
|             "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|             "; hwm-"    << fStore->getHWM()           <<
 | |
|             "; file-"   << fStore->getFileName();
 | |
|         fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         BRM::LBID_t startLbid;
 | |
|         rc = fStore->createDctnry(
 | |
|                  column.dctnry.dctnryOid,
 | |
|                  column.dctnryWidth,      //@bug 3313 - pass string col width
 | |
|                  curCol.dataFile.fDbRoot,
 | |
|                  curCol.dataFile.fPartition,
 | |
|                  curCol.dataFile.fSegment,
 | |
|                  startLbid);
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "openDctnryStore: error creating new store file for " <<
 | |
|                 "OID-"      << column.dctnry.dctnryOid    <<
 | |
|                 "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|                 "; part-"   << curCol.dataFile.fPartition <<
 | |
|                 "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|             fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
| 
 | |
|             // Ignore return code from closing file; already in error state
 | |
|             closeDctnryStore(true); // clean up loose ends
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         rc = fStore->openDctnry(
 | |
|                  column.dctnry.dctnryOid,
 | |
|                  curCol.dataFile.fDbRoot,
 | |
|                  curCol.dataFile.fPartition,
 | |
|                  curCol.dataFile.fSegment,
 | |
|                  false );
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "openDctnryStore: error opening new store file for " <<
 | |
|                 "OID-"      << column.dctnry.dctnryOid    <<
 | |
|                 "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|                 "; part-"   << curCol.dataFile.fPartition <<
 | |
|                 "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
| 
 | |
|             // Ignore return code from closing file; already in error state
 | |
|             closeDctnryStore(true); // clean up loose ends
 | |
|             return rc;
 | |
|         }
 | |
| 
 | |
|         std::ostringstream oss;
 | |
|         oss << "Opening new store file for " << column.colName <<
 | |
|             "; OID-"      << column.dctnry.dctnryOid    <<
 | |
|             "; DBRoot-"   << curCol.dataFile.fDbRoot    <<
 | |
|             "; part-"     << curCol.dataFile.fPartition <<
 | |
|             "; seg-"      << curCol.dataFile.fSegment   <<
 | |
|             "; file-"     << fStore->getFileName();
 | |
|         fLog->logMsg( oss.str(), MSGLVL_INFO2 );
 | |
|     }
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Close the current Dictionary store file.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::closeDctnryStore(bool bAbort)
 | |
| {
 | |
|     int rc = NO_ERROR;
 | |
| 
 | |
|     if (fStore)
 | |
|     {
 | |
|         if (bAbort)
 | |
|             rc = fStore->closeDctnryOnly();
 | |
|         else
 | |
|             rc = fStore->closeDctnry();
 | |
| 
 | |
|         if (rc != NO_ERROR)
 | |
|         {
 | |
|             WErrorCodes ec;
 | |
|             std::ostringstream oss;
 | |
|             oss << "closeDctnryStore: error closing store file for " <<
 | |
|                 "OID-"    << column.dctnry.dctnryOid <<
 | |
|                 "; file-" << fStore->getFileName()   <<
 | |
|                 "; " << ec.errorString(rc);
 | |
|             fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
 | |
|         }
 | |
| 
 | |
|         delete fStore;
 | |
|         fStore = 0;
 | |
|     }
 | |
| 
 | |
|     return rc;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Update dictionary store file with specified strings, and return the assigned
 | |
| // tokens (tokenbuf) to be stored in the corresponding column token file.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::updateDctnryStore(char* buf,
 | |
|                                   ColPosPair** pos,
 | |
|                                   const int totalRow,
 | |
|                                   char* tokenBuf)
 | |
| {
 | |
|     long long truncCount = 0;    // No. of rows with truncated values
 | |
| 
 | |
|     // If this is a VARBINARY column; convert the ascii hex string into binary
 | |
|     //  data and fix the length (it's now only half as long).
 | |
|     // Should be safe to modify pos and buf arrays outside a mutex, as no other
 | |
|     // thread should be accessing the strings from the same buffer, for this
 | |
|     // column.
 | |
|     // This only applies to default text mode.  This step is bypassed for
 | |
|     // binary imports, because in that case, the data is already true binary.
 | |
|     if (((curCol.colType == WR_VARBINARY) || (curCol.colType == WR_BLOB)) &&
 | |
|             (fpTableInfo->getImportDataMode() == IMPORT_DATA_TEXT))
 | |
|     {
 | |
| #ifdef PROFILE
 | |
|         Stats::startParseEvent(WE_STATS_COMPACT_VARBINARY);
 | |
| #endif
 | |
| 
 | |
|         for (int i = 0; i < totalRow; i++)
 | |
|         {
 | |
|             pos[i][id].offset =
 | |
|                 compactVarBinary(buf + pos[i][id].start, pos[i][id].offset);
 | |
|         }
 | |
| 
 | |
| #ifdef PROFILE
 | |
|         Stats::startParseEvent(WE_STATS_COMPACT_VARBINARY);
 | |
| #endif
 | |
|     }
 | |
| 
 | |
| #ifdef PROFILE
 | |
|     Stats::startParseEvent(WE_STATS_WAIT_TO_PARSE_DCT);
 | |
| #endif
 | |
|     boost::mutex::scoped_lock lock(fDictionaryMutex);
 | |
| #ifdef PROFILE
 | |
|     Stats::stopParseEvent(WE_STATS_WAIT_TO_PARSE_DCT);
 | |
| #endif
 | |
| 
 | |
|     int rc = fStore->insertDctnry( buf, pos, totalRow, id, tokenBuf, truncCount );
 | |
| 
 | |
|     if (rc != NO_ERROR)
 | |
|     {
 | |
|         WErrorCodes ec;
 | |
|         std::ostringstream oss;
 | |
|         oss << "updateDctnryStore: error adding rows to store file for " <<
 | |
|             "OID-"      << column.dctnry.dctnryOid    <<
 | |
|             "; DBRoot-" << curCol.dataFile.fDbRoot    <<
 | |
|             "; part-"   << curCol.dataFile.fPartition <<
 | |
|             "; seg-"    << curCol.dataFile.fSegment   <<
 | |
|             "; " << ec.errorString(rc);
 | |
|         fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
 | |
|         fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     incSaturatedCnt( truncCount );
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // No action necessary for uncompressed dictionary files
 | |
| //------------------------------------------------------------------------------
 | |
| // @bug 5572 - HDFS usage: add flag used to control *.tmp file usage
 | |
| int ColumnInfo::saveDctnryStoreHWMChunk(bool& needBackup)
 | |
| {
 | |
|     needBackup = false;
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Truncate specified dictionary store file for this column.
 | |
| // Only applies to compressed columns.
 | |
| //------------------------------------------------------------------------------
 | |
| int ColumnInfo::truncateDctnryStore(
 | |
|     OID /*dctnryOid*/, uint16_t /*root*/, uint32_t /*pNum*/, uint16_t /*sNum*/)
 | |
| const
 | |
| {
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // utility to convert a Status enumeration to a string
 | |
| //------------------------------------------------------------------------------
 | |
| /* static */
 | |
| void ColumnInfo::convertStatusToString(
 | |
|     WriteEngine::Status status,
 | |
|     std::string& statusString )
 | |
| {
 | |
|     static std::string statusStringParseComplete("PARSE_COMPLETE");
 | |
|     static std::string statusStringReadComplete ("READ_COMPLETE");
 | |
|     static std::string statusStringReadProgress ("READ_PROGRESS");
 | |
|     static std::string statusStringNew          ("NEW");
 | |
|     static std::string statusStringErr          ("ERR");
 | |
|     static std::string statusStringUnknown      ("OTHER");
 | |
| 
 | |
|     switch (status)
 | |
|     {
 | |
|         case PARSE_COMPLETE:
 | |
|             statusString = statusStringParseComplete;
 | |
|             break;
 | |
| 
 | |
|         case READ_COMPLETE:
 | |
|             statusString = statusStringReadComplete;
 | |
|             break;
 | |
| 
 | |
|         case READ_PROGRESS:
 | |
|             statusString = statusStringReadProgress;
 | |
|             break;
 | |
| 
 | |
|         case NEW:
 | |
|             statusString = statusStringNew;
 | |
|             break;
 | |
| 
 | |
|         case ERR:
 | |
|             statusString = statusStringErr;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             statusString = statusStringUnknown;
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| }
 |