mariadb-columnstore-engine/writeengine/bulk/we_columninfo.cpp

/* Copyright (C) 2014 InfiniDB, Inc.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
   MA 02110-1301, USA. */

/******************************************************************************
* $Id: we_columninfo.cpp 4737 2013-08-14 20:45:46Z bwilkinson $
*
*******************************************************************************/

#include <cstdlib>
#include <sstream>
#include <unistd.h>
//#define NDEBUG
//#include <cassert>
#include <cctype>

#include "we_columninfo.h"
#include "we_log.h"
#include "we_stats.h"
#include "we_colopbulk.h"
#include "brmtypes.h"
#include "we_columnautoinc.h"
#include "we_dbrootextenttracker.h"
#include "we_brmreporter.h"

#include "we_tableinfo.h"
#include "IDBDataFile.h"
using namespace idbdatafile;

namespace
{

//------------------------------------------------------------------------------
// Do a fast ascii-hex-string to binary data conversion. This is done in-place.
// We take bytes 1 and 2 and put them back into byte 1; 3 and 4 into 2; etc.
// The length is adjusted by 1/2 and returned to the caller as the new length.
// If any invalid hex characters are present in the string (not 0-9,A-F, or
// a-f), then the string is considered invalid, and a null token will be used.
//------------------------------------------------------------------------------
unsigned int compactVarBinary(char* charTmpBuf, int fieldLength)
{
    unsigned char* p = reinterpret_cast<unsigned char*>(charTmpBuf);
    char* f = charTmpBuf;
    char v = '\0';
    for (int i = 0; i < fieldLength / 2; i++, p++)
    {
        // Store even number byte in high order 4 bits of next output byte
        v = *f;
        if (!isxdigit(v))
            return WriteEngine::COLPOSPAIR_NULL_TOKEN_OFFSET;
        if (v <= '9')
            *p = v - '0';
        else if (v <= 'F')
            *p = v - 'A' + 10;
        else //if (v <= 'f')
            *p = v - 'a' + 10;
        *p <<= 4;
        f++;

        // Store odd number byte in low order 4 bite of next output byte
        v = *f;
        if (!isxdigit(v))
            return WriteEngine::COLPOSPAIR_NULL_TOKEN_OFFSET;
        if (v <= '9')
            *p |= v - '0';
        else if (v <= 'F')
            *p |= v - 'A' + 10;
        else //if (v <= 'f')
            *p |= v - 'a' + 10;
        f++;
    }

// Changed our mind and decided to have the read thread reject rows with
// incomplete (odd length) varbinary fields, so the following check is not
// necessary.  We should only get to this function with an even fieldLength.
#if 0
    // Handle case where input data field has "odd" byte length.
    // Store last input byte in high order 4 bits of additional output byte,
    // and leave the low order bits set to 0.
    if ((fieldLength & 1) == 1)
    {
        v = *f;
        if (!isxdigit(v))
            return WriteEngine::COLPOSPAIR_NULL_TOKEN_OFFSET;
        if (v <= '9')
            *p = v - '0';
        else if (v <= 'F')
            *p = v - 'A' + 10;
        else //if (v <= 'f')
            *p = v - 'a' + 10;
        *p <<= 4;

        fieldLength++;
    }
#endif

    return (fieldLength / 2);
}

}

namespace WriteEngine
{

//------------------------------------------------------------------------------
// ColumnInfo constructor
//------------------------------------------------------------------------------
ColumnInfo::ColumnInfo(Log*             logger,
                       int              idIn,
                       const JobColumn& columnIn,
                       DBRootExtentTracker* pDBRootExtTrk,
                       TableInfo* pTableInfo) :
                       id(idIn),
                       lastProcessingTime(0),
#ifdef PROFILE
                       totalProcessingTime(0),
#endif
                       fColBufferMgr(0),
                       availFileSize(0),
                       fileSize(0),
                       fLog(logger),
                       fDelayedFileStartBlksSkipped(0),
                       fSavedLbid(0),
                       fSizeWrittenStart(0),
                       fSizeWritten(0),
                       fLastInputRowInCurrentExtent(0),
                       fLoadingAbbreviatedExtent(false),
                       fColExtInf(0),
                       fMaxNumRowsPerSegFile(0),
                       fStore(0),
                       fAutoIncLastValue(0),
                       fSaturatedRowCnt(0),
                       fpTableInfo(pTableInfo),
                       fAutoIncMgr(0),
                       fDbRootExtTrk(pDBRootExtTrk),
                       fColWidthFactor(1),
                       fDelayedFileCreation(INITIAL_DBFILE_STAT_FILE_EXISTS),
                       fRowsPerExtent(0)
{
    column = columnIn;

    fRowsPerExtent = BRMWrapper::getInstance()->getExtentRows();

    // Allocate a ColExtInfBase object for those types that won't track
    // min/max CasualPartition info; this is a stub class that won't do
    // anything.
    switch ( column.weType )
    {
        case WriteEngine::WR_FLOAT:
        case WriteEngine::WR_DOUBLE:
        case WriteEngine::WR_VARBINARY: // treat like char dictionary for now
        case WriteEngine::WR_TOKEN:
        {
            fColExtInf = new ColExtInfBase( );
            break;
        }

        case WriteEngine::WR_CHAR:
        {
            if (column.colType == COL_TYPE_DICT)
            {
                fColExtInf = new ColExtInfBase( );
            }
            else
            {
                fColExtInf = new ColExtInf(column.mapOid, logger);
            }
            break;
        }

        case WriteEngine::WR_SHORT:
        case WriteEngine::WR_BYTE:
        case WriteEngine::WR_LONGLONG:
        case WriteEngine::WR_INT:
        case WriteEngine::WR_USHORT:
        case WriteEngine::WR_UBYTE:
        case WriteEngine::WR_ULONGLONG:
        case WriteEngine::WR_UINT:
        default:
        {
            fColExtInf = new ColExtInf(column.mapOid, logger);
            break;
        }
    }

    colOp.reset(new ColumnOpBulk(logger, column.compressionType));

    fMaxNumRowsPerSegFile = fRowsPerExtent *
                            Config::getExtentsPerSegmentFile();

    // Create auto-increment object to manage auto-increment next-value
    if (column.autoIncFlag)
    {
        fAutoIncMgr = new ColumnAutoIncIncremental(logger);
        // formerly used ColumnAutoIncJob for Shared Everything
        // fAutoIncMgr = new ColumnAutoIncJob(logger);
    }
}

//------------------------------------------------------------------------------
// ColumnInfo destructor
//------------------------------------------------------------------------------
ColumnInfo::~ColumnInfo()
{
    clearMemory();

    // Closing dictionary file also updates the extent map; which we
    // don't want to do if we are aborting the job.  Besides, the
    // application code should be closing the dictionary as needed,
    // instead of relying on the destructor, so disabled this code.
    //if(fStore != NULL)
    //{
    //    fStore->closeDctnryStore();
    //    delete fStore;
    //}

    if (fColExtInf)
        delete fColExtInf;

    if (fAutoIncMgr)
        delete fAutoIncMgr;

    if (fDbRootExtTrk)
        delete fDbRootExtTrk;
}

//------------------------------------------------------------------------------
// Clear memory consumed by this ColumnInfo object.
//------------------------------------------------------------------------------
void ColumnInfo::clearMemory( )
{
    if (fColBufferMgr)
    {
        delete fColBufferMgr;
        fColBufferMgr = 0;
    }

    fDictBlocks.clear();
}

//------------------------------------------------------------------------------
// If at the start of the job, We have encountered a PM that has no DB file for
// this column, or whose HWM extent is disabled; then this function is called
// to setup delayed file creation.
// A starting DB file will be created if/when we determine that we have rows
// to be processed.
//------------------------------------------------------------------------------
void ColumnInfo::setupDelayedFileCreation(
    uint16_t dbRoot,
    uint32_t partition,
    uint16_t segment,
    HWM       hwm,
    bool      bEmptyPM )
{
    if (bEmptyPM)
        fDelayedFileCreation = INITIAL_DBFILE_STAT_CREATE_FILE_ON_EMPTY;
    else
        fDelayedFileCreation = INITIAL_DBFILE_STAT_CREATE_FILE_ON_DISABLED;
    fDelayedFileStartBlksSkipped = hwm;
    fSavedLbid = INVALID_LBID;

    colOp->initColumn ( curCol );
    colOp->setColParam( curCol, id,
        column.width,
        column.dataType,
        column.weType,
        column.mapOid,
        column.compressionType,
        dbRoot, partition, segment );
}

//------------------------------------------------------------------------------
// Create a DB file as part of delayed file creation.  See setupDelayedFile-
// Creation for an explanation.
//------------------------------------------------------------------------------
int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
{
    int rc = NO_ERROR;

    // For optimization sake, we use a separate mutex (fDelayedFileCreateMutex)
    // exclusively reserved to be used as the gatekeeper to this function.
    // No sense in waiting for a fColMutex lock, when 99.99% of the time,
    // all we need to do is check fDelayedFileCreation, see that it's value
    // is INITIAL_DBFILE_STAT_FILE_EXISTS, and exit the function.
    boost::mutex::scoped_lock lock(fDelayedFileCreateMutex);

    if (fDelayedFileCreation == INITIAL_DBFILE_STAT_FILE_EXISTS)
        return NO_ERROR;

    // Don't try creating extent again if we are already in error state with a
    // previous thread failing to create this extent.
    if (fDelayedFileCreation == INITIAL_DBFILE_STAT_ERROR_STATE)
    {
        rc = ERR_FILE_CREATE;
        std::ostringstream oss;
        oss << "Previous attempt failed to create initial dbroot" <<
            curCol.dataFile.fDbRoot <<
            " extent for column file OID-" << column.mapOid <<
            "; dbroot-"       << curCol.dataFile.fDbRoot     <<
            "; partition-"    << curCol.dataFile.fPartition;
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
        return rc;
    }

    // Once we get this far, we go ahead and acquire a fColMutex lock.  The
    // fDelayedFileCreateMutex lock might suffice, but better to explicitly
    // lock fColMutex since we are modifying attributes that we typically
    // change within the scope of a fColMutex lock.
    boost::mutex::scoped_lock lock2(fColMutex);

    uint16_t dbRoot      = curCol.dataFile.fDbRoot;
    uint32_t partition   = curCol.dataFile.fPartition;

    // We don't have a file on this PM, so we create an initial file
    ColumnOpBulk tempColOp(fLog, column.compressionType);

    bool         createLeaveFileOpen = false;
    IDBDataFile* createPFile      = 0;
    uint16_t     createDbRoot     = dbRoot;
    uint32_t     createPartition  = partition;
    uint16_t     createSegment    = 0;
    std::string  createSegFile;
    HWM          createHwm        = 0;    //output
    BRM::LBID_t  createStartLbid  = 0;    //output
    bool         createNewFile    = true; //output
    int          createAllocSize  = 0;    //output
    char*        createHdrs       = 0;    //output

    std::string allocErrMsg;
    rc = fpTableInfo->allocateBRMColumnExtent( curCol.dataFile.fid,
        createDbRoot,
        createPartition,
        createSegment,
        createStartLbid,
        createAllocSize,
        createHwm,
        allocErrMsg );
    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "Error creating initial dbroot" << dbRoot <<
            " BRM extent for OID-" << column.mapOid <<
            "; dbroot-"     << dbRoot    <<
            "; partition-"  << partition <<
            "; " << ec.errorString(rc)   <<
            "; " << allocErrMsg;
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
        fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
        return rc;
    }
    uint16_t segment = createSegment;
    partition        = createPartition;  // update our partition variable in
                                         // case extent was added to a different
                                         // partition than we intended
    BRM::LBID_t lbid = createStartLbid;

    rc = tempColOp.extendColumn(
        curCol,
        createLeaveFileOpen,
        createHwm,
        createStartLbid,
        createAllocSize,
        createDbRoot,
        createPartition,
        createSegment,
        createSegFile,
        createPFile,
        createNewFile,
        createHdrs);

    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "Error adding initial dbroot" << dbRoot <<
            " extent to column file OID-" << column.mapOid <<
            "; dbroot-"       << dbRoot    <<
            "; partition-"    << partition <<
            "; segment-"      << segment   <<
            "; " << ec.errorString(rc);
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
        fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
        return rc;
    }

    // We don't have a file on this PM (or HWM extent is disabled), so we
    // create a new file to load
    std::ostringstream oss1;
    if (fDelayedFileCreation == INITIAL_DBFILE_STAT_CREATE_FILE_ON_EMPTY)
        oss1 << "PM empty; Creating starting column extent";
    else
        oss1 << "HWM extent disabled; Creating starting column extent";
    oss1 << " on DBRoot-" << createDbRoot <<
        " for OID-" << column.mapOid   <<
        "; part-"   << createPartition <<
        "; seg-"    << createSegment   <<
        "; hwm-"    << createHwm       <<
        "; LBID-"   << createStartLbid <<
        "; file-"   << createSegFile;
    fLog->logMsg( oss1.str(), MSGLVL_INFO2 );

    // Create corresponding dictionary store file if applicable
    if(column.colType == COL_TYPE_DICT)
    {
        std::ostringstream oss;
        oss << "Creating starting dictionary extent on dbroot"<<dbRoot <<
            " (segment " << segment <<
            ") for dictionary OID " << column.dctnry.dctnryOid;
        fLog->logMsg( oss.str(), MSGLVL_INFO2 );
        BRM::LBID_t dLbid;
        Dctnry* tempD = 0;
        if (column.dctnry.fCompressionType != 0)
        {
            DctnryCompress1* tempD1;
            tempD1 = new DctnryCompress1;
            tempD1->setMaxActiveChunkNum(1);
            tempD1->setBulkFlag(true);
            tempD = tempD1;
        }
        else
        {
            tempD = new DctnryCompress0;
        }

        boost::scoped_ptr<Dctnry> refDctnry(tempD);
        rc = tempD->createDctnry(
            column.dctnry.dctnryOid,
            column.dctnryWidth,
            dbRoot,
            partition,
            segment,
            dLbid,
            true); // creating the store file
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "Error creating initial dbroot" << dbRoot <<
                " extent for dictionary file OID-" <<
                column.dctnry.dctnryOid        <<
                "; dbroot-"       << dbRoot    <<
                "; partition-"    << partition <<
                "; segment-"      << segment   <<
                "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
            fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
            return rc;
        }

        rc = tempD->closeDctnry();
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "Error creating/closing initial dbroot" <<
                dbRoot << " extent for dictionary file OID-" <<
                column.dctnry.dctnryOid        <<
                "; partition-"    << partition <<
                "; segment-"      << segment   <<
                "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
            fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;
            return rc;
        }
    } // end of dictionary column processing

    // Check for special case: where we skip initial blk(s) at the start of
    // the "very" 1st file on each PM.
    // We are checking to see if the PM is empty, "and" if the partition is 0.
    // The PM could be empty if all the existing files on the PM were dropped
    // or disabled, but we don't want/need to do block skipping in this case;
    // so we also check to see if the partition number is 0, denoting the 1st
    // extent for the PM.
    // (The reason we are skipping blocks in partition 0, is because import
    // does this with the partition 0, segment 0 file created by DDL.
    // We skip blocks on the other PMs, so that the 1st file created on each
    // PM will employ the same block skipping.)
    HWM hwm = 0;
    if ((fDelayedFileCreation == INITIAL_DBFILE_STAT_CREATE_FILE_ON_EMPTY) &&
        (partition == 0))
    {
        hwm = fDelayedFileStartBlksSkipped;
    }
    rc = setupInitialColumnExtent(
        dbRoot, partition, segment,
        tableName, lbid, hwm, hwm, false, true );

    if (rc == NO_ERROR)
        fDelayedFileCreation = INITIAL_DBFILE_STAT_FILE_EXISTS;
    else
        fDelayedFileCreation = INITIAL_DBFILE_STAT_ERROR_STATE;

    return rc;
}

//------------------------------------------------------------------------------
// Add an extent for this column.  The next segment file in the DBRoot,
// partition, segment number rotation will be selected for the extent.
//
// NOTE: no mutex lock is employed here.  It is assumed that the calling
//       application code is taking care of this, if it is needed.
//------------------------------------------------------------------------------
int ColumnInfo::extendColumn( bool saveLBIDForCP )
{
    //..We assume the applicable file is already open, so...
    //  the HWM of the current segment file should be set to reference the
    //  last block in the current file (as specified in curCol.dataFile.pFile).
    //
    //  Prior to adding compression, we used ftell() to set HWM, but that
    //  would not work for compressed data.  Code now assumes that if we
    //  are adding an extent, that fSizeWritten is a multiple of blksize,
    //  which it should be.  If we are adding an extent, fSizeWritten should
    //  point to the last byte of a full extent boundary.
    HWM hwm = (fSizeWritten / BYTE_PER_BLOCK) - 1;

    //..Save info about the current segment column file, and close that file.
    addToSegFileList( curCol.dataFile, hwm );

    // Close current segment column file prior to adding extent to next seg file
    int rc = closeColumnFile( true, false );
    if (rc != NO_ERROR)
    {
        std::ostringstream oss;
        oss << "extendColumn: error closing extent in "    <<
               "column OID-" << curCol.dataFile.fid        <<
               "; DBRoot-"   << curCol.dataFile.fDbRoot    <<
               "; part-"     << curCol.dataFile.fPartition <<
               "; seg-"      << curCol.dataFile.fSegment   <<
               "; hwm-"      << hwm;
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

        return rc;
    }

    // Call Config::initConfigCache() to force the Config class
    // to reload config cache "if" the config file has changed.
    Config::initConfigCache();

    bool bChangeFlag = Config::hasLocalDBRootListChanged();
    //if (fLog->isDebug( DEBUG_1 ))
    //{
    //  std::ostringstream oss;
    //  oss << "Checking DBRootListChangeFlag: " << bChangeFlag;
    //  fLog->logMsg( oss.str(), MSGLVL_INFO2 );
    //}
    if (bChangeFlag)
    {
        rc = ERR_BULK_DBROOT_CHANGE;

        WErrorCodes ec;
        std::ostringstream oss;
        oss << "extendColumn: DBRoots changed; " <<
            ec.errorString( rc );
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

        return rc;
    }

    //..Declare variables used to advance to the next extent
    uint16_t    dbRootNext   = 0;
    uint32_t    partitionNext= 0;
    uint16_t    segmentNext  = 0;
    HWM         hwmNext      = 0;
    BRM::LBID_t startLbid;

    //..When we finish an extent, we typically should be advancing to the next
    //  DBRoot to create a "new" extent.  But "if" the user has moved a DBRoot
    //  from another PM to this PM, then we may have a partial extent that we
    //  need to fill up.  Here's where we just fill out such partially filled
    //  extents with empty values, until we can get back to a "normal" full
    //  extent boundary case.
    bool bAllocNewExtent = false;
    while (!bAllocNewExtent)
    {
        //..If we have a DBRoot Tracker, then use that to determine next DBRoot
        //  to rotate to, else the old legacy BRM extent allocator will assign,
        //  if we pass in a dbroot of 0.
        bAllocNewExtent = true;
        if (fDbRootExtTrk)
        {
            bAllocNewExtent = fDbRootExtTrk->nextSegFile(
                dbRootNext, partitionNext, segmentNext, hwmNext, startLbid );
        }

        // If our next extent is a partial extent, then fill out that extent
        // to the next full extent boundary, and round up HWM accordingly.
        if (!bAllocNewExtent)
        {
            rc = extendColumnOldExtent( dbRootNext,
                partitionNext, segmentNext, hwmNext );
            if (rc != NO_ERROR)
                return rc;
        }
    }

    // Once we are back on a "normal" full extent boundary, we add a new extent
    // to resume adding rows.
    rc = extendColumnNewExtent( saveLBIDForCP, dbRootNext, partitionNext );

    return rc;
}

//------------------------------------------------------------------------------
// Add a new extent to this column, at the specified DBRoot.  Partition may be
// used if DBRoot is empty.
//------------------------------------------------------------------------------
int ColumnInfo::extendColumnNewExtent(
    bool     saveLBIDForCP,
    uint16_t dbRootNew,
    uint32_t partitionNew )
{
    //..Declare variables used to advance to the next extent
    IDBDataFile* pFileNew     = 0;
    HWM         hwmNew       = 0;
    bool        newFile      = false;
    std::string segFileNew;

    uint16_t    segmentNew   = 0;
    BRM::LBID_t startLbid;

    char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];

    // Extend the column by adding an extent to the next
    // DBRoot, partition, and segment file in the rotation
    int allocsize = 0;
    std::string allocErrMsg;
    int rc = fpTableInfo->allocateBRMColumnExtent( curCol.dataFile.fid,
        dbRootNew,
        partitionNew,
        segmentNew,
        startLbid,
        allocsize,
        hwmNew,
        allocErrMsg );
    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "extendColumnNewExtent: error creating BRM extent after " <<
            "column OID-" << curCol.dataFile.fid          <<
            "; DBRoot-"   << curCol.dataFile.fDbRoot      <<
            "; part-"     << curCol.dataFile.fPartition   <<
            "; seg-"      << curCol.dataFile.fSegment;

        oss << "; newDBRoot-" << dbRootNew                <<
            "; newpart-"      << partitionNew             <<
            "; " << ec.errorString(rc)                    <<
            "; " << allocErrMsg;
        fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
        fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());

        return rc;
    }

    rc = colOp->extendColumn ( curCol,
        true,  // leave file open
        hwmNew,
        startLbid,
        allocsize,
        dbRootNew,
        partitionNew,
        segmentNew,
        segFileNew,
        pFileNew,
        newFile,
        hdr );
    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "extendColumnNewExtent: error adding file extent after " <<
            "column OID-" << curCol.dataFile.fid          <<
            "; DBRoot-"   << curCol.dataFile.fDbRoot      <<
            "; part-"     << curCol.dataFile.fPartition   <<
            "; seg-"      << curCol.dataFile.fSegment;

        oss << "; newDBRoot-" << dbRootNew                <<
            "; newpart-"      << partitionNew             <<
            "; newseg-"       << segmentNew               <<
            "; fbo-"          << hwmNew                   <<
            "; " << ec.errorString(rc);
        fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
        fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());

        if (pFileNew)
            colOp->closeFile( pFileNew ); // clean up loose ends

        return rc;
    }

    std::ostringstream oss;
    oss << "Add column extent OID-" << curCol.dataFile.fid <<
        "; DBRoot-" << dbRootNew    <<
        "; part-"   << partitionNew <<
        "; seg-"    << segmentNew   <<
        "; hwm-"    << hwmNew       <<
        "; LBID-"   << startLbid    <<
        "; file-"   << segFileNew;
        fLog->logMsg( oss.str(), MSGLVL_INFO2 );

    // Save the LBID with our CP extent info, so that we can update extent map
    if (saveLBIDForCP)
    {
        int rcLBID = fColExtInf->updateEntryLbid( startLbid );

        // If error occurs, we log WARNING, but we don't fail the job.
        if (rcLBID != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "updateEntryLbid failed for OID-" << curCol.dataFile.fid <<
                   "; LBID-" << startLbid <<
                   "; CasualPartition info may become invalid; " <<
                   ec.errorString(rcLBID);
            fLog->logMsg( oss.str(), rcLBID, MSGLVL_WARNING );
        }
    }

    //..Reset data members to reflect where we are in the newly
    //  opened column segment file.  The file may be a new file, or we may
    //  be adding an extent to an existing column segment file.
    curCol.dataFile.hwm          = hwmNew;
    curCol.dataFile.pFile        = pFileNew;
    curCol.dataFile.fPartition   = partitionNew;
    curCol.dataFile.fSegment     = segmentNew;
    curCol.dataFile.fDbRoot      = dbRootNew;
    curCol.dataFile.fSegFileName = segFileNew;

    rc = resetFileOffsetsNewExtent(hdr);
    if (rc != NO_ERROR)
    {
        std::ostringstream oss;
        oss << "extendColumnNewExtent: error moving to new extent in " <<
               "column OID-" << curCol.dataFile.fid           <<
               "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
               "; part-"     << curCol.dataFile.fPartition    <<
               "; seg-"      << curCol.dataFile.fSegment      <<
               "; hwm-"      << curCol.dataFile.hwm;
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

        if (pFileNew)
            closeColumnFile( false, true ); // clean up loose ends

        return rc;
    }

    if (fLog->isDebug( DEBUG_1 ))
    {
        std::ostringstream oss2;
        oss2 << "Extent added to column OID-" << curCol.dataFile.fid <<
            "; DBRoot-" << dbRootNew    <<
            "; part-"   << partitionNew <<
            "; seg-"    << segmentNew   <<
            "; begByte-"<< fSizeWritten <<
            "; endByte-"<< fileSize     <<
            "; freeBytes-" << availFileSize;
        fLog->logMsg( oss2.str(), MSGLVL_INFO2 );
    }

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Fill out existing partial extent to extent boundary, so that we can resume
// inserting rows on an extent boundary basis.  This use case should only take
// place when a DBRoot with a partial extent has been moved from one PM to
// another.
//------------------------------------------------------------------------------
int ColumnInfo::extendColumnOldExtent(
    uint16_t    dbRootNext,
    uint32_t    partitionNext,
    uint16_t    segmentNext,
    HWM         hwmNext )
{
    const unsigned int BLKS_PER_EXTENT =
        (fRowsPerExtent * column.width)/BYTE_PER_BLOCK;
    HWM hwmNextExtentBoundary = hwmNext;

    // Round up HWM to the end of the current extent
    unsigned int nBlks = hwmNext + 1;
    unsigned int nRem  = nBlks % BLKS_PER_EXTENT;
    if (nRem > 0)
        hwmNextExtentBoundary = nBlks - nRem + BLKS_PER_EXTENT - 1;
    else
        hwmNextExtentBoundary = nBlks - 1;

    std::ostringstream oss;
    oss << "Padding partial extent to extent boundary in OID-" <<
           curCol.dataFile.fid <<
        "; DBRoot-" << dbRootNext    <<
        "; part-"   << partitionNext <<
        "; seg-"    << segmentNext   <<
        "; oldhwm-" << hwmNext       <<
        "; newhwm-" << hwmNextExtentBoundary;
    fLog->logMsg( oss.str(), MSGLVL_INFO2 );

    long long fileSizeBytes;
    int rc = colOp->getFileSize( curCol.dataFile.fid,
        dbRootNext, partitionNext, segmentNext, fileSizeBytes);
    if (rc != NO_ERROR)
    {
        std::ostringstream oss;
        oss << "extendColumnOldExtent: error padding partial extent for " <<
               "column OID-" << curCol.dataFile.fid           <<
               "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
               "; part-"     << curCol.dataFile.fPartition    <<
               "; seg-"      << curCol.dataFile.fSegment      <<
               "; hwm-"      << curCol.dataFile.hwm;
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

        return rc;
    }

    curCol.dataFile.pFile        = 0;
    curCol.dataFile.fDbRoot      = dbRootNext;
    curCol.dataFile.fPartition   = partitionNext;
    curCol.dataFile.fSegment     = segmentNext;
    curCol.dataFile.hwm          = hwmNextExtentBoundary;
    curCol.dataFile.fSegFileName.clear();

    // See if we have an abbreviated extent that needs to be expanded on disk
    if (fileSizeBytes == (long long)INITIAL_EXTENT_ROWS_TO_DISK * column.width)
    {
        std::string segFile;

        // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
        IDBDataFile* pFile = colOp->openFile( curCol,
            dbRootNext, partitionNext, segmentNext, segFile, true );
        if ( !pFile )
        {
            std::ostringstream oss;
            rc = ERR_FILE_OPEN;
            oss << "extendColumnOldExtent: error padding partial extent for " <<
                   "column OID-" << curCol.dataFile.fid           <<
                   "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
                   "; part-"     << curCol.dataFile.fPartition    <<
                   "; seg-"      << curCol.dataFile.fSegment      <<
                   "; hwm-"      << curCol.dataFile.hwm;
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

            return rc;
        }

        rc = colOp->expandAbbrevColumnExtent( pFile, dbRootNext,
            column.emptyVal, column.width);
        if (rc != NO_ERROR)
        {
            std::ostringstream oss;
            oss << "extendColumnOldExtent: error padding partial extent for " <<
                   "column OID-" << curCol.dataFile.fid           <<
                   "; DBRoot-"   << curCol.dataFile.fDbRoot       <<
                   "; part-"     << curCol.dataFile.fPartition    <<
                   "; seg-"      << curCol.dataFile.fSegment      <<
                   "; hwm-"      << curCol.dataFile.hwm;
            fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
            fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());

            colOp->closeFile( pFile );

            return rc;
        }

        colOp->closeFile( pFile );
    }

    addToSegFileList( curCol.dataFile, hwmNextExtentBoundary );

    return NO_ERROR;
}

//------------------------------------------------------------------------------
//  Either add or update the File object, so that it has the updated HWM.
//  We will access this info to update the HWM in the ExtentMap at the end
//  of the import.
//  dmc-could optimize later by changing fSegFileUpdateList from a vector
//  to a map or hashtable with a key consisting of partition and segment.
//------------------------------------------------------------------------------
void ColumnInfo::addToSegFileList( File& dataFile, HWM hwm )
{
    bool foundFlag = false;
    for (unsigned int i=0; i<fSegFileUpdateList.size(); i++)
    {
        if ((fSegFileUpdateList[i].fPartition == dataFile.fPartition) &&
            (fSegFileUpdateList[i].fSegment   == dataFile.fSegment))
        {
            if (fLog->isDebug( DEBUG_1 ))
            {
                std::ostringstream oss3;
                oss3 << "Updating HWM list"
                        "; column OID-" << dataFile.fid    <<
                        "; DBRoot-" << dataFile.fDbRoot    <<
                        "; part-"   << dataFile.fPartition <<
                        "; seg-"    << dataFile.fSegment   <<
                        "; oldhwm-" << fSegFileUpdateList[i].hwm  <<
                        "; newhwm-" << hwm;
                fLog->logMsg( oss3.str(), MSGLVL_INFO2 );
            }

            fSegFileUpdateList[i].hwm = hwm;
            foundFlag = true;
            break;
        }
    }
    if (!foundFlag)
    {
        if (fLog->isDebug( DEBUG_1 ))
        {
            std::ostringstream oss3;
            oss3 << "Adding to HWM list" <<
                    "; column OID-" << dataFile.fid    <<
                    "; DBRoot-" << dataFile.fDbRoot    <<
                    "; part-"   << dataFile.fPartition <<
                    "; seg-"    << dataFile.fSegment   <<
                    "; hwm-"    << hwm;
            fLog->logMsg( oss3.str(), MSGLVL_INFO2 );
        }

        dataFile.hwm = hwm;
        fSegFileUpdateList.push_back( dataFile );
    }
}

//------------------------------------------------------------------------------
// Reset file offset data member attributes when we start working on the next
// extent.
//------------------------------------------------------------------------------
int ColumnInfo::resetFileOffsetsNewExtent(const char* /*hdr*/)
{
    setFileSize( curCol.dataFile.hwm, false );
    long long byteOffset = (long long)curCol.dataFile.hwm *
                           (long long)BYTE_PER_BLOCK;
    fSizeWritten      = byteOffset;
    fSizeWrittenStart = fSizeWritten;
    availFileSize     = fileSize - fSizeWritten;

    // If we are adding an extent as part of preliminary block skipping, then
    // we won't have a ColumnBufferManager object yet, but that's okay, because
    // we are only adding the empty extent at this point.
    if (fColBufferMgr)
    {
        RETURN_ON_ERROR( fColBufferMgr->setDbFile(
                         curCol.dataFile.pFile, curCol.dataFile.hwm, 0) );

        RETURN_ON_ERROR( colOp->setFileOffset(curCol.dataFile.pFile,
                         byteOffset) );
    }

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Set current size of file in raw (uncompressed) bytes, given the specified
// hwm.  abbrevFlag indicates whether this is a fixed size abbreviated extent.
// For unabbreviated extents the "logical" file size is calculated by rounding
// the hwm up to the nearest multiple of the extent size.
//------------------------------------------------------------------------------
void ColumnInfo::setFileSize( HWM hwm, int abbrevFlag )
{
    // Must be an abbreviated extent if there is only 1 compressed chunk in
    // the db file.  Even a 1-byte column would have 2 4MB chunks for an 8M
    // row column extent.
    if (abbrevFlag)
    {
        fileSize = (INITIAL_EXTENT_ROWS_TO_DISK * curCol.colWidth);
    }
    else
    {
        const unsigned int ROWS_PER_EXTENT = fRowsPerExtent;

        long long nRows = ((long long)(hwm+1) * (long long)BYTE_PER_BLOCK) /
                           (long long)curCol.colWidth;
        long long nRem  = nRows % ROWS_PER_EXTENT;
        if (nRem == 0)
        {
            fileSize = nRows * curCol.colWidth;
        }
        else
        {
            fileSize = (nRows - nRem + ROWS_PER_EXTENT) * curCol.colWidth;
        }
    }
}

//------------------------------------------------------------------------------
// If we are dealing with the first extent in the first segment file for this
// column, and the segment file is still equal to 256K rows, then we set the
// fLoadingAbbreviatedExtent flag.  This tells us (later on) that we are dealing
// with an abbreviated extent that still needs to be expanded and filled, before
// we start adding new extents.
//------------------------------------------------------------------------------
void ColumnInfo::setAbbrevExtentCheck( )
{
// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated?
    if ((curCol.dataFile.fPartition == 0) &&
        (curCol.dataFile.fSegment   == 0))
    {
        if (fileSize == (INITIAL_EXTENT_ROWS_TO_DISK * curCol.colWidth))
        {
            fLoadingAbbreviatedExtent = true;

            if (fLog->isDebug( DEBUG_1 ))
            {
                std::ostringstream oss;
                oss << "Importing into abbreviated extent, column OID-" <<
                         curCol.dataFile.fid   <<
                    "; DBRoot-"   << curCol.dataFile.fDbRoot    <<
                    "; part-"     << curCol.dataFile.fPartition <<
                    "; seg-"      << curCol.dataFile.fSegment   <<
                    "; fileSize-" << fileSize;
                fLog->logMsg( oss.str(), MSGLVL_INFO2 );
            }
        }
    }
}

//------------------------------------------------------------------------------
// If this is an abbreviated extent, we expand the extent to a full extent on
// disk, by initializing the necessary number of remaining blocks.
// bRetainFilePos flag controls whether the current file position is retained
// upon return from this function; else the file will be positioned at the end
// of the file.
//------------------------------------------------------------------------------
int ColumnInfo::expandAbbrevExtent( bool bRetainFilePos )
{
    if (fLoadingAbbreviatedExtent)
    {
        off64_t oldOffset = 0;
        if (bRetainFilePos)
        {
            oldOffset = curCol.dataFile.pFile->tell();
        }
        colOp->setFileOffset( curCol.dataFile.pFile, 0, SEEK_END );

        std::ostringstream oss;
        oss << "Expanding first extent to column OID-" << curCol.dataFile.fid <<
            "; DBRoot-" << curCol.dataFile.fDbRoot    <<
            "; part-"   << curCol.dataFile.fPartition <<
            "; seg-"    << curCol.dataFile.fSegment   <<
            "; file-"   << curCol.dataFile.fSegFileName;
        fLog->logMsg( oss.str(), MSGLVL_INFO2 );

        int rc = colOp->expandAbbrevExtent ( curCol );
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "expandAbbrevExtent: error expanding extent for " <<
                   "OID-"      << curCol.dataFile.fid        <<
                   "; DBRoot-" << curCol.dataFile.fDbRoot    <<
                   "; part-"   << curCol.dataFile.fPartition <<
                   "; seg-"    << curCol.dataFile.fSegment   <<
                   "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
            fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
            return rc;
        }

        // Update available file size to reflect disk space added by expanding
        // the extent.
        long long fileSizeBeforeExpand = fileSize;
        setFileSize( (fileSizeBeforeExpand/BYTE_PER_BLOCK), false );
        availFileSize += (fileSize - fileSizeBeforeExpand);

        // Restore offset back to where we were before expanding the extent
        if (bRetainFilePos)
        {
            rc = colOp->setFileOffset(curCol.dataFile.pFile,oldOffset,SEEK_SET);
            if (rc != NO_ERROR)
            {
                WErrorCodes ec;
                std::ostringstream oss;
                oss << "expandAbbrevExtent: error seeking to new extent for " <<
                       "OID-"      << curCol.dataFile.fid        <<
                       "; DBRoot-" << curCol.dataFile.fDbRoot    <<
                       "; part-"   << curCol.dataFile.fPartition <<
                       "; seg-"    << curCol.dataFile.fSegment   <<
                       "; " << ec.errorString(rc);
                fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
                fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
                return rc;
            }
        }

        // We only use abbreviated extents for the very first extent.  So after
        // expanding a col's abbreviated extent, we should disable this check.
        fLoadingAbbreviatedExtent = false;
    }

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Close the current Column file.
//------------------------------------------------------------------------------
int ColumnInfo::closeColumnFile(bool /*bCompletingExtent*/, bool /*bAbort*/)
{
    if ( curCol.dataFile.pFile )
    {
        colOp->closeFile( curCol.dataFile.pFile );
        curCol.dataFile.pFile = 0;
    }

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Initialize fLastInputRowInCurrentExtent used in detecting when a Read Buffer
// is crossing an extent boundary, so that we can accurately track the min/max
// for each extent as the Read buffers are parsed.
//------------------------------------------------------------------------------
void ColumnInfo::lastInputRowInExtentInit( bool bIsNewExtent )
{
    // Reworked initial block skipping for compression:
    const unsigned int ROWS_PER_EXTENT = fRowsPerExtent;
    RID numRowsLeftInExtent = 0;
    RID numRowsWritten = fSizeWritten / curCol.colWidth;
    if ((numRowsWritten % ROWS_PER_EXTENT) != 0)
        numRowsLeftInExtent = ROWS_PER_EXTENT -
            (numRowsWritten % ROWS_PER_EXTENT);

    bool bRoomToAddToOriginalExtent = true;
    if (fSizeWritten > 0)
    {
        // Handle edge case; if numRowsLeftInExtent comes out to be 0, then
        // current extent is full.  In this case we first bump up row count
        // by a full extent before we subtract by 1 to get the last row number
        // in extent.
        if (numRowsLeftInExtent == 0)
        {
            numRowsLeftInExtent = ROWS_PER_EXTENT;;
            bRoomToAddToOriginalExtent = false;
        }
    }
    else
    {
        // Starting new file with empty extent, so set row count to full extent
        numRowsLeftInExtent = ROWS_PER_EXTENT;
    }

    fLastInputRowInCurrentExtent = numRowsLeftInExtent - 1;

    // If we have a pre-existing extent that we are going to add rows to,
    // then we need to add that extent to our ColExtInf object, so that we
    // can update the CP min/max at the end of the bulk load job.
    if ( bRoomToAddToOriginalExtent )
    {
        fColExtInf->addFirstEntry(fLastInputRowInCurrentExtent,
                                  fSavedLbid,
                                  bIsNewExtent );
    }
}

//------------------------------------------------------------------------------
// Increment fLastRIDInExtent to the end of the next extent.
//------------------------------------------------------------------------------
void ColumnInfo::lastInputRowInExtentInc( )
{
    fLastInputRowInCurrentExtent += fRowsPerExtent;
}

//------------------------------------------------------------------------------
// Parsing is complete for this column.  Flush pending data.  Close the current
// segment file, and corresponding dictionary store file (if applicable).  Also
// clears memory taken up by this ColumnInfo object.
//------------------------------------------------------------------------------
int ColumnInfo::finishParsing( )
{
    int rc = NO_ERROR;

    // Close the dctnry file handle.
    if (fStore)
    {
        rc = closeDctnryStore(false);
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "finishParsing: close dictionary file error with column " <<
                column.colName << "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR);
            return rc;
        }
    }

    // We don't need the mutex to protect against concurrent access by other
    // threads, since by the time we get to this point, this is the last
    // thread working on this column.  But, we use the mutex to insure that
    // we see the latest state that may have been set by another parsing thread
    // working with the same column.
    boost::mutex::scoped_lock lock(fColMutex);

    // Force the flushing of remaining data in the output buffer
    if (fColBufferMgr)
    {
        rc = fColBufferMgr->flush( );
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "finishParsing: flush error with column " << column.colName<<
                "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR);
            return rc;
        }
    }

    // Close the column file
    rc = closeColumnFile(false,false);
    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "finishParsing: close column file error with column " <<
            column.colName << "; " << ec.errorString(rc);
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR);
        return rc;
    }

    clearMemory();

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Store updated column information in BRMReporter for this column at EOJ;
// so that Extent Map CP information and HWM's can be updated.
// Bug2117-Src code from this function was factored over from we_tableinfo.cpp.
//
// We use mutex because this function is called by "one" of the parsing threads
// when parsing is complete for all the columns from this column's table.
// We use the mutex to insure that this parsing thread, which ends up being
// responsible for updating BRM for this column, is getting the most up to
// date values in fSegFileUpdateList, fSizeWritten, etc which may have been
// set by another parsing thread.
//------------------------------------------------------------------------------
void ColumnInfo::getBRMUpdateInfo( BRMReporter& brmReporter )
{
    boost::mutex::scoped_lock lock(fColMutex);
    // Useful for debugging
    //printCPInfo(column);

    int entriesAdded = getHWMInfoForBRM( brmReporter );

    // If we added any rows (HWM update count > 0), then update corresponding CP
    if (entriesAdded > 0)
        getCPInfoForBRM( brmReporter );
}

//------------------------------------------------------------------------------
// Get updated Casual Partition (CP) information for BRM for this column at EOJ.
//------------------------------------------------------------------------------
void ColumnInfo::getCPInfoForBRM( BRMReporter& brmReporter )
{
    fColExtInf->getCPInfoForBRM(column, brmReporter);
}

//------------------------------------------------------------------------------
// Get updated HWM information for BRM for this column at EOJ.
// Returns count of the number of HWM entries added to the BRMReporter.
//------------------------------------------------------------------------------
int ColumnInfo::getHWMInfoForBRM( BRMReporter& brmReporter )
{
    //..If we wrote out any data to the last segment file, then
    //  update HWM for the current (last) segment file we were writing to.

    //Bug1374 - Update HWM when data added to file
    if ( fSizeWritten > fSizeWrittenStart )
    {
        //Bug1372.
        HWM hwm = (fSizeWritten - 1)/BYTE_PER_BLOCK;

        addToSegFileList( curCol.dataFile, hwm );
    }

    int entriesAdded = 0;

    //..Update HWM for each segment file we touched, including the last one
    for (unsigned int iseg=0;
         iseg<fSegFileUpdateList.size(); iseg++)
    {
        // Log for now; may control with debug flag later
        //if (fLog->isDebug( DEBUG_1 ))
        {
            std::ostringstream oss;
            oss << "Saving HWM update for OID-"                       <<
                 fSegFileUpdateList[iseg].fid                         <<
                "; hwm-"       << fSegFileUpdateList[iseg].hwm        <<
                "; DBRoot-"    << fSegFileUpdateList[iseg].fDbRoot    <<
                "; partition-" << fSegFileUpdateList[iseg].fPartition <<
                "; segment-"   << fSegFileUpdateList[iseg].fSegment;

            fLog->logMsg( oss.str(), MSGLVL_INFO2 );
        }

        BRM::BulkSetHWMArg hwmArg;
        hwmArg.oid     = fSegFileUpdateList[iseg].fid;
        hwmArg.partNum = fSegFileUpdateList[iseg].fPartition;
        hwmArg.segNum  = fSegFileUpdateList[iseg].fSegment;
        hwmArg.hwm     = fSegFileUpdateList[iseg].hwm;
        brmReporter.addToHWMInfo( hwmArg );

        // Save list of modified db column files
        BRM::FileInfo aFile;
        aFile.oid          = fSegFileUpdateList[iseg].fid;
        aFile.partitionNum = fSegFileUpdateList[iseg].fPartition;
        aFile.segmentNum   = fSegFileUpdateList[iseg].fSegment;
        aFile.dbRoot       = fSegFileUpdateList[iseg].fDbRoot;
        aFile.compType     = curCol.compressionType;
        brmReporter.addToFileInfo( aFile );

        // Save list of corresponding modified db dictionary store files
        if (column.colType == COL_TYPE_DICT)
        {
            BRM::FileInfo dFile;
            dFile.oid          = column.dctnry.dctnryOid;
            dFile.partitionNum = fSegFileUpdateList[iseg].fPartition;
            dFile.segmentNum   = fSegFileUpdateList[iseg].fSegment;
            dFile.dbRoot       = fSegFileUpdateList[iseg].fDbRoot;
            dFile.compType     = curCol.compressionType;
            brmReporter.addToDctnryFileInfo( dFile );
        }

        entriesAdded++;
    }
    fSegFileUpdateList.clear(); // don't need vector anymore, so release memory

    return entriesAdded;
}

//------------------------------------------------------------------------------
// Setup initial extent we will begin loading at start of import.
// DBRoot, partition, segment, etc for the starting extent are specified.
// If block skipping is causing us to advance to the next extent, then we
// set things up to point to the last block in the current extent.  When we
// start adding rows, we will automatically advance to the next extent.
//------------------------------------------------------------------------------
int ColumnInfo::setupInitialColumnExtent(
    uint16_t   dbRoot,               // dbroot of starting extent
    uint32_t   partition,            // partition number of starting extent
    uint16_t   segment,              // segment number of starting extent
    const std::string& tblName,       // name of table containing this column
    BRM::LBID_t lbid,                 // starting LBID for starting extent
    HWM         oldHwm,               // original HWM
    HWM         hwm,                  // new projected HWM after block skipping
    bool        bSkippedToNewExtent,  // blk skipping to next extent
    bool        bIsNewExtent )        // treat as new extent (for CP updates)
{
    // Init the ColumnInfo object
    colOp->initColumn( curCol );
    colOp->setColParam( curCol, id,
        column.width,
        column.dataType,
        column.weType,
        column.mapOid,
        column.compressionType,
        dbRoot, partition, segment );

    // Open the column file
    if(!colOp->exists(column.mapOid, dbRoot, partition, segment) )
    {
        std::ostringstream oss;
        oss << "Column file does not exist for OID-" << column.mapOid <<
           "; DBRoot-"    << dbRoot    <<
           "; partition-" << partition <<
           "; segment-"   << segment;
        fLog->logMsg( oss.str(), ERR_FILE_NOT_EXIST, MSGLVL_ERROR );
        return ERR_FILE_NOT_EXIST;
    }

    std::string segFile;
    bool useTmpSuffix = false;
    if (!bIsNewExtent)
        useTmpSuffix = true;

    // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
    int rc = colOp->openColumnFile( curCol, segFile, useTmpSuffix );
    if(rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "Error opening column file for OID-" << column.mapOid <<
            "; DBRoot-"    << dbRoot    <<
            "; partition-" << partition <<
            "; segment-"   << segment   <<
            "; filename-"  << segFile   <<
            "; " << ec.errorString(rc);
        fLog->logMsg( oss.str(), ERR_FILE_OPEN, MSGLVL_ERROR );
        return ERR_FILE_OPEN;
    }

    std::ostringstream oss1;
    oss1 << "Initializing import: "    <<
        "Table-"      << tblName       <<
        "; Col-"      << column.colName;
    if (curCol.compressionType)
        oss1          << " (compressed)";
    oss1 <<  "; OID-" << column.mapOid <<
        "; hwm-"      << hwm;
    if (bSkippedToNewExtent)
        oss1          << " (full; load into next extent)";
    oss1 << "; file-" << curCol.dataFile.fSegFileName;
    fLog->logMsg( oss1.str(), MSGLVL_INFO2 );

    if(column.colType == COL_TYPE_DICT)
    {
        RETURN_ON_ERROR( openDctnryStore( true ) );
    }

    fSavedLbid = lbid;

    if (bSkippedToNewExtent)
        oldHwm = hwm;
    rc = setupInitialColumnFile(oldHwm, hwm);
    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "Error reading/positioning column file for OID-" <<
            column.mapOid <<
            "; DBRoot-"    << dbRoot    <<
            "; partition-" << partition <<
            "; segment-"   << segment   <<
            "; filename-"  << segFile   <<
            "; " << ec.errorString(rc);
        fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
        return rc;
    }

    // Reworked initial block skipping for compression:
    // Block skipping is causing us to wrap up this extent.  We consider
    // the current extent to be full, so we "pretend" to fill out the
    // last block by adding 8192 bytes to the bytes written count.
    // This will help trigger the addition of a new extent when we
    // try to store the first section of rows to the db.
    if (bSkippedToNewExtent)
    {
        updateBytesWrittenCounts( BYTE_PER_BLOCK );
        fSizeWrittenStart = fSizeWritten;
    }

    // Reworked initial block skipping for compression:
    // This initializes CP stats for first extent regardless of whether
    // we end up adding rows to this extent, or initial block skipping
    // ultimately causes us to start with a new extent.
    lastInputRowInExtentInit( bIsNewExtent );

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Prepare the initial column segment file for import.
//------------------------------------------------------------------------------
int ColumnInfo::setupInitialColumnFile( HWM oldHwm, HWM hwm )
{
    // Initialize the output buffer manager for the column.
    if (column.colType == COL_TYPE_DICT)
    {
        fColBufferMgr = new ColumnBufferManagerDctnry(this, 8, fLog, 0);
    }
    else
    {
        fColBufferMgr = new ColumnBufferManager(this, column.width, fLog, 0);
    }
    RETURN_ON_ERROR( fColBufferMgr->setDbFile(curCol.dataFile.pFile,hwm,0) );

    RETURN_ON_ERROR( colOp->getFileSize(curCol.dataFile.pFile, fileSize) );

    // See if dealing with abbreviated extent that will need expanding.
    // This only applies to the first extent of the first segment file.
    setAbbrevExtentCheck();

    // If we are dealing with initial extent, see if block skipping has
    // exceeded disk allocation, in which case we expand to a full extent.
    if (isAbbrevExtent())
    {
        unsigned int numBlksForFirstExtent =
          (INITIAL_EXTENT_ROWS_TO_DISK*column.width) / BYTE_PER_BLOCK;
        if ( ((oldHwm+1) <= numBlksForFirstExtent) &&
             ((hwm+1   ) >  numBlksForFirstExtent) )
        {
            RETURN_ON_ERROR( expandAbbrevExtent(false) );
        }
    }

    // Seek till the HWM lbid.
    // Store the current allocated file size in availFileSize.
    long long byteOffset = (long long)hwm * (long long)BYTE_PER_BLOCK;
    RETURN_ON_ERROR( colOp->setFileOffset(curCol.dataFile.pFile, byteOffset) );

    fSizeWritten      = byteOffset;
    fSizeWrittenStart = fSizeWritten;
    availFileSize     = fileSize - fSizeWritten;

    if (fLog->isDebug( DEBUG_1 ))
    {
        std::ostringstream oss;
        oss << "Init raw data offsets in column file OID-" <<
                curCol.dataFile.fid <<
            "; DBRoot-" << curCol.dataFile.fDbRoot    <<
            "; part-"   << curCol.dataFile.fPartition <<
            "; seg-"    << curCol.dataFile.fSegment   <<
            "; begByte-"<< fSizeWritten <<
            "; endByte-"<< fileSize     <<
            "; freeBytes-" << availFileSize;
        fLog->logMsg( oss.str(), MSGLVL_INFO2 );
    }

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Update the number of bytes in the file, and the free space still remaining.
//------------------------------------------------------------------------------
void ColumnInfo::updateBytesWrittenCounts( unsigned int numBytesWritten )
{
    availFileSize = availFileSize - numBytesWritten;
    fSizeWritten  = fSizeWritten   + numBytesWritten;
}

//------------------------------------------------------------------------------
// Tell whether the current column segment file being managed by ColumnInfo,
// has filled up all its extents with data.
//------------------------------------------------------------------------------
bool ColumnInfo::isFileComplete() const
{
    if ((fSizeWritten / column.width) >= fMaxNumRowsPerSegFile)
        return true;

    return false;
}

//------------------------------------------------------------------------------
// Initialize last used auto-increment value from the current "next"
// auto-increment value taken from the system catalog (or BRM).
//------------------------------------------------------------------------------
int ColumnInfo::initAutoInc( const std::string& fullTableName )
{
    int rc = fAutoIncMgr->init( fullTableName, this );

    return rc;
}

//------------------------------------------------------------------------------
// Reserves the requested number of auto-increment numbers (autoIncCount).
// The starting value of the reserved block of numbers is returned in nextValue.
//------------------------------------------------------------------------------
int ColumnInfo::reserveAutoIncNums(uint32_t autoIncCount, uint64_t& nextValue )
{
    int rc = fAutoIncMgr->reserveNextRange( autoIncCount, nextValue );

    return rc;
}

//------------------------------------------------------------------------------
// Finished using auto-increment.  Current value can be committed back to the
// system catalog (or BRM).
//------------------------------------------------------------------------------
int ColumnInfo::finishAutoInc( )
{
    int rc = fAutoIncMgr->finish( );

    return rc;
}

//------------------------------------------------------------------------------
// Get current dbroot, partition, segment, and HWM for this column.
//
// We use mutex because this function is called by "one" of the parsing threads
// when parsing is complete for all the columns from this column's table.
// We use the mutex to insure that this parsing thread, which ends up being
// responsible for wrapping up this column, is getting the most up to
// date values for dbroot, partition, segment, and HWM which may have been
// set by another parsing thread.
//------------------------------------------------------------------------------
void ColumnInfo::getSegFileInfo( DBRootExtentInfo& fileInfo )
{
    boost::mutex::scoped_lock lock(fColMutex);
    fileInfo.fDbRoot    = curCol.dataFile.fDbRoot;
    fileInfo.fPartition = curCol.dataFile.fPartition;
    fileInfo.fSegment   = curCol.dataFile.fSegment;
    if (fSizeWritten > 0)
        fileInfo.fLocalHwm = (fSizeWritten - 1)/BYTE_PER_BLOCK;
    else
        fileInfo.fLocalHwm = 0;
}

//------------------------------------------------------------------------------
// Open a new or existing Dictionary store file based on the DBRoot,
// partition, and segment settings in curCol.dataFile.
//------------------------------------------------------------------------------
int ColumnInfo::openDctnryStore( bool bMustExist )
{
    int rc = NO_ERROR;

    if ( column.dctnry.fCompressionType != 0)
    {
        DctnryCompress1* dctnryCompress1 = new DctnryCompress1;
        dctnryCompress1->setMaxActiveChunkNum(1);
        dctnryCompress1->setBulkFlag(true);
        fStore = dctnryCompress1;
    }
    else
    {
        fStore = new DctnryCompress0;
    }

    fStore->setLogger(fLog);
    fStore->setColWidth( column.dctnryWidth );
    if (column.fWithDefault)
        fStore->setDefault( column.fDefaultChr );
    fStore->setImportDataMode( fpTableInfo->getImportDataMode() );

    // If we are in the process of adding an extent to this column,
    // and the extent we are adding is the first extent for the
    // relevant column segment file, then the corresponding dictionary
    // store file will not exist, in which case we must create
    // the store file, else we open the applicable store file.
    if ( (bMustExist) ||
       (colOp->exists(column.dctnry.dctnryOid,
        curCol.dataFile.fDbRoot,
        curCol.dataFile.fPartition,
        curCol.dataFile.fSegment)) )
    {
        // Save HWM chunk (for compressed files) if this seg file calls for it
        // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
        bool useTmpSuffixDctnry = false;
        RETURN_ON_ERROR( saveDctnryStoreHWMChunk( useTmpSuffixDctnry ) );

        // @bug 5572 - HDFS usage: incorporate *.tmp file backup flag
        rc = fStore->openDctnry(
            column.dctnry.dctnryOid,
            curCol.dataFile.fDbRoot,
            curCol.dataFile.fPartition,
            curCol.dataFile.fSegment,
            useTmpSuffixDctnry );
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "openDctnryStore: error opening existing store file for " <<
                   "OID-"      << column.dctnry.dctnryOid    <<
                   "; DBRoot-" << curCol.dataFile.fDbRoot    <<
                   "; part-"   << curCol.dataFile.fPartition <<
                   "; seg-"    << curCol.dataFile.fSegment   <<
                   "; tmpFlag-"<< useTmpSuffixDctnry         <<
                   "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

            // Ignore return code from closing file; already in error state
            closeDctnryStore(true); // clean up loose ends
            return rc;
        }

        if (INVALID_LBID != fStore->getCurLbid())
            fDictBlocks.push_back(fStore->getCurLbid());

        std::ostringstream oss;
        oss << "Opening existing store file for " << column.colName <<
               "; OID-"    << column.dctnry.dctnryOid    <<
               "; DBRoot-" << curCol.dataFile.fDbRoot    <<
               "; part-"   << curCol.dataFile.fPartition <<
               "; seg-"    << curCol.dataFile.fSegment   <<
               "; hwm-"    << fStore->getHWM()           <<
               "; file-"   << fStore->getFileName();
        fLog->logMsg( oss.str(), MSGLVL_INFO2 );
    }
    else
    {
        BRM::LBID_t startLbid;
        rc = fStore->createDctnry(
            column.dctnry.dctnryOid,
            column.dctnryWidth,      //@bug 3313 - pass string col width
            curCol.dataFile.fDbRoot,
            curCol.dataFile.fPartition,
            curCol.dataFile.fSegment,
            startLbid);
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "openDctnryStore: error creating new store file for " <<
                   "OID-"      << column.dctnry.dctnryOid    <<
                   "; DBRoot-" << curCol.dataFile.fDbRoot    <<
                   "; part-"   << curCol.dataFile.fPartition <<
                   "; seg-"    << curCol.dataFile.fSegment   <<
                   "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
            fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());

            // Ignore return code from closing file; already in error state
            closeDctnryStore(true); // clean up loose ends
            return rc;
        }

        rc = fStore->openDctnry(
            column.dctnry.dctnryOid,
            curCol.dataFile.fDbRoot,
            curCol.dataFile.fPartition,
            curCol.dataFile.fSegment,
            false );
        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "openDctnryStore: error opening new store file for " <<
                   "OID-"      << column.dctnry.dctnryOid    <<
                   "; DBRoot-" << curCol.dataFile.fDbRoot    <<
                   "; part-"   << curCol.dataFile.fPartition <<
                   "; seg-"    << curCol.dataFile.fSegment   <<
                   "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );

            // Ignore return code from closing file; already in error state
            closeDctnryStore(true); // clean up loose ends
            return rc;
        }

        std::ostringstream oss;
        oss << "Opening new store file for " << column.colName <<
               "; OID-"      << column.dctnry.dctnryOid    <<
               "; DBRoot-"   << curCol.dataFile.fDbRoot    <<
               "; part-"     << curCol.dataFile.fPartition <<
               "; seg-"      << curCol.dataFile.fSegment   <<
               "; file-"     << fStore->getFileName();
        fLog->logMsg( oss.str(), MSGLVL_INFO2 );
    }

    return rc;
}

//------------------------------------------------------------------------------
// Close the current Dictionary store file.
//------------------------------------------------------------------------------
int ColumnInfo::closeDctnryStore(bool bAbort)
{
    int rc = NO_ERROR;

    if (fStore)
    {
        if (bAbort)
            rc = fStore->closeDctnryOnly();
        else
            rc = fStore->closeDctnry();

        if (rc != NO_ERROR)
        {
            WErrorCodes ec;
            std::ostringstream oss;
            oss << "closeDctnryStore: error closing store file for " <<
                   "OID-"    << column.dctnry.dctnryOid <<
                   "; file-" << fStore->getFileName()   <<
                   "; " << ec.errorString(rc);
            fLog->logMsg( oss.str(), rc, MSGLVL_ERROR );
        }

        delete fStore;
        fStore = 0;
    }

    return rc;
}

//------------------------------------------------------------------------------
// Update dictionary store file with specified strings, and return the assigned
// tokens (tokenbuf) to be stored in the corresponding column token file.
//------------------------------------------------------------------------------
int ColumnInfo::updateDctnryStore(char* buf,
    ColPosPair ** pos,
    const int totalRow,
    char* tokenBuf)
{
    long long truncCount = 0;    // No. of rows with truncated values

    // If this is a VARBINARY column; convert the ascii hex string into binary
    //  data and fix the length (it's now only half as long).
    // Should be safe to modify pos and buf arrays outside a mutex, as no other
    // thread should be accessing the strings from the same buffer, for this
    // column.
    // This only applies to default text mode.  This step is bypassed for
    // binary imports, because in that case, the data is already true binary.
    if (((curCol.colType == WR_VARBINARY) || (curCol.colType == WR_BLOB)) &&
        (fpTableInfo->getImportDataMode() == IMPORT_DATA_TEXT))
    {
#ifdef PROFILE
        Stats::startParseEvent(WE_STATS_COMPACT_VARBINARY);
#endif
        for (int i = 0; i < totalRow; i++)
        {
            pos[i][id].offset =
                compactVarBinary(buf + pos[i][id].start, pos[i][id].offset);
        }
#ifdef PROFILE
        Stats::startParseEvent(WE_STATS_COMPACT_VARBINARY);
#endif
    }

#ifdef PROFILE
    Stats::startParseEvent(WE_STATS_WAIT_TO_PARSE_DCT);
#endif
    boost::mutex::scoped_lock lock(fDictionaryMutex);
#ifdef PROFILE
    Stats::stopParseEvent(WE_STATS_WAIT_TO_PARSE_DCT);
#endif

    int rc = fStore->insertDctnry( buf, pos, totalRow, id, tokenBuf, truncCount );
    if (rc != NO_ERROR)
    {
        WErrorCodes ec;
        std::ostringstream oss;
        oss << "updateDctnryStore: error adding rows to store file for " <<
               "OID-"      << column.dctnry.dctnryOid    <<
               "; DBRoot-" << curCol.dataFile.fDbRoot    <<
               "; part-"   << curCol.dataFile.fPartition <<
               "; seg-"    << curCol.dataFile.fSegment   <<
               "; " << ec.errorString(rc);
        fLog->logMsg( oss.str(), rc, MSGLVL_CRITICAL );
        fpTableInfo->fBRMReporter.addToErrMsgEntry(oss.str());
        return rc;
    }
    incSaturatedCnt( truncCount );

    return NO_ERROR;
}

//------------------------------------------------------------------------------
// No action necessary for uncompressed dictionary files
//------------------------------------------------------------------------------
// @bug 5572 - HDFS usage: add flag used to control *.tmp file usage
int ColumnInfo::saveDctnryStoreHWMChunk(bool& needBackup)
{
    needBackup = false;
    return NO_ERROR;
}

//------------------------------------------------------------------------------
// Truncate specified dictionary store file for this column.
// Only applies to compressed columns.
//------------------------------------------------------------------------------
int ColumnInfo::truncateDctnryStore(
    OID /*dctnryOid*/, uint16_t /*root*/, uint32_t /*pNum*/, uint16_t /*sNum*/)
    const
{
    return NO_ERROR;
}

//------------------------------------------------------------------------------
// utility to convert a Status enumeration to a string
//------------------------------------------------------------------------------
/* static */
void ColumnInfo::convertStatusToString(
    WriteEngine::Status status,
    std::string& statusString )
{
    static std::string statusStringParseComplete("PARSE_COMPLETE");
    static std::string statusStringReadComplete ("READ_COMPLETE");
    static std::string statusStringReadProgress ("READ_PROGRESS");
    static std::string statusStringNew          ("NEW");
    static std::string statusStringErr          ("ERR");
    static std::string statusStringUnknown      ("OTHER");

    switch (status)
    {
        case PARSE_COMPLETE:
            statusString = statusStringParseComplete;
            break;
        case READ_COMPLETE:
            statusString = statusStringReadComplete;
            break;
        case READ_PROGRESS:
            statusString = statusStringReadProgress;
            break;
        case NEW:
            statusString = statusStringNew;
            break;
        case ERR:
            statusString = statusStringErr;
            break;
        default:
            statusString = statusStringUnknown;
            break;
    }
}

}