1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-25 21:17:42 +03:00
Files
mariadb-columnstore-engine/writeengine/bulk/we_bulkload.h
2016-01-06 14:08:59 -06:00

419 lines
16 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/*******************************************************************************
* $Id: we_bulkload.h 4631 2013-05-02 15:21:09Z dcathey $
*
*******************************************************************************/
/** @file */
#ifndef _WE_BULKLOAD_H_
#define _WE_BULKLOAD_H_
#ifndef _MSC_VER
#include <pthread.h>
#endif
#include <fstream>
#include <string>
#include <vector>
#include <sys/time.h>
#include <we_log.h>
#include <we_colop.h>
#include <we_xmljob.h>
#include <we_convertor.h>
#include <writeengine.h>
#include <we_brm.h>
#include "we_tableinfo.h"
#include "brmtypes.h"
#include "boost/ptr_container/ptr_vector.hpp"
#include <boost/thread/thread.hpp>
#include <boost/thread/mutex.hpp>
#include <boost/bind.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/uuid/uuid.hpp>
#if 0 //defined(_MSC_VER) && defined(WE_BULKLOAD_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
/** Namespace WriteEngine */
namespace WriteEngine
{
/** Class BulkLoad */
class BulkLoad : public FileOp
{
public:
/**
* @brief BulkLoad onstructor
*/
EXPORT BulkLoad();
/**
* @brief BulkLoad destructor
*/
EXPORT ~BulkLoad();
/**
* @brief Load job information
*/
EXPORT int loadJobInfo( const std::string& fullFileName,
bool bUseTempJobFile,
const std::string& systemLang,
int argc,
char** argv,
bool bLogInfo2ToConsole,
bool bValidateColumnList );
/**
* @brief Pre process jobs to validate and assign values to the job structure
*/
int preProcess(Job& job, int tableNo, TableInfo* tableInfo);
/**
* @brief Print job information
*/
void printJob( );
/**
* @brief Process job
*/
EXPORT int processJob( );
/**
* @brief Set Debug level for this BulkLoad object and any data members
*/
void setAllDebug( DebugLevel level );
/**
* @brief Update next autoincrement value for specified OID.
* @param columnOID oid of autoincrement column to be updated
* @param nextValue next autoincrement value to assign to tableOID
*/
static int updateNextValue(OID columnOID, uint64_t nextValue);
// Accessors and mutators
void addToCmdLineImportFileList(const std::string& importFile);
const std::string& getAlternateImportDir( ) const;
const std::string& getErrorDir ( ) const;
const std::string& getJobDir ( ) const;
const std::string& getSchema ( ) const;
const std::string& getTempJobDir ( ) const;
bool getTruncationAsError ( ) const;
BulkModeType getBulkLoadMode ( ) const;
bool getContinue ( ) const;
boost::uuids::uuid getJobUUID ( ) const { return fUUID; }
EXPORT int setAlternateImportDir( const std::string& loadDir,
std::string& errMsg);
void setImportDataMode ( ImportDataMode importMode );
void setColDelimiter ( char delim );
void setBulkLoadMode ( BulkModeType bulkMode,
const std::string& rptFileName );
void setEnclosedByChar ( char enChar);
void setEscapeChar ( char esChar);
void setKeepRbMetaFiles ( bool keepMeta );
void setMaxErrorCount ( unsigned int maxErrors );
void setNoOfParseThreads ( int parseThreads );
void setNoOfReadThreads ( int readThreads );
void setNullStringMode ( bool bMode );
void setParseErrorOnTable (int tableId, bool lockParseMutex);
void setParserNum ( int parser );
void setProcessName ( const std::string& processName );
void setReadBufferCount ( int noOfReadBuffers );
void setReadBufferSize ( int readBufferSize );
void setTxnID ( BRM::TxnID txnID );
void setVbufReadSize ( int vbufReadSize );
void setTruncationAsError ( bool bTruncationAsError );
void setJobUUID ( const std::string& jobUUID );
void setErrorDir ( const std::string& errorDir );
// Timer functions
void startTimer ( );
void stopTimer ( );
double getTotalRunTime ( ) const;
void disableTimeOut ( const bool disableTimeOut);
bool disableTimeOut ( ) const;
static void disableConsoleOutput ( const bool noConsoleOutput)
{ fNoConsoleOutput = noConsoleOutput; }
static bool disableConsoleOutput ( )
{ return fNoConsoleOutput; }
// Add error message into appropriate BRM updater
static bool addErrorMsg2BrmUpdater(const std::string& tablename, const std::ostringstream& oss);
void setDefaultJobUUID ( );
private:
//--------------------------------------------------------------------------
// Private Data Members
//--------------------------------------------------------------------------
XMLJob fJobInfo; // current job information
boost::scoped_ptr<ColumnOp> fColOp; // column operation
std::string fRootDir; // job process root directory
std::string fJobFileName; // job description file name
Log fLog; // logger
int fNumOfParser; // total number of parser
char fColDelim; // delimits col values within a row
int fNoOfBuffers; // Number of read buffers
int fBufferSize; // Read buffer size
int fFileVbufSize; // Internal file system buffer size
long long fMaxErrors; // Max allowable errors per job
std::string fAlternateImportDir; // Alternate bulk import directory
std::string fErrorDir; // Opt. where error records record
std::string fProcessName; // Application process name
static boost::ptr_vector<TableInfo> fTableInfo;// Vector of Table information
int fNoOfParseThreads; // Number of parse threads
int fNoOfReadThreads; // Number of read threads
boost::thread_group fReadThreads; // Read thread group
boost::thread_group fParseThreads; // Parse thread group
boost::mutex fReadMutex; // Manages table selection by each
// read thread
boost::mutex fParseMutex; // Manages table/buffer/column
// selection by each parsing thread
BRM::TxnID fTxnID; // TransID acquired from SessionMgr
bool fKeepRbMetaFiles; // Keep/delete bulkRB metadata files
bool fNullStringMode; // Treat "NULL" as NULL value
char fEnclosedByChar; // Char used to enclose column value
char fEscapeChar; // Escape char within enclosed value
timeval fStartTime; // job start time
timeval fEndTime; // job end time
double fTotalTime; // elapsed time for current phase
std::vector<std::string> fCmdLineImportFiles; // Import Files from cmd line
BulkModeType fBulkMode; // Distributed bulk mode (1,2, or 3)
std::string fBRMRptFileName; // Name of distributed mode rpt file
bool fbTruncationAsError; // Treat string truncation as error
ImportDataMode fImportDataMode; // Importing text or binary data
bool fbContinue; // true when read and parse r running
//
static boost::mutex* fDDLMutex; // Insure only 1 DDL op at a time
EXPORT static const std::string DIR_BULK_JOB; // Bulk job directory
EXPORT static const std::string DIR_BULK_TEMP_JOB;// Dir for tmp job files
static const std::string DIR_BULK_IMPORT; // Bulk job import dir
static const std::string DIR_BULK_LOG; // Bulk job log directory
bool fDisableTimeOut; // disable timeout when waiting for table lock
boost::uuids::uuid fUUID; // job UUID
static bool fNoConsoleOutput; // disable output to console
//--------------------------------------------------------------------------
// Private Functions
//--------------------------------------------------------------------------
// Spawn the worker threads.
void spawnWorkers();
// Checks if all tables have the status set
bool allTablesDone(Status status);
// Lock the table for read. Called by the read thread.
int lockTableForRead(int id);
// Get column for parsing. Called by the parse thread.
// @bug 2099 - Temporary hack to diagnose deadlock. Added report parm below.
bool lockColumnForParse(int id, // thread id
int &tableId, // selected table id
int &columnId, // selected column id
int &myParseBuffer, // selected parse buffer
bool report);
// Map specified DBRoot to it's first segment file number
int mapDBRootToFirstSegment(OID columnOid,
uint16_t dbRoot,
uint16_t& segment);
// The thread method for the read thread.
void read(int id);
// The thread method for the parse thread.
void parse(int id);
// Sleep method
void sleepMS(long int ms);
// Initialize auto-increment column for specified schema and table.
int preProcessAutoInc(
const std::string& fullTableName,// schema.table
ColumnInfo* colInfo); // ColumnInfo associated with AI column
// Determine starting HWM and LBID after block skipping added to HWM
int preProcessHwmLbid( const ColumnInfo* info,
int minWidth,
uint32_t partition,
uint16_t segment,
HWM& hwm,
BRM::LBID_t& lbid,
bool& bSkippedToNewExtent);
// Rollback any tables that are left in a locked state at EOJ.
int rollbackLockedTables( );
// Rollback a table left in a locked state.
int rollbackLockedTable( TableInfo& tableInfo );
// Save metadata info required for shared-nothing bulk rollback.
int saveBulkRollbackMetaData( Job& job, // current job
TableInfo* tableInfo, // TableInfo for table of interest
const std::vector<DBRootExtentInfo>& segFileInfo, //vector seg file info
const std::vector<BRM::EmDbRootHWMInfo_v>& dbRootHWMInfoPM);
// Manage/validate the list of 1 or more import data files
int manageImportDataFileList(Job& job, // current job
int tableNo, // table number of current job
TableInfo* tableInfo); // TableInfo for table of interest
// Break up list of file names into a vector of filename strings
int buildImportDataFileList(
const std::string& location,
const std::string& filename,
std::vector<std::string>& importFileNames);
};
//------------------------------------------------------------------------------
// Inline functions
//------------------------------------------------------------------------------
inline void BulkLoad::addToCmdLineImportFileList(const std::string& importFile){
fCmdLineImportFiles.push_back( importFile ); }
inline const std::string& BulkLoad::getAlternateImportDir( ) const {
return fAlternateImportDir; }
inline const std::string& BulkLoad::getErrorDir( ) const {
return fErrorDir; }
inline const std::string& BulkLoad::getJobDir( ) const {
return DIR_BULK_JOB; }
inline const std::string& BulkLoad::getSchema( ) const {
return fJobInfo.getJob().schema; }
inline const std::string& BulkLoad::getTempJobDir( ) const {
return DIR_BULK_TEMP_JOB; }
inline bool BulkLoad::getTruncationAsError ( ) const {
return fbTruncationAsError; }
inline BulkModeType BulkLoad::getBulkLoadMode ( ) const {
return fBulkMode; }
inline bool BulkLoad::getContinue ( ) const {
return fbContinue; }
inline void BulkLoad::printJob() {
if (isDebug(DEBUG_1))
fJobInfo.printJobInfo(fLog);
else
fJobInfo.printJobInfoBrief(fLog); }
inline void BulkLoad::setAllDebug( DebugLevel level ) {
setDebugLevel( level );
fLog.setDebugLevel( level ); }
inline void BulkLoad::setColDelimiter( char delim ) {
fColDelim = delim; }
inline void BulkLoad::setBulkLoadMode(
BulkModeType bulkMode,
const std::string& rptFileName ) {
fBulkMode = bulkMode;
fBRMRptFileName = rptFileName; }
inline void BulkLoad::setEnclosedByChar( char enChar ) {
fEnclosedByChar = enChar; }
inline void BulkLoad::setEscapeChar( char esChar ) {
fEscapeChar = esChar; }
inline void BulkLoad::setImportDataMode(ImportDataMode importMode) {
fImportDataMode = importMode; }
inline void BulkLoad::setKeepRbMetaFiles( bool keepMeta ) {
fKeepRbMetaFiles = keepMeta; }
// Mutator takes an unsigned int, but we store in a long long, because...
// TableInfo which eventually needs this attribute, takes an unsigned int,
// but we want to be able to init to -1, to indicate when it has not been set.
inline void BulkLoad::setMaxErrorCount( unsigned int maxErrors ) {
fMaxErrors = maxErrors; }
inline void BulkLoad::setNoOfParseThreads(int parseThreads ) {
fNoOfParseThreads = parseThreads; }
inline void BulkLoad::setNoOfReadThreads( int readThreads ) {
fNoOfReadThreads = readThreads; }
inline void BulkLoad::setNullStringMode( bool bMode ) {
fNullStringMode = bMode; }
inline void BulkLoad::setParserNum( int parser ) {
fNumOfParser = parser; }
inline void BulkLoad::setProcessName( const std::string& processName ) {
fProcessName = processName; }
inline void BulkLoad::setReadBufferCount( int noOfReadBuffers ) {
fNoOfBuffers = noOfReadBuffers; }
inline void BulkLoad::setReadBufferSize( int readBufferSize ) {
fBufferSize = readBufferSize; }
inline void BulkLoad::setTxnID( BRM::TxnID txnID ) {
fTxnID = txnID; }
inline void BulkLoad::setVbufReadSize( int vbufReadSize ) {
fFileVbufSize = vbufReadSize; }
inline void BulkLoad::setTruncationAsError(bool bTruncationAsError) {
fbTruncationAsError = bTruncationAsError; }
inline void BulkLoad::setErrorDir( const std::string& errorDir ) {
fErrorDir = errorDir; }
inline void BulkLoad::startTimer( ) {
gettimeofday( &fStartTime, 0 ); }
inline void BulkLoad::stopTimer() {
gettimeofday( &fEndTime, 0 );
fTotalTime = (fEndTime.tv_sec + (fEndTime.tv_usec / 1000000.0)) -
(fStartTime.tv_sec + (fStartTime.tv_usec / 1000000.0)); }
inline double BulkLoad::getTotalRunTime() const {
return fTotalTime; }
inline void BulkLoad::disableTimeOut( const bool disableTimeOut) {
fDisableTimeOut = disableTimeOut; }
inline bool BulkLoad::disableTimeOut() const {
return fDisableTimeOut; }
} // end of namespace
#undef EXPORT
#endif // _WE_BULKLOAD_H_