You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
feature(cpimport): MCOL-5164 ignore all errors (-e all
)
This commit is contained in:
committed by
Leonid Fedorov
parent
7dca1da8f2
commit
1ce46b5e0b
@ -530,14 +530,14 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr<TableInfo>& tabl
|
||||
if (pwd)
|
||||
tableInfo->setUIDGID(pwd->pw_uid, pwd->pw_gid);
|
||||
|
||||
if (fMaxErrors != -1)
|
||||
if (fMaxErrors != MAX_ERRORS_DEFAULT)
|
||||
tableInfo->setMaxErrorRows(fMaxErrors);
|
||||
else
|
||||
tableInfo->setMaxErrorRows(job.jobTableList[tableNo].maxErrNum);
|
||||
|
||||
// @bug 3929: cpimport.bin error messaging using up too much memory.
|
||||
// Validate that max allowed error count is within valid range
|
||||
long long maxErrNum = tableInfo->getMaxErrorRows();
|
||||
int maxErrNum = tableInfo->getMaxErrorRows();
|
||||
|
||||
if (maxErrNum > MAX_ALLOW_ERROR_COUNT)
|
||||
{
|
||||
|
@ -129,7 +129,7 @@ class BulkLoad : public FileOp
|
||||
void setEscapeChar(char esChar);
|
||||
void setSkipRows(size_t skipRows);
|
||||
void setKeepRbMetaFiles(bool keepMeta);
|
||||
void setMaxErrorCount(unsigned int maxErrors);
|
||||
void setMaxErrorCount(int maxErrors);
|
||||
void setNoOfParseThreads(int parseThreads);
|
||||
void setNoOfReadThreads(int readThreads);
|
||||
void setNullStringMode(bool bMode);
|
||||
@ -184,13 +184,13 @@ class BulkLoad : public FileOp
|
||||
|
||||
Log fLog; // logger
|
||||
|
||||
int fNumOfParser; // total number of parser
|
||||
int fNumOfParser{0}; // total number of parser
|
||||
char fColDelim{0}; // delimits col values within a row
|
||||
|
||||
int fNoOfBuffers{-1}; // Number of read buffers
|
||||
int fBufferSize{-1}; // Read buffer size
|
||||
int fFileVbufSize{-1}; // Internal file system buffer size
|
||||
long long fMaxErrors{-1}; // Max allowable errors per job
|
||||
long long fMaxErrors{MAX_ERRORS_DEFAULT}; // Max allowable errors per job
|
||||
std::string fAlternateImportDir; // Alternate bulk import directory
|
||||
std::string fErrorDir; // Opt. where error records record
|
||||
std::string fProcessName; // Application process name
|
||||
@ -429,10 +429,7 @@ inline void BulkLoad::setKeepRbMetaFiles(bool keepMeta)
|
||||
fKeepRbMetaFiles = keepMeta;
|
||||
}
|
||||
|
||||
// Mutator takes an unsigned int, but we store in a long long, because...
|
||||
// TableInfo which eventually needs this attribute, takes an unsigned int,
|
||||
// but we want to be able to init to -1, to indicate when it has not been set.
|
||||
inline void BulkLoad::setMaxErrorCount(unsigned int maxErrors)
|
||||
inline void BulkLoad::setMaxErrorCount(int maxErrors)
|
||||
{
|
||||
fMaxErrors = maxErrors;
|
||||
}
|
||||
|
@ -2049,7 +2049,7 @@ int BulkLoadBuffer::parseDictSection(ColumnInfo& columnInfo, int tokenPos, RID s
|
||||
int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length,
|
||||
size_t* parse_length, size_t& skipRows, RID& totalReadRows,
|
||||
RID& correctTotalRows, const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||
unsigned int allowedErrCntThisCall)
|
||||
int allowedErrCntThisCall)
|
||||
{
|
||||
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
|
||||
reset();
|
||||
@ -2153,7 +2153,7 @@ int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const ch
|
||||
int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows,
|
||||
RID& totalReadRows, RID& correctTotalRows,
|
||||
const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||
unsigned int allowedErrCntThisCall)
|
||||
int allowedErrCntThisCall)
|
||||
{
|
||||
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
|
||||
reset();
|
||||
@ -2277,7 +2277,7 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand
|
||||
// depending on whether the user has enabled the "enclosed by" feature.
|
||||
//------------------------------------------------------------------------------
|
||||
void BulkLoadBuffer::tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||
unsigned int allowedErrCntThisCall, size_t& skipRows)
|
||||
int allowedErrCntThisCall, size_t& skipRows)
|
||||
{
|
||||
unsigned offset = 0; // length of field
|
||||
unsigned curCol = 0; // dest db column counter within a row
|
||||
@ -2789,7 +2789,7 @@ void BulkLoadBuffer::tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||
// Quit if we exceed max allowable errors for this call.
|
||||
// We set lastRowHead = p, so that the code that follows this
|
||||
// loop won't try to save any data in fOverflowBuf.
|
||||
if (errorCount > allowedErrCntThisCall)
|
||||
if (allowedErrCntThisCall != MAX_ERRORS_ALL && errorCount > static_cast<unsigned>(allowedErrCntThisCall))
|
||||
{
|
||||
lastRowHead = p + 1;
|
||||
p++;
|
||||
@ -2928,7 +2928,7 @@ void BulkLoadBuffer::resizeTokenArray()
|
||||
// then tokenize() will stop reading data and exit.
|
||||
//------------------------------------------------------------------------------
|
||||
int BulkLoadBuffer::tokenizeBinary(const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||
unsigned int allowedErrCntThisCall, bool bEndOfData)
|
||||
int allowedErrCntThisCall, bool bEndOfData)
|
||||
{
|
||||
unsigned curCol = 0; // dest db column counter within a row
|
||||
unsigned curRowNum = 0; // "total" number of rows read during this call
|
||||
@ -3082,7 +3082,7 @@ int BulkLoadBuffer::tokenizeBinary(const boost::ptr_vector<ColumnInfo>& columnsI
|
||||
errorCount++;
|
||||
|
||||
// Quit if we exceed max allowable errors for this call
|
||||
if (errorCount > allowedErrCntThisCall)
|
||||
if (allowedErrCntThisCall != MAX_ERRORS_ALL && errorCount > static_cast<unsigned>(allowedErrCntThisCall))
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -215,12 +215,12 @@ class BulkLoadBuffer
|
||||
|
||||
/** @brief tokenize the buffer contents and fill up the token array.
|
||||
*/
|
||||
void tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall,
|
||||
void tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo, int allowedErrCntThisCall,
|
||||
size_t& skipRows);
|
||||
|
||||
/** @brief Binary tokenization of the buffer, and fill up the token array.
|
||||
*/
|
||||
int tokenizeBinary(const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall,
|
||||
int tokenizeBinary(const boost::ptr_vector<ColumnInfo>& columnsInfo, int allowedErrCntThisCall,
|
||||
bool bEndOfData);
|
||||
|
||||
/** @brief Determine if specified value is NULL or not.
|
||||
@ -275,13 +275,13 @@ class BulkLoadBuffer
|
||||
|
||||
int fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length,
|
||||
size_t* parse_length, size_t& skipRows, RID& totalReadRows, RID& correctTotalRows,
|
||||
const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall);
|
||||
const boost::ptr_vector<ColumnInfo>& columnsInfo, int allowedErrCntThisCall);
|
||||
|
||||
/** @brief Read the table data into the buffer
|
||||
*/
|
||||
int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows, RID& totalRows,
|
||||
RID& correctTotalRows, const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||
unsigned int allowedErrCntThisCall);
|
||||
int allowedErrCntThisCall);
|
||||
|
||||
/** @brief Get the overflow size
|
||||
*/
|
||||
|
@ -70,8 +70,7 @@ WECmdArgs::WECmdArgs(int argc, char** argv)
|
||||
DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX,
|
||||
"Application read buffer size (in bytes)")
|
||||
DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message")
|
||||
DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX,
|
||||
"Maximum number of allowable error per table per PM")
|
||||
("max-errors,e", po::value<string>(), "Maximum number (or 'all') of allowable error per table per PM")
|
||||
("file-path,f", po::value<string>(&fPmFilePath),
|
||||
"Data file directory path. Default is current working directory.\n"
|
||||
"\tIn Mode 1, represents the local input file path.\n"
|
||||
@ -304,6 +303,24 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
||||
fAllowMissingColumn = true;
|
||||
}
|
||||
}
|
||||
if (vm.contains("max-errors"))
|
||||
{
|
||||
auto optarg= vm["max-errors"].as<string>();
|
||||
if (optarg == "all")
|
||||
{
|
||||
fMaxErrors = MAX_ERRORS_ALL;
|
||||
}
|
||||
else
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg.c_str(), nullptr, 10);
|
||||
if (errno != 0 || lValue < 0 || lValue > INT_MAX)
|
||||
{
|
||||
startupError("Option --max-errors/-e is invalid or out of range");
|
||||
}
|
||||
fMaxErrors = lValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (fArgMode != -1)
|
||||
fMode = fArgMode; // BUG 4210
|
||||
@ -337,10 +354,7 @@ void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string
|
||||
|
||||
curJob.setReadBufferCount(fIOReadBufSize);
|
||||
curJob.setReadBufferSize(fReadBufSize);
|
||||
if (fMaxErrors >= 0)
|
||||
{
|
||||
curJob.setMaxErrorCount(fMaxErrors);
|
||||
}
|
||||
curJob.setMaxErrorCount(fMaxErrors);
|
||||
if (!fPmFilePath.empty())
|
||||
{
|
||||
importPath = fPmFilePath;
|
||||
|
@ -91,7 +91,7 @@ private:
|
||||
|
||||
int fNoOfReadThrds{1}; // No. of read buffers
|
||||
int fDebugLvl{0}; // Debug level
|
||||
int fMaxErrors{-1}; // Max allowable errors
|
||||
int fMaxErrors{MAX_ERRORS_DEFAULT}; // Max allowable errors
|
||||
int fReadBufSize{-1}; // Read buffer size
|
||||
int fIOReadBufSize{-1}; // I/O read buffer size
|
||||
int fSetBufSize{0}; // Buff size w/setvbuf
|
||||
|
@ -412,7 +412,11 @@ int TableInfo::readTableData()
|
||||
// We keep a running total of read errors; fMaxErrorRows specifies
|
||||
// the error limit. Here's where we see how many more errors we
|
||||
// still have below the limit, and we pass this to fillFromFile().
|
||||
unsigned allowedErrCntThisCall = ((fMaxErrorRows > fTotalErrRows) ? (fMaxErrorRows - fTotalErrRows) : 0);
|
||||
int allowedErrCntThisCall;
|
||||
if (fMaxErrorRows == MAX_ERRORS_ALL)
|
||||
allowedErrCntThisCall = MAX_ERRORS_ALL;
|
||||
else
|
||||
allowedErrCntThisCall = static_cast<unsigned>(fMaxErrorRows) > fTotalErrRows ? fMaxErrorRows - fTotalErrRows : 0;
|
||||
|
||||
// Fill in the specified buffer.
|
||||
// fTotalReadRowsPerInputFile is ongoing total number of rows read,
|
||||
@ -485,7 +489,7 @@ int TableInfo::readTableData()
|
||||
writeErrorList(&fBuffers[readBufNo].getErrorRows(), &fBuffers[readBufNo].getExactErrorRows(), false);
|
||||
fBuffers[readBufNo].clearErrRows();
|
||||
|
||||
if (fTotalErrRows > fMaxErrorRows)
|
||||
if (fMaxErrorRows != MAX_ERRORS_ALL && fTotalErrRows > static_cast<unsigned>(fMaxErrorRows))
|
||||
{
|
||||
// flush the reject data file and output the rejected rows
|
||||
// flush err file and output the rejected row id and the reason.
|
||||
|
@ -85,7 +85,7 @@ class TableInfo : public WeUIDGID
|
||||
// for this table. Is volatile to
|
||||
// insure parser & reader threads
|
||||
// see the latest value.
|
||||
unsigned fMaxErrorRows; // Maximum error rows
|
||||
int fMaxErrorRows; // Maximum error rows
|
||||
int fLastBufferId; // Id of the last buffer
|
||||
char* fFileBuffer; // File buffer passed to setvbuf()
|
||||
int fCurrentParseBuffer; // Id of leading current buffer being
|
||||
@ -298,7 +298,7 @@ class TableInfo : public WeUIDGID
|
||||
|
||||
/** @brief Get the number of maximum allowed error rows
|
||||
*/
|
||||
unsigned getMaxErrorRows() const;
|
||||
int getMaxErrorRows() const;
|
||||
|
||||
/** @brief retrieve the tuncation as error setting for this
|
||||
* import. When set, this causes char and varchar strings
|
||||
@ -309,7 +309,7 @@ class TableInfo : public WeUIDGID
|
||||
|
||||
/** @brief set the maximum number of error rows allowed
|
||||
*/
|
||||
void setMaxErrorRows(const unsigned int maxErrorRows);
|
||||
void setMaxErrorRows(int maxErrorRows);
|
||||
|
||||
/** @brief Set mode to treat "NULL" string as NULL value or not.
|
||||
*/
|
||||
@ -513,7 +513,7 @@ inline Status TableInfo::getStatusTI() const
|
||||
return fStatusTI;
|
||||
}
|
||||
|
||||
inline unsigned TableInfo::getMaxErrorRows() const
|
||||
inline int TableInfo::getMaxErrorRows() const
|
||||
{
|
||||
return fMaxErrorRows;
|
||||
}
|
||||
@ -630,7 +630,7 @@ inline void TableInfo::setLoadFilesInput(bool bReadFromStdin, bool bReadFromS3,
|
||||
fS3Region = s3region;
|
||||
}
|
||||
|
||||
inline void TableInfo::setMaxErrorRows(const unsigned int maxErrorRows)
|
||||
inline void TableInfo::setMaxErrorRows(int maxErrorRows)
|
||||
{
|
||||
fMaxErrorRows = maxErrorRows;
|
||||
}
|
||||
|
Reference in New Issue
Block a user