You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-01 06:21:41 +03:00
MCOL-4957 Fix performance slowdown for processing TIMESTAMP columns.
Part 1: As part of MCOL-3776 to address synchronization issue while accessing the fTimeZone member of the Func class, mutex locks were added to the accessor and mutator methods. However, this slows down processing of TIMESTAMP columns in PrimProc significantly as all threads across all concurrently running queries would serialize on the mutex. This is because PrimProc only has a single global object for the functor class (class derived from Func in utils/funcexp/functor.h) for a given function name. To fix this problem: (1) We remove the fTimeZone as a member of the Func derived classes (hence removing the mutexes) and instead use the fOperationType member of the FunctionColumn class to propagate the timezone values down to the individual functor processing functions such as FunctionColumn::getStrVal(), FunctionColumn::getIntVal(), etc. (2) To achieve (1), a timezone member is added to the execplan::CalpontSystemCatalog::ColType class. Part 2: Several functors in the Funcexp code call dataconvert::gmtSecToMySQLTime() and dataconvert::mySQLTimeToGmtSec() functions for conversion between seconds since unix epoch and broken-down representation. These functions in turn call the C library function localtime_r() which currently has a known bug of holding a global lock via a call to __tz_convert. This significantly reduces performance in multi-threaded applications where multiple threads concurrently call localtime_r(). More details on the bug: https://sourceware.org/bugzilla/show_bug.cgi?id=16145 This bug in localtime_r() caused processing of the Functors in PrimProc to slowdown significantly since a query execution causes Functors code to be processed in a multi-threaded manner. As a fix, we remove the calls to localtime_r() from gmtSecToMySQLTime() and mySQLTimeToGmtSec() by performing the timezone-to-offset conversion (done in dataconvert::timeZoneToOffset()) during the execution plan creation in the plugin. Note that localtime_r() is only called when the time_zone system variable is set to "SYSTEM". This fix also required changing the timezone type from a std::string to a long across the system.
This commit is contained in:
@ -648,12 +648,12 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob
|
||||
std::string timeZone = optarg;
|
||||
long offset;
|
||||
|
||||
if (timeZone != "SYSTEM" && dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
|
||||
if (dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
|
||||
{
|
||||
startupError(std::string("Value for option -T is invalid"), true);
|
||||
}
|
||||
|
||||
curJob.setTimeZone(timeZone);
|
||||
curJob.setTimeZone(offset);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -159,7 +159,7 @@ BulkLoad::BulkLoad()
|
||||
, fbContinue(false)
|
||||
, fDisableTimeOut(false)
|
||||
, fUUID(boost::uuids::nil_generator()())
|
||||
, fTimeZone("SYSTEM")
|
||||
, fTimeZone(dataconvert::systemTimeZoneOffset())
|
||||
, fUsername("mysql") // MCOL-4328 default file owner
|
||||
{
|
||||
fTableInfo.clear();
|
||||
|
@ -107,7 +107,7 @@ class BulkLoad : public FileOp
|
||||
void addToCmdLineImportFileList(const std::string& importFile);
|
||||
const std::string& getAlternateImportDir() const;
|
||||
const std::string& getErrorDir() const;
|
||||
const std::string& getTimeZone() const;
|
||||
long getTimeZone() const;
|
||||
const std::string& getJobDir() const;
|
||||
const std::string& getSchema() const;
|
||||
const std::string& getTempJobDir() const;
|
||||
@ -145,7 +145,7 @@ class BulkLoad : public FileOp
|
||||
void setTruncationAsError(bool bTruncationAsError);
|
||||
void setJobUUID(const std::string& jobUUID);
|
||||
void setErrorDir(const std::string& errorDir);
|
||||
void setTimeZone(const std::string& timeZone);
|
||||
void setTimeZone(long timeZone);
|
||||
void setS3Key(const std::string& key);
|
||||
void setS3Secret(const std::string& secret);
|
||||
void setS3Bucket(const std::string& bucket);
|
||||
@ -229,7 +229,9 @@ class BulkLoad : public FileOp
|
||||
bool fDisableTimeOut; // disable timeout when waiting for table lock
|
||||
boost::uuids::uuid fUUID; // job UUID
|
||||
static bool fNoConsoleOutput; // disable output to console
|
||||
std::string fTimeZone; // Timezone to use for TIMESTAMP data type
|
||||
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
|
||||
// to use for TIMESTAMP data type. For example,
|
||||
// for EST which is UTC-5:00, offset will be -18000s.
|
||||
std::string fS3Key; // S3 Key
|
||||
std::string fS3Secret; // S3 Secret
|
||||
std::string fS3Host; // S3 Host
|
||||
@ -318,7 +320,7 @@ inline const std::string& BulkLoad::getErrorDir() const
|
||||
return fErrorDir;
|
||||
}
|
||||
|
||||
inline const std::string& BulkLoad::getTimeZone() const
|
||||
inline long BulkLoad::getTimeZone() const
|
||||
{
|
||||
return fTimeZone;
|
||||
}
|
||||
@ -486,7 +488,7 @@ inline void BulkLoad::setErrorDir(const std::string& errorDir)
|
||||
fErrorDir = errorDir;
|
||||
}
|
||||
|
||||
inline void BulkLoad::setTimeZone(const std::string& timeZone)
|
||||
inline void BulkLoad::setTimeZone(long timeZone)
|
||||
{
|
||||
fTimeZone = timeZone;
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ BulkLoadBuffer::BulkLoadBuffer(unsigned numberOfCols, unsigned bufferSize, Log*
|
||||
, fTableName(tableName)
|
||||
, fbTruncationAsError(false)
|
||||
, fImportDataMode(IMPORT_DATA_TEXT)
|
||||
, fTimeZone("SYSTEM")
|
||||
, fTimeZone(dataconvert::systemTimeZoneOffset())
|
||||
, fFixedBinaryRecLen(0)
|
||||
{
|
||||
fData = new char[bufferSize];
|
||||
|
@ -152,7 +152,9 @@ class BulkLoadBuffer
|
||||
// for db cols (omits default cols)
|
||||
bool fbTruncationAsError; // Treat string truncation as error
|
||||
ImportDataMode fImportDataMode; // Import data in text or binary mode
|
||||
std::string fTimeZone; // Timezone used by TIMESTAMP datatype
|
||||
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
|
||||
// to use for TIMESTAMP data type. For example,
|
||||
// for EST which is UTC-5:00, offset will be -18000s.
|
||||
unsigned int fFixedBinaryRecLen; // Fixed rec len used in binary mode
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
@ -388,7 +390,7 @@ class BulkLoadBuffer
|
||||
|
||||
/** @brief set timezone.
|
||||
*/
|
||||
void setTimeZone(const std::string& timeZone)
|
||||
void setTimeZone(long timeZone)
|
||||
{
|
||||
fTimeZone = timeZone;
|
||||
}
|
||||
|
@ -146,7 +146,7 @@ TableInfo::TableInfo(Log* logger, const BRM::TxnID txnID, const string& processN
|
||||
, fKeepRbMetaFile(bKeepRbMetaFile)
|
||||
, fbTruncationAsError(false)
|
||||
, fImportDataMode(IMPORT_DATA_TEXT)
|
||||
, fTimeZone("SYSTEM")
|
||||
, fTimeZone(dataconvert::systemTimeZoneOffset())
|
||||
, fTableLocked(false)
|
||||
, fReadFromStdin(false)
|
||||
, fReadFromS3(false)
|
||||
|
@ -127,7 +127,9 @@ class TableInfo : public WeUIDGID
|
||||
// data file
|
||||
bool fbTruncationAsError; // Treat string truncation as error
|
||||
ImportDataMode fImportDataMode; // Import data in text or binary mode
|
||||
std::string fTimeZone; // Timezone used by TIMESTAMP data type
|
||||
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
|
||||
// to use for TIMESTAMP data type. For example,
|
||||
// for EST which is UTC-5:00, offset will be -18000s.
|
||||
|
||||
volatile bool fTableLocked; // Do we have db table lock
|
||||
|
||||
@ -254,7 +256,7 @@ class TableInfo : public WeUIDGID
|
||||
|
||||
/** @brief Get timezone.
|
||||
*/
|
||||
const std::string& getTimeZone() const;
|
||||
long getTimeZone() const;
|
||||
|
||||
/** @brief Get number of buffers
|
||||
*/
|
||||
@ -315,7 +317,7 @@ class TableInfo : public WeUIDGID
|
||||
|
||||
/** @brief Set timezone.
|
||||
*/
|
||||
void setTimeZone(const std::string& timeZone);
|
||||
void setTimeZone(long timeZone);
|
||||
|
||||
/** @brief Enable distributed mode, saving BRM updates in rptFileName
|
||||
*/
|
||||
@ -481,7 +483,7 @@ inline ImportDataMode TableInfo::getImportDataMode() const
|
||||
return fImportDataMode;
|
||||
}
|
||||
|
||||
inline const std::string& TableInfo::getTimeZone() const
|
||||
inline long TableInfo::getTimeZone() const
|
||||
{
|
||||
return fTimeZone;
|
||||
}
|
||||
@ -582,7 +584,7 @@ inline void TableInfo::setImportDataMode(ImportDataMode importMode)
|
||||
fImportDataMode = importMode;
|
||||
}
|
||||
|
||||
inline void TableInfo::setTimeZone(const std::string& timeZone)
|
||||
inline void TableInfo::setTimeZone(long timeZone)
|
||||
{
|
||||
fTimeZone = timeZone;
|
||||
}
|
||||
|
@ -3552,7 +3552,7 @@ uint8_t WE_DDLCommandProc::fillNewColumn(ByteStream& bs, std::string& err)
|
||||
int dataWidth, scale, precision, compressionType, refColWidth, refCompressionType;
|
||||
string defaultValStr;
|
||||
ColTuple defaultVal;
|
||||
string timeZone;
|
||||
long timeZone;
|
||||
|
||||
bs >> tmp32;
|
||||
txnID = tmp32;
|
||||
@ -3581,7 +3581,9 @@ uint8_t WE_DDLCommandProc::fillNewColumn(ByteStream& bs, std::string& err)
|
||||
refColWidth = tmp32;
|
||||
bs >> tmp8;
|
||||
refCompressionType = tmp8;
|
||||
bs >> timeZone;
|
||||
messageqcpp::ByteStream::octbyte timeZoneTemp;
|
||||
bs >> timeZoneTemp;
|
||||
timeZone = timeZoneTemp;
|
||||
// Find the fill in value
|
||||
bool isNULL = false;
|
||||
|
||||
|
@ -2681,7 +2681,7 @@ uint8_t WE_DMLCommandProc::processUpdate(messageqcpp::ByteStream& bs, std::strin
|
||||
CalpontSystemCatalog::OID oid = 0;
|
||||
CalpontSystemCatalog::ROPair tableRO;
|
||||
|
||||
std::string timeZone = cpackages[txnId].get_TimeZone();
|
||||
long timeZone = cpackages[txnId].get_TimeZone();
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -71,7 +71,11 @@ const long long columnstore_precision[19] = {0,
|
||||
//------------------------------------------------------------------------------
|
||||
// Constructor
|
||||
//------------------------------------------------------------------------------
|
||||
XMLJob::XMLJob() : fDebugLevel(DEBUG_0), fDeleteTempFile(false), fValidateColList(true), fTimeZone("SYSTEM")
|
||||
XMLJob::XMLJob()
|
||||
: fDebugLevel(DEBUG_0)
|
||||
, fDeleteTempFile(false)
|
||||
, fValidateColList(true)
|
||||
, fTimeZone(dataconvert::systemTimeZoneOffset())
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -116,7 +116,7 @@ class XMLJob : public XMLOp
|
||||
/**
|
||||
* @brief Set timezone
|
||||
*/
|
||||
void setTimeZone(const std::string& timeZone)
|
||||
void setTimeZone(long timeZone)
|
||||
{
|
||||
fTimeZone = timeZone;
|
||||
}
|
||||
@ -144,7 +144,9 @@ class XMLJob : public XMLOp
|
||||
JobColList fDefaultColumns; // temporary list of default cols
|
||||
// for table node being processed
|
||||
bool fValidateColList; // Validate all cols have XML tag
|
||||
std::string fTimeZone; // Timezone used for TIMESTAMP datatype
|
||||
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
|
||||
// to use for TIMESTAMP data type. For example,
|
||||
// for EST which is UTC-5:00, offset will be -18000s.
|
||||
};
|
||||
|
||||
} // namespace WriteEngine
|
||||
|
Reference in New Issue
Block a user