1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-01 06:21:41 +03:00

MCOL-4957 Fix performance slowdown for processing TIMESTAMP columns.

Part 1:
 As part of MCOL-3776 to address synchronization issue while accessing
 the fTimeZone member of the Func class, mutex locks were added to the
 accessor and mutator methods. However, this slows down processing
 of TIMESTAMP columns in PrimProc significantly as all threads across
 all concurrently running queries would serialize on the mutex. This
 is because PrimProc only has a single global object for the functor
 class (class derived from Func in utils/funcexp/functor.h) for a given
 function name. To fix this problem:

   (1) We remove the fTimeZone as a member of the Func derived classes
   (hence removing the mutexes) and instead use the fOperationType
   member of the FunctionColumn class to propagate the timezone values
   down to the individual functor processing functions such as
   FunctionColumn::getStrVal(), FunctionColumn::getIntVal(), etc.

   (2) To achieve (1), a timezone member is added to the
   execplan::CalpontSystemCatalog::ColType class.

Part 2:
 Several functors in the Funcexp code call dataconvert::gmtSecToMySQLTime()
 and dataconvert::mySQLTimeToGmtSec() functions for conversion between seconds
 since unix epoch and broken-down representation. These functions in turn call
 the C library function localtime_r() which currently has a known bug of holding
 a global lock via a call to __tz_convert. This significantly reduces performance
 in multi-threaded applications where multiple threads concurrently call
 localtime_r(). More details on the bug:
   https://sourceware.org/bugzilla/show_bug.cgi?id=16145

 This bug in localtime_r() caused processing of the Functors in PrimProc to
 slowdown significantly since a query execution causes Functors code to be
 processed in a multi-threaded manner.

 As a fix, we remove the calls to localtime_r() from gmtSecToMySQLTime()
 and mySQLTimeToGmtSec() by performing the timezone-to-offset conversion
 (done in dataconvert::timeZoneToOffset()) during the execution plan
 creation in the plugin. Note that localtime_r() is only called when the
 time_zone system variable is set to "SYSTEM".

 This fix also required changing the timezone type from a std::string to
 a long across the system.
This commit is contained in:
Gagan Goel
2022-02-09 19:03:00 -05:00
parent f67a37bcae
commit 973e5024d8
120 changed files with 1022 additions and 695 deletions

View File

@ -648,12 +648,12 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob
std::string timeZone = optarg;
long offset;
if (timeZone != "SYSTEM" && dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
if (dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
{
startupError(std::string("Value for option -T is invalid"), true);
}
curJob.setTimeZone(timeZone);
curJob.setTimeZone(offset);
break;
}

View File

@ -159,7 +159,7 @@ BulkLoad::BulkLoad()
, fbContinue(false)
, fDisableTimeOut(false)
, fUUID(boost::uuids::nil_generator()())
, fTimeZone("SYSTEM")
, fTimeZone(dataconvert::systemTimeZoneOffset())
, fUsername("mysql") // MCOL-4328 default file owner
{
fTableInfo.clear();

View File

@ -107,7 +107,7 @@ class BulkLoad : public FileOp
void addToCmdLineImportFileList(const std::string& importFile);
const std::string& getAlternateImportDir() const;
const std::string& getErrorDir() const;
const std::string& getTimeZone() const;
long getTimeZone() const;
const std::string& getJobDir() const;
const std::string& getSchema() const;
const std::string& getTempJobDir() const;
@ -145,7 +145,7 @@ class BulkLoad : public FileOp
void setTruncationAsError(bool bTruncationAsError);
void setJobUUID(const std::string& jobUUID);
void setErrorDir(const std::string& errorDir);
void setTimeZone(const std::string& timeZone);
void setTimeZone(long timeZone);
void setS3Key(const std::string& key);
void setS3Secret(const std::string& secret);
void setS3Bucket(const std::string& bucket);
@ -229,7 +229,9 @@ class BulkLoad : public FileOp
bool fDisableTimeOut; // disable timeout when waiting for table lock
boost::uuids::uuid fUUID; // job UUID
static bool fNoConsoleOutput; // disable output to console
std::string fTimeZone; // Timezone to use for TIMESTAMP data type
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
// to use for TIMESTAMP data type. For example,
// for EST which is UTC-5:00, offset will be -18000s.
std::string fS3Key; // S3 Key
std::string fS3Secret; // S3 Secret
std::string fS3Host; // S3 Host
@ -318,7 +320,7 @@ inline const std::string& BulkLoad::getErrorDir() const
return fErrorDir;
}
inline const std::string& BulkLoad::getTimeZone() const
inline long BulkLoad::getTimeZone() const
{
return fTimeZone;
}
@ -486,7 +488,7 @@ inline void BulkLoad::setErrorDir(const std::string& errorDir)
fErrorDir = errorDir;
}
inline void BulkLoad::setTimeZone(const std::string& timeZone)
inline void BulkLoad::setTimeZone(long timeZone)
{
fTimeZone = timeZone;
}

View File

@ -139,7 +139,7 @@ BulkLoadBuffer::BulkLoadBuffer(unsigned numberOfCols, unsigned bufferSize, Log*
, fTableName(tableName)
, fbTruncationAsError(false)
, fImportDataMode(IMPORT_DATA_TEXT)
, fTimeZone("SYSTEM")
, fTimeZone(dataconvert::systemTimeZoneOffset())
, fFixedBinaryRecLen(0)
{
fData = new char[bufferSize];

View File

@ -152,7 +152,9 @@ class BulkLoadBuffer
// for db cols (omits default cols)
bool fbTruncationAsError; // Treat string truncation as error
ImportDataMode fImportDataMode; // Import data in text or binary mode
std::string fTimeZone; // Timezone used by TIMESTAMP datatype
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
// to use for TIMESTAMP data type. For example,
// for EST which is UTC-5:00, offset will be -18000s.
unsigned int fFixedBinaryRecLen; // Fixed rec len used in binary mode
//--------------------------------------------------------------------------
@ -388,7 +390,7 @@ class BulkLoadBuffer
/** @brief set timezone.
*/
void setTimeZone(const std::string& timeZone)
void setTimeZone(long timeZone)
{
fTimeZone = timeZone;
}

View File

@ -146,7 +146,7 @@ TableInfo::TableInfo(Log* logger, const BRM::TxnID txnID, const string& processN
, fKeepRbMetaFile(bKeepRbMetaFile)
, fbTruncationAsError(false)
, fImportDataMode(IMPORT_DATA_TEXT)
, fTimeZone("SYSTEM")
, fTimeZone(dataconvert::systemTimeZoneOffset())
, fTableLocked(false)
, fReadFromStdin(false)
, fReadFromS3(false)

View File

@ -127,7 +127,9 @@ class TableInfo : public WeUIDGID
// data file
bool fbTruncationAsError; // Treat string truncation as error
ImportDataMode fImportDataMode; // Import data in text or binary mode
std::string fTimeZone; // Timezone used by TIMESTAMP data type
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
// to use for TIMESTAMP data type. For example,
// for EST which is UTC-5:00, offset will be -18000s.
volatile bool fTableLocked; // Do we have db table lock
@ -254,7 +256,7 @@ class TableInfo : public WeUIDGID
/** @brief Get timezone.
*/
const std::string& getTimeZone() const;
long getTimeZone() const;
/** @brief Get number of buffers
*/
@ -315,7 +317,7 @@ class TableInfo : public WeUIDGID
/** @brief Set timezone.
*/
void setTimeZone(const std::string& timeZone);
void setTimeZone(long timeZone);
/** @brief Enable distributed mode, saving BRM updates in rptFileName
*/
@ -481,7 +483,7 @@ inline ImportDataMode TableInfo::getImportDataMode() const
return fImportDataMode;
}
inline const std::string& TableInfo::getTimeZone() const
inline long TableInfo::getTimeZone() const
{
return fTimeZone;
}
@ -582,7 +584,7 @@ inline void TableInfo::setImportDataMode(ImportDataMode importMode)
fImportDataMode = importMode;
}
inline void TableInfo::setTimeZone(const std::string& timeZone)
inline void TableInfo::setTimeZone(long timeZone)
{
fTimeZone = timeZone;
}

View File

@ -3552,7 +3552,7 @@ uint8_t WE_DDLCommandProc::fillNewColumn(ByteStream& bs, std::string& err)
int dataWidth, scale, precision, compressionType, refColWidth, refCompressionType;
string defaultValStr;
ColTuple defaultVal;
string timeZone;
long timeZone;
bs >> tmp32;
txnID = tmp32;
@ -3581,7 +3581,9 @@ uint8_t WE_DDLCommandProc::fillNewColumn(ByteStream& bs, std::string& err)
refColWidth = tmp32;
bs >> tmp8;
refCompressionType = tmp8;
bs >> timeZone;
messageqcpp::ByteStream::octbyte timeZoneTemp;
bs >> timeZoneTemp;
timeZone = timeZoneTemp;
// Find the fill in value
bool isNULL = false;

View File

@ -2681,7 +2681,7 @@ uint8_t WE_DMLCommandProc::processUpdate(messageqcpp::ByteStream& bs, std::strin
CalpontSystemCatalog::OID oid = 0;
CalpontSystemCatalog::ROPair tableRO;
std::string timeZone = cpackages[txnId].get_TimeZone();
long timeZone = cpackages[txnId].get_TimeZone();
try
{

View File

@ -71,7 +71,11 @@ const long long columnstore_precision[19] = {0,
//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
XMLJob::XMLJob() : fDebugLevel(DEBUG_0), fDeleteTempFile(false), fValidateColList(true), fTimeZone("SYSTEM")
XMLJob::XMLJob()
: fDebugLevel(DEBUG_0)
, fDeleteTempFile(false)
, fValidateColList(true)
, fTimeZone(dataconvert::systemTimeZoneOffset())
{
}

View File

@ -116,7 +116,7 @@ class XMLJob : public XMLOp
/**
* @brief Set timezone
*/
void setTimeZone(const std::string& timeZone)
void setTimeZone(long timeZone)
{
fTimeZone = timeZone;
}
@ -144,7 +144,9 @@ class XMLJob : public XMLOp
JobColList fDefaultColumns; // temporary list of default cols
// for table node being processed
bool fValidateColList; // Validate all cols have XML tag
std::string fTimeZone; // Timezone used for TIMESTAMP datatype
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
// to use for TIMESTAMP data type. For example,
// for EST which is UTC-5:00, offset will be -18000s.
};
} // namespace WriteEngine