1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4912 This patch introduces Extent Map index to improve EM scaleability

EM scaleability project has two parts: phase1 and phase2.
        This is phase1 that brings EM index to speed up(from O(n) down
        to the speed of boost::unordered_map) EM lookups looking for
        <dbroot, oid, partition> tuple to turn it into LBID,
        e.g. most bulk insertion meta info operations.
        The basis is boost::shared_managed_object where EMIndex is
        stored. Whilst it is not debug-friendly it allows to put a
        nested structs into shmem. EMIndex has 3 tiers. Top down description:
        vector of dbroots, map of oids to partition vectors, partition
        vectors that have EM indices.
        Separate EM methods now queries index before they do EM run.
        EMIndex has a separate shmem file with the fixed id
        MCS-shm-00060001.
This commit is contained in:
Roman Nozdrin
2022-03-30 08:57:05 +00:00
committed by Leonid Fedorov
parent fb3eaabd29
commit 4c26e4f960
25 changed files with 3498 additions and 459 deletions

View File

@ -916,16 +916,24 @@ void getTableOID(const std::string& xmlGenSchema, const std::string& xmlGenTable
void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJobIdStr,
const std::string& xmlGenSchema, const std::string& xmlGenTable,
const std::string& alternateImportDir, const std::string& S3Bucket,
boost::filesystem::path& sFileName)
const std::string& tableOIDStr, boost::filesystem::path& sFileName)
{
// Construct the job description file name
std::string xmlErrMsg;
int rc = 0;
std::string tableOIDStr;
getTableOID(xmlGenSchema, xmlGenTable, tableOIDStr);
std::string localTableOIDStr;
if (tableOIDStr.empty())
{
getTableOID(xmlGenSchema, xmlGenTable, localTableOIDStr);
}
else
{
localTableOIDStr = tableOIDStr;
}
rc = XMLJob::genJobXMLFileName(std::string(), tempJobDir, sJobIdStr,
true, // using temp job xml file
xmlGenSchema, xmlGenTable, sFileName, xmlErrMsg, tableOIDStr);
xmlGenSchema, xmlGenTable, sFileName, xmlErrMsg, localTableOIDStr);
if (rc != NO_ERROR)
{
@ -945,7 +953,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
{
genProc.startXMLFile();
execplan::CalpontSystemCatalog::TableName tbl(xmlGenSchema, xmlGenTable);
genProc.makeTableData(tbl);
genProc.makeTableData(tbl, localTableOIDStr);
if (!genProc.makeColumnData(tbl))
{
@ -1222,9 +1230,9 @@ int main(int argc, char** argv)
if (!xmlGenSchema.empty()) // create temporary job file name
{
// If JobID is not provided, then default to the table OID
std::string tableOIDStr{""};
if (sJobIdStr.empty())
{
std::string tableOIDStr;
getTableOID(xmlGenSchema, xmlGenTable, tableOIDStr);
if (!(BulkLoad::disableConsoleOutput()))
@ -1239,7 +1247,7 @@ int main(int argc, char** argv)
bUseTempJobFile = true;
constructTempXmlFile(curJob.getTempJobDir(), sJobIdStr, xmlGenSchema, xmlGenTable,
curJob.getAlternateImportDir(), curJob.getS3Bucket(), sFileName);
curJob.getAlternateImportDir(), curJob.getS3Bucket(), tableOIDStr, sFileName);
}
else // create user's persistent job file name
{

View File

@ -168,7 +168,21 @@ void XMLGenProc::startXMLFile()
// makeTableData
// Create XML tag for a table.
//------------------------------------------------------------------------------
// This method is used by colxml only and it can be relatively slower doing tableRID()
// first call. All subsequent calls will re-use data from CalpontSystemCatalog cache.
void XMLGenProc::makeTableData(const CalpontSystemCatalog::TableName& table)
{
boost::shared_ptr<CalpontSystemCatalog> cat =
CalpontSystemCatalog::makeCalpontSystemCatalog(BULK_SYSCAT_SESSION_ID);
cat->identity(CalpontSystemCatalog::EC);
std::ostringstream oss;
// tableRID method might take a lot with a significant EM.
oss << cat->tableRID(table).objnum;
makeTableData(table, oss.str());
}
void XMLGenProc::makeTableData(const CalpontSystemCatalog::TableName& table, const std::string& tableOIDStr)
{
static unsigned kount;
@ -180,11 +194,8 @@ void XMLGenProc::makeTableData(const CalpontSystemCatalog::TableName& table)
{
try
{
boost::shared_ptr<CalpontSystemCatalog> cat =
CalpontSystemCatalog::makeCalpontSystemCatalog(BULK_SYSCAT_SESSION_ID);
cat->identity(CalpontSystemCatalog::EC);
xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_TBL_OID], "%d",
cat->tableRID(table).objnum);
xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_TBL_OID], "%s",
tableOIDStr.c_str());
}
catch (std::exception& ex)
{

View File

@ -75,6 +75,9 @@ class XMLGenProc
*
* @param table Name of table for which the table tag is to be generated.
*/
EXPORT void makeTableData(const execplan::CalpontSystemCatalog::TableName& table,
const std::string& tableOIDStr);
EXPORT void makeTableData(const execplan::CalpontSystemCatalog::TableName& table);
/** @brief Creates column tags for the specified table.

View File

@ -381,11 +381,11 @@ void XMLJob::setJobData(xmlNode* pNode, const xmlTag tag, bool bExpectContent, X
if (tagType == TYPE_INT)
bSuccess = getNodeContent(pNode, &intVal, TYPE_INT);
else // longlong
if (tagType == TYPE_LONGLONG)
bSuccess = getNodeContent(pNode, &llVal, TYPE_LONGLONG);
else // char
if (tagType == TYPE_LONGLONG)
bSuccess = getNodeContent(pNode, &llVal, TYPE_LONGLONG);
else // char
if (tagType == TYPE_CHAR)
bSuccess = getNodeContentStr(pNode, bufString);
bSuccess = getNodeContentStr(pNode, bufString);
if (!bSuccess)
return;
@ -1194,7 +1194,8 @@ void XMLJob::validateAllColumnsHaveTags(const execplan::CalpontSystemCatalog::RI
/* static */
int XMLJob::genJobXMLFileName(const string& sXMLJobDir, const string& jobDir, const string& jobId,
bool bTempFile, const string& schemaName, const string& tableName,
boost::filesystem::path& xmlFilePath, string& errMsg, std::string& tableOIDStr)
boost::filesystem::path& xmlFilePath, string& errMsg,
const std::string& tableOIDStr)
{
// get full file directory path for XML job description file
if (sXMLJobDir.empty())

View File

@ -74,7 +74,7 @@ class XMLJob : public XMLOp
EXPORT static int genJobXMLFileName(const std::string& sXMLJobDir, const std::string& jobDir,
const std::string& jobId, bool bTempFile, const std::string& schemaName,
const std::string& tableName, boost::filesystem::path& xmlDirPath,
std::string& errMsg, std::string& tableOIDStr);
std::string& errMsg, const std::string& tableOIDStr);
/**
* @brief Get job structure