You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-06 16:09:29 +03:00
feat: mcs_analyze_table_bloat
This commit is contained in:
@@ -1306,23 +1306,22 @@ void CommandPackageProcessor::analyzePartitionBloat(const dmlpackage::CalpontDML
|
||||
|
||||
// Create a FunctionColumn for idbPartition(aux)
|
||||
// parms: psueducolumn dbroot, segmentdir, segment
|
||||
SPTP sptp2;
|
||||
FunctionColumn* fc = new FunctionColumn();
|
||||
fc->functionName("idbpartition");
|
||||
fc->sessionID(fSessionID);
|
||||
fc->expressionId(0);
|
||||
funcexp::FunctionParm parms;
|
||||
PseudoColumn* dbroot = new PseudoColumn(*auxCol, PSEUDO_DBROOT, fSessionID);
|
||||
sptp2.reset(new ParseTree(dbroot));
|
||||
parms.push_back(sptp2);
|
||||
sptp.reset(new ParseTree(dbroot));
|
||||
parms.push_back(sptp);
|
||||
|
||||
PseudoColumn* pp = new PseudoColumn(*auxCol, PSEUDO_SEGMENTDIR, fSessionID);
|
||||
sptp2.reset(new ParseTree(pp));
|
||||
parms.push_back(sptp2);
|
||||
sptp.reset(new ParseTree(pp));
|
||||
parms.push_back(sptp);
|
||||
|
||||
PseudoColumn* seg = new PseudoColumn(*auxCol, PSEUDO_SEGMENT, fSessionID);
|
||||
sptp2.reset(new ParseTree(seg));
|
||||
parms.push_back(sptp2);
|
||||
sptp.reset(new ParseTree(seg));
|
||||
parms.push_back(sptp);
|
||||
|
||||
fc->functionParms(parms);
|
||||
|
||||
@@ -1367,9 +1366,9 @@ void CommandPackageProcessor::analyzePartitionBloat(const dmlpackage::CalpontDML
|
||||
|
||||
for (auto it = sysDataList.begin(); it != sysDataList.end(); it++)
|
||||
{
|
||||
if (it == sysDataList.begin()) {
|
||||
if ((*it)->ColumnOID() == static_cast<int>(countAuxCol->expressionId())) {
|
||||
countAux = (*it)->GetData(0);
|
||||
} else {
|
||||
} else if ((*it)->ColumnOID() == static_cast<int>(countCaseCol->expressionId())) {
|
||||
countAuxDeleted = (*it)->GetData(0);
|
||||
}
|
||||
}
|
||||
@@ -1392,4 +1391,232 @@ void CommandPackageProcessor::analyzePartitionBloat(const dmlpackage::CalpontDML
|
||||
}
|
||||
}
|
||||
|
||||
void CommandPackageProcessor::analyzeTableBloat(const dmlpackage::CalpontDMLPackage& cpackage,
|
||||
DMLPackageProcessor::DMLResult& result)
|
||||
{
|
||||
boost::shared_ptr<CalpontSystemCatalog> systemCatalogPtr =
|
||||
CalpontSystemCatalog::makeCalpontSystemCatalog(fSessionID);
|
||||
systemCatalogPtr->identity(CalpontSystemCatalog::EC);
|
||||
CalpontSystemCatalog::TableName tableName;
|
||||
tableName.schema = cpackage.get_SchemaName();
|
||||
tableName.table = cpackage.get_TableName();
|
||||
|
||||
std::ostringstream analysisResults;
|
||||
bool bErrFlag = false;
|
||||
std::string errorMsg;
|
||||
|
||||
try
|
||||
{
|
||||
// Get AUX column OID for the table
|
||||
CalpontSystemCatalog::OID auxColumnOid = systemCatalogPtr->tableAUXColumnOID(tableName);
|
||||
|
||||
if (auxColumnOid <= 3000)
|
||||
{
|
||||
analysisResults << "Table " << tableName.toString()
|
||||
<< " does not have an AUX column for bloat analysis.";
|
||||
result.bloatAnalysis = analysisResults.str();
|
||||
return;
|
||||
}
|
||||
|
||||
// SELECT idbPartition(aux), COUNT(aux) AS count_aux, COUNT(CASE aux WHEN 1 THEN 1 END) AS count_aux_deleted FROM test GROUP BY idbPartition(aux);
|
||||
CalpontSelectExecutionPlan csep;
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList;
|
||||
CalpontSelectExecutionPlan::ColumnMap colMap;
|
||||
|
||||
// Create the base SimpleColumn for 'aux'
|
||||
SimpleColumn* auxCol = new SimpleColumn(tableName.schema, tableName.table, "aux", fSessionID);
|
||||
auxCol->alias("aux");
|
||||
auxCol->setOID();
|
||||
CalpontSystemCatalog::ColType auxColType;
|
||||
auxColType.colDataType = CalpontSystemCatalog::INT;
|
||||
auxColType.colWidth = 4;
|
||||
auxCol->resultType(auxColType);
|
||||
|
||||
// Create the COUNT(aux) AS count_aux aggregate column
|
||||
AggregateColumn* countAuxCol = new AggregateColumn(fSessionID);
|
||||
countAuxCol->alias("count_aux");
|
||||
countAuxCol->aggOp(AggregateColumn::COUNT);
|
||||
countAuxCol->functionName("count");
|
||||
countAuxCol->expressionId(1);
|
||||
CalpontSystemCatalog::ColType countAuxColType;
|
||||
countAuxColType.colDataType = CalpontSystemCatalog::INT;
|
||||
countAuxColType.colWidth = 4;
|
||||
countAuxCol->resultType(countAuxColType);
|
||||
|
||||
SRCP auxSRCP(auxCol->clone());
|
||||
countAuxCol->aggParms().push_back(auxSRCP);
|
||||
|
||||
// Create the CASE aux WHEN 1 THEN 1 END expression
|
||||
FunctionColumn* caseCol = new FunctionColumn();
|
||||
caseCol->functionName("case_simple"); // Use case_simple for expression comparison
|
||||
caseCol->sessionID(fSessionID);
|
||||
caseCol->expressionId(2);
|
||||
caseCol->alias("case_aux_deleted");
|
||||
|
||||
// Set the result type for the CASE expression
|
||||
CalpontSystemCatalog::ColType caseColType;
|
||||
caseColType.colDataType = CalpontSystemCatalog::INT;
|
||||
caseColType.colWidth = 4;
|
||||
caseCol->resultType(caseColType);
|
||||
|
||||
// Create the WHEN value: 1
|
||||
ConstantColumn* whenValue = new ConstantColumn("1", ConstantColumn::NUM);
|
||||
whenValue->sessionID(fSessionID);
|
||||
|
||||
// Create the THEN result: 1
|
||||
ConstantColumn* thenResult = new ConstantColumn("1", ConstantColumn::NUM);
|
||||
thenResult->sessionID(fSessionID);
|
||||
|
||||
// Build the function parameters for CASE
|
||||
funcexp::FunctionParm funcParms;
|
||||
SPTP sptp;
|
||||
|
||||
// Add the CASE expression (aux column)
|
||||
sptp.reset(new ParseTree(auxCol->clone()));
|
||||
funcParms.push_back(sptp);
|
||||
|
||||
// Add the WHEN value
|
||||
sptp.reset(new ParseTree(whenValue));
|
||||
funcParms.push_back(sptp);
|
||||
|
||||
// Add the THEN result
|
||||
sptp.reset(new ParseTree(thenResult));
|
||||
funcParms.push_back(sptp);
|
||||
|
||||
// Set the function parameters
|
||||
caseCol->functionParms(funcParms);
|
||||
|
||||
// Create the COUNT(CASE aux WHEN 1 THEN 1 END) AS count_aux_deleted aggregate column
|
||||
AggregateColumn* countCaseCol = new AggregateColumn(fSessionID);
|
||||
countCaseCol->alias("count_aux_deleted");
|
||||
countCaseCol->aggOp(AggregateColumn::COUNT);
|
||||
countCaseCol->functionName("count");
|
||||
countCaseCol->expressionId(3);
|
||||
CalpontSystemCatalog::ColType countCaseColType;
|
||||
countCaseColType.colDataType = CalpontSystemCatalog::INT;
|
||||
countCaseColType.colWidth = 4;
|
||||
countCaseCol->resultType(countCaseColType);
|
||||
|
||||
SRCP caseSRCP(caseCol->clone());
|
||||
countCaseCol->aggParms().push_back(caseSRCP);
|
||||
|
||||
// Add the base 'aux' column to ColumnMap (used for reference resolution)
|
||||
// Note: The aggregate results do NOT go in ColumnMap
|
||||
// Add "aux" multiple times since it's referenced in COUNT(aux), CASE expression, and idbPartition(aux)
|
||||
colMap.insert(CMVT_(tableName.schema + "." + tableName.table + "." + "aux", auxSRCP));
|
||||
auxSRCP.reset(auxCol->clone());
|
||||
colMap.insert(CMVT_(tableName.schema + "." + tableName.table + "." + "aux", auxSRCP));
|
||||
auxSRCP.reset(auxCol->clone());
|
||||
colMap.insert(CMVT_(tableName.schema + "." + tableName.table + "." + "aux", auxSRCP));
|
||||
|
||||
// Add both COUNT columns to ReturnedColumnList (what gets returned by SELECT)
|
||||
SRCP countSRCP(countAuxCol->clone());
|
||||
returnedColumnList.push_back(countSRCP);
|
||||
SRCP countCaseSRCP(countCaseCol->clone());
|
||||
returnedColumnList.push_back(countCaseSRCP);
|
||||
|
||||
// Create a FunctionColumn for idbPartition(aux)
|
||||
// parms: psueducolumn dbroot, segmentdir, segment
|
||||
FunctionColumn* fc = new FunctionColumn();
|
||||
fc->functionName("idbpartition");
|
||||
fc->alias("idbPartition(aux)");
|
||||
fc->sessionID(fSessionID);
|
||||
fc->expressionId(0);
|
||||
funcexp::FunctionParm parms;
|
||||
PseudoColumn* dbroot = new PseudoColumn(*auxCol, PSEUDO_DBROOT, fSessionID);
|
||||
sptp.reset(new ParseTree(dbroot));
|
||||
parms.push_back(sptp);
|
||||
|
||||
PseudoColumn* pp = new PseudoColumn(*auxCol, PSEUDO_SEGMENTDIR, fSessionID);
|
||||
sptp.reset(new ParseTree(pp));
|
||||
parms.push_back(sptp);
|
||||
|
||||
PseudoColumn* seg = new PseudoColumn(*auxCol, PSEUDO_SEGMENT, fSessionID);
|
||||
sptp.reset(new ParseTree(seg));
|
||||
parms.push_back(sptp);
|
||||
|
||||
fc->functionParms(parms);
|
||||
|
||||
CalpontSystemCatalog::ColType resultType;
|
||||
resultType.colDataType = CalpontSystemCatalog::VARCHAR;
|
||||
resultType.colWidth = 256;
|
||||
fc->resultType(resultType);
|
||||
|
||||
funcexp::Func_idbpartition* idbpartition = new funcexp::Func_idbpartition();
|
||||
fc->operationType(idbpartition->operationType(parms, fc->resultType()));
|
||||
delete idbpartition;
|
||||
|
||||
SRCP fcSRCP(fc->clone());
|
||||
returnedColumnList.push_back(fcSRCP);
|
||||
|
||||
csep.columnMapNonStatic(colMap);
|
||||
csep.returnedCols(returnedColumnList);
|
||||
|
||||
// Set the group by
|
||||
CalpontSelectExecutionPlan::GroupByColumnList groupByList;
|
||||
groupByList.push_back(fcSRCP);
|
||||
csep.groupByCols(groupByList);
|
||||
|
||||
// Set the session ID, transaction ID and version Id
|
||||
BRM::QueryContext verID;
|
||||
verID = fSessionManager.verID();
|
||||
csep.verID(verID);
|
||||
csep.sessionID(fSessionID);
|
||||
BRM::TxnID txnID;
|
||||
txnID = fSessionManager.getTxnID(fSessionID);
|
||||
csep.txnID(txnID.id);
|
||||
|
||||
// Set the table list
|
||||
CalpontSelectExecutionPlan::TableList tablelist;
|
||||
tablelist.push_back(make_aliastable(tableName.schema, tableName.table, ""));
|
||||
csep.tableList(tablelist);
|
||||
csep.schemaName(tableName.schema, 0);
|
||||
csep.tableName(tableName.table, 0);
|
||||
|
||||
// Send CSEP to ExeMgr
|
||||
CalpontSystemCatalog::NJLSysDataList sysDataList;
|
||||
systemCatalogPtr->getQueryData(csep, sysDataList);
|
||||
|
||||
size_t countAuxIndex = static_cast<size_t>(-1);
|
||||
size_t countAuxDeletedIndex = static_cast<size_t>(-1);
|
||||
size_t idbPartitionIndex = static_cast<size_t>(-1);
|
||||
|
||||
for (size_t i = 0; i < sysDataList.sysDataVec.size(); ++i) {
|
||||
if (sysDataList.sysDataVec[i]->ColumnOID() == static_cast<int>(countAuxCol->expressionId())) {
|
||||
countAuxIndex = i;
|
||||
} else if (sysDataList.sysDataVec[i]->ColumnOID() == static_cast<int>(countCaseCol->expressionId())) {
|
||||
countAuxDeletedIndex = i;
|
||||
} else if (sysDataList.sysDataVec[i]->ColumnOID() == static_cast<int>(fc->expressionId())) {
|
||||
idbPartitionIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (countAuxIndex != static_cast<size_t>(-1) && countAuxDeletedIndex != static_cast<size_t>(-1) && idbPartitionIndex != static_cast<size_t>(-1)) {
|
||||
for (int i = 0; i < sysDataList.sysDataVec[idbPartitionIndex]->dataCount(); i++) {
|
||||
int64_t countAux = sysDataList.sysDataVec[countAuxIndex]->GetData(i);
|
||||
int64_t countAuxDeleted = sysDataList.sysDataVec[countAuxDeletedIndex]->GetData(i);
|
||||
string idbPartition = sysDataList.sysDataVec[idbPartitionIndex]->GetStringData(i).toString();
|
||||
|
||||
if (i > 0) {
|
||||
analysisResults << ", ";
|
||||
}
|
||||
analysisResults << idbPartition << ": " << std::fixed << std::setprecision(2) << (static_cast<double>(countAuxDeleted) / countAux) * 100 << "%";
|
||||
}
|
||||
}
|
||||
|
||||
result.bloatAnalysis = analysisResults.str();
|
||||
}
|
||||
catch (std::exception& ex)
|
||||
{
|
||||
bErrFlag = true;
|
||||
errorMsg = ex.what();
|
||||
}
|
||||
|
||||
if (bErrFlag)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Table bloat analysis failed for table " << tableName.toString() << ": " << errorMsg;
|
||||
result.bloatAnalysis = oss.str();
|
||||
}}
|
||||
|
||||
} // namespace dmlpackageprocessor
|
||||
|
||||
@@ -54,6 +54,7 @@ class CommandPackageProcessor : public DMLPackageProcessor
|
||||
void clearTableLock(uint64_t uniqueId, const dmlpackage::CalpontDMLPackage& cpackage, DMLResult& result);
|
||||
void establishTableLockToClear(uint64_t tableLockID, BRM::TableLockInfo& lockInfo);
|
||||
void analyzePartitionBloat(const dmlpackage::CalpontDMLPackage& cpackage, DMLPackageProcessor::DMLResult& result);
|
||||
void analyzeTableBloat(const dmlpackage::CalpontDMLPackage& cpackage, DMLPackageProcessor::DMLResult& result);
|
||||
DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) override;
|
||||
|
||||
// Tracks active cleartablelock commands by storing set of table lock IDs
|
||||
|
||||
@@ -1181,7 +1181,7 @@ extern "C"
|
||||
}
|
||||
|
||||
initid->maybe_null = 0;
|
||||
initid->max_length = 3;
|
||||
initid->max_length = 255;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1230,4 +1230,63 @@ extern "C"
|
||||
{
|
||||
}
|
||||
|
||||
my_bool analyze_table_bloat_init(UDF_INIT* initid, UDF_ARGS* args, char* message, const char* funcname)
|
||||
{
|
||||
if (args->arg_count != 2 ||
|
||||
args->arg_type[0] != STRING_RESULT ||
|
||||
args->arg_type[1] != STRING_RESULT)
|
||||
{
|
||||
sprintf(message, "%s() requires two string arguments", funcname);
|
||||
return 1;
|
||||
}
|
||||
|
||||
initid->maybe_null = 0;
|
||||
initid->max_length = 255;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
my_bool mcs_analyze_table_bloat_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
|
||||
{
|
||||
return analyze_table_bloat_init(initid, args, message, "MCSANALYZETABLEBLOAT");
|
||||
}
|
||||
|
||||
const char* mcs_analyze_table_bloat(UDF_INIT* /*initid*/, UDF_ARGS* args, char* result,
|
||||
unsigned long* length, char* /*is_null*/, char* /*error*/)
|
||||
{
|
||||
THD* thd = current_thd;
|
||||
|
||||
if (get_fe_conn_info_ptr() == NULL)
|
||||
{
|
||||
set_fe_conn_info_ptr((void*)new cal_connection_info());
|
||||
thd_set_ha_data(thd, mcs_hton, get_fe_conn_info_ptr());
|
||||
}
|
||||
|
||||
cal_connection_info* ci = reinterpret_cast<cal_connection_info*>(get_fe_conn_info_ptr());
|
||||
execplan::CalpontSystemCatalog::TableName tableName;
|
||||
|
||||
tableName.schema = args->args[0];
|
||||
tableName.table = args->args[1];
|
||||
|
||||
if (lower_case_table_names) {
|
||||
boost::algorithm::to_lower(tableName.schema);
|
||||
boost::algorithm::to_lower(tableName.table);
|
||||
}
|
||||
|
||||
if (!ci->dmlProc)
|
||||
{
|
||||
ci->dmlProc = new MessageQueueClient("DMLProc");
|
||||
}
|
||||
|
||||
std::string analysisResult = ha_mcs_impl_analyze_table_bloat(*ci, tableName);
|
||||
|
||||
memcpy(result, analysisResult.c_str(), analysisResult.length());
|
||||
*length = analysisResult.length();
|
||||
return result;
|
||||
}
|
||||
|
||||
void mcs_analyze_table_bloat_deinit(UDF_INIT* /*initid*/)
|
||||
{
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -1077,3 +1077,76 @@ std::string ha_mcs_impl_analyze_partition_bloat(cal_impl_if::cal_connection_info
|
||||
return analysisResult;
|
||||
}
|
||||
|
||||
|
||||
std::string ha_mcs_impl_analyze_table_bloat(cal_impl_if::cal_connection_info& ci,
|
||||
execplan::CalpontSystemCatalog::TableName& tablename)
|
||||
{
|
||||
THD* thd = current_thd;
|
||||
ulong sessionID = tid2sid(thd->thread_id);
|
||||
CalpontDMLPackage* pDMLPackage;
|
||||
std::string dmlStatement("ANALYZETABLEBLOAT");
|
||||
VendorDMLStatement cmdStmt(dmlStatement, DML_COMMAND, sessionID);
|
||||
pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(cmdStmt);
|
||||
|
||||
if (lower_case_table_names)
|
||||
{
|
||||
boost::algorithm::to_lower(tablename.schema);
|
||||
boost::algorithm::to_lower(tablename.table);
|
||||
}
|
||||
pDMLPackage->set_SchemaName(tablename.schema);
|
||||
pDMLPackage->set_TableName(tablename.table);
|
||||
|
||||
ByteStream bytestream;
|
||||
bytestream << static_cast<uint32_t>(sessionID);
|
||||
pDMLPackage->write(bytestream);
|
||||
delete pDMLPackage;
|
||||
|
||||
ByteStream::byte b = 0;
|
||||
ByteStream::octbyte rows;
|
||||
std::string errorMsg;
|
||||
std::string analysisResult;
|
||||
|
||||
try
|
||||
{
|
||||
ci.dmlProc->write(bytestream);
|
||||
bytestream = ci.dmlProc->read();
|
||||
|
||||
if (bytestream.length() == 0)
|
||||
{
|
||||
thd->get_stmt_da()->set_overwrite_status(true);
|
||||
thd->raise_error_printf(ER_INTERNAL_ERROR, "Lost connection to DMLProc [9]");
|
||||
}
|
||||
else
|
||||
{
|
||||
bytestream >> b;
|
||||
bytestream >> rows;
|
||||
bytestream >> errorMsg;
|
||||
|
||||
// Skip tableLockInfo, queryStats, extendedStats, miniStats (not used for this command)
|
||||
std::string tmp;
|
||||
bytestream >> tmp;
|
||||
bytestream >> tmp;
|
||||
bytestream >> tmp;
|
||||
bytestream >> tmp;
|
||||
|
||||
// Read the bloatAnalysis result
|
||||
bytestream >> analysisResult;
|
||||
}
|
||||
}
|
||||
catch (runtime_error&)
|
||||
{
|
||||
thd->get_stmt_da()->set_overwrite_status(true);
|
||||
thd->raise_error_printf(ER_INTERNAL_ERROR, "Lost connection to DMLProc [10]");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
thd->get_stmt_da()->set_overwrite_status(true);
|
||||
thd->raise_error_printf(ER_INTERNAL_ERROR, "Caught unknown error");
|
||||
}
|
||||
|
||||
if (b != 0)
|
||||
analysisResult = errorMsg;
|
||||
|
||||
return analysisResult;
|
||||
}
|
||||
|
||||
|
||||
@@ -78,4 +78,6 @@ extern std::string ha_mcs_impl_cleartablelock(cal_impl_if::cal_connection_info&
|
||||
extern std::string ha_mcs_impl_analyze_partition_bloat(cal_impl_if::cal_connection_info& ci,
|
||||
execplan::CalpontSystemCatalog::TableName& tablename,
|
||||
const std::string& partition);
|
||||
extern std::string ha_mcs_impl_analyze_table_bloat(cal_impl_if::cal_connection_info& ci,
|
||||
execplan::CalpontSystemCatalog::TableName& tablename);
|
||||
#endif
|
||||
|
||||
@@ -57,6 +57,7 @@ CREATE OR REPLACE FUNCTION mcssystemprimary RETURNS INTEGER SONAME 'ha_columnsto
|
||||
CREATE OR REPLACE FUNCTION mcs_emindex_size RETURNS INTEGER SONAME 'ha_columnstore.so';
|
||||
CREATE OR REPLACE FUNCTION mcs_emindex_free RETURNS INTEGER SONAME 'ha_columnstore.so';
|
||||
CREATE OR REPLACE FUNCTION mcs_analyze_partition_bloat RETURNS STRING SONAME 'ha_columnstore.so';
|
||||
CREATE OR REPLACE FUNCTION mcs_analyze_table_bloat RETURNS STRING SONAME 'ha_columnstore.so';
|
||||
CREATE OR REPLACE FUNCTION columnstore_dataload RETURNS STRING SONAME 'ha_columnstore.so';
|
||||
CREATE OR REPLACE AGGREGATE FUNCTION regr_avgx RETURNS REAL SONAME 'libregr_mysql.so';
|
||||
CREATE OR REPLACE AGGREGATE FUNCTION regr_avgy RETURNS REAL SONAME 'libregr_mysql.so';
|
||||
|
||||
Reference in New Issue
Block a user