1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-02 06:13:16 +03:00

feat: add vacuum_partition functionality with initialization and execution logic

This commit is contained in:
Amr Elmohamady
2025-09-27 23:55:14 +03:00
committed by drrtuy
parent 899f0f6aae
commit 3d2e61a637
18 changed files with 832 additions and 11 deletions

View File

@@ -34,6 +34,7 @@
#include <string.h>
#include <vector>
#include <sstream>
#include <stdexcept>
#include <boost/filesystem.hpp>
#include <boost/uuid/uuid.hpp>
@@ -48,6 +49,7 @@
#include "we_config.h"
#include "we_dbrootextenttracker.h"
#include "writeengine.h"
#include "extentmap.h"
#include "sys/time.h"
#include "sys/types.h"
#include "dataconvert.h"
@@ -630,14 +632,26 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr<TableInfo>& tabl
if (i == 0) // select starting DBRoot/segment for column[0]
{
std::string trkErrMsg;
rc =
pDBRootExtentTracker->selectFirstSegFile(dbRootExtent, bNoStartExtentOnThisPM, bEmptyPM, trkErrMsg);
if (rc != NO_ERROR)
if (hasTargetPartition()) // Use specified target partition
{
fLog.logMsg(trkErrMsg, rc, MSGLVL_ERROR);
return rc;
rc = setTargetDBRootExtent(dbRootExtent, curJobCol.mapOid);
if (rc != NO_ERROR)
{
fLog.logMsg("Failed to set target partition", rc, MSGLVL_ERROR);
return rc;
}
}
else // Use automatic selection
{
std::string trkErrMsg;
rc =
pDBRootExtentTracker->selectFirstSegFile(dbRootExtent, bNoStartExtentOnThisPM, bEmptyPM, trkErrMsg);
if (rc != NO_ERROR)
{
fLog.logMsg(trkErrMsg, rc, MSGLVL_ERROR);
return rc;
}
}
}
else // select starting DBRoot/segment based on column[0] selection
@@ -1613,4 +1627,132 @@ void BulkLoad::setDefaultJobUUID()
fUUID = boost::uuids::random_generator()();
}
//------------------------------------------------------------------------------
// Set DBRootExtentInfo from target partition triple
//------------------------------------------------------------------------------
int BulkLoad::setTargetDBRootExtent(DBRootExtentInfo& dbRootExtent, OID columnOid)
{
if (!fHasTargetPartition)
return ERR_INVALID_PARAM;
// Set the basic partition info from target
dbRootExtent.fPartition = fTargetPartition.pp; // physical partition
dbRootExtent.fDbRoot = fTargetPartition.dbroot; // DBRoot
dbRootExtent.fSegment = fTargetPartition.seg; // segment
// Get extent information from BRM for this specific partition
std::vector<BRM::EMEntry> extents;
try
{
// Get all extents for this OID on the target DBRoot
int rc = BRMWrapper::getInstance()->getExtents_dbroot(columnOid, extents, fTargetPartition.dbroot);
if (rc != 0)
{
fLog.logMsg("Failed to get extents for target DBRoot", rc, MSGLVL_ERROR);
return rc;
}
// Find extent with matching partition and segment
bool extentFound = false;
for (const auto& extent : extents)
{
if (extent.partitionNum == fTargetPartition.pp &&
extent.segmentNum == fTargetPartition.seg)
{
dbRootExtent.fStartLbid = extent.range.start;
dbRootExtent.fLocalHwm = extent.HWM;
// Handle different extent states
if (extent.status == 2) // EXTENTOUTOFSERVICE
{
// This is a hidden partition created by createHiddenStripeColumnExtents
// We can write to it directly since it's already allocated
dbRootExtent.fState = DBROOT_EXTENT_PARTIAL_EXTENT;
fLog.logMsg("Found hidden (out-of-service) target partition - ready for data loading",
NO_ERROR, MSGLVL_INFO1);
}
else if (extent.status == 0) // EXTENTAVAILABLE
{
// This is a normal available extent
dbRootExtent.fState = DBROOT_EXTENT_PARTIAL_EXTENT;
fLog.logMsg("Found available target partition", NO_ERROR, MSGLVL_INFO1);
}
else
{
// Other status (e.g., invalid) - handle as error
fLog.logMsg("Target partition exists but has invalid status: " +
std::to_string(extent.status), ERR_INVALID_PARAM, MSGLVL_ERROR);
return ERR_INVALID_PARAM;
}
extentFound = true;
break;
}
}
if (!extentFound)
{
// This shouldn't happen for maintenance tasks using pre-created partitions
fLog.logMsg("Target partition not found - this may indicate the partition was not pre-created",
ERR_BRM_LOOKUP_START_LBID, MSGLVL_ERROR);
return ERR_BRM_LOOKUP_START_LBID;
}
// Get total blocks for this DBRoot (approximate)
dbRootExtent.fDBRootTotalBlocks = 0;
fLog.logMsg("Using target partition: " + fTargetPartition.toString(),
NO_ERROR, MSGLVL_INFO1);
return NO_ERROR;
}
catch (const std::exception& e)
{
fLog.logMsg(std::string("Failed to setup target partition: ") + e.what(),
ERR_BRM_LOOKUP_START_LBID, MSGLVL_ERROR);
return ERR_BRM_LOOKUP_START_LBID;
}
}
//------------------------------------------------------------------------------
// Parse and set target partition triple using LogicalPartition
//------------------------------------------------------------------------------
void BulkLoad::setTargetPartitionTriple(const std::string& partitionTriple)
{
fHasTargetPartition = false;
if (partitionTriple.empty())
return;
try
{
// Use the existing LogicalPartition parsing
std::istringstream iss(partitionTriple);
iss >> fTargetPartition;
if (iss.fail())
{
throw std::runtime_error("Failed to parse partition triple format");
}
// Basic validation - LogicalPartition uses (uint16_t)-1 and (uint32_t)-1 as invalid values
if (fTargetPartition.pp == (uint32_t)-1 ||
fTargetPartition.seg == (uint16_t)-1 ||
fTargetPartition.dbroot == (uint16_t)-1)
{
throw std::runtime_error("Invalid partition values detected");
}
fHasTargetPartition = true;
}
catch (const std::exception& e)
{
std::ostringstream oss;
oss << "Failed to parse partition triple '" << partitionTriple << "': " << e.what();
throw std::runtime_error(oss.str());
}
}
} // namespace WriteEngine

View File

@@ -38,6 +38,7 @@
#include "we_tableinfo.h"
#include "brmtypes.h"
#include "logicalpartition.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include "boost/ptr_container/ptr_vector.hpp"
@@ -150,6 +151,11 @@ class BulkLoad : public FileOp
void setS3Host(const std::string& host);
void setS3Region(const std::string& region);
void setUsername(const std::string& username);
void setTargetPartitionTriple(const std::string& partitionTriple);
const BRM::LogicalPartition& getTargetPartition() const;
bool hasTargetPartition() const;
// Timer functions
void startTimer();
void stopTimer();
@@ -237,6 +243,8 @@ class BulkLoad : public FileOp
std::string fS3Bucket; // S3 Bucket
std::string fS3Region; // S3 Region
std::string fUsername{"mysql"}; // data files owner name mysql by default
BRM::LogicalPartition fTargetPartition; // Target partition for -a flag
bool fHasTargetPartition{false}; // Whether target partition is specified
//--------------------------------------------------------------------------
// Private Functions
@@ -261,6 +269,9 @@ class BulkLoad : public FileOp
// Map specified DBRoot to it's first segment file number
int mapDBRootToFirstSegment(OID columnOid, uint16_t dbRoot, uint16_t& segment);
// Set DBRootExtentInfo from target partition triple
int setTargetDBRootExtent(DBRootExtentInfo& dbRootExtent, OID columnOid);
// The thread method for the read thread.
void read(int id);
@@ -524,6 +535,16 @@ inline void BulkLoad::setUsername(const std::string& username)
fUsername = username;
}
inline const BRM::LogicalPartition& BulkLoad::getTargetPartition() const
{
return fTargetPartition;
}
inline bool BulkLoad::hasTargetPartition() const
{
return fHasTargetPartition;
}
inline void BulkLoad::startTimer()
{
gettimeofday(&fStartTime, nullptr);

View File

@@ -134,6 +134,8 @@ WECmdArgs::WECmdArgs(int argc, char** argv)
"Directory for the output .err and .bad files")
("job-uuid,u", po::value<string>(&fUUID), "import job UUID")
("username,U", po::value<string>(&fUsername), "Username of the files owner.")
("target-partition,a", po::value<string>(&fTargetPartitionTriple),
"Target partition triple in format Directory.Segment.DBRoot ")
("dbname", po::value<string>(), "Name of the database to load")
("table", po::value<string>(), "Name of table to load")
("load-file", po::value<string>(),
@@ -186,7 +188,7 @@ void WECmdArgs::usage() const
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
"[-d debugLevel] [-i] "
<< endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone] [-a Directory.Segment.DBRoot]" << endl
<< " [-U username]" << endl
<< endl;
@@ -201,7 +203,7 @@ void WECmdArgs::usage() const
"[-d debugLevel] [-i] "
<< endl
<< " [-p path] [-l loadFile]" << endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone] [-a Directory.Segment.DBRoot]" << endl
<< " [-U username]" << endl
<< endl;
@@ -212,7 +214,9 @@ void WECmdArgs::usage() const
<< " Example2: Some column values are enclosed within double quotes." << endl
<< " " << fPrgmName << " -j 3000 -E '\"'" << endl
<< " Example3: Import a nation table without a Job XML file" << endl
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl;
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl
<< " Example4: Import to specific partition (maintenance tasks)" << endl
<< " " << fPrgmName << " -j 302 -a 100.0.1 tpch nation nation.tbl" << endl;
exit(1);
}
@@ -410,6 +414,12 @@ void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string
curJob.setUsername(fUsername);
}
curJob.setSkipRows(fSkipRows);
// Set target partition if specified
if (!fTargetPartitionTriple.empty())
{
curJob.setTargetPartitionTriple(fTargetPartitionTriple);
}
curJob.setDefaultJobUUID();

View File

@@ -116,6 +116,7 @@ private:
bool fDisableTableLockTimeOut{false};
bool fSilent{false};
std::string fModuleIDandPID;
std::string fTargetPartitionTriple; // Directory.Segment.DBRoot for -a flag
std::string fReportFilename;
bool fKeepRollbackMetaData{false};