You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-02 06:13:16 +03:00
feat: add vacuum_partition functionality with initialization and execution logic
This commit is contained in:
@@ -34,6 +34,7 @@
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
@@ -48,6 +49,7 @@
|
||||
#include "we_config.h"
|
||||
#include "we_dbrootextenttracker.h"
|
||||
#include "writeengine.h"
|
||||
#include "extentmap.h"
|
||||
#include "sys/time.h"
|
||||
#include "sys/types.h"
|
||||
#include "dataconvert.h"
|
||||
@@ -630,14 +632,26 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr<TableInfo>& tabl
|
||||
|
||||
if (i == 0) // select starting DBRoot/segment for column[0]
|
||||
{
|
||||
std::string trkErrMsg;
|
||||
rc =
|
||||
pDBRootExtentTracker->selectFirstSegFile(dbRootExtent, bNoStartExtentOnThisPM, bEmptyPM, trkErrMsg);
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
if (hasTargetPartition()) // Use specified target partition
|
||||
{
|
||||
fLog.logMsg(trkErrMsg, rc, MSGLVL_ERROR);
|
||||
return rc;
|
||||
rc = setTargetDBRootExtent(dbRootExtent, curJobCol.mapOid);
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
fLog.logMsg("Failed to set target partition", rc, MSGLVL_ERROR);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
else // Use automatic selection
|
||||
{
|
||||
std::string trkErrMsg;
|
||||
rc =
|
||||
pDBRootExtentTracker->selectFirstSegFile(dbRootExtent, bNoStartExtentOnThisPM, bEmptyPM, trkErrMsg);
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
fLog.logMsg(trkErrMsg, rc, MSGLVL_ERROR);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
else // select starting DBRoot/segment based on column[0] selection
|
||||
@@ -1613,4 +1627,132 @@ void BulkLoad::setDefaultJobUUID()
|
||||
fUUID = boost::uuids::random_generator()();
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Set DBRootExtentInfo from target partition triple
|
||||
//------------------------------------------------------------------------------
|
||||
int BulkLoad::setTargetDBRootExtent(DBRootExtentInfo& dbRootExtent, OID columnOid)
|
||||
{
|
||||
if (!fHasTargetPartition)
|
||||
return ERR_INVALID_PARAM;
|
||||
|
||||
// Set the basic partition info from target
|
||||
dbRootExtent.fPartition = fTargetPartition.pp; // physical partition
|
||||
dbRootExtent.fDbRoot = fTargetPartition.dbroot; // DBRoot
|
||||
dbRootExtent.fSegment = fTargetPartition.seg; // segment
|
||||
|
||||
// Get extent information from BRM for this specific partition
|
||||
std::vector<BRM::EMEntry> extents;
|
||||
|
||||
try
|
||||
{
|
||||
// Get all extents for this OID on the target DBRoot
|
||||
int rc = BRMWrapper::getInstance()->getExtents_dbroot(columnOid, extents, fTargetPartition.dbroot);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
fLog.logMsg("Failed to get extents for target DBRoot", rc, MSGLVL_ERROR);
|
||||
return rc;
|
||||
}
|
||||
|
||||
// Find extent with matching partition and segment
|
||||
bool extentFound = false;
|
||||
for (const auto& extent : extents)
|
||||
{
|
||||
if (extent.partitionNum == fTargetPartition.pp &&
|
||||
extent.segmentNum == fTargetPartition.seg)
|
||||
{
|
||||
dbRootExtent.fStartLbid = extent.range.start;
|
||||
dbRootExtent.fLocalHwm = extent.HWM;
|
||||
|
||||
// Handle different extent states
|
||||
if (extent.status == 2) // EXTENTOUTOFSERVICE
|
||||
{
|
||||
// This is a hidden partition created by createHiddenStripeColumnExtents
|
||||
// We can write to it directly since it's already allocated
|
||||
dbRootExtent.fState = DBROOT_EXTENT_PARTIAL_EXTENT;
|
||||
fLog.logMsg("Found hidden (out-of-service) target partition - ready for data loading",
|
||||
NO_ERROR, MSGLVL_INFO1);
|
||||
}
|
||||
else if (extent.status == 0) // EXTENTAVAILABLE
|
||||
{
|
||||
// This is a normal available extent
|
||||
dbRootExtent.fState = DBROOT_EXTENT_PARTIAL_EXTENT;
|
||||
fLog.logMsg("Found available target partition", NO_ERROR, MSGLVL_INFO1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Other status (e.g., invalid) - handle as error
|
||||
fLog.logMsg("Target partition exists but has invalid status: " +
|
||||
std::to_string(extent.status), ERR_INVALID_PARAM, MSGLVL_ERROR);
|
||||
return ERR_INVALID_PARAM;
|
||||
}
|
||||
|
||||
extentFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!extentFound)
|
||||
{
|
||||
// This shouldn't happen for maintenance tasks using pre-created partitions
|
||||
fLog.logMsg("Target partition not found - this may indicate the partition was not pre-created",
|
||||
ERR_BRM_LOOKUP_START_LBID, MSGLVL_ERROR);
|
||||
return ERR_BRM_LOOKUP_START_LBID;
|
||||
}
|
||||
|
||||
// Get total blocks for this DBRoot (approximate)
|
||||
dbRootExtent.fDBRootTotalBlocks = 0;
|
||||
|
||||
fLog.logMsg("Using target partition: " + fTargetPartition.toString(),
|
||||
NO_ERROR, MSGLVL_INFO1);
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
fLog.logMsg(std::string("Failed to setup target partition: ") + e.what(),
|
||||
ERR_BRM_LOOKUP_START_LBID, MSGLVL_ERROR);
|
||||
return ERR_BRM_LOOKUP_START_LBID;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Parse and set target partition triple using LogicalPartition
|
||||
//------------------------------------------------------------------------------
|
||||
void BulkLoad::setTargetPartitionTriple(const std::string& partitionTriple)
|
||||
{
|
||||
fHasTargetPartition = false;
|
||||
|
||||
if (partitionTriple.empty())
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
// Use the existing LogicalPartition parsing
|
||||
std::istringstream iss(partitionTriple);
|
||||
iss >> fTargetPartition;
|
||||
|
||||
if (iss.fail())
|
||||
{
|
||||
throw std::runtime_error("Failed to parse partition triple format");
|
||||
}
|
||||
|
||||
// Basic validation - LogicalPartition uses (uint16_t)-1 and (uint32_t)-1 as invalid values
|
||||
if (fTargetPartition.pp == (uint32_t)-1 ||
|
||||
fTargetPartition.seg == (uint16_t)-1 ||
|
||||
fTargetPartition.dbroot == (uint16_t)-1)
|
||||
{
|
||||
throw std::runtime_error("Invalid partition values detected");
|
||||
}
|
||||
|
||||
fHasTargetPartition = true;
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Failed to parse partition triple '" << partitionTriple << "': " << e.what();
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace WriteEngine
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
|
||||
#include "we_tableinfo.h"
|
||||
#include "brmtypes.h"
|
||||
#include "logicalpartition.h"
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#include "boost/ptr_container/ptr_vector.hpp"
|
||||
@@ -150,6 +151,11 @@ class BulkLoad : public FileOp
|
||||
void setS3Host(const std::string& host);
|
||||
void setS3Region(const std::string& region);
|
||||
void setUsername(const std::string& username);
|
||||
|
||||
void setTargetPartitionTriple(const std::string& partitionTriple);
|
||||
const BRM::LogicalPartition& getTargetPartition() const;
|
||||
bool hasTargetPartition() const;
|
||||
|
||||
// Timer functions
|
||||
void startTimer();
|
||||
void stopTimer();
|
||||
@@ -237,6 +243,8 @@ class BulkLoad : public FileOp
|
||||
std::string fS3Bucket; // S3 Bucket
|
||||
std::string fS3Region; // S3 Region
|
||||
std::string fUsername{"mysql"}; // data files owner name mysql by default
|
||||
BRM::LogicalPartition fTargetPartition; // Target partition for -a flag
|
||||
bool fHasTargetPartition{false}; // Whether target partition is specified
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Private Functions
|
||||
@@ -261,6 +269,9 @@ class BulkLoad : public FileOp
|
||||
|
||||
// Map specified DBRoot to it's first segment file number
|
||||
int mapDBRootToFirstSegment(OID columnOid, uint16_t dbRoot, uint16_t& segment);
|
||||
|
||||
// Set DBRootExtentInfo from target partition triple
|
||||
int setTargetDBRootExtent(DBRootExtentInfo& dbRootExtent, OID columnOid);
|
||||
|
||||
// The thread method for the read thread.
|
||||
void read(int id);
|
||||
@@ -524,6 +535,16 @@ inline void BulkLoad::setUsername(const std::string& username)
|
||||
fUsername = username;
|
||||
}
|
||||
|
||||
inline const BRM::LogicalPartition& BulkLoad::getTargetPartition() const
|
||||
{
|
||||
return fTargetPartition;
|
||||
}
|
||||
|
||||
inline bool BulkLoad::hasTargetPartition() const
|
||||
{
|
||||
return fHasTargetPartition;
|
||||
}
|
||||
|
||||
inline void BulkLoad::startTimer()
|
||||
{
|
||||
gettimeofday(&fStartTime, nullptr);
|
||||
|
||||
@@ -134,6 +134,8 @@ WECmdArgs::WECmdArgs(int argc, char** argv)
|
||||
"Directory for the output .err and .bad files")
|
||||
("job-uuid,u", po::value<string>(&fUUID), "import job UUID")
|
||||
("username,U", po::value<string>(&fUsername), "Username of the files owner.")
|
||||
("target-partition,a", po::value<string>(&fTargetPartitionTriple),
|
||||
"Target partition triple in format Directory.Segment.DBRoot ")
|
||||
("dbname", po::value<string>(), "Name of the database to load")
|
||||
("table", po::value<string>(), "Name of table to load")
|
||||
("load-file", po::value<string>(),
|
||||
@@ -186,7 +188,7 @@ void WECmdArgs::usage() const
|
||||
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
||||
"[-d debugLevel] [-i] "
|
||||
<< endl
|
||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone] [-a Directory.Segment.DBRoot]" << endl
|
||||
<< " [-U username]" << endl
|
||||
<< endl;
|
||||
|
||||
@@ -201,7 +203,7 @@ void WECmdArgs::usage() const
|
||||
"[-d debugLevel] [-i] "
|
||||
<< endl
|
||||
<< " [-p path] [-l loadFile]" << endl
|
||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone] [-a Directory.Segment.DBRoot]" << endl
|
||||
<< " [-U username]" << endl
|
||||
<< endl;
|
||||
|
||||
@@ -212,7 +214,9 @@ void WECmdArgs::usage() const
|
||||
<< " Example2: Some column values are enclosed within double quotes." << endl
|
||||
<< " " << fPrgmName << " -j 3000 -E '\"'" << endl
|
||||
<< " Example3: Import a nation table without a Job XML file" << endl
|
||||
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl;
|
||||
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl
|
||||
<< " Example4: Import to specific partition (maintenance tasks)" << endl
|
||||
<< " " << fPrgmName << " -j 302 -a 100.0.1 tpch nation nation.tbl" << endl;
|
||||
|
||||
exit(1);
|
||||
}
|
||||
@@ -410,6 +414,12 @@ void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string
|
||||
curJob.setUsername(fUsername);
|
||||
}
|
||||
curJob.setSkipRows(fSkipRows);
|
||||
|
||||
// Set target partition if specified
|
||||
if (!fTargetPartitionTriple.empty())
|
||||
{
|
||||
curJob.setTargetPartitionTriple(fTargetPartitionTriple);
|
||||
}
|
||||
|
||||
curJob.setDefaultJobUUID();
|
||||
|
||||
|
||||
@@ -116,6 +116,7 @@ private:
|
||||
bool fDisableTableLockTimeOut{false};
|
||||
bool fSilent{false};
|
||||
std::string fModuleIDandPID;
|
||||
std::string fTargetPartitionTriple; // Directory.Segment.DBRoot for -a flag
|
||||
|
||||
std::string fReportFilename;
|
||||
bool fKeepRollbackMetaData{false};
|
||||
|
||||
Reference in New Issue
Block a user