1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-28 19:54:55 +03:00

feat: add vacuum_partition functionality with initialization and execution logic

This commit is contained in:
Amr Elmohamady
2025-09-27 23:55:14 +03:00
committed by drrtuy
parent 899f0f6aae
commit 3d2e61a637
18 changed files with 832 additions and 11 deletions

View File

@@ -1230,6 +1230,67 @@ extern "C"
{
}
my_bool vacuum_partition_init(UDF_INIT* initid, UDF_ARGS* args, char* message, const char* funcname)
{
if (args->arg_count != 3 ||
args->arg_type[0] != STRING_RESULT ||
args->arg_type[1] != STRING_RESULT ||
args->arg_type[2] != STRING_RESULT)
{
sprintf(message, "%s() requires three string arguments", funcname);
return 1;
}
initid->maybe_null = 0;
initid->max_length = 255;
return 0;
}
my_bool mcs_vacuum_partition_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
return vacuum_partition_init(initid, args, message, "MCSVACUUMPARTITION");
}
const char* mcs_vacuum_partition(UDF_INIT* /*initid*/, UDF_ARGS* args, char* result,
unsigned long* length, char* /*is_null*/, char* /*error*/)
{
THD* thd = current_thd;
if (get_fe_conn_info_ptr() == NULL)
{
set_fe_conn_info_ptr((void*)new cal_connection_info());
thd_set_ha_data(thd, mcs_hton, get_fe_conn_info_ptr());
}
cal_connection_info* ci = reinterpret_cast<cal_connection_info*>(get_fe_conn_info_ptr());
execplan::CalpontSystemCatalog::TableName tableName;
tableName.schema = args->args[0];
tableName.table = args->args[1];
std::string partition = args->args[2];
if (lower_case_table_names) {
boost::algorithm::to_lower(tableName.schema);
boost::algorithm::to_lower(tableName.table);
}
if (!ci->dmlProc)
{
ci->dmlProc = new MessageQueueClient("DMLProc");
}
std::string vacuumResult = ha_mcs_impl_vacuum_partition(*ci, tableName, partition);
memcpy(result, vacuumResult.c_str(), vacuumResult.length());
*length = vacuumResult.length();
return result;
}
void mcs_vacuum_partition_deinit(UDF_INIT* /*initid*/)
{
}
my_bool analyze_table_bloat_init(UDF_INIT* initid, UDF_ARGS* args, char* message, const char* funcname)
{
if (args->arg_count != 2 ||

View File

@@ -28,6 +28,7 @@
#include <unordered.h>
#include <fstream>
#include <sstream>
#include <vector>
#include <cerrno>
#include <cstring>
@@ -1077,6 +1078,158 @@ std::string ha_mcs_impl_analyze_partition_bloat(cal_impl_if::cal_connection_info
return analysisResult;
}
std::string ha_mcs_impl_vacuum_partition(cal_impl_if::cal_connection_info& ci,
execplan::CalpontSystemCatalog::TableName& tablename,
const std::string& partition)
{
THD* thd = current_thd;
ulong sessionID = tid2sid(thd->thread_id);
std::string result;
try
{
// Parse partition triplet string: "dbroot.segment.partition"
uint16_t dbRoot;
uint16_t segmentNum;
uint32_t partitionNum;
std::istringstream ss(partition);
std::string item;
std::vector<std::string> tokens;
while (std::getline(ss, item, '.')) {
tokens.push_back(item);
}
if (tokens.size() != 3) {
return "Error: Invalid partition triplet format. Expected: dbroot.segment.partition";
}
try
{
dbRoot = static_cast<uint16_t>(std::stoul(tokens[0]));
segmentNum = static_cast<uint16_t>(std::stoul(tokens[1]));
partitionNum = std::stoul(tokens[2]);
}
catch (const std::exception&)
{
return "Error: Invalid numeric values in partition triplet";
}
// Get system catalog
boost::shared_ptr<execplan::CalpontSystemCatalog> systemCatalogPtr =
execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(sessionID);
systemCatalogPtr->identity(execplan::CalpontSystemCatalog::EC);
// Get table information
execplan::CalpontSystemCatalog::TableName sysCatalogTableName;
sysCatalogTableName.schema = tablename.schema;
sysCatalogTableName.table = tablename.table;
// Check if table exists
execplan::CalpontSystemCatalog::RIDList ridList;
try
{
ridList = systemCatalogPtr->columnRIDs(sysCatalogTableName);
}
catch (const std::exception& ex)
{
return std::string("Error: Table not found - ") + ex.what();
}
if (ridList.empty())
{
return "Error: Table has no columns";
}
// Get AUX column OID
execplan::CalpontSystemCatalog::OID auxColumnOid = systemCatalogPtr->tableAUXColumnOID(sysCatalogTableName);
// Build column list for createHiddenStripeColumnExtents
std::vector<BRM::CreateStripeColumnExtentsArgIn> cols;
// Add regular columns
for (const auto& roPair : ridList)
{
BRM::CreateStripeColumnExtentsArgIn colArg;
colArg.oid = roPair.objnum;
execplan::CalpontSystemCatalog::ColType colType = systemCatalogPtr->colType(roPair.objnum);
colArg.colDataType = colType.colDataType;
// Set width based on whether it's a dictionary column
if (colType.ddn.dictOID > 3000)
{
colArg.width = 8; // Dictionary columns use 8-byte tokens
}
else
{
colArg.width = colType.colWidth;
}
cols.push_back(colArg);
}
// Add AUX column if it exists
if (auxColumnOid > 3000)
{
BRM::CreateStripeColumnExtentsArgIn auxColArg;
auxColArg.oid = auxColumnOid;
execplan::CalpontSystemCatalog::ColType auxColType = systemCatalogPtr->colType(auxColumnOid);
auxColArg.colDataType = auxColType.colDataType;
auxColArg.width = auxColType.colWidth;
cols.push_back(auxColArg);
}
std::vector<BRM::CreateStripeColumnExtentsArgOut> extents;
// Store original values for comparison
uint16_t originalSegmentNum = segmentNum;
uint32_t originalPartitionNum = partitionNum;
// Call createHiddenStripeColumnExtents
BRM::DBRM dbrm;
int rc = dbrm.createHiddenStripeColumnExtents(cols, dbRoot, partitionNum, segmentNum, extents);
if (rc != 0)
{
std::ostringstream oss;
oss << "Error: Failed to create hidden stripe column extents, error code: " << rc;
return oss.str();
}
// Build success message
std::ostringstream successMsg;
successMsg << "Successfully created hidden partition on DBRoot " << dbRoot;
// Check if the function used our requested values or assigned different ones
if (partitionNum != originalPartitionNum || segmentNum != originalSegmentNum)
{
successMsg << " (requested: " << originalPartitionNum << "." << originalSegmentNum
<< ", assigned: " << partitionNum << "." << segmentNum << ")";
}
else
{
successMsg << " at partition " << partitionNum << ", segment " << segmentNum;
}
successMsg << " with " << extents.size() << " extents";
result = successMsg.str();
}
catch (const std::exception& ex)
{
std::ostringstream errorMsg;
errorMsg << "Error: " << ex.what();
result = errorMsg.str();
}
catch (...)
{
result = "Error: Unknown exception occurred during vacuum partition operation";
}
return result;
}
std::string ha_mcs_impl_analyze_table_bloat(cal_impl_if::cal_connection_info& ci,
execplan::CalpontSystemCatalog::TableName& tablename)

View File

@@ -80,4 +80,7 @@ extern std::string ha_mcs_impl_analyze_partition_bloat(cal_impl_if::cal_connecti
const std::string& partition);
extern std::string ha_mcs_impl_analyze_table_bloat(cal_impl_if::cal_connection_info& ci,
execplan::CalpontSystemCatalog::TableName& tablename);
extern std::string ha_mcs_impl_vacuum_partition(cal_impl_if::cal_connection_info& ci,
execplan::CalpontSystemCatalog::TableName& tablename,
const std::string& partition);
#endif

View File

@@ -58,6 +58,7 @@ CREATE OR REPLACE FUNCTION mcs_emindex_size RETURNS INTEGER SONAME 'ha_columnsto
CREATE OR REPLACE FUNCTION mcs_emindex_free RETURNS INTEGER SONAME 'ha_columnstore.so';
CREATE OR REPLACE FUNCTION mcs_analyze_partition_bloat RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE FUNCTION mcs_analyze_table_bloat RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE FUNCTION mcs_vacuum_partition RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE FUNCTION columnstore_dataload RETURNS STRING SONAME 'ha_columnstore.so';
CREATE OR REPLACE AGGREGATE FUNCTION regr_avgx RETURNS REAL SONAME 'libregr_mysql.so';
CREATE OR REPLACE AGGREGATE FUNCTION regr_avgy RETURNS REAL SONAME 'libregr_mysql.so';

View File

@@ -481,6 +481,8 @@ const uint8_t MARK_ALL_PARTITION_FOR_DELETION = 41;
const uint8_t CREATE_COLUMN_EXTENT_EXACT_FILE = 42;
const uint8_t DELETE_DBROOT = 43;
const uint8_t CREATE_STRIPE_COLUMN_EXTENTS = 44;
const uint8_t CREATE_HIDDEN_STRIPE_COLUMN_EXTENTS = 107;
const uint8_t MAKE_PARTITION_VISIBLE = 108;
/* SessionManager interface */
const uint8_t VER_ID = 45;

View File

@@ -991,6 +991,114 @@ int DBRM::createStripeColumnExtents(const std::vector<CreateStripeColumnExtentsA
return 0;
}
//------------------------------------------------------------------------------
// Send a request to create hidden stripe column extents
//------------------------------------------------------------------------------
int DBRM::createHiddenStripeColumnExtents(const std::vector<CreateStripeColumnExtentsArgIn>& cols, uint16_t dbRoot,
uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents) DBRM_THROW
{
#ifdef BRM_INFO
if (fDebug)
{
TRACER_WRITELATER("createHiddenStripeColumnExtents");
TRACER_WRITE;
}
#endif
ByteStream command, response;
uint8_t err;
uint16_t tmp16;
uint32_t tmp32;
command << CREATE_HIDDEN_STRIPE_COLUMN_EXTENTS;
serializeInlineVector(command, cols);
command << dbRoot << partitionNum;
err = send_recv(command, response);
if (err != ERR_OK)
return err;
if (response.length() == 0)
return ERR_NETWORK;
try
{
response >> err;
if (err != 0)
return (int)err;
response >> tmp32;
partitionNum = tmp32;
response >> tmp16;
segmentNum = tmp16;
deserializeInlineVector(response, extents);
}
catch (exception& e)
{
cerr << e.what() << endl;
return ERR_FAILURE;
}
CHECK_EMPTY(response);
return 0;
}
//------------------------------------------------------------------------------
// Send a request to make a hidden partition visible
//------------------------------------------------------------------------------
int DBRM::makePartitionVisible(const std::set<OID_t>& oids, uint16_t dbRoot, uint32_t partitionNum) DBRM_THROW
{
#ifdef BRM_INFO
if (fDebug)
{
TRACER_WRITELATER("makePartitionVisible");
TRACER_WRITE;
}
#endif
ByteStream command, response;
uint8_t err;
command << MAKE_PARTITION_VISIBLE;
command << static_cast<uint32_t>(oids.size());
for (const auto& oid : oids)
{
command << oid;
}
command << dbRoot << partitionNum;
err = send_recv(command, response);
if (err != ERR_OK)
return err;
if (response.length() == 0)
return ERR_NETWORK;
try
{
response >> err;
if (err != 0)
return (int)err;
}
catch (exception& e)
{
cerr << e.what() << endl;
return ERR_FAILURE;
}
CHECK_EMPTY(response);
return 0;
}
//------------------------------------------------------------------------------
// Send a request to create a column extent for the specified OID and DBRoot.
//------------------------------------------------------------------------------

View File

@@ -229,6 +229,35 @@ class DBRM
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents) DBRM_THROW;
/** @brief Allocate a "stripe" of hidden (out of service) extents for columns in a table (in DBRoot)
*
* Creates a new hidden partition that is not visible to normal query operations.
* The partition can be used as a destination for data movement operations like VACUUM.
* All extents are initially marked with EXTENTOUTOFSERVICE status.
*
* @param cols (in) List of column OIDs and column widths
* @param dbRoot (in) DBRoot for requested extents.
* @param partitionNum (in/out) Partition number in file path.
* @param segmentNum (out) Segment number selected for new extents.
* @param extents (out) list of lbids, numBlks, and fbo for new extents
*/
EXPORT int createHiddenStripeColumnExtents(const std::vector<CreateStripeColumnExtentsArgIn>& cols,
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents) DBRM_THROW;
/** @brief Make a hidden partition visible to normal query operations
*
* Changes the status of all extents in the specified partition from
* EXTENTOUTOFSERVICE to EXTENTAVAILABLE, making them visible to queries.
* This is used to make partitions created with createHiddenStripeColumnExtents
* visible after data movement operations like VACUUM are complete.
*
* @param oids (in) Set of column OIDs in the partition
* @param dbRoot (in) The DBRoot containing the partition
* @param partitionNum (in) The partition number to make visible
*/
EXPORT int makePartitionVisible(const std::set<OID_t>& oids, uint16_t dbRoot, uint32_t partitionNum) DBRM_THROW;
/** @brief Allocate an extent for a column file
*
* Allocate a column extent for the specified OID and DBRoot.

View File

@@ -2593,6 +2593,93 @@ void ExtentMap::createStripeColumnExtents(const vector<CreateStripeColumnExtents
}
}
//------------------------------------------------------------------------------
// Creates a "stripe" of hidden extents for columns in a table (in DBRoot).
// These extents are marked as EXTENTOUTOFSERVICE and are not visible to
// normal query operations until makePartitionVisible() is called.
//------------------------------------------------------------------------------
void ExtentMap::createHiddenStripeColumnExtents(const vector<CreateStripeColumnExtentsArgIn>& cols,
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
vector<CreateStripeColumnExtentsArgOut>& extents)
{
LBID_t startLbid;
int allocSize;
uint32_t startBlkOffset;
grabEMEntryTable(WRITE);
grabEMIndex(WRITE);
grabFreeList(WRITE);
OID_t baselineOID = -1;
uint16_t baselineSegmentNum = -1;
uint32_t baselinePartNum = -1;
for (uint32_t i = 0; i < cols.size(); i++)
{
createColumnExtent_DBroot(cols[i].oid, cols[i].width, dbRoot, cols[i].colDataType, partitionNum,
segmentNum, startLbid, allocSize, startBlkOffset, false);
auto emIter = fExtentMapRBTree->find(startLbid);
if (emIter != fExtentMapRBTree->end())
{
makeUndoRecordRBTree(UndoRecordType::DEFAULT, emIter->second);
emIter->second.status = EXTENTOUTOFSERVICE;
}
if (i == 0)
{
baselineOID = cols[i].oid;
baselinePartNum = partitionNum;
baselineSegmentNum = segmentNum;
}
else if ((partitionNum != baselinePartNum) || (segmentNum != baselineSegmentNum))
{
ostringstream oss;
oss << "ExtentMap::createHiddenStripeColumnExtents(): "
"Inconsistent segment extent creation: "
<< "DBRoot: " << dbRoot << "OID1: " << baselineOID << "; Part#: " << baselinePartNum
<< "; Seg#: " << baselineSegmentNum << " <versus> OID2: " << cols[i].oid
<< "; Part#: " << partitionNum << "; Seg#: " << segmentNum;
log(oss.str(), logging::LOG_TYPE_CRITICAL);
throw invalid_argument(oss.str());
}
CreateStripeColumnExtentsArgOut extentInfo;
extentInfo.startLbid = startLbid;
extentInfo.allocSize = allocSize;
extentInfo.startBlkOffset = startBlkOffset;
extents.push_back(extentInfo);
}
}
//------------------------------------------------------------------------------
// Makes a hidden partition visible to normal query operations by changing
// the status of all extents from EXTENTOUTOFSERVICE to EXTENTAVAILABLE.
//------------------------------------------------------------------------------
void ExtentMap::makePartitionVisible(const set<OID_t>& oids, uint16_t dbRoot, uint32_t partitionNum)
{
grabEMEntryTable(WRITE);
for (const auto& oid : oids)
{
const auto lbids = fPExtMapIndexImpl_->find(dbRoot, oid);
auto emIterators = getEmIteratorsByLbids(lbids);
for (auto& emIter : emIterators)
{
EMEntry& emEntry = emIter->second;
if (emEntry.partitionNum == partitionNum && emEntry.dbRoot == dbRoot &&
emEntry.status == EXTENTOUTOFSERVICE)
{
makeUndoRecordRBTree(UndoRecordType::DEFAULT, emEntry);
emEntry.status = EXTENTAVAILABLE;
}
}
}
releaseEMEntryTable(WRITE);
}
//------------------------------------------------------------------------------
// Creates an extent for a column file on the specified DBRoot. This is the
// external API function referenced by the dbrm wrapper class.

View File

@@ -639,6 +639,39 @@ class ExtentMap : public Undoable
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents);
/** @brief Allocate a "stripe" of hidden extents for columns in a table (in DBRoot)
*
* Creates a new hidden partition that is not visible to normal query operations.
* The partition can be used as a destination for data movement operations like VACUUM.
* All extents are initially marked with EXTENTOUTOFSERVICE status and remain
* hidden even when HWM is set, unlike normal extents which become visible
* when HWM is set.
*
* @param cols (in) List of column OIDs and column widths
* @param dbRoot (in) DBRoot for requested extents.
* @param partitionNum (in/out) Partition number in file path.
* If allocating OID's first extent for this DBRoot, then
* partitionNum is input, else it is an output arg.
* @param segmentNum (out) Segment number selected for new extents.
* @param extents (out) list of lbids, numBlks, and fbo for new extents
*/
EXPORT void createHiddenStripeColumnExtents(const std::vector<CreateStripeColumnExtentsArgIn>& cols,
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents);
/** @brief Make a hidden partition visible to normal query operations
*
* Changes the status of all extents in the specified partition from
* EXTENTOUTOFSERVICE to EXTENTAVAILABLE, making them visible to queries.
* This is used to make partitions created with createHiddenStripeColumnExtents
* visible after data movement operations like VACUUM are complete.
*
* @param oids (in) Set of column OIDs in the partition
* @param dbRoot (in) The DBRoot containing the partition
* @param partitionNum (in) The partition number to make visible
*/
EXPORT void makePartitionVisible(const std::set<OID_t>& oids, uint16_t dbRoot, uint32_t partitionNum);
/** @brief Allocates an extent for a column file
*
* Allocates an extent for the specified OID and DBroot.

View File

@@ -293,6 +293,10 @@ void SlaveComm::processCommand(ByteStream& msg)
{
case CREATE_STRIPE_COLUMN_EXTENTS: do_createStripeColumnExtents(msg); break;
case CREATE_HIDDEN_STRIPE_COLUMN_EXTENTS: do_createHiddenStripeColumnExtents(msg); break;
case MAKE_PARTITION_VISIBLE: do_makePartitionVisible(msg); break;
case CREATE_COLUMN_EXTENT_DBROOT: do_createColumnExtent_DBroot(msg); break;
case CREATE_COLUMN_EXTENT_EXACT_FILE: do_createColumnExtentExactFile(msg); break;
@@ -432,6 +436,113 @@ void SlaveComm::do_createStripeColumnExtents(ByteStream& msg)
doSaveDelta = true;
}
//------------------------------------------------------------------------------
// Process a request to create hidden stripe column extents
//------------------------------------------------------------------------------
void SlaveComm::do_createHiddenStripeColumnExtents(ByteStream& msg)
{
int err;
uint16_t tmp16;
uint16_t tmp32;
uint16_t dbRoot;
uint32_t partitionNum;
uint16_t segmentNum;
std::vector<CreateStripeColumnExtentsArgIn> cols;
std::vector<CreateStripeColumnExtentsArgOut> extents;
ByteStream reply;
#ifdef BRM_VERBOSE
cerr << "WorkerComm: do_createHiddenStripeColumnExtents()" << endl;
#endif
deserializeInlineVector(msg, cols);
msg >> tmp16;
dbRoot = tmp16;
msg >> tmp32;
partitionNum = tmp32;
if (printOnly)
{
cout << "createHiddenStripeColumnExtents(). " << "DBRoot=" << dbRoot << "; Part#=" << partitionNum << endl;
for (uint32_t i = 0; i < cols.size(); i++)
cout << "HiddenStripeColExt arg " << i + 1 << ": oid=" << cols[i].oid << " width=" << cols[i].width << endl;
return;
}
err = slave->createHiddenStripeColumnExtents(cols, dbRoot, partitionNum, segmentNum, extents);
reply << (uint8_t)err;
if (err == ERR_OK)
{
reply << partitionNum;
reply << segmentNum;
serializeInlineVector(reply, extents);
}
#ifdef BRM_VERBOSE
cerr << "WorkerComm: do_createHiddenStripeColumnExtents() err code is " << err << endl;
#endif
if (!standalone)
master.write(reply);
// see bug 3596. Need to make sure a snapshot file exists.
if ((cols.size() > 0) && (cols[0].oid < 3000))
takeSnapshot = true;
else
doSaveDelta = true;
}
//------------------------------------------------------------------------------
// Process a request to make a hidden partition visible
//------------------------------------------------------------------------------
void SlaveComm::do_makePartitionVisible(ByteStream& msg)
{
int err;
uint32_t numOids;
uint32_t partitionNum;
uint16_t dbRoot;
std::set<OID_t> oids;
ByteStream reply;
#ifdef BRM_VERBOSE
cerr << "WorkerComm: do_makePartitionVisible()" << endl;
#endif
msg >> numOids;
for (uint32_t i = 0; i < numOids; i++)
{
OID_t oid;
msg >> oid;
oids.insert(oid);
}
msg >> dbRoot;
msg >> partitionNum;
if (printOnly)
{
cout << "makePartitionVisible(). " << "DBRoot=" << dbRoot << "; Part#=" << partitionNum << "; OIDs: ";
for (const auto& oid : oids)
cout << oid << " ";
cout << endl;
return;
}
err = slave->makePartitionVisible(oids, dbRoot, partitionNum);
reply << (uint8_t)err;
#ifdef BRM_VERBOSE
cerr << "WorkerComm: do_makePartitionVisible() err code is " << err << endl;
#endif
if (!standalone)
master.write(reply);
doSaveDelta = true;
}
//------------------------------------------------------------------------------
// Process a request to create a column extent for a specific OID and DBRoot.
//------------------------------------------------------------------------------

View File

@@ -72,6 +72,8 @@ class SlaveComm
void processCommand(messageqcpp::ByteStream& msg);
void do_createStripeColumnExtents(messageqcpp::ByteStream& msg);
void do_createHiddenStripeColumnExtents(messageqcpp::ByteStream& msg);
void do_makePartitionVisible(messageqcpp::ByteStream& msg);
void do_createColumnExtent_DBroot(messageqcpp::ByteStream& msg);
void do_createColumnExtentExactFile(messageqcpp::ByteStream& msg);
void do_createDictStoreExtent(messageqcpp::ByteStream& msg);

View File

@@ -95,6 +95,44 @@ int SlaveDBRMNode::createStripeColumnExtents(const std::vector<CreateStripeColum
return 0;
}
//------------------------------------------------------------------------------
// Create hidden stripe column extents
//------------------------------------------------------------------------------
int SlaveDBRMNode::createHiddenStripeColumnExtents(const std::vector<CreateStripeColumnExtentsArgIn>& cols,
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents) throw()
{
try
{
em.createHiddenStripeColumnExtents(cols, dbRoot, partitionNum, segmentNum, extents);
}
catch (exception& e)
{
cerr << e.what() << endl;
return -1;
}
return 0;
}
//------------------------------------------------------------------------------
// Make a hidden partition visible
//------------------------------------------------------------------------------
int SlaveDBRMNode::makePartitionVisible(const std::set<OID_t>& oids, uint16_t dbRoot, uint32_t partitionNum) throw()
{
try
{
em.makePartitionVisible(oids, dbRoot, partitionNum);
}
catch (exception& e)
{
cerr << e.what() << endl;
return -1;
}
return 0;
}
//------------------------------------------------------------------------------
// Create an extent for the specified OID and DBRoot.
//------------------------------------------------------------------------------

View File

@@ -104,6 +104,25 @@ class SlaveDBRMNode
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents) throw();
/** @brief Allocate a "stripe" of hidden (out of service) extents for columns in a table
*
* Allocate a "stripe" of hidden (out of service) extents for the specified columns and DBRoot
* @param cols (in) List of column OIDs and column widths
* @param dbRoot (in) DBRoot for requested extents.
* @param partitionNum (in/out) Partition number in file path.
* If allocating OID's first extent for this DBRoot, then
* partitionNum is input, else it is an output arg.
* @param segmentNum (out) Segment number selected for new extents.
* @param extents (out) list of lbids, numBlks, and fbo for new extents
* @return 0 on success, -1 on error */
EXPORT int createHiddenStripeColumnExtents(const std::vector<CreateStripeColumnExtentsArgIn>& cols,
uint16_t dbRoot, uint32_t& partitionNum, uint16_t& segmentNum,
std::vector<CreateStripeColumnExtentsArgOut>& extents) throw();
/** @brief Make a hidden partition visible to normal query operations
*/
EXPORT int makePartitionVisible(const std::set<OID_t>& oids, uint16_t dbRoot, uint32_t partitionNum) throw();
/** @brief Allocate extent in the specified segment file
*
* Allocate column extent for the exact segment file specified by the

View File

@@ -34,6 +34,7 @@
#include <string.h>
#include <vector>
#include <sstream>
#include <stdexcept>
#include <boost/filesystem.hpp>
#include <boost/uuid/uuid.hpp>
@@ -48,6 +49,7 @@
#include "we_config.h"
#include "we_dbrootextenttracker.h"
#include "writeengine.h"
#include "extentmap.h"
#include "sys/time.h"
#include "sys/types.h"
#include "dataconvert.h"
@@ -630,14 +632,26 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr<TableInfo>& tabl
if (i == 0) // select starting DBRoot/segment for column[0]
{
std::string trkErrMsg;
rc =
pDBRootExtentTracker->selectFirstSegFile(dbRootExtent, bNoStartExtentOnThisPM, bEmptyPM, trkErrMsg);
if (rc != NO_ERROR)
if (hasTargetPartition()) // Use specified target partition
{
fLog.logMsg(trkErrMsg, rc, MSGLVL_ERROR);
return rc;
rc = setTargetDBRootExtent(dbRootExtent, curJobCol.mapOid);
if (rc != NO_ERROR)
{
fLog.logMsg("Failed to set target partition", rc, MSGLVL_ERROR);
return rc;
}
}
else // Use automatic selection
{
std::string trkErrMsg;
rc =
pDBRootExtentTracker->selectFirstSegFile(dbRootExtent, bNoStartExtentOnThisPM, bEmptyPM, trkErrMsg);
if (rc != NO_ERROR)
{
fLog.logMsg(trkErrMsg, rc, MSGLVL_ERROR);
return rc;
}
}
}
else // select starting DBRoot/segment based on column[0] selection
@@ -1613,4 +1627,132 @@ void BulkLoad::setDefaultJobUUID()
fUUID = boost::uuids::random_generator()();
}
//------------------------------------------------------------------------------
// Set DBRootExtentInfo from target partition triple
//------------------------------------------------------------------------------
int BulkLoad::setTargetDBRootExtent(DBRootExtentInfo& dbRootExtent, OID columnOid)
{
if (!fHasTargetPartition)
return ERR_INVALID_PARAM;
// Set the basic partition info from target
dbRootExtent.fPartition = fTargetPartition.pp; // physical partition
dbRootExtent.fDbRoot = fTargetPartition.dbroot; // DBRoot
dbRootExtent.fSegment = fTargetPartition.seg; // segment
// Get extent information from BRM for this specific partition
std::vector<BRM::EMEntry> extents;
try
{
// Get all extents for this OID on the target DBRoot
int rc = BRMWrapper::getInstance()->getExtents_dbroot(columnOid, extents, fTargetPartition.dbroot);
if (rc != 0)
{
fLog.logMsg("Failed to get extents for target DBRoot", rc, MSGLVL_ERROR);
return rc;
}
// Find extent with matching partition and segment
bool extentFound = false;
for (const auto& extent : extents)
{
if (extent.partitionNum == fTargetPartition.pp &&
extent.segmentNum == fTargetPartition.seg)
{
dbRootExtent.fStartLbid = extent.range.start;
dbRootExtent.fLocalHwm = extent.HWM;
// Handle different extent states
if (extent.status == 2) // EXTENTOUTOFSERVICE
{
// This is a hidden partition created by createHiddenStripeColumnExtents
// We can write to it directly since it's already allocated
dbRootExtent.fState = DBROOT_EXTENT_PARTIAL_EXTENT;
fLog.logMsg("Found hidden (out-of-service) target partition - ready for data loading",
NO_ERROR, MSGLVL_INFO1);
}
else if (extent.status == 0) // EXTENTAVAILABLE
{
// This is a normal available extent
dbRootExtent.fState = DBROOT_EXTENT_PARTIAL_EXTENT;
fLog.logMsg("Found available target partition", NO_ERROR, MSGLVL_INFO1);
}
else
{
// Other status (e.g., invalid) - handle as error
fLog.logMsg("Target partition exists but has invalid status: " +
std::to_string(extent.status), ERR_INVALID_PARAM, MSGLVL_ERROR);
return ERR_INVALID_PARAM;
}
extentFound = true;
break;
}
}
if (!extentFound)
{
// This shouldn't happen for maintenance tasks using pre-created partitions
fLog.logMsg("Target partition not found - this may indicate the partition was not pre-created",
ERR_BRM_LOOKUP_START_LBID, MSGLVL_ERROR);
return ERR_BRM_LOOKUP_START_LBID;
}
// Get total blocks for this DBRoot (approximate)
dbRootExtent.fDBRootTotalBlocks = 0;
fLog.logMsg("Using target partition: " + fTargetPartition.toString(),
NO_ERROR, MSGLVL_INFO1);
return NO_ERROR;
}
catch (const std::exception& e)
{
fLog.logMsg(std::string("Failed to setup target partition: ") + e.what(),
ERR_BRM_LOOKUP_START_LBID, MSGLVL_ERROR);
return ERR_BRM_LOOKUP_START_LBID;
}
}
//------------------------------------------------------------------------------
// Parse and set target partition triple using LogicalPartition
//------------------------------------------------------------------------------
void BulkLoad::setTargetPartitionTriple(const std::string& partitionTriple)
{
fHasTargetPartition = false;
if (partitionTriple.empty())
return;
try
{
// Use the existing LogicalPartition parsing
std::istringstream iss(partitionTriple);
iss >> fTargetPartition;
if (iss.fail())
{
throw std::runtime_error("Failed to parse partition triple format");
}
// Basic validation - LogicalPartition uses (uint16_t)-1 and (uint32_t)-1 as invalid values
if (fTargetPartition.pp == (uint32_t)-1 ||
fTargetPartition.seg == (uint16_t)-1 ||
fTargetPartition.dbroot == (uint16_t)-1)
{
throw std::runtime_error("Invalid partition values detected");
}
fHasTargetPartition = true;
}
catch (const std::exception& e)
{
std::ostringstream oss;
oss << "Failed to parse partition triple '" << partitionTriple << "': " << e.what();
throw std::runtime_error(oss.str());
}
}
} // namespace WriteEngine

View File

@@ -38,6 +38,7 @@
#include "we_tableinfo.h"
#include "brmtypes.h"
#include "logicalpartition.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include "boost/ptr_container/ptr_vector.hpp"
@@ -150,6 +151,11 @@ class BulkLoad : public FileOp
void setS3Host(const std::string& host);
void setS3Region(const std::string& region);
void setUsername(const std::string& username);
void setTargetPartitionTriple(const std::string& partitionTriple);
const BRM::LogicalPartition& getTargetPartition() const;
bool hasTargetPartition() const;
// Timer functions
void startTimer();
void stopTimer();
@@ -237,6 +243,8 @@ class BulkLoad : public FileOp
std::string fS3Bucket; // S3 Bucket
std::string fS3Region; // S3 Region
std::string fUsername{"mysql"}; // data files owner name mysql by default
BRM::LogicalPartition fTargetPartition; // Target partition for -a flag
bool fHasTargetPartition{false}; // Whether target partition is specified
//--------------------------------------------------------------------------
// Private Functions
@@ -261,6 +269,9 @@ class BulkLoad : public FileOp
// Map specified DBRoot to it's first segment file number
int mapDBRootToFirstSegment(OID columnOid, uint16_t dbRoot, uint16_t& segment);
// Set DBRootExtentInfo from target partition triple
int setTargetDBRootExtent(DBRootExtentInfo& dbRootExtent, OID columnOid);
// The thread method for the read thread.
void read(int id);
@@ -524,6 +535,16 @@ inline void BulkLoad::setUsername(const std::string& username)
fUsername = username;
}
inline const BRM::LogicalPartition& BulkLoad::getTargetPartition() const
{
return fTargetPartition;
}
inline bool BulkLoad::hasTargetPartition() const
{
return fHasTargetPartition;
}
inline void BulkLoad::startTimer()
{
gettimeofday(&fStartTime, nullptr);

View File

@@ -134,6 +134,8 @@ WECmdArgs::WECmdArgs(int argc, char** argv)
"Directory for the output .err and .bad files")
("job-uuid,u", po::value<string>(&fUUID), "import job UUID")
("username,U", po::value<string>(&fUsername), "Username of the files owner.")
("target-partition,a", po::value<string>(&fTargetPartitionTriple),
"Target partition triple in format Directory.Segment.DBRoot ")
("dbname", po::value<string>(), "Name of the database to load")
("table", po::value<string>(), "Name of table to load")
("load-file", po::value<string>(),
@@ -186,7 +188,7 @@ void WECmdArgs::usage() const
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
"[-d debugLevel] [-i] "
<< endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone] [-a Directory.Segment.DBRoot]" << endl
<< " [-U username]" << endl
<< endl;
@@ -201,7 +203,7 @@ void WECmdArgs::usage() const
"[-d debugLevel] [-i] "
<< endl
<< " [-p path] [-l loadFile]" << endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
<< " [-D] [-N] [-L rejectDir] [-T timeZone] [-a Directory.Segment.DBRoot]" << endl
<< " [-U username]" << endl
<< endl;
@@ -212,7 +214,9 @@ void WECmdArgs::usage() const
<< " Example2: Some column values are enclosed within double quotes." << endl
<< " " << fPrgmName << " -j 3000 -E '\"'" << endl
<< " Example3: Import a nation table without a Job XML file" << endl
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl;
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl
<< " Example4: Import to specific partition (maintenance tasks)" << endl
<< " " << fPrgmName << " -j 302 -a 100.0.1 tpch nation nation.tbl" << endl;
exit(1);
}
@@ -410,6 +414,12 @@ void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string
curJob.setUsername(fUsername);
}
curJob.setSkipRows(fSkipRows);
// Set target partition if specified
if (!fTargetPartitionTriple.empty())
{
curJob.setTargetPartitionTriple(fTargetPartitionTriple);
}
curJob.setDefaultJobUUID();

View File

@@ -116,6 +116,7 @@ private:
bool fDisableTableLockTimeOut{false};
bool fSilent{false};
std::string fModuleIDandPID;
std::string fTargetPartitionTriple; // Directory.Segment.DBRoot for -a flag
std::string fReportFilename;
bool fKeepRollbackMetaData{false};