You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
MCOL-4694 Add the JobList.DECConnectionsPerQuery setting to allow to
distribute Jobs b/w DEC connections from EM to PPs
This commit is contained in:
committed by
Roman Nozdrin
parent
5155a08a67
commit
736b9b81fc
@ -243,6 +243,7 @@ void DistributedEngineComm::Setup()
|
|||||||
uint32_t newPmCount = fRm->getPsCount();
|
uint32_t newPmCount = fRm->getPsCount();
|
||||||
throttleThreshold = fRm->getDECThrottleThreshold();
|
throttleThreshold = fRm->getDECThrottleThreshold();
|
||||||
tbpsThreadCount = fRm->getJlNumScanReceiveThreads();
|
tbpsThreadCount = fRm->getJlNumScanReceiveThreads();
|
||||||
|
fDECConnectionsPerQuery = fRm->getDECConnectionsPerQuery();
|
||||||
unsigned numConnections = getNumConnections();
|
unsigned numConnections = getNumConnections();
|
||||||
oam::Oam oam;
|
oam::Oam oam;
|
||||||
ModuleTypeConfig moduletypeconfig;
|
ModuleTypeConfig moduletypeconfig;
|
||||||
@ -458,7 +459,8 @@ void DistributedEngineComm::addQueue(uint32_t key, bool sendACKs)
|
|||||||
|
|
||||||
boost::mutex* lock = new boost::mutex();
|
boost::mutex* lock = new boost::mutex();
|
||||||
condition* cond = new condition();
|
condition* cond = new condition();
|
||||||
boost::shared_ptr<MQE> mqe(new MQE(pmCount));
|
uint32_t firstPMInterleavedConnectionId = key % (fPmConnections.size() / pmCount) * fDECConnectionsPerQuery * pmCount % fPmConnections.size();
|
||||||
|
boost::shared_ptr<MQE> mqe(new MQE(pmCount, firstPMInterleavedConnectionId));
|
||||||
|
|
||||||
mqe->queue = StepMsgQueue(lock, cond);
|
mqe->queue = StepMsgQueue(lock, cond);
|
||||||
mqe->sendACKs = sendACKs;
|
mqe->sendACKs = sendACKs;
|
||||||
@ -972,30 +974,31 @@ void DistributedEngineComm::doHasBigMsgs(boost::shared_ptr<MQE> mqe, uint64_t ta
|
|||||||
mqe->targetQueueSize = targetSize;
|
mqe->targetQueueSize = targetSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DistributedEngineComm::writeToClient(size_t index, const ByteStream& bs, uint32_t sender, bool doInterleaving)
|
int DistributedEngineComm::writeToClient(size_t aPMIndex, const ByteStream& bs, uint32_t senderUniqueID, bool doInterleaving)
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock lk(fMlock, boost::defer_lock_t());
|
boost::mutex::scoped_lock lk(fMlock, boost::defer_lock_t());
|
||||||
MessageQueueMap::iterator it;
|
MessageQueueMap::iterator it;
|
||||||
// Keep mqe's stats from being freed early
|
// Keep mqe's stats from being freed early
|
||||||
boost::shared_ptr<MQE> mqe;
|
boost::shared_ptr<MQE> mqe;
|
||||||
Stats* senderStats = NULL;
|
Stats* senderStats = NULL;
|
||||||
uint32_t interleaver = 0;
|
|
||||||
|
|
||||||
if (fPmConnections.size() == 0)
|
if (fPmConnections.size() == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (sender != numeric_limits<uint32_t>::max())
|
uint32_t connectionId = aPMIndex;
|
||||||
|
if (senderUniqueID != numeric_limits<uint32_t>::max())
|
||||||
{
|
{
|
||||||
lk.lock();
|
lk.lock();
|
||||||
it = fSessionMessages.find(sender);
|
it = fSessionMessages.find(senderUniqueID);
|
||||||
|
|
||||||
if (it != fSessionMessages.end())
|
if (it != fSessionMessages.end())
|
||||||
{
|
{
|
||||||
mqe = it->second;
|
mqe = it->second;
|
||||||
senderStats = &(mqe->stats);
|
senderStats = &(mqe->stats);
|
||||||
|
size_t pmIndex = aPMIndex % mqe->pmCount;
|
||||||
if (doInterleaving)
|
connectionId = it->second->getNextConnectionId(pmIndex,
|
||||||
interleaver = it->second->interleaver[index % it->second->pmCount]++;
|
fPmConnections.size(),
|
||||||
|
fDECConnectionsPerQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
lk.unlock();
|
lk.unlock();
|
||||||
@ -1003,14 +1006,11 @@ int DistributedEngineComm::writeToClient(size_t index, const ByteStream& bs, uin
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (doInterleaving)
|
ClientList::value_type client = fPmConnections[connectionId];
|
||||||
index = (index + (interleaver * pmCount)) % fPmConnections.size();
|
|
||||||
|
|
||||||
ClientList::value_type client = fPmConnections[index];
|
|
||||||
|
|
||||||
if (!client->isAvailable()) return 0;
|
if (!client->isAvailable()) return 0;
|
||||||
|
|
||||||
boost::mutex::scoped_lock lk(*(fWlock[index]));
|
boost::mutex::scoped_lock lk(*(fWlock[connectionId]));
|
||||||
client->write(bs, NULL, senderStats);
|
client->write(bs, NULL, senderStats);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1114,13 +1114,40 @@ Stats DistributedEngineComm::getNetworkStats(uint32_t uniqueID)
|
|||||||
return empty;
|
return empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
DistributedEngineComm::MQE::MQE(uint32_t pCount) : ackSocketIndex(0), pmCount(pCount), hasBigMsgs(false),
|
DistributedEngineComm::MQE::MQE(const uint32_t pCount, const uint32_t initialInterleaverValue)
|
||||||
targetQueueSize(targetRecvQueueSize)
|
: ackSocketIndex(0), pmCount(pCount), hasBigMsgs(false), targetQueueSize(targetRecvQueueSize)
|
||||||
{
|
{
|
||||||
unackedWork.reset(new volatile uint32_t[pmCount]);
|
unackedWork.reset(new volatile uint32_t[pmCount]);
|
||||||
interleaver.reset(new uint32_t[pmCount]);
|
interleaver.reset(new uint32_t[pmCount]);
|
||||||
memset((void*) unackedWork.get(), 0, pmCount * sizeof(uint32_t));
|
memset((void*) unackedWork.get(), 0, pmCount * sizeof(uint32_t));
|
||||||
memset((void*) interleaver.get(), 0, pmCount * sizeof(uint32_t));
|
uint32_t interleaverValue = initialInterleaverValue;
|
||||||
|
initialConnectionId = initialInterleaverValue;
|
||||||
|
for (size_t pmId = 0; pmId < pmCount; ++pmId)
|
||||||
|
{
|
||||||
|
interleaver.get()[pmId] = interleaverValue++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here is the assumed connections distribution schema in a pmConnections vector.
|
||||||
|
// (PM1-PM2...-PMX)-(PM1-PM2..-PMX)...
|
||||||
|
uint32_t DistributedEngineComm::MQE::getNextConnectionId(const size_t pmIndex,
|
||||||
|
const size_t pmConnectionsNumber,
|
||||||
|
const uint32_t DECConnectionsPerQuery)
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t nextConnectionId = (interleaver[pmIndex] + pmCount) % pmConnectionsNumber;
|
||||||
|
// Wrap around the connection id.
|
||||||
|
if ((nextConnectionId - pmIndex) % DECConnectionsPerQuery == 0)
|
||||||
|
{
|
||||||
|
// The sum must be < connections vector size.
|
||||||
|
nextConnectionId = initialConnectionId + pmIndex;
|
||||||
|
interleaver[pmIndex] = nextConnectionId;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
interleaver[pmIndex] = nextConnectionId;
|
||||||
|
}
|
||||||
|
return nextConnectionId;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -217,12 +217,16 @@ private:
|
|||||||
/* To keep some state associated with the connection. These aren't copyable. */
|
/* To keep some state associated with the connection. These aren't copyable. */
|
||||||
struct MQE : public boost::noncopyable
|
struct MQE : public boost::noncopyable
|
||||||
{
|
{
|
||||||
MQE(uint32_t pmCount);
|
MQE(const uint32_t pmCount, const uint32_t initialInterleaverValue);
|
||||||
|
uint32_t getNextConnectionId(const size_t pmIndex,
|
||||||
|
const size_t pmConnectionsNumber,
|
||||||
|
const uint32_t DECConnectionsPerQuery);
|
||||||
messageqcpp::Stats stats;
|
messageqcpp::Stats stats;
|
||||||
StepMsgQueue queue;
|
StepMsgQueue queue;
|
||||||
uint32_t ackSocketIndex;
|
uint32_t ackSocketIndex;
|
||||||
boost::scoped_array<volatile uint32_t> unackedWork;
|
boost::scoped_array<volatile uint32_t> unackedWork;
|
||||||
boost::scoped_array<uint32_t> interleaver;
|
boost::scoped_array<uint32_t> interleaver;
|
||||||
|
uint32_t initialConnectionId;
|
||||||
uint32_t pmCount;
|
uint32_t pmCount;
|
||||||
// non-BPP primitives don't do ACKs
|
// non-BPP primitives don't do ACKs
|
||||||
bool sendACKs;
|
bool sendACKs;
|
||||||
@ -286,6 +290,7 @@ private:
|
|||||||
static const uint32_t targetRecvQueueSize = 50000000;
|
static const uint32_t targetRecvQueueSize = 50000000;
|
||||||
static const uint32_t disableThreshold = 10000000;
|
static const uint32_t disableThreshold = 10000000;
|
||||||
uint32_t tbpsThreadCount;
|
uint32_t tbpsThreadCount;
|
||||||
|
uint32_t fDECConnectionsPerQuery;
|
||||||
|
|
||||||
void sendAcks(uint32_t uniqueID, const std::vector<messageqcpp::SBS>& msgs,
|
void sendAcks(uint32_t uniqueID, const std::vector<messageqcpp::SBS>& msgs,
|
||||||
boost::shared_ptr<MQE> mqe, size_t qSize);
|
boost::shared_ptr<MQE> mqe, size_t qSize);
|
||||||
|
@ -152,6 +152,11 @@ ResourceManager::ResourceManager(bool runningInExeMgr) :
|
|||||||
if (temp > 0)
|
if (temp > 0)
|
||||||
fJlNumScanReceiveThreads = temp;
|
fJlNumScanReceiveThreads = temp;
|
||||||
|
|
||||||
|
fDECConnectionsPerQuery = getUintVal(fJobListStr, "DECConnectionsPerQuery", 0);
|
||||||
|
fDECConnectionsPerQuery = (fDECConnectionsPerQuery)
|
||||||
|
? fDECConnectionsPerQuery
|
||||||
|
: getPsConnectionsPerPrimProc();
|
||||||
|
|
||||||
temp = getIntVal(fTupleWSDLStr, "NumThreads", -1);
|
temp = getIntVal(fTupleWSDLStr, "NumThreads", -1);
|
||||||
|
|
||||||
if (temp > 0)
|
if (temp > 0)
|
||||||
|
@ -128,6 +128,7 @@ const uint64_t defaultDECThrottleThreshold = 200000000; // ~200 MB
|
|||||||
const uint8_t defaultUseCpimport = 1;
|
const uint8_t defaultUseCpimport = 1;
|
||||||
|
|
||||||
const bool defaultAllowDiskAggregation = false;
|
const bool defaultAllowDiskAggregation = false;
|
||||||
|
|
||||||
/** @brief ResourceManager
|
/** @brief ResourceManager
|
||||||
* Returns requested values from Config
|
* Returns requested values from Config
|
||||||
*
|
*
|
||||||
@ -179,11 +180,16 @@ public:
|
|||||||
return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize);
|
return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getAllowDiskAggregation() const
|
bool getAllowDiskAggregation() const
|
||||||
{
|
{
|
||||||
return fAllowedDiskAggregation;
|
return fAllowedDiskAggregation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t getDECConnectionsPerQuery() const
|
||||||
|
{
|
||||||
|
return fDECConnectionsPerQuery;
|
||||||
|
}
|
||||||
|
|
||||||
int getHjMaxBuckets() const
|
int getHjMaxBuckets() const
|
||||||
{
|
{
|
||||||
return getUintVal(fHashJoinStr, "MaxBuckets", defaultHJMaxBuckets);
|
return getUintVal(fHashJoinStr, "MaxBuckets", defaultHJMaxBuckets);
|
||||||
@ -615,6 +621,7 @@ private:
|
|||||||
bool isExeMgr;
|
bool isExeMgr;
|
||||||
bool fUseHdfs;
|
bool fUseHdfs;
|
||||||
bool fAllowedDiskAggregation{false};
|
bool fAllowedDiskAggregation{false};
|
||||||
|
uint64_t fDECConnectionsPerQuery;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user