You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
fix(threadpool): MCOL-5565 queries stuck in FairThreadScheduler. (#3100)
Meta Primitive Jobs, .e.g ADD_JOINER, LAST_JOINER stuck in Fair scheduler without out-of-band scheduler. Add OOB scheduler back to remedy the issue.
This commit is contained in:
@ -1211,6 +1211,7 @@ struct BPPHandler
|
||||
}
|
||||
|
||||
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(key);
|
||||
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(key);
|
||||
}
|
||||
|
||||
scoped.unlock();
|
||||
@ -1324,16 +1325,21 @@ struct BPPHandler
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
std::cout << "doAbort: job for key " << key << " has been killed." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
scoped.unlock();
|
||||
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(key);
|
||||
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1356,7 +1362,10 @@ struct BPPHandler
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void createBPP(ByteStream& bs)
|
||||
@ -1404,7 +1413,6 @@ struct BPPHandler
|
||||
bppKeys.push_back(key);
|
||||
bool newInsert;
|
||||
newInsert = bppMap.insert(pair<uint32_t, SBPPV>(key, bppv)).second;
|
||||
// cout << "creating BPP # " << key << endl;
|
||||
scoped.unlock();
|
||||
|
||||
if (!newInsert)
|
||||
@ -1422,10 +1430,7 @@ struct BPPHandler
|
||||
inline SBPPV grabBPPs(uint32_t uniqueID)
|
||||
{
|
||||
BPPMap::iterator it;
|
||||
/*
|
||||
uint32_t failCount = 0;
|
||||
uint32_t maxFailCount = (fatal ? 500 : 5000);
|
||||
*/
|
||||
|
||||
SBPPV ret;
|
||||
|
||||
boost::mutex::scoped_lock scoped(bppLock);
|
||||
@ -1435,24 +1440,6 @@ struct BPPHandler
|
||||
return it->second;
|
||||
else
|
||||
return SBPPV();
|
||||
|
||||
/*
|
||||
do
|
||||
{
|
||||
if (++failCount == maxFailCount) {
|
||||
//cout << "grabBPPs couldn't find the BPPs for " << uniqueID << endl;
|
||||
return ret;
|
||||
//throw logic_error("grabBPPs couldn't find the unique ID");
|
||||
}
|
||||
scoped.unlock();
|
||||
usleep(5000);
|
||||
scoped.lock();
|
||||
it = bppMap.find(uniqueID);
|
||||
} while (it == bppMap.end());
|
||||
|
||||
ret = it->second;
|
||||
return ret;
|
||||
*/
|
||||
}
|
||||
|
||||
inline shared_mutex& getDJLock(uint32_t uniqueID)
|
||||
@ -1490,6 +1477,7 @@ struct BPPHandler
|
||||
buf = bs.buf();
|
||||
/* the uniqueID is after the ISMPacketHeader, sessionID, and stepID */
|
||||
uniqueID = *((const uint32_t*)&buf[sizeof(ISMPacketHeader) + 2 * sizeof(uint32_t)]);
|
||||
|
||||
bppv = grabBPPs(uniqueID);
|
||||
|
||||
if (bppv)
|
||||
@ -1501,7 +1489,10 @@ struct BPPHandler
|
||||
else
|
||||
{
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "addJoinerToBPP: job for id " << uniqueID << " has been killed." << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
@ -1519,20 +1510,22 @@ struct BPPHandler
|
||||
buf = bs.buf();
|
||||
/* the uniqueID is after the ISMPacketHeader, sessionID, and stepID */
|
||||
uniqueID = *((const uint32_t*)&buf[sizeof(ISMPacketHeader) + 2 * sizeof(uint32_t)]);
|
||||
|
||||
bppv = grabBPPs(uniqueID);
|
||||
|
||||
if (!bppv)
|
||||
{
|
||||
// cout << "got a lastJoiner msg for an unknown obj " << uniqueID << endl;
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "LastJoiner: job for id " << uniqueID << " has been killed." << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
boost::unique_lock<shared_mutex> lk(getDJLock(uniqueID));
|
||||
|
||||
for (i = 0; i < bppv->get().size(); i++)
|
||||
{
|
||||
err = bppv->get()[i]->endOfJoiner();
|
||||
@ -1540,32 +1533,26 @@ struct BPPHandler
|
||||
if (err == -1)
|
||||
{
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "LastJoiner: job for id " << uniqueID
|
||||
<< " has been killed waiting for joiner messages for too long." << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
bppv->get()[0]->doneSendingJoinerData();
|
||||
|
||||
/* Note: some of the duplicate/run/join sync was moved to the BPPV class to do
|
||||
more intelligent scheduling. Once the join data is received, BPPV will
|
||||
start letting jobs run and create more BPP instances on demand. */
|
||||
|
||||
atomicops::atomicMb(); // make sure the joinDataReceived assignment doesn't migrate upward...
|
||||
bppv->joinDataReceived = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int destroyBPP(ByteStream& bs, const posix_time::ptime& dieTime)
|
||||
{
|
||||
// This is a corner case that damages bs so its length becomes less than a header length.
|
||||
// The damaged bs doesn't pass the if that checks bs at least has header + 3x int32_t.
|
||||
// The if block below works around the issue.
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t uniqueID, sessionID, stepID;
|
||||
BPPMap::iterator it;
|
||||
|
||||
@ -1608,39 +1595,33 @@ struct BPPHandler
|
||||
{
|
||||
// MCOL-5. On ubuntu, a crash was happening. Checking
|
||||
// joinDataReceived here fixes it.
|
||||
// We're not ready for a destroy. Reschedule.
|
||||
// We're not ready for a destroy. Reschedule to wait
|
||||
// for all joiners to arrive.
|
||||
// TODO there might be no joiners if the query is canceled.
|
||||
// The memory will leak.
|
||||
// Rewind to the beginning of ByteStream buf b/c of the advance above.
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// cout << "got a destroy for an unknown obj " << uniqueID << endl;
|
||||
bs.rewind();
|
||||
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
// XXXPAT: going to let this fall through and delete jobs for
|
||||
// uniqueID if there are any. Not clear what the downside is.
|
||||
/*
|
||||
lk.unlock();
|
||||
deleteDJLock(uniqueID);
|
||||
return 0;
|
||||
*/
|
||||
cout << "destroyBPP: job for id " << uniqueID << " and sessionID " << sessionID << " has been killed."
|
||||
<< endl;
|
||||
// If for some reason there are jobs for this uniqueID that arrived later
|
||||
// they won't leave PP thread pool staying there forever.
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// cout << " destroy: new size is " << bppMap.size() << endl;
|
||||
/*
|
||||
if (sessionID & 0x80000000)
|
||||
cerr << "destroyed BPP instances for sessionID " << (int)
|
||||
(sessionID ^ 0x80000000) << " stepID "<< stepID << " (syscat)\n";
|
||||
else
|
||||
cerr << "destroyed BPP instances for sessionID " << sessionID <<
|
||||
" stepID "<< stepID << endl;
|
||||
*/
|
||||
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(uniqueID);
|
||||
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(uniqueID);
|
||||
lk.unlock();
|
||||
deleteDJLock(uniqueID);
|
||||
return 0;
|
||||
@ -1709,7 +1690,10 @@ class DictionaryOp : public FairThreadPool::Functor
|
||||
bs->rewind();
|
||||
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "DictionaryOp::operator(): job has been killed." << endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -1787,7 +1771,10 @@ class DestroyEqualityFilter : public DictionaryOp
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs->rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -1925,7 +1912,8 @@ struct ReadThread
|
||||
}
|
||||
|
||||
static void dispatchPrimitive(SBS sbs, boost::shared_ptr<BPPHandler>& fBPPHandler,
|
||||
boost::shared_ptr<threadpool::FairThreadPool>& procPoolPtr,
|
||||
boost::shared_ptr<threadpool::FairThreadPool> procPool,
|
||||
std::shared_ptr<threadpool::PriorityThreadPool> OOBProcPool,
|
||||
SP_UM_IOSOCK& outIos, SP_UM_MUTEX& writeLock, const uint32_t processorThreads,
|
||||
const bool ptTrace)
|
||||
{
|
||||
@ -1947,6 +1935,7 @@ struct ReadThread
|
||||
const uint32_t uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
const uint32_t weight = threadpool::MetaJobsInitialWeight;
|
||||
const uint32_t priority = 0;
|
||||
|
||||
uint32_t id = 0;
|
||||
boost::shared_ptr<FairThreadPool::Functor> functor;
|
||||
if (ismHdr->Command == DICT_CREATE_EQUALITY_FILTER)
|
||||
@ -1980,8 +1969,8 @@ struct ReadThread
|
||||
id = fBPPHandler->getUniqueID(sbs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::Abort(fBPPHandler, sbs));
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
PriorityThreadPool::Job job(uniqueID, stepID, txnId, functor, weight, priority, id);
|
||||
OOBProcPool->addJob(job);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -2022,10 +2011,18 @@ struct ReadThread
|
||||
txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]);
|
||||
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]) + 100000;
|
||||
}
|
||||
if (hdr && hdr->flags & IS_SYSCAT)
|
||||
{
|
||||
PriorityThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
OOBProcPool->addJob(job);
|
||||
}
|
||||
else
|
||||
{
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPool->addJob(job);
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
|
||||
break;
|
||||
}
|
||||
@ -2049,7 +2046,8 @@ struct ReadThread
|
||||
void operator()()
|
||||
{
|
||||
utils::setThreadName("PPReadThread");
|
||||
boost::shared_ptr<threadpool::FairThreadPool> procPoolPtr = fPrimitiveServerPtr->getProcessorThreadPool();
|
||||
auto procPool = fPrimitiveServerPtr->getProcessorThreadPool();
|
||||
auto OOBProcPool = fPrimitiveServerPtr->getOOBProcessorThreadPool();
|
||||
SBS bs;
|
||||
UmSocketSelector* pUmSocketSelector = UmSocketSelector::instance();
|
||||
|
||||
@ -2140,7 +2138,7 @@ struct ReadThread
|
||||
|
||||
default: break;
|
||||
}
|
||||
dispatchPrimitive(bs, fBPPHandler, procPoolPtr, outIos, writeLock,
|
||||
dispatchPrimitive(bs, fBPPHandler, procPool, OOBProcPool, outIos, writeLock,
|
||||
fPrimitiveServerPtr->ProcessorThreads(), fPrimitiveServerPtr->PTTrace());
|
||||
}
|
||||
else // bs.length() == 0
|
||||
@ -2282,6 +2280,9 @@ PrimitiveServer::PrimitiveServer(int serverThreads, int serverQueueSize, int pro
|
||||
|
||||
fProcessorPool.reset(new threadpool::FairThreadPool(fProcessorWeight, highPriorityThreads,
|
||||
medPriorityThreads, lowPriorityThreads, 0));
|
||||
// We're not using either the priority or the job-clustering features, just need a threadpool
|
||||
// that can reschedule jobs, and an unlimited non-blocking queue
|
||||
fOOBPool.reset(new threadpool::PriorityThreadPool(1, 5, 0, 0, 1));
|
||||
|
||||
asyncCounter = 0;
|
||||
|
||||
@ -2338,12 +2339,13 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
|
||||
// These empty SPs have "same-host" messaging semantics.
|
||||
SP_UM_IOSOCK outIos(nullptr);
|
||||
SP_UM_MUTEX writeLock(nullptr);
|
||||
auto procPoolPtr = this->getProcessorThreadPool();
|
||||
auto procPool = this->getProcessorThreadPool();
|
||||
auto OOBProcPool = this->getOOBProcessorThreadPool();
|
||||
boost::shared_ptr<BPPHandler> fBPPHandler(new BPPHandler(this));
|
||||
for (;;)
|
||||
{
|
||||
joblist::DistributedEngineComm::SBSVector primitiveMsgs;
|
||||
for (auto& sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
|
||||
for (auto sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
|
||||
{
|
||||
if (sbs->length() == 0)
|
||||
{
|
||||
@ -2352,7 +2354,7 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
|
||||
}
|
||||
idbassert(sbs->length() >= sizeof(ISMPacketHeader));
|
||||
|
||||
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPoolPtr, outIos, writeLock,
|
||||
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPool, OOBProcPool, outIos, writeLock,
|
||||
this->ProcessorThreads(), this->PTTrace());
|
||||
}
|
||||
}
|
||||
@ -2369,7 +2371,6 @@ BPPV::BPPV(PrimitiveServer* ps)
|
||||
sendThread->setProcessorPool(ps->getProcessorThreadPool());
|
||||
v.reserve(BPPCount);
|
||||
pos = 0;
|
||||
joinDataReceived = false;
|
||||
}
|
||||
|
||||
BPPV::~BPPV()
|
||||
@ -2409,27 +2410,6 @@ boost::shared_ptr<BatchPrimitiveProcessor> BPPV::next()
|
||||
uint32_t size = v.size();
|
||||
uint32_t i = 0;
|
||||
|
||||
#if 0
|
||||
|
||||
// This block of code scans for the first available BPP instance,
|
||||
// makes BPPSeeder reschedule it if none are available. Relies on BPP instance
|
||||
// being preallocated.
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
uint32_t index = (i + pos) % size;
|
||||
|
||||
if (!(v[index]->busy()))
|
||||
{
|
||||
pos = (index + 1) % size;
|
||||
v[index]->busy(true);
|
||||
return v[index];
|
||||
}
|
||||
}
|
||||
|
||||
// They're all busy, make threadpool reschedule the job
|
||||
return boost::shared_ptr<BatchPrimitiveProcessor>();
|
||||
#endif
|
||||
|
||||
// This block of code creates BPP instances if/when they are needed
|
||||
|
||||
// don't use a processor thread when it will just block, reschedule it instead
|
||||
|
Reference in New Issue
Block a user