You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
fix(join, threadpool): MCOL-5565: MCOL-5636: MCOL-5645: port from develop-23.02 to [develop] (#3128)
* fix(threadpool): MCOL-5565 queries stuck in FairThreadScheduler. (#3100) Meta Primitive Jobs, .e.g ADD_JOINER, LAST_JOINER stuck in Fair scheduler without out-of-band scheduler. Add OOB scheduler back to remedy the issue. * fix(messageqcpp): MCOL-5636 same node communication crashes transmiting PP errors to EM b/c error messaging leveraged socket that was a nullptr. (#3106) * fix(threadpool): MCOL-5645 errenous threadpool Job ctor implictly sets socket shared_ptr to nullptr causing sigabrt when threadpool returns an error (#3125) --------- Co-authored-by: drrtuy <roman.nozdrin@mariadb.com>
This commit is contained in:
@ -59,6 +59,7 @@ using namespace BRM;
|
||||
#include "writeengine.h"
|
||||
|
||||
#include "messagequeue.h"
|
||||
#include "samenodepseudosocket.h"
|
||||
using namespace messageqcpp;
|
||||
|
||||
#include "blockrequestprocessor.h"
|
||||
@ -106,8 +107,6 @@ using namespace threadpool;
|
||||
#define O_NOATIME 0
|
||||
#endif
|
||||
|
||||
typedef tr1::unordered_set<BRM::OID_t> USOID;
|
||||
|
||||
// make global for blockcache
|
||||
//
|
||||
static const char* statsName = {"pm"};
|
||||
@ -1019,7 +1018,7 @@ class DictScanJob : public threadpool::FairThreadPool::Functor
|
||||
DictScanJob(SP_UM_IOSOCK ios, SBS bs, SP_UM_MUTEX writeLock);
|
||||
virtual ~DictScanJob();
|
||||
|
||||
void write(const SBS&);
|
||||
void write(const SBS);
|
||||
int operator()();
|
||||
void catchHandler(const std::string& ex, uint32_t id, uint16_t code = logging::primitiveServerErr);
|
||||
void sendErrorMsg(uint32_t id, uint16_t code);
|
||||
@ -1041,15 +1040,14 @@ DictScanJob::~DictScanJob()
|
||||
{
|
||||
}
|
||||
|
||||
void DictScanJob::write(const SBS& sbs)
|
||||
void DictScanJob::write(const SBS sbs)
|
||||
{
|
||||
// Here is the fast path for local EM to PM interaction. PM puts into the
|
||||
// input EM DEC queue directly.
|
||||
// !sock has a 'same host connection' semantics here.
|
||||
if (!fIos)
|
||||
// !fWriteLock has a 'same host connection' semantics here.
|
||||
if (!fWriteLock)
|
||||
{
|
||||
auto* exeMgrDecPtr = exemgr::globServiceExeMgr->getDec();
|
||||
exeMgrDecPtr->addDataToOutput(sbs);
|
||||
fIos->write(sbs);
|
||||
return;
|
||||
}
|
||||
boost::mutex::scoped_lock lk(*fWriteLock);
|
||||
@ -1209,6 +1207,7 @@ struct BPPHandler
|
||||
}
|
||||
|
||||
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(key);
|
||||
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(key);
|
||||
}
|
||||
|
||||
scoped.unlock();
|
||||
@ -1322,16 +1321,21 @@ struct BPPHandler
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
std::cout << "doAbort: job for key " << key << " has been killed." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
scoped.unlock();
|
||||
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(key);
|
||||
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1354,7 +1358,10 @@ struct BPPHandler
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void createBPP(ByteStream& bs)
|
||||
@ -1402,7 +1409,6 @@ struct BPPHandler
|
||||
bppKeys.push_back(key);
|
||||
bool newInsert;
|
||||
newInsert = bppMap.insert(pair<uint32_t, SBPPV>(key, bppv)).second;
|
||||
// cout << "creating BPP # " << key << endl;
|
||||
scoped.unlock();
|
||||
|
||||
if (!newInsert)
|
||||
@ -1420,10 +1426,7 @@ struct BPPHandler
|
||||
inline SBPPV grabBPPs(uint32_t uniqueID)
|
||||
{
|
||||
BPPMap::iterator it;
|
||||
/*
|
||||
uint32_t failCount = 0;
|
||||
uint32_t maxFailCount = (fatal ? 500 : 5000);
|
||||
*/
|
||||
|
||||
SBPPV ret;
|
||||
|
||||
boost::mutex::scoped_lock scoped(bppLock);
|
||||
@ -1433,24 +1436,6 @@ struct BPPHandler
|
||||
return it->second;
|
||||
else
|
||||
return SBPPV();
|
||||
|
||||
/*
|
||||
do
|
||||
{
|
||||
if (++failCount == maxFailCount) {
|
||||
//cout << "grabBPPs couldn't find the BPPs for " << uniqueID << endl;
|
||||
return ret;
|
||||
//throw logic_error("grabBPPs couldn't find the unique ID");
|
||||
}
|
||||
scoped.unlock();
|
||||
usleep(5000);
|
||||
scoped.lock();
|
||||
it = bppMap.find(uniqueID);
|
||||
} while (it == bppMap.end());
|
||||
|
||||
ret = it->second;
|
||||
return ret;
|
||||
*/
|
||||
}
|
||||
|
||||
inline shared_mutex& getDJLock(uint32_t uniqueID)
|
||||
@ -1488,6 +1473,7 @@ struct BPPHandler
|
||||
buf = bs.buf();
|
||||
/* the uniqueID is after the ISMPacketHeader, sessionID, and stepID */
|
||||
uniqueID = *((const uint32_t*)&buf[sizeof(ISMPacketHeader) + 2 * sizeof(uint32_t)]);
|
||||
|
||||
bppv = grabBPPs(uniqueID);
|
||||
|
||||
if (bppv)
|
||||
@ -1499,7 +1485,10 @@ struct BPPHandler
|
||||
else
|
||||
{
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "addJoinerToBPP: job for id " << uniqueID << " has been killed." << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
@ -1517,20 +1506,22 @@ struct BPPHandler
|
||||
buf = bs.buf();
|
||||
/* the uniqueID is after the ISMPacketHeader, sessionID, and stepID */
|
||||
uniqueID = *((const uint32_t*)&buf[sizeof(ISMPacketHeader) + 2 * sizeof(uint32_t)]);
|
||||
|
||||
bppv = grabBPPs(uniqueID);
|
||||
|
||||
if (!bppv)
|
||||
{
|
||||
// cout << "got a lastJoiner msg for an unknown obj " << uniqueID << endl;
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "LastJoiner: job for id " << uniqueID << " has been killed." << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
boost::unique_lock<shared_mutex> lk(getDJLock(uniqueID));
|
||||
|
||||
for (i = 0; i < bppv->get().size(); i++)
|
||||
{
|
||||
err = bppv->get()[i]->endOfJoiner();
|
||||
@ -1538,32 +1529,26 @@ struct BPPHandler
|
||||
if (err == -1)
|
||||
{
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "LastJoiner: job for id " << uniqueID
|
||||
<< " has been killed waiting for joiner messages for too long." << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
bppv->get()[0]->doneSendingJoinerData();
|
||||
|
||||
/* Note: some of the duplicate/run/join sync was moved to the BPPV class to do
|
||||
more intelligent scheduling. Once the join data is received, BPPV will
|
||||
start letting jobs run and create more BPP instances on demand. */
|
||||
|
||||
atomicops::atomicMb(); // make sure the joinDataReceived assignment doesn't migrate upward...
|
||||
bppv->joinDataReceived = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int destroyBPP(ByteStream& bs, const posix_time::ptime& dieTime)
|
||||
{
|
||||
// This is a corner case that damages bs so its length becomes less than a header length.
|
||||
// The damaged bs doesn't pass the if that checks bs at least has header + 3x int32_t.
|
||||
// The if block below works around the issue.
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t uniqueID, sessionID, stepID;
|
||||
BPPMap::iterator it;
|
||||
if (bs.length() < sizeof(ISMPacketHeader) + sizeof(sessionID) + sizeof(stepID) + sizeof(uniqueID))
|
||||
@ -1603,39 +1588,33 @@ struct BPPHandler
|
||||
{
|
||||
// MCOL-5. On ubuntu, a crash was happening. Checking
|
||||
// joinDataReceived here fixes it.
|
||||
// We're not ready for a destroy. Reschedule.
|
||||
// We're not ready for a destroy. Reschedule to wait
|
||||
// for all joiners to arrive.
|
||||
// TODO there might be no joiners if the query is canceled.
|
||||
// The memory will leak.
|
||||
// Rewind to the beginning of ByteStream buf b/c of the advance above.
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// cout << "got a destroy for an unknown obj " << uniqueID << endl;
|
||||
bs.rewind();
|
||||
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
// XXXPAT: going to let this fall through and delete jobs for
|
||||
// uniqueID if there are any. Not clear what the downside is.
|
||||
/*
|
||||
lk.unlock();
|
||||
deleteDJLock(uniqueID);
|
||||
return 0;
|
||||
*/
|
||||
cout << "destroyBPP: job for id " << uniqueID << " and sessionID " << sessionID << " has been killed."
|
||||
<< endl;
|
||||
// If for some reason there are jobs for this uniqueID that arrived later
|
||||
// they won't leave PP thread pool staying there forever.
|
||||
}
|
||||
else
|
||||
{
|
||||
bs.rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// cout << " destroy: new size is " << bppMap.size() << endl;
|
||||
/*
|
||||
if (sessionID & 0x80000000)
|
||||
cerr << "destroyed BPP instances for sessionID " << (int)
|
||||
(sessionID ^ 0x80000000) << " stepID "<< stepID << " (syscat)\n";
|
||||
else
|
||||
cerr << "destroyed BPP instances for sessionID " << sessionID <<
|
||||
" stepID "<< stepID << endl;
|
||||
*/
|
||||
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(uniqueID);
|
||||
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(uniqueID);
|
||||
lk.unlock();
|
||||
deleteDJLock(uniqueID);
|
||||
return 0;
|
||||
@ -1704,7 +1683,10 @@ class DictionaryOp : public FairThreadPool::Functor
|
||||
bs->rewind();
|
||||
|
||||
if (posix_time::second_clock::universal_time() > dieTime)
|
||||
{
|
||||
cout << "DictionaryOp::operator(): job has been killed." << endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -1782,7 +1764,10 @@ class DestroyEqualityFilter : public DictionaryOp
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs->rewind();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -1920,7 +1905,8 @@ struct ReadThread
|
||||
}
|
||||
|
||||
static void dispatchPrimitive(SBS sbs, boost::shared_ptr<BPPHandler>& fBPPHandler,
|
||||
boost::shared_ptr<threadpool::FairThreadPool>& procPoolPtr,
|
||||
boost::shared_ptr<threadpool::FairThreadPool> procPool,
|
||||
std::shared_ptr<threadpool::PriorityThreadPool> OOBProcPool,
|
||||
SP_UM_IOSOCK& outIos, SP_UM_MUTEX& writeLock, const uint32_t processorThreads,
|
||||
const bool ptTrace)
|
||||
{
|
||||
@ -1942,6 +1928,7 @@ struct ReadThread
|
||||
const uint32_t uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
const uint32_t weight = threadpool::MetaJobsInitialWeight;
|
||||
const uint32_t priority = 0;
|
||||
|
||||
uint32_t id = 0;
|
||||
boost::shared_ptr<FairThreadPool::Functor> functor;
|
||||
if (ismHdr->Command == DICT_CREATE_EQUALITY_FILTER)
|
||||
@ -1975,8 +1962,8 @@ struct ReadThread
|
||||
id = fBPPHandler->getUniqueID(sbs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::Abort(fBPPHandler, sbs));
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
PriorityThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
OOBProcPool->addJob(job);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -2017,10 +2004,18 @@ struct ReadThread
|
||||
txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]);
|
||||
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]) + 100000;
|
||||
}
|
||||
if (hdr && hdr->flags & IS_SYSCAT)
|
||||
{
|
||||
PriorityThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
OOBProcPool->addJob(job);
|
||||
}
|
||||
else
|
||||
{
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPool->addJob(job);
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
|
||||
break;
|
||||
}
|
||||
@ -2044,7 +2039,8 @@ struct ReadThread
|
||||
void operator()()
|
||||
{
|
||||
utils::setThreadName("PPReadThread");
|
||||
boost::shared_ptr<threadpool::FairThreadPool> procPoolPtr = fPrimitiveServerPtr->getProcessorThreadPool();
|
||||
auto procPool = fPrimitiveServerPtr->getProcessorThreadPool();
|
||||
auto OOBProcPool = fPrimitiveServerPtr->getOOBProcessorThreadPool();
|
||||
SBS bs;
|
||||
UmSocketSelector* pUmSocketSelector = UmSocketSelector::instance();
|
||||
|
||||
@ -2135,7 +2131,7 @@ struct ReadThread
|
||||
|
||||
default: break;
|
||||
}
|
||||
dispatchPrimitive(bs, fBPPHandler, procPoolPtr, outIos, writeLock,
|
||||
dispatchPrimitive(bs, fBPPHandler, procPool, OOBProcPool, outIos, writeLock,
|
||||
fPrimitiveServerPtr->ProcessorThreads(), fPrimitiveServerPtr->PTTrace());
|
||||
}
|
||||
else // bs.length() == 0
|
||||
@ -2277,6 +2273,9 @@ PrimitiveServer::PrimitiveServer(int serverThreads, int serverQueueSize, int pro
|
||||
|
||||
fProcessorPool.reset(new threadpool::FairThreadPool(fProcessorWeight, highPriorityThreads,
|
||||
medPriorityThreads, lowPriorityThreads, 0));
|
||||
// We're not using either the priority or the job-clustering features, just need a threadpool
|
||||
// that can reschedule jobs, and an unlimited non-blocking queue
|
||||
fOOBPool.reset(new threadpool::PriorityThreadPool(1, 5, 0, 0, 1));
|
||||
|
||||
asyncCounter = 0;
|
||||
|
||||
@ -2330,15 +2329,18 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
|
||||
sleep(1);
|
||||
exeMgrDecPtr = (exemgr::globServiceExeMgr) ? exemgr::globServiceExeMgr->getDec() : nullptr;
|
||||
}
|
||||
// These empty SPs have "same-host" messaging semantics.
|
||||
SP_UM_IOSOCK outIos(nullptr);
|
||||
// This is a pseudo socket that puts data into DEC queue directly.
|
||||
// It can be used for PP to EM communication only.
|
||||
SP_UM_IOSOCK outIos(new IOSocket(new SameNodePseudoSocket(exeMgrDecPtr)));
|
||||
// This empty SP transmits "same-host" messaging semantics.
|
||||
SP_UM_MUTEX writeLock(nullptr);
|
||||
auto procPoolPtr = this->getProcessorThreadPool();
|
||||
auto procPool = this->getProcessorThreadPool();
|
||||
auto OOBProcPool = this->getOOBProcessorThreadPool();
|
||||
boost::shared_ptr<BPPHandler> fBPPHandler(new BPPHandler(this));
|
||||
for (;;)
|
||||
{
|
||||
joblist::DistributedEngineComm::SBSVector primitiveMsgs;
|
||||
for (auto& sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
|
||||
for (auto sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
|
||||
{
|
||||
if (sbs->length() == 0)
|
||||
{
|
||||
@ -2347,7 +2349,7 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
|
||||
}
|
||||
idbassert(sbs->length() >= sizeof(ISMPacketHeader));
|
||||
|
||||
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPoolPtr, outIos, writeLock,
|
||||
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPool, OOBProcPool, outIos, writeLock,
|
||||
this->ProcessorThreads(), this->PTTrace());
|
||||
}
|
||||
}
|
||||
@ -2364,7 +2366,6 @@ BPPV::BPPV(PrimitiveServer* ps)
|
||||
sendThread->setProcessorPool(ps->getProcessorThreadPool());
|
||||
v.reserve(BPPCount);
|
||||
pos = 0;
|
||||
joinDataReceived = false;
|
||||
}
|
||||
|
||||
BPPV::~BPPV()
|
||||
@ -2404,27 +2405,6 @@ boost::shared_ptr<BatchPrimitiveProcessor> BPPV::next()
|
||||
uint32_t size = v.size();
|
||||
uint32_t i = 0;
|
||||
|
||||
#if 0
|
||||
|
||||
// This block of code scans for the first available BPP instance,
|
||||
// makes BPPSeeder reschedule it if none are available. Relies on BPP instance
|
||||
// being preallocated.
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
uint32_t index = (i + pos) % size;
|
||||
|
||||
if (!(v[index]->busy()))
|
||||
{
|
||||
pos = (index + 1) % size;
|
||||
v[index]->busy(true);
|
||||
return v[index];
|
||||
}
|
||||
}
|
||||
|
||||
// They're all busy, make threadpool reschedule the job
|
||||
return boost::shared_ptr<BatchPrimitiveProcessor>();
|
||||
#endif
|
||||
|
||||
// This block of code creates BPP instances if/when they are needed
|
||||
|
||||
// don't use a processor thread when it will just block, reschedule it instead
|
||||
|
Reference in New Issue
Block a user