1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

fix(join, threadpool): MCOL-5565: MCOL-5636: MCOL-5645: port from develop-23.02 to [develop] (#3128)

* fix(threadpool): MCOL-5565 queries stuck in FairThreadScheduler. (#3100)

Meta Primitive Jobs, .e.g ADD_JOINER, LAST_JOINER stuck
	in Fair scheduler without out-of-band scheduler. Add OOB
	scheduler back to remedy the issue.

* fix(messageqcpp): MCOL-5636 same node communication crashes transmiting PP errors to EM b/c error messaging leveraged socket that was a nullptr. (#3106)

* fix(threadpool): MCOL-5645 errenous threadpool Job ctor implictly sets socket shared_ptr to nullptr causing sigabrt when threadpool returns an error (#3125)

---------

Co-authored-by: drrtuy <roman.nozdrin@mariadb.com>
This commit is contained in:
Leonid Fedorov
2024-02-13 19:01:16 +03:00
committed by GitHub
parent fcd46ab00a
commit 83c2408f8d
15 changed files with 410 additions and 317 deletions

View File

@ -59,6 +59,7 @@ using namespace BRM;
#include "writeengine.h"
#include "messagequeue.h"
#include "samenodepseudosocket.h"
using namespace messageqcpp;
#include "blockrequestprocessor.h"
@ -106,8 +107,6 @@ using namespace threadpool;
#define O_NOATIME 0
#endif
typedef tr1::unordered_set<BRM::OID_t> USOID;
// make global for blockcache
//
static const char* statsName = {"pm"};
@ -1019,7 +1018,7 @@ class DictScanJob : public threadpool::FairThreadPool::Functor
DictScanJob(SP_UM_IOSOCK ios, SBS bs, SP_UM_MUTEX writeLock);
virtual ~DictScanJob();
void write(const SBS&);
void write(const SBS);
int operator()();
void catchHandler(const std::string& ex, uint32_t id, uint16_t code = logging::primitiveServerErr);
void sendErrorMsg(uint32_t id, uint16_t code);
@ -1041,15 +1040,14 @@ DictScanJob::~DictScanJob()
{
}
void DictScanJob::write(const SBS& sbs)
void DictScanJob::write(const SBS sbs)
{
// Here is the fast path for local EM to PM interaction. PM puts into the
// input EM DEC queue directly.
// !sock has a 'same host connection' semantics here.
if (!fIos)
// !fWriteLock has a 'same host connection' semantics here.
if (!fWriteLock)
{
auto* exeMgrDecPtr = exemgr::globServiceExeMgr->getDec();
exeMgrDecPtr->addDataToOutput(sbs);
fIos->write(sbs);
return;
}
boost::mutex::scoped_lock lk(*fWriteLock);
@ -1209,6 +1207,7 @@ struct BPPHandler
}
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(key);
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(key);
}
scoped.unlock();
@ -1322,16 +1321,21 @@ struct BPPHandler
}
else
{
bs.rewind();
if (posix_time::second_clock::universal_time() > dieTime)
{
std::cout << "doAbort: job for key " << key << " has been killed." << std::endl;
return 0;
}
else
{
bs.rewind();
return -1;
}
}
scoped.unlock();
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(key);
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(key);
return 0;
}
@ -1354,7 +1358,10 @@ struct BPPHandler
return 0;
}
else
{
bs.rewind();
return -1;
}
}
void createBPP(ByteStream& bs)
@ -1402,7 +1409,6 @@ struct BPPHandler
bppKeys.push_back(key);
bool newInsert;
newInsert = bppMap.insert(pair<uint32_t, SBPPV>(key, bppv)).second;
// cout << "creating BPP # " << key << endl;
scoped.unlock();
if (!newInsert)
@ -1420,10 +1426,7 @@ struct BPPHandler
inline SBPPV grabBPPs(uint32_t uniqueID)
{
BPPMap::iterator it;
/*
uint32_t failCount = 0;
uint32_t maxFailCount = (fatal ? 500 : 5000);
*/
SBPPV ret;
boost::mutex::scoped_lock scoped(bppLock);
@ -1433,24 +1436,6 @@ struct BPPHandler
return it->second;
else
return SBPPV();
/*
do
{
if (++failCount == maxFailCount) {
//cout << "grabBPPs couldn't find the BPPs for " << uniqueID << endl;
return ret;
//throw logic_error("grabBPPs couldn't find the unique ID");
}
scoped.unlock();
usleep(5000);
scoped.lock();
it = bppMap.find(uniqueID);
} while (it == bppMap.end());
ret = it->second;
return ret;
*/
}
inline shared_mutex& getDJLock(uint32_t uniqueID)
@ -1488,6 +1473,7 @@ struct BPPHandler
buf = bs.buf();
/* the uniqueID is after the ISMPacketHeader, sessionID, and stepID */
uniqueID = *((const uint32_t*)&buf[sizeof(ISMPacketHeader) + 2 * sizeof(uint32_t)]);
bppv = grabBPPs(uniqueID);
if (bppv)
@ -1499,7 +1485,10 @@ struct BPPHandler
else
{
if (posix_time::second_clock::universal_time() > dieTime)
{
cout << "addJoinerToBPP: job for id " << uniqueID << " has been killed." << endl;
return 0;
}
else
return -1;
}
@ -1517,20 +1506,22 @@ struct BPPHandler
buf = bs.buf();
/* the uniqueID is after the ISMPacketHeader, sessionID, and stepID */
uniqueID = *((const uint32_t*)&buf[sizeof(ISMPacketHeader) + 2 * sizeof(uint32_t)]);
bppv = grabBPPs(uniqueID);
if (!bppv)
{
// cout << "got a lastJoiner msg for an unknown obj " << uniqueID << endl;
if (posix_time::second_clock::universal_time() > dieTime)
{
cout << "LastJoiner: job for id " << uniqueID << " has been killed." << endl;
return 0;
}
else
{
return -1;
}
}
boost::unique_lock<shared_mutex> lk(getDJLock(uniqueID));
for (i = 0; i < bppv->get().size(); i++)
{
err = bppv->get()[i]->endOfJoiner();
@ -1538,32 +1529,26 @@ struct BPPHandler
if (err == -1)
{
if (posix_time::second_clock::universal_time() > dieTime)
{
cout << "LastJoiner: job for id " << uniqueID
<< " has been killed waiting for joiner messages for too long." << endl;
return 0;
}
else
return -1;
}
}
bppv->get()[0]->doneSendingJoinerData();
/* Note: some of the duplicate/run/join sync was moved to the BPPV class to do
more intelligent scheduling. Once the join data is received, BPPV will
start letting jobs run and create more BPP instances on demand. */
atomicops::atomicMb(); // make sure the joinDataReceived assignment doesn't migrate upward...
bppv->joinDataReceived = true;
return 0;
}
int destroyBPP(ByteStream& bs, const posix_time::ptime& dieTime)
{
// This is a corner case that damages bs so its length becomes less than a header length.
// The damaged bs doesn't pass the if that checks bs at least has header + 3x int32_t.
// The if block below works around the issue.
if (posix_time::second_clock::universal_time() > dieTime)
{
return 0;
}
uint32_t uniqueID, sessionID, stepID;
BPPMap::iterator it;
if (bs.length() < sizeof(ISMPacketHeader) + sizeof(sessionID) + sizeof(stepID) + sizeof(uniqueID))
@ -1603,39 +1588,33 @@ struct BPPHandler
{
// MCOL-5. On ubuntu, a crash was happening. Checking
// joinDataReceived here fixes it.
// We're not ready for a destroy. Reschedule.
// We're not ready for a destroy. Reschedule to wait
// for all joiners to arrive.
// TODO there might be no joiners if the query is canceled.
// The memory will leak.
// Rewind to the beginning of ByteStream buf b/c of the advance above.
bs.rewind();
return -1;
}
}
else
{
// cout << "got a destroy for an unknown obj " << uniqueID << endl;
bs.rewind();
if (posix_time::second_clock::universal_time() > dieTime)
{
// XXXPAT: going to let this fall through and delete jobs for
// uniqueID if there are any. Not clear what the downside is.
/*
lk.unlock();
deleteDJLock(uniqueID);
return 0;
*/
cout << "destroyBPP: job for id " << uniqueID << " and sessionID " << sessionID << " has been killed."
<< endl;
// If for some reason there are jobs for this uniqueID that arrived later
// they won't leave PP thread pool staying there forever.
}
else
{
bs.rewind();
return -1;
}
}
// cout << " destroy: new size is " << bppMap.size() << endl;
/*
if (sessionID & 0x80000000)
cerr << "destroyed BPP instances for sessionID " << (int)
(sessionID ^ 0x80000000) << " stepID "<< stepID << " (syscat)\n";
else
cerr << "destroyed BPP instances for sessionID " << sessionID <<
" stepID "<< stepID << endl;
*/
fPrimitiveServerPtr->getProcessorThreadPool()->removeJobs(uniqueID);
fPrimitiveServerPtr->getOOBProcessorThreadPool()->removeJobs(uniqueID);
lk.unlock();
deleteDJLock(uniqueID);
return 0;
@ -1704,7 +1683,10 @@ class DictionaryOp : public FairThreadPool::Functor
bs->rewind();
if (posix_time::second_clock::universal_time() > dieTime)
{
cout << "DictionaryOp::operator(): job has been killed." << endl;
return 0;
}
}
return ret;
@ -1782,7 +1764,10 @@ class DestroyEqualityFilter : public DictionaryOp
return 0;
}
else
{
bs->rewind();
return -1;
}
}
};
@ -1920,7 +1905,8 @@ struct ReadThread
}
static void dispatchPrimitive(SBS sbs, boost::shared_ptr<BPPHandler>& fBPPHandler,
boost::shared_ptr<threadpool::FairThreadPool>& procPoolPtr,
boost::shared_ptr<threadpool::FairThreadPool> procPool,
std::shared_ptr<threadpool::PriorityThreadPool> OOBProcPool,
SP_UM_IOSOCK& outIos, SP_UM_MUTEX& writeLock, const uint32_t processorThreads,
const bool ptTrace)
{
@ -1942,6 +1928,7 @@ struct ReadThread
const uint32_t uniqueID = *((uint32_t*)&buf[pos + 10]);
const uint32_t weight = threadpool::MetaJobsInitialWeight;
const uint32_t priority = 0;
uint32_t id = 0;
boost::shared_ptr<FairThreadPool::Functor> functor;
if (ismHdr->Command == DICT_CREATE_EQUALITY_FILTER)
@ -1975,8 +1962,8 @@ struct ReadThread
id = fBPPHandler->getUniqueID(sbs, ismHdr->Command);
functor.reset(new BPPHandler::Abort(fBPPHandler, sbs));
}
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, weight, priority, id);
procPoolPtr->addJob(job);
PriorityThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
OOBProcPool->addJob(job);
break;
}
@ -2017,10 +2004,18 @@ struct ReadThread
txnId = *((uint32_t*)&buf[pos + 2]);
stepID = *((uint32_t*)&buf[pos + 6]);
uniqueID = *((uint32_t*)&buf[pos + 10]);
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]);
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]) + 100000;
}
if (hdr && hdr->flags & IS_SYSCAT)
{
PriorityThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
OOBProcPool->addJob(job);
}
else
{
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
procPool->addJob(job);
}
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
procPoolPtr->addJob(job);
break;
}
@ -2044,7 +2039,8 @@ struct ReadThread
void operator()()
{
utils::setThreadName("PPReadThread");
boost::shared_ptr<threadpool::FairThreadPool> procPoolPtr = fPrimitiveServerPtr->getProcessorThreadPool();
auto procPool = fPrimitiveServerPtr->getProcessorThreadPool();
auto OOBProcPool = fPrimitiveServerPtr->getOOBProcessorThreadPool();
SBS bs;
UmSocketSelector* pUmSocketSelector = UmSocketSelector::instance();
@ -2135,7 +2131,7 @@ struct ReadThread
default: break;
}
dispatchPrimitive(bs, fBPPHandler, procPoolPtr, outIos, writeLock,
dispatchPrimitive(bs, fBPPHandler, procPool, OOBProcPool, outIos, writeLock,
fPrimitiveServerPtr->ProcessorThreads(), fPrimitiveServerPtr->PTTrace());
}
else // bs.length() == 0
@ -2277,6 +2273,9 @@ PrimitiveServer::PrimitiveServer(int serverThreads, int serverQueueSize, int pro
fProcessorPool.reset(new threadpool::FairThreadPool(fProcessorWeight, highPriorityThreads,
medPriorityThreads, lowPriorityThreads, 0));
// We're not using either the priority or the job-clustering features, just need a threadpool
// that can reschedule jobs, and an unlimited non-blocking queue
fOOBPool.reset(new threadpool::PriorityThreadPool(1, 5, 0, 0, 1));
asyncCounter = 0;
@ -2330,15 +2329,18 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
sleep(1);
exeMgrDecPtr = (exemgr::globServiceExeMgr) ? exemgr::globServiceExeMgr->getDec() : nullptr;
}
// These empty SPs have "same-host" messaging semantics.
SP_UM_IOSOCK outIos(nullptr);
// This is a pseudo socket that puts data into DEC queue directly.
// It can be used for PP to EM communication only.
SP_UM_IOSOCK outIos(new IOSocket(new SameNodePseudoSocket(exeMgrDecPtr)));
// This empty SP transmits "same-host" messaging semantics.
SP_UM_MUTEX writeLock(nullptr);
auto procPoolPtr = this->getProcessorThreadPool();
auto procPool = this->getProcessorThreadPool();
auto OOBProcPool = this->getOOBProcessorThreadPool();
boost::shared_ptr<BPPHandler> fBPPHandler(new BPPHandler(this));
for (;;)
{
joblist::DistributedEngineComm::SBSVector primitiveMsgs;
for (auto& sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
for (auto sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
{
if (sbs->length() == 0)
{
@ -2347,7 +2349,7 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
}
idbassert(sbs->length() >= sizeof(ISMPacketHeader));
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPoolPtr, outIos, writeLock,
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPool, OOBProcPool, outIos, writeLock,
this->ProcessorThreads(), this->PTTrace());
}
}
@ -2364,7 +2366,6 @@ BPPV::BPPV(PrimitiveServer* ps)
sendThread->setProcessorPool(ps->getProcessorThreadPool());
v.reserve(BPPCount);
pos = 0;
joinDataReceived = false;
}
BPPV::~BPPV()
@ -2404,27 +2405,6 @@ boost::shared_ptr<BatchPrimitiveProcessor> BPPV::next()
uint32_t size = v.size();
uint32_t i = 0;
#if 0
// This block of code scans for the first available BPP instance,
// makes BPPSeeder reschedule it if none are available. Relies on BPP instance
// being preallocated.
for (i = 0; i < size; i++)
{
uint32_t index = (i + pos) % size;
if (!(v[index]->busy()))
{
pos = (index + 1) % size;
v[index]->busy(true);
return v[index];
}
}
// They're all busy, make threadpool reschedule the job
return boost::shared_ptr<BatchPrimitiveProcessor>();
#endif
// This block of code creates BPP instances if/when they are needed
// don't use a processor thread when it will just block, reschedule it instead