You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-5166 This patch adds support for in-memory communication b/w EM to PP via a shared queue in DEC class
JobList low-level code relateod to primitive jobs now uses shared pointers instead of ByteStream refs talking to DEC b/c same-node EM-PP communication now goes over a queue in DEC instead of a network hop. PP now has a separate thread that processes the primitive job messages from that DEC queue.
This commit is contained in:
committed by
Leonid Fedorov
parent
9ef16c6ded
commit
a9d8924683
@ -21,13 +21,16 @@
|
||||
*
|
||||
*
|
||||
***********************************************************************/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
#define _LARGEFILE64_SOURCE
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
|
||||
//#define NDEBUG
|
||||
#include <cassert>
|
||||
#include <boost/thread.hpp>
|
||||
@ -49,7 +52,8 @@ using namespace std;
|
||||
#include <boost/scoped_array.hpp>
|
||||
#include <boost/thread.hpp>
|
||||
using namespace boost;
|
||||
|
||||
#include "distributedenginecomm.h"
|
||||
#include "serviceexemgr.h"
|
||||
#include "primproc.h"
|
||||
#include "primitiveserver.h"
|
||||
#include "primitivemsg.h"
|
||||
@ -1055,7 +1059,7 @@ class DictScanJob : public threadpool::FairThreadPool::Functor
|
||||
DictScanJob(SP_UM_IOSOCK ios, SBS bs, SP_UM_MUTEX writeLock);
|
||||
virtual ~DictScanJob();
|
||||
|
||||
void write(const ByteStream&);
|
||||
void write(const SBS&);
|
||||
int operator()();
|
||||
void catchHandler(const std::string& ex, uint32_t id, uint16_t code = logging::primitiveServerErr);
|
||||
void sendErrorMsg(uint32_t id, uint16_t code);
|
||||
@ -1077,17 +1081,27 @@ DictScanJob::~DictScanJob()
|
||||
{
|
||||
}
|
||||
|
||||
void DictScanJob::write(const ByteStream& bs)
|
||||
void DictScanJob::write(const SBS& sbs)
|
||||
{
|
||||
// Here is the fast path for local EM to PM interaction. PM puts into the
|
||||
// input EM DEC queue directly.
|
||||
// !sock has a 'same host connection' semantics here.
|
||||
if (!fIos)
|
||||
{
|
||||
auto* exeMgrDecPtr = exemgr::globServiceExeMgr->getDec();
|
||||
exeMgrDecPtr->addDataToOutput(sbs);
|
||||
return;
|
||||
}
|
||||
boost::mutex::scoped_lock lk(*fWriteLock);
|
||||
fIos->write(bs);
|
||||
fIos->write(*sbs);
|
||||
}
|
||||
|
||||
int DictScanJob::operator()()
|
||||
{
|
||||
utils::setThreadName("PPDictScanJob");
|
||||
uint8_t data[DATA_BLOCK_SIZE];
|
||||
uint32_t output_buf_size = MAX_BUFFER_SIZE;
|
||||
// Reducing this buffer one might face unrelated issues in DictScanStep.
|
||||
const uint32_t output_buf_size = MAX_BUFFER_SIZE;
|
||||
uint32_t session;
|
||||
uint32_t uniqueId = 0;
|
||||
bool wasBlockInCache = false;
|
||||
@ -1095,7 +1109,6 @@ int DictScanJob::operator()()
|
||||
uint16_t runCount;
|
||||
|
||||
boost::shared_ptr<DictEqualityFilter> eqFilter;
|
||||
ByteStream results(output_buf_size);
|
||||
TokenByScanRequestHeader* cmd;
|
||||
PrimitiveProcessor pproc(gDebugLevel);
|
||||
TokenByScanResultHeader* output;
|
||||
@ -1114,7 +1127,6 @@ int DictScanJob::operator()()
|
||||
session = cmd->Hdr.SessionID;
|
||||
uniqueId = cmd->Hdr.UniqueID;
|
||||
runCount = cmd->Count;
|
||||
output = (TokenByScanResultHeader*)results.getInputPtr();
|
||||
#ifdef VALGRIND
|
||||
memset(output, 0, sizeof(TokenByScanResultHeader));
|
||||
#endif
|
||||
@ -1145,6 +1157,9 @@ int DictScanJob::operator()()
|
||||
|
||||
for (uint16_t i = 0; i < runCount; ++i)
|
||||
{
|
||||
SBS results(new ByteStream(output_buf_size));
|
||||
output = (TokenByScanResultHeader*)results->getInputPtr();
|
||||
|
||||
loadBlock(cmd->LBID, verInfo, cmd->Hdr.TransactionID, cmd->CompType, data, &wasBlockInCache,
|
||||
&blocksRead, fLBIDTraceOn, session);
|
||||
pproc.setBlockPtr((int*)data);
|
||||
@ -1155,9 +1170,8 @@ int DictScanJob::operator()()
|
||||
else
|
||||
output->PhysicalIO += blocksRead;
|
||||
|
||||
results.advanceInputPtr(output->NBYTES);
|
||||
results->advanceInputPtr(output->NBYTES);
|
||||
write(results);
|
||||
results.restart();
|
||||
cmd->LBID++;
|
||||
}
|
||||
|
||||
@ -1199,9 +1213,9 @@ void DictScanJob::sendErrorMsg(uint32_t id, uint16_t code)
|
||||
ism.Status = code;
|
||||
ph.UniqueID = id;
|
||||
|
||||
ByteStream msg(sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader));
|
||||
msg.append((uint8_t*)&ism, sizeof(ism));
|
||||
msg.append((uint8_t*)&ph, sizeof(ph));
|
||||
SBS msg(new ByteStream(sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)));
|
||||
msg->append((uint8_t*)&ism, sizeof(ism));
|
||||
msg->append((uint8_t*)&ph, sizeof(ph));
|
||||
|
||||
write(msg);
|
||||
}
|
||||
@ -1940,6 +1954,128 @@ struct ReadThread
|
||||
ios->write(buildCacheOpResp(0));
|
||||
}
|
||||
|
||||
static void dispatchPrimitive(SBS sbs, boost::shared_ptr<BPPHandler>& fBPPHandler,
|
||||
boost::shared_ptr<threadpool::FairThreadPool>& procPoolPtr,
|
||||
SP_UM_IOSOCK& outIos, SP_UM_MUTEX& writeLock, const uint32_t processorThreads,
|
||||
const bool ptTrace)
|
||||
{
|
||||
const ISMPacketHeader* ismHdr = reinterpret_cast<const ISMPacketHeader*>(sbs->buf());
|
||||
switch (ismHdr->Command)
|
||||
{
|
||||
case DICT_CREATE_EQUALITY_FILTER:
|
||||
case DICT_DESTROY_EQUALITY_FILTER:
|
||||
case BATCH_PRIMITIVE_CREATE:
|
||||
case BATCH_PRIMITIVE_ADD_JOINER:
|
||||
case BATCH_PRIMITIVE_END_JOINER:
|
||||
case BATCH_PRIMITIVE_DESTROY:
|
||||
case BATCH_PRIMITIVE_ABORT:
|
||||
{
|
||||
const uint8_t* buf = sbs->buf();
|
||||
uint32_t pos = sizeof(ISMPacketHeader) - 2;
|
||||
const uint32_t txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
const uint32_t stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
const uint32_t uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
const uint32_t weight = 1;
|
||||
const uint32_t priority = 0;
|
||||
uint32_t id = 0;
|
||||
boost::shared_ptr<FairThreadPool::Functor> functor;
|
||||
if (ismHdr->Command == DICT_CREATE_EQUALITY_FILTER)
|
||||
{
|
||||
functor.reset(new CreateEqualityFilter(sbs));
|
||||
}
|
||||
else if (ismHdr->Command == DICT_DESTROY_EQUALITY_FILTER)
|
||||
{
|
||||
functor.reset(new DestroyEqualityFilter(sbs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_CREATE)
|
||||
{
|
||||
functor.reset(new BPPHandler::Create(fBPPHandler, sbs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_ADD_JOINER)
|
||||
{
|
||||
functor.reset(new BPPHandler::AddJoiner(fBPPHandler, sbs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_END_JOINER)
|
||||
{
|
||||
id = fBPPHandler->getUniqueID(sbs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::LastJoiner(fBPPHandler, sbs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_DESTROY)
|
||||
{
|
||||
id = fBPPHandler->getUniqueID(sbs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::Destroy(fBPPHandler, sbs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_ABORT)
|
||||
{
|
||||
id = fBPPHandler->getUniqueID(sbs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::Abort(fBPPHandler, sbs));
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
break;
|
||||
}
|
||||
|
||||
case DICT_TOKEN_BY_SCAN_COMPARE:
|
||||
case BATCH_PRIMITIVE_RUN:
|
||||
{
|
||||
TokenByScanRequestHeader* hdr = nullptr;
|
||||
boost::shared_ptr<FairThreadPool::Functor> functor;
|
||||
uint32_t id = 0;
|
||||
uint32_t weight = 0;
|
||||
uint32_t priority = 0;
|
||||
uint32_t txnId = 0;
|
||||
uint32_t stepID = 0;
|
||||
uint32_t uniqueID = 0;
|
||||
|
||||
if (ismHdr->Command == DICT_TOKEN_BY_SCAN_COMPARE)
|
||||
{
|
||||
idbassert(sbs->length() >= sizeof(TokenByScanRequestHeader));
|
||||
hdr = (TokenByScanRequestHeader*)ismHdr;
|
||||
functor.reset(new DictScanJob(outIos, sbs, writeLock));
|
||||
id = hdr->Hdr.UniqueID;
|
||||
weight = LOGICAL_BLOCK_RIDS;
|
||||
priority = hdr->Hdr.Priority;
|
||||
const uint8_t* buf = sbs->buf();
|
||||
const uint32_t pos = sizeof(ISMPacketHeader) - 2;
|
||||
txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_RUN)
|
||||
{
|
||||
functor.reset(new BPPSeeder(sbs, writeLock, outIos, processorThreads, ptTrace));
|
||||
BPPSeeder* bpps = dynamic_cast<BPPSeeder*>(functor.get());
|
||||
id = bpps->getID();
|
||||
priority = bpps->priority();
|
||||
const uint8_t* buf = sbs->buf();
|
||||
const uint32_t pos = sizeof(ISMPacketHeader) - 2;
|
||||
txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]);
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BATCH_PRIMITIVE_ACK:
|
||||
{
|
||||
fBPPHandler->doAck(*sbs);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
std::ostringstream os;
|
||||
Logger log;
|
||||
os << "unknown primitive cmd: " << ismHdr->Command;
|
||||
log.logMessage(os.str());
|
||||
break;
|
||||
}
|
||||
} // the switch stmt
|
||||
}
|
||||
|
||||
void operator()()
|
||||
{
|
||||
utils::setThreadName("PPReadThread");
|
||||
@ -1994,9 +2130,6 @@ struct ReadThread
|
||||
idbassert(bs->length() >= sizeof(ISMPacketHeader));
|
||||
|
||||
const ISMPacketHeader* ismHdr = reinterpret_cast<const ISMPacketHeader*>(bs->buf());
|
||||
// uint64_t someVal = ismHdr->Command;
|
||||
// std::cout << " PP read thread Command " << someVal << std::endl;
|
||||
|
||||
/* This switch is for the OOB commands */
|
||||
switch (ismHdr->Command)
|
||||
{
|
||||
@ -2037,139 +2170,8 @@ struct ReadThread
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
switch (ismHdr->Command)
|
||||
{
|
||||
case DICT_CREATE_EQUALITY_FILTER:
|
||||
case DICT_DESTROY_EQUALITY_FILTER:
|
||||
case BATCH_PRIMITIVE_CREATE:
|
||||
case BATCH_PRIMITIVE_ADD_JOINER:
|
||||
case BATCH_PRIMITIVE_END_JOINER:
|
||||
case BATCH_PRIMITIVE_DESTROY:
|
||||
case BATCH_PRIMITIVE_ABORT:
|
||||
{
|
||||
const uint8_t* buf = bs->buf();
|
||||
uint32_t pos = sizeof(ISMPacketHeader) - 2;
|
||||
const uint32_t txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
const uint32_t stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
const uint32_t uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
const uint32_t weight = 1;
|
||||
const uint32_t priority = 0;
|
||||
uint32_t id = 0;
|
||||
boost::shared_ptr<FairThreadPool::Functor> functor;
|
||||
if (ismHdr->Command == DICT_CREATE_EQUALITY_FILTER)
|
||||
{
|
||||
functor.reset(new CreateEqualityFilter(bs));
|
||||
}
|
||||
else if (ismHdr->Command == DICT_DESTROY_EQUALITY_FILTER)
|
||||
{
|
||||
functor.reset(new DestroyEqualityFilter(bs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_CREATE)
|
||||
{
|
||||
functor.reset(new BPPHandler::Create(fBPPHandler, bs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_ADD_JOINER)
|
||||
{
|
||||
functor.reset(new BPPHandler::AddJoiner(fBPPHandler, bs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_END_JOINER)
|
||||
{
|
||||
id = fBPPHandler->getUniqueID(bs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::LastJoiner(fBPPHandler, bs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_DESTROY)
|
||||
{
|
||||
id = fBPPHandler->getUniqueID(bs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::Destroy(fBPPHandler, bs));
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_ABORT)
|
||||
{
|
||||
id = fBPPHandler->getUniqueID(bs, ismHdr->Command);
|
||||
functor.reset(new BPPHandler::Abort(fBPPHandler, bs));
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
break;
|
||||
}
|
||||
|
||||
case DICT_TOKEN_BY_SCAN_COMPARE:
|
||||
case BATCH_PRIMITIVE_RUN:
|
||||
{
|
||||
TokenByScanRequestHeader* hdr = nullptr;
|
||||
boost::shared_ptr<FairThreadPool::Functor> functor;
|
||||
uint32_t id = 0;
|
||||
uint32_t weight = 0;
|
||||
uint32_t priority = 0;
|
||||
uint32_t txnId = 0;
|
||||
uint32_t stepID = 0;
|
||||
uint32_t uniqueID = 0;
|
||||
|
||||
if (bRotateDest)
|
||||
{
|
||||
if (!pUmSocketSelector->nextIOSocket(fIos, outIos, writeLock))
|
||||
{
|
||||
// If we ever fall into this part of the
|
||||
// code we have a "bug" of some sort.
|
||||
// See handleUmSockSelErr() for more info.
|
||||
// We reset ios and mutex to defaults.
|
||||
handleUmSockSelErr(string("default cmd"));
|
||||
outIos = outIosDefault;
|
||||
writeLock = writeLockDefault;
|
||||
pUmSocketSelector->delConnection(fIos);
|
||||
bRotateDest = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (ismHdr->Command == DICT_TOKEN_BY_SCAN_COMPARE)
|
||||
{
|
||||
idbassert(bs->length() >= sizeof(TokenByScanRequestHeader));
|
||||
hdr = (TokenByScanRequestHeader*)ismHdr;
|
||||
functor.reset(new DictScanJob(outIos, bs, writeLock));
|
||||
id = hdr->Hdr.UniqueID;
|
||||
weight = LOGICAL_BLOCK_RIDS;
|
||||
priority = hdr->Hdr.Priority;
|
||||
const uint8_t* buf = bs->buf();
|
||||
const uint32_t pos = sizeof(ISMPacketHeader) - 2;
|
||||
txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
}
|
||||
else if (ismHdr->Command == BATCH_PRIMITIVE_RUN)
|
||||
{
|
||||
functor.reset(new BPPSeeder(bs, writeLock, outIos,
|
||||
fPrimitiveServerPtr->ProcessorThreads(),
|
||||
fPrimitiveServerPtr->PTTrace()));
|
||||
BPPSeeder* bpps = dynamic_cast<BPPSeeder*>(functor.get());
|
||||
id = bpps->getID();
|
||||
priority = bpps->priority();
|
||||
const uint8_t* buf = bs->buf();
|
||||
const uint32_t pos = sizeof(ISMPacketHeader) - 2;
|
||||
txnId = *((uint32_t*)&buf[pos + 2]);
|
||||
stepID = *((uint32_t*)&buf[pos + 6]);
|
||||
uniqueID = *((uint32_t*)&buf[pos + 10]);
|
||||
weight = ismHdr->Size + *((uint32_t*)&buf[pos + 18]);
|
||||
}
|
||||
FairThreadPool::Job job(uniqueID, stepID, txnId, functor, outIos, weight, priority, id);
|
||||
procPoolPtr->addJob(job);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BATCH_PRIMITIVE_ACK:
|
||||
{
|
||||
fBPPHandler->doAck(*bs);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
std::ostringstream os;
|
||||
Logger log;
|
||||
os << "unknown primitive cmd: " << ismHdr->Command;
|
||||
log.logMessage(os.str());
|
||||
break;
|
||||
}
|
||||
} // the switch stmt
|
||||
dispatchPrimitive(bs, fBPPHandler, procPoolPtr, outIos, writeLock,
|
||||
fPrimitiveServerPtr->ProcessorThreads(), fPrimitiveServerPtr->PTTrace());
|
||||
}
|
||||
else // bs.length() == 0
|
||||
{
|
||||
@ -2213,8 +2215,6 @@ struct ReadThread
|
||||
boost::shared_ptr<BPPHandler> fBPPHandler;
|
||||
};
|
||||
|
||||
/** @brief accept a primitive command from the user module
|
||||
*/
|
||||
struct ServerThread
|
||||
{
|
||||
ServerThread(string serverName, PrimitiveServer* ps) : fServerName(serverName), fPrimitiveServerPtr(ps)
|
||||
@ -2351,9 +2351,43 @@ void PrimitiveServer::start(Service* service, utils::USpaceSpinLock& startupRace
|
||||
|
||||
fServerpool.invoke(ServerThread(oss.str(), this));
|
||||
}
|
||||
|
||||
startupRaceLock.release();
|
||||
service->NotifyServiceStarted();
|
||||
|
||||
std::thread sameHostServerThread(
|
||||
[this]()
|
||||
{
|
||||
utils::setThreadName("PPSHServerThr");
|
||||
auto* exeMgrDecPtr = exemgr::globServiceExeMgr->getDec();
|
||||
while (!exeMgrDecPtr)
|
||||
{
|
||||
sleep(1);
|
||||
exeMgrDecPtr = exemgr::globServiceExeMgr->getDec();
|
||||
}
|
||||
// These empty SPs have "same-host" messaging semantics.
|
||||
SP_UM_IOSOCK outIos(nullptr);
|
||||
SP_UM_MUTEX writeLock(nullptr);
|
||||
auto procPoolPtr = this->getProcessorThreadPool();
|
||||
boost::shared_ptr<BPPHandler> fBPPHandler(new BPPHandler(this));
|
||||
for (;;)
|
||||
{
|
||||
joblist::DistributedEngineComm::SBSVector primitiveMsgs;
|
||||
for (auto& sbs : exeMgrDecPtr->readLocalQueueMessagesOrWait(primitiveMsgs))
|
||||
{
|
||||
if (sbs->length() == 0)
|
||||
{
|
||||
std::cout << "PPSHServerThr got an empty ByteStream." << std::endl;
|
||||
continue;
|
||||
}
|
||||
idbassert(sbs->length() >= sizeof(ISMPacketHeader));
|
||||
|
||||
ReadThread::dispatchPrimitive(sbs, fBPPHandler, procPoolPtr, outIos, writeLock,
|
||||
this->ProcessorThreads(), this->PTTrace());
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
fServerpool.wait();
|
||||
|
||||
cerr << "PrimitiveServer::start() exiting!" << endl;
|
||||
|
Reference in New Issue
Block a user