1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

fix(DEC):MCOL-5805,5808 to resolve UM-only node crash inside DEC when there is no local PP to send the local requests to. (#3350)

* Revert "fix(DEC): MCOL-5602 fixing potentially endless loop in DEC (#3049)"

This reverts commit 1d416bc6ed.

* fix(DEC):MCOL-5805,5808 to resolve UM-only node crash inside DEC when there is no local PP to send the local requests to.
This commit is contained in:
drrtuy
2024-11-11 18:31:15 +00:00
committed by GitHub
parent 4cba8301d5
commit eaba4d33b4
3 changed files with 102 additions and 78 deletions

View File

@@ -69,6 +69,11 @@ class Config;
*/
namespace joblist
{
constexpr uint32_t defaultLocalConnectionId()
{
return std::numeric_limits<uint32_t>::max();
}
class DECEventListener
{
public:
@@ -225,7 +230,6 @@ class DistributedEngineComm
// A queue of ByteStreams coming in from PrimProc heading for a JobStep
typedef ThreadSafeQueue<messageqcpp::SBS> StepMsgQueue;
using AtomicSizeVec = std::vector<std::atomic<size_t>>;
// Creates a ByteStream as a command for Primitive Server and initializes it with a given `command`,
// `uniqueID` and `size`.
@@ -240,7 +244,7 @@ class DistributedEngineComm
messageqcpp::Stats stats;
StepMsgQueue queue;
uint32_t ackSocketIndex;
AtomicSizeVec unackedWork;
boost::scoped_array<volatile uint32_t> unackedWork;
boost::scoped_array<uint32_t> interleaver;
uint32_t initialConnectionId;
uint32_t pmCount;
@@ -288,7 +292,7 @@ class DistributedEngineComm
std::mutex fMlock; // sessionMessages mutex
std::vector<std::shared_ptr<std::mutex>> fWlock; // PrimProc socket write mutexes
bool fBusy;
std::atomic<uint32_t> pmCount;
volatile uint32_t pmCount;
boost::mutex fOnErrMutex; // to lock function scope to reset pmconnections under error condition
boost::mutex fSetupMutex;
@@ -310,13 +314,13 @@ class DistributedEngineComm
void sendAcks(uint32_t uniqueID, const std::vector<messageqcpp::SBS>& msgs, boost::shared_ptr<MQE> mqe,
size_t qSize);
size_t subsMsgCounterAndRotatePM(boost::shared_ptr<MQE> mqe, const size_t maxAck, uint32_t* sockIndex);
void nextPMToACK(boost::shared_ptr<MQE> mqe, uint32_t maxAck, uint32_t* sockIndex, uint16_t* numToAck);
void setFlowControl(bool enable, uint32_t uniqueID, boost::shared_ptr<MQE> mqe);
void doHasBigMsgs(boost::shared_ptr<MQE> mqe, uint64_t targetSize);
boost::mutex ackLock;
// localConnectionId_ is set running Setup() method
uint32_t localConnectionId_ = std::numeric_limits<uint32_t>::max();
uint32_t localConnectionId_ = defaultLocalConnectionId();
std::vector<struct in_addr> localNetIfaceSins_;
std::mutex inMemoryEM2PPExchMutex_;
std::condition_variable inMemoryEM2PPExchCV_;