1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
mariadb-columnstore-engine/dbcon/joblist/distributedenginecomm.h
2023-03-02 15:59:42 +00:00

322 lines
9.4 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
//
// $Id: distributedenginecomm.h 9655 2013-06-25 23:08:13Z xlou $
//
// C++ Interface: distributedenginecomm
//
// Description:
//
//
// Author: <pfigg@calpont.com>, (C) 2006
//
// Copyright: See COPYING file that comes with this distribution
//
//
/** @file */
#pragma once
#include <boost/thread.hpp>
#include <boost/thread/condition.hpp>
#include <boost/scoped_array.hpp>
#include <condition_variable>
#include <ifaddrs.h>
#include <iostream>
#include <map>
#include <mutex>
#include <string>
#include <queue>
#include <vector>
#include "bytestream.h"
#include "primitivemsg.h"
#include "threadsafequeue.h"
#include "rwlock_local.h"
#include "resourcemanager.h"
#include "messagequeue.h"
class TestDistributedEngineComm;
#define EXPORT
namespace messageqcpp
{
class MessageQueueClient;
}
namespace config
{
class Config;
}
/**
* Namespace
*/
namespace joblist
{
class DECEventListener
{
public:
virtual ~DECEventListener(){};
/* Do whatever needs to be done to init the new PM */
virtual void newPMOnline(uint32_t newConnectionNumber) = 0;
};
/**
* class DistributedEngineComm
*/
class DistributedEngineComm
{
using SharedPtrEMSock = boost::shared_ptr<messageqcpp::IOSocket>;
public:
using SBSVector = std::vector<messageqcpp::SBS>;
/**
* Constructors
*/
EXPORT virtual ~DistributedEngineComm();
EXPORT static DistributedEngineComm* instance(ResourceManager* rm, bool isExeMgr = false);
/** @brief delete the static instance
* This has the effect of causing the connection to be rebuilt
*/
EXPORT static void reset();
/** @brief currently a nop
*
*/
EXPORT int Open()
{
return 0;
}
EXPORT void addQueue(uint32_t key, bool sendACKs = false);
EXPORT void removeQueue(uint32_t key);
EXPORT void shutdownQueue(uint32_t key);
/** @brief read a primitive response
*
* Returns the next message in the inbound queue for session sessionId and step stepId.
* @todo See if we can't save a copy by returning a const&
*/
EXPORT const messageqcpp::ByteStream read(uint32_t key);
/** @brief read a primitve response
*
* Returns the next message in the inbound queue for session sessionId and step stepId.
* @param bs A pointer to the ByteStream to fill in.
* @note: saves a copy vs read(uint32_t, uint32_t).
*/
EXPORT void read(uint32_t key, messageqcpp::SBS&);
/** @brief read a primitve response
*
* Returns the next message in the inbound queue for session sessionId and step stepId.
*/
EXPORT void read_all(uint32_t key, std::vector<messageqcpp::SBS>& v);
/** reads queuesize/divisor msgs */
EXPORT void read_some(uint32_t key, uint32_t divisor, std::vector<messageqcpp::SBS>& v,
bool* flowControlOn = NULL);
/** @brief Write a primitive message
*
* Writes a primitive message to a primitive server. Msg needs to conatin an ISMPacketHeader. The
* LBID is extracted from the ISMPacketHeader and used to determine the actual P/M to send to.
*/
EXPORT int32_t write(uint32_t key, const messageqcpp::SBS& msg);
// EXPORT void throttledWrite(const messageqcpp::ByteStream& msg);
/** @brief Special write function for use only by newPMOnline event handlers
*/
EXPORT void write(messageqcpp::ByteStream& msg, uint32_t connection);
/** @brief Shutdown this object
*
* Closes all the connections created during Setup() and cleans up other stuff.
*/
EXPORT int Close();
/** @brief Start listening for primitive responses
*
* Starts the current thread listening on the client socket for primitive response messages. Will not return
* until busy() returns false or a zero-length response is received.
*/
EXPORT void Listen(boost::shared_ptr<messageqcpp::MessageQueueClient> client, uint32_t connIndex);
/** @brief set/unset busy flag
*
* Set or unset the busy flag so Listen() can return.
*/
EXPORT void makeBusy(bool b)
{
fBusy = b;
}
/** @brief fBusy accessor
*
*/
EXPORT bool Busy() const
{
return fBusy;
}
/** @brief Returns the size of the queue for the specified jobstep
*/
EXPORT uint32_t size(uint32_t key);
EXPORT int32_t Setup();
EXPORT void addDECEventListener(DECEventListener*);
EXPORT void removeDECEventListener(DECEventListener*);
uint64_t connectedPmServers() const
{
return fPmConnections.size();
}
uint32_t getPmCount() const
{
return pmCount;
}
unsigned getNumConnections() const
{
unsigned cpp = (fIsExeMgr ? fRm->getPsConnectionsPerPrimProc() : 1);
return fRm->getPsCount() * cpp;
}
bool isExeMgrDEC() const
{
return fIsExeMgr;
}
template <typename T>
bool clientAtTheSameHost(T& client) const;
void getLocalNetIfacesSins();
messageqcpp::Stats getNetworkStats(uint32_t uniqueID);
void addDataToOutput(messageqcpp::SBS sbs);
SBSVector& readLocalQueueMessagesOrWait(SBSVector&);
friend class ::TestDistributedEngineComm;
private:
typedef std::vector<boost::thread*> ReaderList;
typedef std::vector<boost::shared_ptr<messageqcpp::MessageQueueClient>> ClientList;
// A queue of ByteStreams coming in from PrimProc heading for a JobStep
typedef ThreadSafeQueue<messageqcpp::SBS> StepMsgQueue;
/* To keep some state associated with the connection. These aren't copyable. */
struct MQE : public boost::noncopyable
{
MQE(const uint32_t pmCount, const uint32_t initialInterleaverValue);
uint32_t getNextConnectionId(const size_t pmIndex, const size_t pmConnectionsNumber,
const uint32_t DECConnectionsPerQuery);
messageqcpp::Stats stats;
StepMsgQueue queue;
uint32_t ackSocketIndex;
boost::scoped_array<volatile uint32_t> unackedWork;
boost::scoped_array<uint32_t> interleaver;
uint32_t initialConnectionId;
uint32_t pmCount;
// non-BPP primitives don't do ACKs
bool sendACKs;
// This var will allow us to toggle flow control for BPP instances when
// the UM is keeping up. Send -1 as the ACK value to disable flow control
// on the PM side, positive value to reenable it. Not yet impl'd on the UM side.
bool throttled;
// This var signifies that the PM can return msgs big enough to keep toggling
// FC on and off. We force FC on in that case and maintain a larger buffer.
bool hasBigMsgs;
uint64_t targetQueueSize;
};
// The mapping of session ids to StepMsgQueueLists
typedef std::map<unsigned, boost::shared_ptr<MQE>> MessageQueueMap;
explicit DistributedEngineComm(ResourceManager* rm, bool isExeMgr);
void StartClientListener(boost::shared_ptr<messageqcpp::MessageQueueClient> cl, uint32_t connIndex);
/** @brief Add a message to the queue
*
*/
void addDataToOutput(messageqcpp::SBS, uint32_t connIndex, messageqcpp::Stats* statsToAdd);
/** @brief Writes data to the client at the index
*
* Continues trying to write data to the client at the next index until all clients have been tried.
*/
int writeToClient(size_t index, const messageqcpp::SBS& bs,
uint32_t senderID = std::numeric_limits<uint32_t>::max(), bool doInterleaving = false);
void pushToTheLocalQueueAndNotifyRecv(const messageqcpp::SBS& bs);
static DistributedEngineComm* fInstance;
ResourceManager* fRm;
ClientList fPmConnections; // all the pm servers
ReaderList fPmReader; // all the reader threads for the pm servers
MessageQueueMap
fSessionMessages; // place to put messages from the pm server to be returned by the Read method
std::mutex fMlock; // sessionMessages mutex
std::vector<std::shared_ptr<std::mutex>> fWlock; // PrimProc socket write mutexes
bool fBusy;
volatile uint32_t pmCount;
boost::mutex fOnErrMutex; // to lock function scope to reset pmconnections under error condition
boost::mutex fSetupMutex;
// event listener data
std::vector<DECEventListener*> eventListeners;
boost::mutex eventListenerLock;
ClientList newClients;
std::vector<std::shared_ptr<std::mutex>> newLocks;
bool fIsExeMgr;
// send-side throttling vars
uint64_t throttleThreshold;
static const uint32_t targetRecvQueueSize = 50000000;
static const uint32_t disableThreshold = 10000000;
uint32_t tbpsThreadCount;
uint32_t fDECConnectionsPerQuery;
void sendAcks(uint32_t uniqueID, const std::vector<messageqcpp::SBS>& msgs, boost::shared_ptr<MQE> mqe,
size_t qSize);
void nextPMToACK(boost::shared_ptr<MQE> mqe, uint32_t maxAck, uint32_t* sockIndex, uint16_t* numToAck);
void setFlowControl(bool enable, uint32_t uniqueID, boost::shared_ptr<MQE> mqe);
void doHasBigMsgs(boost::shared_ptr<MQE> mqe, uint64_t targetSize);
boost::mutex ackLock;
std::vector<struct in_addr> localNetIfaceSins_;
std::mutex inMemoryEM2PPExchMutex_;
std::condition_variable inMemoryEM2PPExchCV_;
std::queue<messageqcpp::SBS> inMemoryEM2PPExchQueue_;
};
} // namespace joblist
#undef EXPORT