You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	* fix(threadpool): MCOL-5565 queries stuck in FairThreadScheduler. (#3100) Meta Primitive Jobs, .e.g ADD_JOINER, LAST_JOINER stuck in Fair scheduler without out-of-band scheduler. Add OOB scheduler back to remedy the issue. * fix(messageqcpp): MCOL-5636 same node communication crashes transmiting PP errors to EM b/c error messaging leveraged socket that was a nullptr. (#3106) * fix(threadpool): MCOL-5645 errenous threadpool Job ctor implictly sets socket shared_ptr to nullptr causing sigabrt when threadpool returns an error (#3125) --------- Co-authored-by: drrtuy <roman.nozdrin@mariadb.com>
		
			
				
	
	
		
			311 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			311 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* Copyright (c) 2022 MariaDB Corporation
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU General Public License
 | 
						|
   as published by the Free Software Foundation; version 2 of
 | 
						|
   the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
						|
   MA 02110-1301, USA. */
 | 
						|
 | 
						|
#include <atomic>
 | 
						|
#include <stdexcept>
 | 
						|
#include <unistd.h>
 | 
						|
#include <exception>
 | 
						|
using namespace std;
 | 
						|
 | 
						|
#include "messageobj.h"
 | 
						|
#include "messagelog.h"
 | 
						|
#include "threadnaming.h"
 | 
						|
using namespace logging;
 | 
						|
 | 
						|
#include "fair_threadpool.h"
 | 
						|
using namespace boost;
 | 
						|
 | 
						|
#include "dbcon/joblist/primitivemsg.h"
 | 
						|
 | 
						|
namespace threadpool
 | 
						|
{
 | 
						|
FairThreadPool::FairThreadPool(uint targetWeightPerRun, uint highThreads, uint midThreads, uint lowThreads,
 | 
						|
                               uint ID)
 | 
						|
 : weightPerRun(targetWeightPerRun), id(ID), stopExtra_(false)
 | 
						|
{
 | 
						|
  boost::thread* newThread;
 | 
						|
  size_t numberOfThreads = highThreads + midThreads + lowThreads;
 | 
						|
  for (uint32_t i = 0; i < numberOfThreads; ++i)
 | 
						|
  {
 | 
						|
    newThread = threads.create_thread(ThreadHelper(this, PriorityThreadPool::Priority::HIGH));
 | 
						|
    newThread->detach();
 | 
						|
  }
 | 
						|
  cout << "FairThreadPool started " << numberOfThreads << " thread/-s.\n";
 | 
						|
  threadCounts_.store(numberOfThreads, std::memory_order_relaxed);
 | 
						|
  defaultThreadCounts = numberOfThreads;
 | 
						|
}
 | 
						|
 | 
						|
FairThreadPool::~FairThreadPool()
 | 
						|
{
 | 
						|
  stop();
 | 
						|
}
 | 
						|
 | 
						|
void FairThreadPool::addJob(const Job& job)
 | 
						|
{
 | 
						|
  boost::thread* newThread;
 | 
						|
  std::unique_lock<std::mutex> lk(mutex, std::defer_lock_t());
 | 
						|
 | 
						|
  // Create any missing threads
 | 
						|
  if (defaultThreadCounts != threadCounts_.load(std::memory_order_relaxed))
 | 
						|
  {
 | 
						|
    newThread = threads.create_thread(ThreadHelper(this, PriorityThreadPool::Priority::HIGH));
 | 
						|
    newThread->detach();
 | 
						|
    threadCounts_.fetch_add(1, std::memory_order_relaxed);
 | 
						|
  }
 | 
						|
 | 
						|
  lk.lock();
 | 
						|
  // If some threads have blocked (because of output queue full)
 | 
						|
  // Temporarily add some extra worker threads to make up for the blocked threads.
 | 
						|
  if (blockedThreads_ > extraThreads_)
 | 
						|
  {
 | 
						|
    stopExtra_ = false;
 | 
						|
    newThread = threads.create_thread(ThreadHelper(this, PriorityThreadPool::Priority::EXTRA));
 | 
						|
    newThread->detach();
 | 
						|
    ++extraThreads_;
 | 
						|
  }
 | 
						|
  else if (blockedThreads_ == 0)
 | 
						|
  {
 | 
						|
    // Release the temporary threads -- some threads have become unblocked.
 | 
						|
    stopExtra_ = true;
 | 
						|
  }
 | 
						|
 | 
						|
  auto jobsListMapIter = txn2JobsListMap_.find(job.txnIdx_);
 | 
						|
  if (jobsListMapIter == txn2JobsListMap_.end())  // there is no txn in the map
 | 
						|
  {
 | 
						|
    ThreadPoolJobsList* jobsList = new ThreadPoolJobsList;
 | 
						|
    jobsList->push_back(job);
 | 
						|
    txn2JobsListMap_[job.txnIdx_] = jobsList;
 | 
						|
    weightedTxnsQueue_.push({job.weight_, job.txnIdx_});
 | 
						|
  }
 | 
						|
  else  // txn is in the map
 | 
						|
  {
 | 
						|
    if (jobsListMapIter->second->empty())  // there are no jobs for the txn
 | 
						|
    {
 | 
						|
      weightedTxnsQueue_.push({job.weight_, job.txnIdx_});
 | 
						|
    }
 | 
						|
    jobsListMapIter->second->push_back(job);
 | 
						|
  }
 | 
						|
 | 
						|
  newJob.notify_one();
 | 
						|
}
 | 
						|
 | 
						|
void FairThreadPool::removeJobs(uint32_t id)
 | 
						|
{
 | 
						|
  std::unique_lock<std::mutex> lk(mutex);
 | 
						|
 | 
						|
  auto txnJobsMapIter = txn2JobsListMap_.begin();
 | 
						|
  while (txnJobsMapIter != txn2JobsListMap_.end())
 | 
						|
  {
 | 
						|
    auto& txnJobsMapPair = *txnJobsMapIter;
 | 
						|
    ThreadPoolJobsList* txnJobsList = txnJobsMapPair.second;
 | 
						|
    // txnJobsList must not be nullptr
 | 
						|
    if (txnJobsList && txnJobsList->empty())
 | 
						|
    {
 | 
						|
      txnJobsMapIter = txn2JobsListMap_.erase(txnJobsMapIter);
 | 
						|
      delete txnJobsList;
 | 
						|
      continue;
 | 
						|
      // There is no clean-up for PQ. It will happen later in threadFcn
 | 
						|
    }
 | 
						|
    auto job = txnJobsList->begin();
 | 
						|
    while (job != txnJobsList->end())
 | 
						|
    {
 | 
						|
      if (job->id_ == id)
 | 
						|
      {
 | 
						|
        job = txnJobsList->erase(job);  // update the job iter
 | 
						|
        continue;                       // go-on skiping job iter increment
 | 
						|
      }
 | 
						|
      ++job;
 | 
						|
    }
 | 
						|
 | 
						|
    if (txnJobsList->empty())
 | 
						|
    {
 | 
						|
      txnJobsMapIter = txn2JobsListMap_.erase(txnJobsMapIter);
 | 
						|
      delete txnJobsList;
 | 
						|
      continue;
 | 
						|
      // There is no clean-up for PQ. It will happen later in threadFcn
 | 
						|
    }
 | 
						|
    ++txnJobsMapIter;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void FairThreadPool::threadFcn(const PriorityThreadPool::Priority preferredQueue)
 | 
						|
{
 | 
						|
  utils::setThreadName("Idle");
 | 
						|
  RunListT runList(1);  // This is a vector to allow to grab multiple jobs
 | 
						|
  RescheduleVecType reschedule;
 | 
						|
  bool running = false;
 | 
						|
  bool rescheduleJob = false;
 | 
						|
 | 
						|
  try
 | 
						|
  {
 | 
						|
    while (!stop_.load(std::memory_order_relaxed))
 | 
						|
    {
 | 
						|
      runList.clear();  // remove the job
 | 
						|
      std::unique_lock<std::mutex> lk(mutex);
 | 
						|
 | 
						|
      if (weightedTxnsQueue_.empty())
 | 
						|
      {
 | 
						|
        // If this is an EXTRA thread due toother threads blocking, and all blockers are unblocked,
 | 
						|
        // we don't want this one any more.
 | 
						|
        if (preferredQueue == PriorityThreadPool::Priority::EXTRA && stopExtra_)
 | 
						|
        {
 | 
						|
          --extraThreads_;
 | 
						|
          return;
 | 
						|
        }
 | 
						|
        newJob.wait(lk);
 | 
						|
        continue;  // just go on w/o re-taking the lock
 | 
						|
      }
 | 
						|
 | 
						|
      WeightedTxnT weightedTxn = weightedTxnsQueue_.top();
 | 
						|
      auto txnAndJobListPair = txn2JobsListMap_.find(weightedTxn.second);
 | 
						|
      // Looking for non-empty jobsList in a loop
 | 
						|
      // The loop waits on newJob cond_var if PQ is empty(no jobs in this thread pool)
 | 
						|
      while (txnAndJobListPair == txn2JobsListMap_.end() || txnAndJobListPair->second->empty())
 | 
						|
      {
 | 
						|
        // JobList is empty. This can happen when this method pops the last Job.
 | 
						|
        if (txnAndJobListPair != txn2JobsListMap_.end())
 | 
						|
        {
 | 
						|
          ThreadPoolJobsList* txnJobsList = txnAndJobListPair->second;
 | 
						|
          delete txnJobsList;
 | 
						|
          // !txnAndJobListPair is invalidated after this!
 | 
						|
          txn2JobsListMap_.erase(txnAndJobListPair->first);
 | 
						|
        }
 | 
						|
        weightedTxnsQueue_.pop();
 | 
						|
        if (weightedTxnsQueue_.empty())  // remove the empty
 | 
						|
        {
 | 
						|
          break;
 | 
						|
        }
 | 
						|
        weightedTxn = weightedTxnsQueue_.top();
 | 
						|
        txnAndJobListPair = txn2JobsListMap_.find(weightedTxn.second);
 | 
						|
      }
 | 
						|
 | 
						|
      if (weightedTxnsQueue_.empty())
 | 
						|
      {
 | 
						|
        newJob.wait(lk);  // might need a lock here
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      // We have non-empty jobsList at this point.
 | 
						|
      // Remove the txn from a queue first to add it later
 | 
						|
      weightedTxnsQueue_.pop();
 | 
						|
      TransactionIdxT txnIdx = txnAndJobListPair->first;
 | 
						|
      ThreadPoolJobsList* jobsList = txnAndJobListPair->second;
 | 
						|
      runList.push_back(jobsList->front());
 | 
						|
 | 
						|
      jobsList->pop_front();
 | 
						|
      // Add the jobList back into the PQ adding some weight to it
 | 
						|
      // Current algo doesn't reduce total txn weight if the job is rescheduled.
 | 
						|
      if (!jobsList->empty())
 | 
						|
      {
 | 
						|
        weightedTxnsQueue_.push({weightedTxn.first + runList[0].weight_, txnIdx});
 | 
						|
      }
 | 
						|
 | 
						|
      lk.unlock();
 | 
						|
 | 
						|
      running = true;
 | 
						|
      jobsRunning_.fetch_add(1, std::memory_order_relaxed);
 | 
						|
      rescheduleJob = (*(runList[0].functor_))();  // run the functor
 | 
						|
      jobsRunning_.fetch_sub(1, std::memory_order_relaxed);
 | 
						|
      running = false;
 | 
						|
 | 
						|
      utils::setThreadName("Idle");
 | 
						|
 | 
						|
      if (rescheduleJob)
 | 
						|
      {
 | 
						|
        // to avoid excessive CPU usage waiting for data from storage
 | 
						|
        usleep(500);
 | 
						|
        runList[0].weight_ += (runList[0].weight_) ? runList[0].weight_ : RescheduleWeightIncrement;
 | 
						|
        addJob(runList[0]);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  catch (std::exception& ex)
 | 
						|
  {
 | 
						|
    if (running)
 | 
						|
    {
 | 
						|
      jobsRunning_.fetch_sub(1, std::memory_order_relaxed);
 | 
						|
    }
 | 
						|
    // Log the exception and exit this thread
 | 
						|
    try
 | 
						|
    {
 | 
						|
      threadCounts_.fetch_sub(1, std::memory_order_relaxed);
 | 
						|
#ifndef NOLOGGING
 | 
						|
      logging::Message::Args args;
 | 
						|
      logging::Message message(5);
 | 
						|
      args.add("threadFcn: Caught exception: ");
 | 
						|
      args.add(ex.what());
 | 
						|
 | 
						|
      message.format(args);
 | 
						|
 | 
						|
      logging::LoggingID lid(22);
 | 
						|
      logging::MessageLog ml(lid);
 | 
						|
 | 
						|
      ml.logErrorMessage(message);
 | 
						|
#endif
 | 
						|
 | 
						|
      if (running)
 | 
						|
      {
 | 
						|
        error_handling::sendErrorMsg(logging::primitiveServerErr, runList[0].uniqueID_, runList[0].stepID_,
 | 
						|
                                     runList[0].sock_);
 | 
						|
      }
 | 
						|
    }
 | 
						|
    catch (...)
 | 
						|
    {
 | 
						|
      std::cout << "FairThreadPool::threadFcn(): std::exception - double exception: failed to send an error"
 | 
						|
                << std::endl;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  catch (...)
 | 
						|
  {
 | 
						|
    try
 | 
						|
    {
 | 
						|
      if (running)
 | 
						|
      {
 | 
						|
        jobsRunning_.fetch_sub(1, std::memory_order_relaxed);
 | 
						|
      }
 | 
						|
      threadCounts_.fetch_sub(1, std::memory_order_relaxed);
 | 
						|
#ifndef NOLOGGING
 | 
						|
      logging::Message::Args args;
 | 
						|
      logging::Message message(6);
 | 
						|
      args.add("threadFcn: Caught unknown exception!");
 | 
						|
 | 
						|
      message.format(args);
 | 
						|
 | 
						|
      logging::LoggingID lid(22);
 | 
						|
      logging::MessageLog ml(lid);
 | 
						|
 | 
						|
      ml.logErrorMessage(message);
 | 
						|
#endif
 | 
						|
 | 
						|
      if (running)
 | 
						|
        error_handling::sendErrorMsg(logging::primitiveServerErr, runList[0].uniqueID_, runList[0].stepID_,
 | 
						|
                                     runList[0].sock_);
 | 
						|
    }
 | 
						|
    catch (...)
 | 
						|
    {
 | 
						|
      std::cout << "FairThreadPool::threadFcn(): ... exception - double exception: failed to send an error"
 | 
						|
                << std::endl;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void FairThreadPool::stop()
 | 
						|
{
 | 
						|
  stop_.store(true, std::memory_order_relaxed);
 | 
						|
}
 | 
						|
 | 
						|
}  // namespace threadpool
 |