1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-4912 This patch introduces Extent Map index to improve EM scaleability

EM scaleability project has two parts: phase1 and phase2.
        This is phase1 that brings EM index to speed up(from O(n) down
        to the speed of boost::unordered_map) EM lookups looking for
        <dbroot, oid, partition> tuple to turn it into LBID,
        e.g. most bulk insertion meta info operations.
        The basis is boost::shared_managed_object where EMIndex is
        stored. Whilst it is not debug-friendly it allows to put a
        nested structs into shmem. EMIndex has 3 tiers. Top down description:
        vector of dbroots, map of oids to partition vectors, partition
        vectors that have EM indices.
        Separate EM methods now queries index before they do EM run.
        EMIndex has a separate shmem file with the fixed id
        MCS-shm-00060001.
This commit is contained in:
Roman Nozdrin
2022-03-30 08:57:05 +00:00
committed by Leonid Fedorov
parent fb3eaabd29
commit 4c26e4f960
25 changed files with 3498 additions and 459 deletions

View File

@ -40,7 +40,15 @@ namespace bi = boost::interprocess;
namespace BRM
{
BRMShmImpl::BRMShmImpl(unsigned key, off_t size, bool readOnly) : fKey(key), fSize(size), fReadOnly(readOnly)
const constexpr uint32_t ShmCreateMaxRetries = 10;
const constexpr unsigned int NapTimer = 500000;
BRMShmImplParent::BRMShmImplParent(unsigned key, off_t size, bool readOnly)
: fKey(key), fSize(size), fReadOnly(readOnly){};
BRMShmImplParent::~BRMShmImplParent(){};
BRMShmImpl::BRMShmImpl(unsigned key, off_t size, bool readOnly) : BRMShmImplParent(key, size, readOnly)
{
string keyName = ShmKeys::keyToName(fKey);
@ -238,5 +246,167 @@ void BRMShmImpl::destroy()
bi::shared_memory_object::remove(oldName.c_str());
}
BRMManagedShmImpl::BRMManagedShmImpl(unsigned key, off_t size, bool readOnly)
: BRMShmImplParent(key, size, readOnly)
{
string keyName = ShmKeys::keyToName(fKey);
off_t curSize = 0;
for (uint32_t tries = 0; fSize == 0 && tries <= ShmCreateMaxRetries; ++tries)
{
try
{
auto* shmSegment = new boost::interprocess::managed_shared_memory(bi::open_only, keyName.c_str());
curSize = shmSegment->get_size();
if (curSize == 0)
{
delete shmSegment;
throw bi::interprocess_exception("shared memory segment size is 0.");
}
else
{
fShmSegment = shmSegment;
fSize = curSize;
return;
}
}
catch (bi::interprocess_exception&)
{
if (tries == ShmCreateMaxRetries)
{
log("BRMManagedShmImpl::BRMManagedShmImpl(): re-creating shared memory segment\
b/c of its size == 0. Re-throw.");
throw;
}
cerr << "BRMManagedShmImpl::BRMManagedShmImpl(): re-creating shared memory segment\
b/c of its size == 0"
<< endl;
usleep(NapTimer);
}
}
try
{
bi::permissions perms;
perms.set_unrestricted();
fShmSegment = new bi::managed_shared_memory(bi::create_only, keyName.c_str(), fSize,
0, // use a default address to map the segment
perms);
// fSize == 0 on any process startup but managed_shared_memory ctor throws
// so control flow doesn't get here.
idbassert(fSize > 0);
}
catch (bi::interprocess_exception& b)
{
if (b.get_error_code() != bi::already_exists_error)
{
ostringstream o;
o << "BRM caught an exception creating a shared memory segment: " << b.what();
log(o.str());
throw;
}
bi::managed_shared_memory* shmSegment = nullptr;
try
{
if (fReadOnly)
shmSegment = new bi::managed_shared_memory(bi::open_read_only, keyName.c_str());
else
shmSegment = new bi::managed_shared_memory(bi::open_only, keyName.c_str());
}
catch (exception& e)
{
ostringstream o;
o << "BRM caught an exception attaching to a shared memory segment (" << keyName << "): " << b.what();
log(o.str());
throw;
}
off_t curSize = shmSegment->get_size();
idbassert(curSize > 0);
idbassert(curSize >= fSize);
fShmSegment = shmSegment;
fSize = curSize;
}
}
int BRMManagedShmImpl::grow(off_t newSize)
{
auto keyName = ShmKeys::keyToName(fKey);
if (newSize > fSize)
{
const auto incSize = newSize - fSize;
if (fShmSegment)
{
// Call destructor to unmap the segment.
delete fShmSegment;
// Grow the segment.
bi::managed_shared_memory::grow(keyName.c_str(), incSize);
// Open only with the assumption ::grow() can be called on read-write shmem.
fShmSegment = new bi::managed_shared_memory(bi::open_only, keyName.c_str());
// Update size.
fSize = newSize;
}
}
return 0;
}
// Dummy method that has no references in the code.
int BRMManagedShmImpl::clear(unsigned newKey, off_t newSize)
{
return 0;
}
// This method calls for all related shmem pointers to be refreshed.
void BRMManagedShmImpl::setReadOnly()
{
if (fReadOnly)
return;
const bool readOnly = true;
remap(readOnly);
fReadOnly = true;
}
void BRMManagedShmImpl::swap(BRMManagedShmImpl& rhs)
{
fShmSegment->swap(*rhs.fShmSegment);
std::swap(fKey, rhs.fKey);
std::swap(fSize, rhs.fSize);
std::swap(fReadOnly, rhs.fReadOnly);
}
// The method was copied from non-managed shmem impl class
// and it has no refences in MCS 6.x code.
void BRMManagedShmImpl::destroy()
{
string keyName = ShmKeys::keyToName(fKey);
try
{
bi::shared_memory_object::remove(keyName.c_str());
}
catch (bi::interprocess_exception& b)
{
std::ostringstream o;
o << "BRMManagedShmImpl::destroy caught an exception removing a managed shared memory segment: "
<< b.what();
log(o.str());
throw;
}
}
void BRMManagedShmImpl::remap(const bool readOnly)
{
delete fShmSegment;
fShmSegment = nullptr;
string keyName = ShmKeys::keyToName(fKey);
if (readOnly)
fShmSegment = new bi::managed_shared_memory(bi::open_read_only, keyName.c_str());
else
fShmSegment = new bi::managed_shared_memory(bi::open_only, keyName.c_str());
}
} // namespace BRM

View File

@ -30,17 +30,18 @@
//#define NDEBUG
#include <cassert>
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/mapped_region.hpp>
namespace bi = boost::interprocess;
namespace BRM
{
class BRMShmImpl
class BRMShmImplParent
{
public:
BRMShmImpl(unsigned key, off_t size, bool readOnly = false);
~BRMShmImpl()
{
}
BRMShmImplParent(unsigned key, off_t size, bool readOnly = false);
virtual ~BRMShmImplParent();
inline unsigned key() const
{
@ -55,23 +56,63 @@ class BRMShmImpl
return fReadOnly;
}
void setReadOnly();
int grow(unsigned newKey, off_t newSize);
int clear(unsigned newKey, off_t newSize);
void swap(BRMShmImpl& rhs);
void destroy();
boost::interprocess::shared_memory_object fShmobj;
boost::interprocess::mapped_region fMapreg;
private:
BRMShmImpl(const BRMShmImpl& rhs);
BRMShmImpl& operator=(const BRMShmImpl& rhs);
virtual void setReadOnly() = 0;
virtual int clear(unsigned newKey, off_t newSize) = 0;
virtual void destroy() = 0;
protected:
unsigned fKey;
off_t fSize;
bool fReadOnly;
};
class BRMShmImpl : public BRMShmImplParent
{
public:
BRMShmImpl(unsigned key, off_t size, bool readOnly = false);
BRMShmImpl(const BRMShmImpl& rhs) = delete;
BRMShmImpl& operator=(const BRMShmImpl& rhs) = delete;
~BRMShmImpl()
{
}
int clear(unsigned newKey, off_t newSize) override;
void destroy() override;
void setReadOnly() override;
int grow(unsigned newKey, off_t newSize);
void swap(BRMShmImpl& rhs);
bi::shared_memory_object fShmobj;
bi::mapped_region fMapreg;
};
class BRMManagedShmImpl : public BRMShmImplParent
{
public:
BRMManagedShmImpl(unsigned key, off_t size, bool readOnly = false);
BRMManagedShmImpl(const BRMManagedShmImpl& rhs) = delete;
BRMManagedShmImpl& operator=(const BRMManagedShmImpl& rhs) = delete;
~BRMManagedShmImpl()
{
delete fShmSegment;
}
int clear(unsigned newKey, off_t newSize) override;
void destroy() override;
void setReadOnly() override;
int grow(off_t newSize);
void remap(const bool readOnly = false);
void swap(BRMManagedShmImpl& rhs);
bi::managed_shared_memory* getManagedSegment()
{
assert(fShmSegment);
return fShmSegment;
}
private:
bi::managed_shared_memory* fShmSegment;
};
} // namespace BRM

View File

@ -98,7 +98,7 @@ DBRM::DBRM(const DBRM& brm)
throw logic_error("DBRM: Don't use the copy constructor.");
}
DBRM::~DBRM() throw()
DBRM::~DBRM()
{
if (msgClient != NULL)
MessageQueueClientPool::releaseInstance(msgClient);
@ -461,7 +461,7 @@ int DBRM::markExtentsInvalid(const vector<LBID_t>& lbids,
}
template <typename T>
int DBRM::getExtentMaxMin(const LBID_t lbid, T& max, T& min, int32_t& seqNum) throw()
int DBRM::getExtentMaxMin(const LBID_t lbid, T& max, T& min, int32_t& seqNum)
{
#ifdef BRM_INFO
@ -489,7 +489,7 @@ int DBRM::getExtentMaxMin(const LBID_t lbid, T& max, T& min, int32_t& seqNum) th
}
}
int DBRM::getExtentCPMaxMin(const LBID_t lbid, CPMaxMin& cpMaxMin) throw()
int DBRM::getExtentCPMaxMin(const LBID_t lbid, CPMaxMin& cpMaxMin)
{
try
{
@ -4555,10 +4555,19 @@ void DBRM::invalidateUncommittedExtentLBIDs(execplan::CalpontSystemCatalog::SCN
setExtentsMaxMin(cpInfos);
}
template int DBRM::getExtentMaxMin<int128_t>(const LBID_t lbid, int128_t& max, int128_t& min,
int32_t& seqNum) throw();
size_t DBRM::EMIndexShmemSize()
{
return em->EMIndexShmemSize();
}
template int DBRM::getExtentMaxMin<int64_t>(const LBID_t lbid, int64_t& max, int64_t& min,
int32_t& seqNum) throw();
size_t DBRM::EMIndexShmemFree()
{
return em->EMIndexShmemFree();
}
template int DBRM::getExtentMaxMin<int128_t>(const LBID_t lbid, int128_t& max, int128_t& min,
int32_t& seqNum);
template int DBRM::getExtentMaxMin<int64_t>(const LBID_t lbid, int64_t& max, int64_t& min, int32_t& seqNum);
} // namespace BRM

View File

@ -103,7 +103,7 @@ class DBRM
// The param noBRMFcns suppresses init of the ExtentMap, VSS, VBBM, and CopyLocks.
// It can speed up init if the caller only needs the other structures.
EXPORT DBRM(bool noBRMFcns = false);
EXPORT ~DBRM() throw();
EXPORT ~DBRM();
EXPORT static void refreshShm()
{
@ -780,12 +780,12 @@ class DBRM
const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colDataTypes)
DBRM_THROW;
template <typename T>
EXPORT int getExtentMaxMin(const LBID_t lbid, T& max, T& min, int32_t& seqNum) throw();
EXPORT int getExtentMaxMin(const LBID_t lbid, T& max, T& min, int32_t& seqNum);
EXPORT int setExtentMaxMin(const LBID_t lbid, const int64_t max, const int64_t min,
const int32_t seqNum) DBRM_THROW;
EXPORT int getExtentCPMaxMin(const LBID_t lbid, CPMaxMin& cpMaxMin) throw();
EXPORT int getExtentCPMaxMin(const LBID_t lbid, CPMaxMin& cpMaxMin);
/** @brief Updates the max and min casual partitioning info for the passed extents.
*
@ -984,6 +984,9 @@ class DBRM
EXPORT void invalidateUncommittedExtentLBIDs(execplan::CalpontSystemCatalog::SCN txnid, bool allExtents,
std::vector<LBID_t>* plbidList = NULL);
size_t EMIndexShmemSize();
size_t EMIndexShmemFree();
private:
DBRM(const DBRM& brm);
DBRM& operator=(const DBRM& brm);

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
/* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2016-2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -29,15 +30,18 @@
#include <sys/types.h>
#include <vector>
#include <set>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include <mutex>
//#define NDEBUG
#include <cassert>
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/mapped_region.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/unordered_map.hpp>
#include <boost/functional/hash.hpp> //boost::hash
#include "shmkeys.h"
#include "brmtypes.h"
@ -63,6 +67,8 @@
#define EXPORT
#endif
namespace bi = boost::interprocess;
namespace oam
{
typedef std::vector<uint16_t> DBRootConfigList;
@ -75,6 +81,15 @@ class IDBDataFile;
namespace BRM
{
using PartitionNumberT = uint32_t;
using DBRootT = uint16_t;
using SegmentT = uint16_t;
using LastExtentIndexT = int;
using EmptyEMEntry = int;
using HighestOffset = uint32_t;
using LastIndEmptyIndEmptyInd = std::pair<LastExtentIndexT, EmptyEMEntry>;
using DBRootVec = std::vector<DBRootT>;
// assumed column width when calculating dictionary store extent size
#define DICT_COL_WIDTH 8
@ -98,8 +113,6 @@ const char CP_INVALID = 0;
const char CP_UPDATING = 1;
const char CP_VALID = 2;
// The _v4 structs are defined below for upgrading extent map
// from v4 to v5; see ExtentMap::loadVersion4or5 for details.
struct EMCasualPartition_struct_v4
{
RangePartitionData_t hi_val; // This needs to be reinterpreted as unsigned for uint64_t column types.
@ -113,16 +126,15 @@ struct EMPartition_struct_v4
{
EMCasualPartition_struct_v4 cprange;
};
struct EMEntry_v4
{
InlineLBIDRange range;
int fileID;
uint32_t blockOffset;
HWM_t HWM;
uint32_t partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
uint16_t dbRoot; // starts at 1 to match Columnstore.xml
PartitionNumberT partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
DBRootT dbRoot; // starts at 1 to match Columnstore.xml
uint16_t colWid;
int16_t status; // extent avail for query or not, or out of service
EMPartition_struct_v4 partition;
@ -151,7 +163,7 @@ struct EMCasualPartition_struct
EXPORT EMCasualPartition_struct(const EMCasualPartition_struct& em);
EXPORT EMCasualPartition_struct& operator=(const EMCasualPartition_struct& em);
};
typedef EMCasualPartition_struct EMCasualPartition_t;
using EMCasualPartition_t = EMCasualPartition_struct;
struct EMPartition_struct
{
@ -165,9 +177,9 @@ struct EMEntry
int fileID;
uint32_t blockOffset;
HWM_t HWM;
uint32_t partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
uint16_t dbRoot; // starts at 1 to match Columnstore.xml
PartitionNumberT partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
DBRootT dbRoot; // starts at 1 to match Columnstore.xml
uint16_t colWid;
int16_t status; // extent avail for query or not, or out of service
EMPartition_t partition;
@ -319,6 +331,146 @@ class FreeListImpl
static FreeListImpl* fInstance;
};
using ShmSegmentManagerT = bi::managed_shared_memory::segment_manager;
using ShmVoidAllocator = bi::allocator<void, ShmSegmentManagerT>;
using ExtentMapIdxT = size_t;
using ExtentMapIdxTAlloc = bi::allocator<ExtentMapIdxT, ShmSegmentManagerT>;
using PartitionNumberTAlloc = bi::allocator<PartitionNumberT, ShmSegmentManagerT>;
using ExtentMapIndicesT = std::vector<ExtentMapIdxT, ExtentMapIdxTAlloc>;
using PartitionIndexContainerKeyT = PartitionNumberT;
using PartitionIndexContainerValT = std::pair<const PartitionIndexContainerKeyT, ExtentMapIndicesT>;
using PartitionIndexContainerValTAlloc = bi::allocator<PartitionIndexContainerValT, ShmSegmentManagerT>;
// Can't use std::unordered_map presumably b/c the map's pointer type doesn't use offset_type as boost::u_map
// does
using PartitionIndexContainerT =
boost::unordered_map<PartitionIndexContainerKeyT, ExtentMapIndicesT,
boost::hash<PartitionIndexContainerKeyT>, std::equal_to<PartitionIndexContainerKeyT>,
PartitionIndexContainerValTAlloc>;
using OIDIndexContainerKeyT = OID_t;
using OIDIndexContainerValT = std::pair<const OIDIndexContainerKeyT, PartitionIndexContainerT>;
using OIDIndexContainerValTAlloc = bi::allocator<OIDIndexContainerValT, ShmSegmentManagerT>;
using OIDIndexContainerT =
boost::unordered_map<OIDIndexContainerKeyT, PartitionIndexContainerT, boost::hash<OIDIndexContainerKeyT>,
std::equal_to<OIDIndexContainerKeyT>, OIDIndexContainerValTAlloc>;
using DBRootIndexTAlloc = bi::allocator<OIDIndexContainerT, ShmSegmentManagerT>;
using DBRootIndexContainerT = std::vector<OIDIndexContainerT, DBRootIndexTAlloc>;
using ExtentMapIndex = DBRootIndexContainerT;
using ExtentMapIndexFindResult = std::vector<ExtentMapIdxT>;
using InsertUpdateShmemKeyPair = std::pair<bool, bool>;
class ExtentMapIndexImpl
{
public:
~ExtentMapIndexImpl(){};
static ExtentMapIndexImpl* makeExtentMapIndexImpl(unsigned key, off_t size, bool readOnly = false);
static void refreshShm()
{
if (fInstance_)
{
delete fInstance_;
fInstance_ = nullptr;
}
}
// The multipliers and constants here are pure theoretical
// tested using customer's data.
static size_t estimateEMIndexSize(uint32_t numberOfExtents)
{
// These are just educated guess values to calculate initial
// managed shmem size.
constexpr const size_t tablesNumber_ = 100ULL;
constexpr const size_t columnsNumber_ = 200ULL;
constexpr const size_t dbRootsNumber_ = 3ULL;
constexpr const size_t filesInPartition_ = 4ULL;
constexpr const size_t extentsInPartition_ = filesInPartition_ * 2;
return numberOfExtents * emIdentUnitSize_ +
numberOfExtents / extentsInPartition_ * partitionContainerUnitSize_ +
dbRootsNumber_ * tablesNumber_ * columnsNumber_;
}
bool growIfNeeded(const size_t memoryNeeded);
inline void grow(off_t size)
{
int rc = fBRMManagedShmMemImpl_.grow(size);
idbassert(rc == 0);
}
// After this call one needs to refresh any refs or ptrs sourced
// from this shmem.
inline void makeReadOnly()
{
fBRMManagedShmMemImpl_.setReadOnly();
}
inline void swapout(BRMManagedShmImpl& rhs)
{
fBRMManagedShmMemImpl_.swap(rhs);
}
inline unsigned key() const
{
return fBRMManagedShmMemImpl_.key();
}
unsigned getShmemSize()
{
return fBRMManagedShmMemImpl_.getManagedSegment()->get_size();
}
size_t getShmemFree()
{
return fBRMManagedShmMemImpl_.getManagedSegment()->get_free_memory();
}
unsigned getShmemImplSize()
{
return fBRMManagedShmMemImpl_.size();
}
void createExtentMapIndexIfNeeded();
ExtentMapIndex* get();
InsertUpdateShmemKeyPair insert(const EMEntry& emEntry, const size_t emIdx);
InsertUpdateShmemKeyPair insert2ndLayerWrapper(OIDIndexContainerT& oids, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
InsertUpdateShmemKeyPair insert2ndLayer(OIDIndexContainerT& oids, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
InsertUpdateShmemKeyPair insert3dLayerWrapper(PartitionIndexContainerT& partitions, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
InsertUpdateShmemKeyPair insert3dLayer(PartitionIndexContainerT& partitions, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
ExtentMapIndexFindResult find(const DBRootT dbroot, const OID_t oid,
const PartitionNumberT partitionNumber);
ExtentMapIndexFindResult find(const DBRootT dbroot, const OID_t oid);
ExtentMapIndexFindResult search2ndLayer(OIDIndexContainerT& oids, const OID_t oid,
const PartitionNumberT partitionNumber);
ExtentMapIndexFindResult search2ndLayer(OIDIndexContainerT& oids, const OID_t oid);
ExtentMapIndexFindResult search3dLayer(PartitionIndexContainerT& partitions,
const PartitionNumberT partitionNumber);
void deleteDbRoot(const DBRootT dbroot);
void deleteOID(const DBRootT dbroot, const OID_t oid);
void deleteEMEntry(const EMEntry& emEntry, const ExtentMapIdxT emIdent);
private:
BRMManagedShmImpl fBRMManagedShmMemImpl_;
ExtentMapIndexImpl(unsigned key, off_t size, bool readOnly = false);
ExtentMapIndexImpl(const ExtentMapIndexImpl& rhs);
ExtentMapIndexImpl& operator=(const ExtentMapIndexImpl& rhs);
static std::mutex fInstanceMutex_;
static ExtentMapIndexImpl* fInstance_;
static const constexpr uint32_t dbRootContainerUnitSize_ = 64ULL;
static const constexpr uint32_t oidContainerUnitSize_ = 352ULL; // 2 * map overhead
static const constexpr uint32_t partitionContainerUnitSize_ = 368ULL; // single map overhead
static const constexpr uint32_t emIdentUnitSize_ = sizeof(uint64_t);
static const constexpr uint32_t extraUnits_ = 2;
static const constexpr size_t freeSpaceThreshold_ = 256 * 1024;
};
/** @brief This class encapsulates the extent map functionality of the system
*
* This class encapsulates the extent map functionality of the system. It
@ -345,7 +497,7 @@ class ExtentMap : public Undoable
*/
EXPORT void load(const std::string& filename, bool fixFL = false);
/** @brief Loads the ExtentMap entries from a binayr blob.
/** @brief Loads the ExtentMap entries from a binary blob.
*
* Loads the ExtentMap entries from a file. This will
* clear out any existing entries. The intention is that before
@ -886,6 +1038,9 @@ class ExtentMap : public Undoable
EXPORT void dumpTo(std::ostream& os);
EXPORT const bool* getEMLockStatus();
EXPORT const bool* getEMFLLockStatus();
EXPORT const bool* getEMIndexLockStatus();
size_t EMIndexShmemSize();
size_t EMIndexShmemFree();
#ifdef BRM_DEBUG
EXPORT void printEM() const;
@ -895,11 +1050,11 @@ class ExtentMap : public Undoable
#endif
private:
static const size_t EM_INCREMENT_ROWS = 100;
static const size_t EM_INITIAL_SIZE = EM_INCREMENT_ROWS * 10 * sizeof(EMEntry);
static const size_t EM_INCREMENT = EM_INCREMENT_ROWS * sizeof(EMEntry);
static const size_t EM_FREELIST_INITIAL_SIZE = 50 * sizeof(InlineLBIDRange);
static const size_t EM_FREELIST_INCREMENT = 50 * sizeof(InlineLBIDRange);
static const constexpr size_t EM_INCREMENT_ROWS = 100;
static const constexpr size_t EM_INITIAL_SIZE = EM_INCREMENT_ROWS * 10 * sizeof(EMEntry);
static const constexpr size_t EM_INCREMENT = EM_INCREMENT_ROWS * sizeof(EMEntry);
static const constexpr size_t EM_FREELIST_INITIAL_SIZE = 50 * sizeof(InlineLBIDRange);
static const constexpr size_t EM_FREELIST_INCREMENT = 50 * sizeof(InlineLBIDRange);
ExtentMap(const ExtentMap& em);
ExtentMap& operator=(const ExtentMap& em);
@ -910,6 +1065,7 @@ class ExtentMap : public Undoable
key_t fCurrentFLShmkey;
MSTEntry* fEMShminfo;
MSTEntry* fFLShminfo;
MSTEntry* fEMIndexShminfo;
const MasterSegmentTable fMST;
bool r_only;
typedef std::tr1::unordered_map<int, oam::DBRootConfigList*> PmDbRootMap_t;
@ -917,8 +1073,9 @@ class ExtentMap : public Undoable
time_t fCacheTime; // timestamp associated with config cache
int numUndoRecords;
bool flLocked, emLocked;
static boost::mutex mutex; // @bug5355 - made mutex static
bool flLocked, emLocked, emIndexLocked;
static boost::mutex mutex; // @bug5355 - made mutex static
static boost::mutex emIndexMutex;
boost::mutex fConfigCacheMutex; // protect access to Config Cache
enum OPS
@ -930,6 +1087,12 @@ class ExtentMap : public Undoable
OPS EMLock, FLLock;
LastIndEmptyIndEmptyInd _createExtentCommonSearch(const OID_t OID, const DBRootT dbRoot,
const PartitionNumberT partitionNum,
const SegmentT segmentNum);
void logAndSetEMIndexReadOnly(const std::string& funcName);
LBID_t _createColumnExtent_DBroot(uint32_t size, int OID, uint32_t colWidth, uint16_t dbRoot,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t& partitionNum, uint16_t& segmentNum, uint32_t& startBlockOffset);
@ -941,24 +1104,32 @@ class ExtentMap : public Undoable
uint16_t segmentNum);
template <typename T>
bool isValidCPRange(const T& max, const T& min, execplan::CalpontSystemCatalog::ColDataType type) const;
void deleteExtent(int emIndex);
void deleteExtent(const int emIndex, const bool clearEMIndex = true);
LBID_t getLBIDsFromFreeList(uint32_t size);
void reserveLBIDRange(LBID_t start, uint8_t size); // used by load() to allocate pre-existing LBIDs
key_t chooseEMShmkey(); // see the code for how keys are segmented
key_t chooseFLShmkey(); // see the code for how keys are segmented
key_t chooseEMShmkey();
key_t chooseFLShmkey();
key_t chooseEMIndexShmkey();
key_t getInitialEMIndexShmkey() const;
// see the code for how keys are segmented
key_t chooseShmkey(const MSTEntry* masterTableEntry, const uint32_t keyRangeBase) const;
void grabEMEntryTable(OPS op);
void grabFreeList(OPS op);
void grabEMIndex(OPS op);
void releaseEMEntryTable(OPS op);
void releaseFreeList(OPS op);
void releaseEMIndex(OPS op);
void growEMShmseg(size_t nrows = 0);
void growFLShmseg();
void growEMIndexShmseg(const size_t suggestedSize = 0);
void finishChanges();
EXPORT unsigned getFilesPerColumnPartition();
unsigned getExtentsPerSegmentFile();
unsigned getDbRootCount();
void getPmDbRoots(int pm, std::vector<int>& dbRootList);
DBRootVec getAllDbRoots();
void checkReloadConfig();
ShmKeys fShmKeys;
@ -979,6 +1150,7 @@ class ExtentMap : public Undoable
ExtentMapImpl* fPExtMapImpl;
FreeListImpl* fPFreeListImpl;
ExtentMapIndexImpl* fPExtMapIndexImpl_;
};
inline std::ostream& operator<<(std::ostream& os, ExtentMap& rhs)

View File

@ -1,4 +1,5 @@
/* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2016-2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -21,6 +22,7 @@
* third, lock or unlock it
*/
#include <string>
#include <iostream>
#include <stdlib.h>
#include <rwlock.h>
@ -32,10 +34,15 @@ char* name;
void usage()
{
cout << "Usage " << name << " which_lock_to_use which_side_to_use lock_or_unlock\n"
<< " which_lock_to_use: 1=VSS 2=ExtentMap 3=FreeList 4=VBBM 5=CopyLocks\n"
<< " which_side_to_use: r|w (read or write)\n"
<< " lock_or_unlock: l|u (lock or unlock)\n";
std::cout << "Usage " << name << " which_lock_to_use which_side_to_use lock_or_unlock" << std::endl;
size_t lockId = 0;
for (auto& lockName : RWLockNames)
{
std::cout << " " << lockId++ << "=" << lockName << " ";
}
std::cout << std::endl
<< " which_side_to_use: r|w (read or write)" << std::endl
<< " lock_or_unlock: l|u (lock or unlock)" << std::endl;
exit(1);
}
@ -54,10 +61,21 @@ int main(int argc, char** argv)
if (strlen(argv[1]) != 1 || strlen(argv[2]) != 1 || strlen(argv[3]) != 1)
usage();
which_lock = atoi(argv[1]);
if (which_lock < 1 || which_lock > 5)
try
{
which_lock = std::stoi(argv[1]);
}
catch (std::exception const& e)
{
std::cerr << "Cannot convert the lock id: " << e.what() << std::endl;
usage();
}
if (which_lock >= RWLockNames.size())
usage();
size_t minLockId = (which_lock > 0) ? which_lock : 1;
size_t maxLockId = (which_lock > 0) ? which_lock : RWLockNames.size() - 1;
if (argv[2][0] == 'r')
which_side = 0;
@ -73,17 +91,28 @@ int main(int argc, char** argv)
else
usage();
rwlock = new RWLock(0x10000 * which_lock);
for (size_t i = minLockId; i <= maxLockId; ++i)
{
rwlock = new RWLock(0x10000 * which_lock);
if (which_side == 0)
if (lock_unlock == 0)
rwlock->read_lock();
if (which_side == 0)
{
if (lock_unlock == 0)
rwlock->read_lock();
else
rwlock->read_unlock();
}
else if (lock_unlock == 0)
{
rwlock->write_lock();
}
else
rwlock->read_unlock();
else if (lock_unlock == 0)
rwlock->write_lock();
else
rwlock->write_unlock();
{
rwlock->write_unlock();
}
delete rwlock;
}
return 0;
}

View File

@ -1,4 +1,5 @@
/* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2016-2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -22,6 +23,7 @@
*/
#include <iostream>
#include <string>
#include <stdlib.h>
#include <rwlock.h>
@ -32,14 +34,18 @@ char* name;
void usage()
{
cout << "Usage " << name << " which_lock_to_use which_side_to_use lock_or_unlock\n"
<< " which_lock_to_use: 1=VSS 2=ExtentMap 3=FreeList 4=VBBM 5=CopyLocks\n";
std::cout << "Usage " << name << " which_lock_to_use:" << std::endl;
size_t lockId = 0;
for (auto& lockName : RWLockNames)
{
std::cout << " " << lockId++ << "=" << lockName << std::endl;
}
exit(1);
}
int main(int argc, char** argv)
{
uint32_t which_lock; // 1-5
uint32_t which_lock; // 0-6
RWLock* rwlock;
LockState state;
@ -51,18 +57,35 @@ int main(int argc, char** argv)
if (strlen(argv[1]) != 1)
usage();
which_lock = atoi(argv[1]);
try
{
which_lock = std::stoi(argv[1]);
}
catch (std::exception const& e)
{
std::cerr << "Cannot convert the lock id: " << e.what() << std::endl;
usage();
}
if (which_lock < 1 || which_lock > 5)
if (which_lock >= RWLockNames.size())
usage();
rwlock = new RWLock(0x10000 * which_lock);
state = rwlock->getLockState();
cout << "readers = " << state.reading << endl
<< "writers = " << state.writing << endl
<< "readers waiting = " << state.readerswaiting << endl
<< "writers waiting = " << state.writerswaiting << endl
<< "mutex locked = " << (int)state.mutexLocked << endl;
size_t minLockId = (which_lock > 0) ? which_lock : 1;
size_t maxLockId = (which_lock > 0) ? which_lock : RWLockNames.size() - 1;
for (size_t i = minLockId; i <= maxLockId; ++i)
{
rwlock = new RWLock(0x10000 * i);
state = rwlock->getLockState();
cout << RWLockNames[i] << " RWLock" << std::endl
<< " readers = " << state.reading << std::endl
<< " writers = " << state.writing << std::endl
<< " readers waiting = " << state.readerswaiting << std::endl
<< " writers waiting = " << state.writerswaiting << std::endl
<< " mutex locked = " << (int)state.mutexLocked << std::endl;
delete rwlock;
}
return 0;
}

View File

@ -138,6 +138,7 @@ MasterSegmentTable::MasterSegmentTable()
RWLockKeys[2] = fShmKeys.KEYRANGE_VBBM_BASE;
RWLockKeys[3] = fShmKeys.KEYRANGE_VSS_BASE;
RWLockKeys[4] = fShmKeys.KEYRANGE_CL_BASE;
RWLockKeys[5] = fShmKeys.KEYRANGE_EXTENTMAP_INDEX_BASE;
try
{

View File

@ -109,8 +109,10 @@ class MasterSegmentTable
static const int VSSSegment = 3;
/// specifies the copy lock segment
static const int CLSegment = 4;
/// specifies the EM Index segment
static const int EMIndex = 5;
/// the number of tables currently defined
static const int nTables = 5;
static const int nTables = 6;
/** @brief This function gets the specified table.
*

View File

@ -50,6 +50,7 @@ ShmKeys::ShmKeys()
KEYRANGE_EMFREELIST_BASE = 0x30000 | (BRM_UID << 20);
KEYRANGE_VBBM_BASE = 0x40000 | (BRM_UID << 20);
KEYRANGE_CL_BASE = 0x50000 | (BRM_UID << 20);
KEYRANGE_EXTENTMAP_INDEX_BASE = 0x60000 | (BRM_UID << 20);
MST_SYSVKEY = 0xff000000 | BRM_UID;
PROCESSSTATUS_SYSVKEY = 0xfd000000 | BRM_UID;
SYSTEMSTATUS_SYSVKEY = 0xfc000000 | BRM_UID;
@ -62,7 +63,7 @@ ShmKeys::ShmKeys()
string ShmKeys::keyToName(unsigned key)
{
ostringstream oss;
oss << "InfiniDB-shm-";
oss << "MCS-shm-";
oss << setw(8) << setfill('0') << hex << key;
return oss.str();
}

View File

@ -55,6 +55,7 @@ struct ShmKeys
uint32_t KEYRANGE_EMFREELIST_BASE;
uint32_t KEYRANGE_VBBM_BASE;
uint32_t KEYRANGE_VSS_BASE;
uint32_t KEYRANGE_EXTENTMAP_INDEX_BASE;
/****** Fixed location assignments *******/
uint32_t MST_SYSVKEY;

View File

@ -1492,6 +1492,11 @@ const bool* SlaveDBRMNode::getEMLockStatus()
return em.getEMLockStatus();
}
const bool* SlaveDBRMNode::getEMIndexLockStatus()
{
return em.getEMIndexLockStatus();
}
const bool* SlaveDBRMNode::getVBBMLockStatus()
{
return &locked[0];

View File

@ -461,6 +461,7 @@ class SlaveDBRMNode
EXPORT const bool* getEMFLLockStatus();
EXPORT const bool* getEMLockStatus();
EXPORT const bool* getEMIndexLockStatus();
EXPORT const bool* getVBBMLockStatus();
EXPORT const bool* getVSSLockStatus();

View File

@ -147,6 +147,8 @@ int ServiceWorkerNode::Child()
monitorThreads.create_thread(RWLockMonitor(&die, slave.getEMLockStatus(), keys.KEYRANGE_EXTENTMAP_BASE));
monitorThreads.create_thread(RWLockMonitor(&die, slave.getVBBMLockStatus(), keys.KEYRANGE_VBBM_BASE));
monitorThreads.create_thread(RWLockMonitor(&die, slave.getVSSLockStatus(), keys.KEYRANGE_VSS_BASE));
monitorThreads.create_thread(
RWLockMonitor(&die, slave.getEMIndexLockStatus(), keys.KEYRANGE_EXTENTMAP_INDEX_BASE));
try
{