1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-27 21:01:50 +03:00

MCOL-4912 This patch introduces Extent Map index to improve EM scaleability

EM scaleability project has two parts: phase1 and phase2.
        This is phase1 that brings EM index to speed up(from O(n) down
        to the speed of boost::unordered_map) EM lookups looking for
        <dbroot, oid, partition> tuple to turn it into LBID,
        e.g. most bulk insertion meta info operations.
        The basis is boost::shared_managed_object where EMIndex is
        stored. Whilst it is not debug-friendly it allows to put a
        nested structs into shmem. EMIndex has 3 tiers. Top down description:
        vector of dbroots, map of oids to partition vectors, partition
        vectors that have EM indices.
        Separate EM methods now queries index before they do EM run.
        EMIndex has a separate shmem file with the fixed id
        MCS-shm-00060001.
This commit is contained in:
Roman Nozdrin
2022-03-30 08:57:05 +00:00
committed by Leonid Fedorov
parent fb3eaabd29
commit 4c26e4f960
25 changed files with 3498 additions and 459 deletions

View File

@ -1,4 +1,5 @@
/* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2016-2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -29,15 +30,18 @@
#include <sys/types.h>
#include <vector>
#include <set>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include <mutex>
//#define NDEBUG
#include <cassert>
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/mapped_region.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/unordered_map.hpp>
#include <boost/functional/hash.hpp> //boost::hash
#include "shmkeys.h"
#include "brmtypes.h"
@ -63,6 +67,8 @@
#define EXPORT
#endif
namespace bi = boost::interprocess;
namespace oam
{
typedef std::vector<uint16_t> DBRootConfigList;
@ -75,6 +81,15 @@ class IDBDataFile;
namespace BRM
{
using PartitionNumberT = uint32_t;
using DBRootT = uint16_t;
using SegmentT = uint16_t;
using LastExtentIndexT = int;
using EmptyEMEntry = int;
using HighestOffset = uint32_t;
using LastIndEmptyIndEmptyInd = std::pair<LastExtentIndexT, EmptyEMEntry>;
using DBRootVec = std::vector<DBRootT>;
// assumed column width when calculating dictionary store extent size
#define DICT_COL_WIDTH 8
@ -98,8 +113,6 @@ const char CP_INVALID = 0;
const char CP_UPDATING = 1;
const char CP_VALID = 2;
// The _v4 structs are defined below for upgrading extent map
// from v4 to v5; see ExtentMap::loadVersion4or5 for details.
struct EMCasualPartition_struct_v4
{
RangePartitionData_t hi_val; // This needs to be reinterpreted as unsigned for uint64_t column types.
@ -113,16 +126,15 @@ struct EMPartition_struct_v4
{
EMCasualPartition_struct_v4 cprange;
};
struct EMEntry_v4
{
InlineLBIDRange range;
int fileID;
uint32_t blockOffset;
HWM_t HWM;
uint32_t partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
uint16_t dbRoot; // starts at 1 to match Columnstore.xml
PartitionNumberT partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
DBRootT dbRoot; // starts at 1 to match Columnstore.xml
uint16_t colWid;
int16_t status; // extent avail for query or not, or out of service
EMPartition_struct_v4 partition;
@ -151,7 +163,7 @@ struct EMCasualPartition_struct
EXPORT EMCasualPartition_struct(const EMCasualPartition_struct& em);
EXPORT EMCasualPartition_struct& operator=(const EMCasualPartition_struct& em);
};
typedef EMCasualPartition_struct EMCasualPartition_t;
using EMCasualPartition_t = EMCasualPartition_struct;
struct EMPartition_struct
{
@ -165,9 +177,9 @@ struct EMEntry
int fileID;
uint32_t blockOffset;
HWM_t HWM;
uint32_t partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
uint16_t dbRoot; // starts at 1 to match Columnstore.xml
PartitionNumberT partitionNum; // starts at 0
uint16_t segmentNum; // starts at 0
DBRootT dbRoot; // starts at 1 to match Columnstore.xml
uint16_t colWid;
int16_t status; // extent avail for query or not, or out of service
EMPartition_t partition;
@ -319,6 +331,146 @@ class FreeListImpl
static FreeListImpl* fInstance;
};
using ShmSegmentManagerT = bi::managed_shared_memory::segment_manager;
using ShmVoidAllocator = bi::allocator<void, ShmSegmentManagerT>;
using ExtentMapIdxT = size_t;
using ExtentMapIdxTAlloc = bi::allocator<ExtentMapIdxT, ShmSegmentManagerT>;
using PartitionNumberTAlloc = bi::allocator<PartitionNumberT, ShmSegmentManagerT>;
using ExtentMapIndicesT = std::vector<ExtentMapIdxT, ExtentMapIdxTAlloc>;
using PartitionIndexContainerKeyT = PartitionNumberT;
using PartitionIndexContainerValT = std::pair<const PartitionIndexContainerKeyT, ExtentMapIndicesT>;
using PartitionIndexContainerValTAlloc = bi::allocator<PartitionIndexContainerValT, ShmSegmentManagerT>;
// Can't use std::unordered_map presumably b/c the map's pointer type doesn't use offset_type as boost::u_map
// does
using PartitionIndexContainerT =
boost::unordered_map<PartitionIndexContainerKeyT, ExtentMapIndicesT,
boost::hash<PartitionIndexContainerKeyT>, std::equal_to<PartitionIndexContainerKeyT>,
PartitionIndexContainerValTAlloc>;
using OIDIndexContainerKeyT = OID_t;
using OIDIndexContainerValT = std::pair<const OIDIndexContainerKeyT, PartitionIndexContainerT>;
using OIDIndexContainerValTAlloc = bi::allocator<OIDIndexContainerValT, ShmSegmentManagerT>;
using OIDIndexContainerT =
boost::unordered_map<OIDIndexContainerKeyT, PartitionIndexContainerT, boost::hash<OIDIndexContainerKeyT>,
std::equal_to<OIDIndexContainerKeyT>, OIDIndexContainerValTAlloc>;
using DBRootIndexTAlloc = bi::allocator<OIDIndexContainerT, ShmSegmentManagerT>;
using DBRootIndexContainerT = std::vector<OIDIndexContainerT, DBRootIndexTAlloc>;
using ExtentMapIndex = DBRootIndexContainerT;
using ExtentMapIndexFindResult = std::vector<ExtentMapIdxT>;
using InsertUpdateShmemKeyPair = std::pair<bool, bool>;
class ExtentMapIndexImpl
{
public:
~ExtentMapIndexImpl(){};
static ExtentMapIndexImpl* makeExtentMapIndexImpl(unsigned key, off_t size, bool readOnly = false);
static void refreshShm()
{
if (fInstance_)
{
delete fInstance_;
fInstance_ = nullptr;
}
}
// The multipliers and constants here are pure theoretical
// tested using customer's data.
static size_t estimateEMIndexSize(uint32_t numberOfExtents)
{
// These are just educated guess values to calculate initial
// managed shmem size.
constexpr const size_t tablesNumber_ = 100ULL;
constexpr const size_t columnsNumber_ = 200ULL;
constexpr const size_t dbRootsNumber_ = 3ULL;
constexpr const size_t filesInPartition_ = 4ULL;
constexpr const size_t extentsInPartition_ = filesInPartition_ * 2;
return numberOfExtents * emIdentUnitSize_ +
numberOfExtents / extentsInPartition_ * partitionContainerUnitSize_ +
dbRootsNumber_ * tablesNumber_ * columnsNumber_;
}
bool growIfNeeded(const size_t memoryNeeded);
inline void grow(off_t size)
{
int rc = fBRMManagedShmMemImpl_.grow(size);
idbassert(rc == 0);
}
// After this call one needs to refresh any refs or ptrs sourced
// from this shmem.
inline void makeReadOnly()
{
fBRMManagedShmMemImpl_.setReadOnly();
}
inline void swapout(BRMManagedShmImpl& rhs)
{
fBRMManagedShmMemImpl_.swap(rhs);
}
inline unsigned key() const
{
return fBRMManagedShmMemImpl_.key();
}
unsigned getShmemSize()
{
return fBRMManagedShmMemImpl_.getManagedSegment()->get_size();
}
size_t getShmemFree()
{
return fBRMManagedShmMemImpl_.getManagedSegment()->get_free_memory();
}
unsigned getShmemImplSize()
{
return fBRMManagedShmMemImpl_.size();
}
void createExtentMapIndexIfNeeded();
ExtentMapIndex* get();
InsertUpdateShmemKeyPair insert(const EMEntry& emEntry, const size_t emIdx);
InsertUpdateShmemKeyPair insert2ndLayerWrapper(OIDIndexContainerT& oids, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
InsertUpdateShmemKeyPair insert2ndLayer(OIDIndexContainerT& oids, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
InsertUpdateShmemKeyPair insert3dLayerWrapper(PartitionIndexContainerT& partitions, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
InsertUpdateShmemKeyPair insert3dLayer(PartitionIndexContainerT& partitions, const EMEntry& emEntry,
const size_t emIdx, const bool aShmemHasGrown);
ExtentMapIndexFindResult find(const DBRootT dbroot, const OID_t oid,
const PartitionNumberT partitionNumber);
ExtentMapIndexFindResult find(const DBRootT dbroot, const OID_t oid);
ExtentMapIndexFindResult search2ndLayer(OIDIndexContainerT& oids, const OID_t oid,
const PartitionNumberT partitionNumber);
ExtentMapIndexFindResult search2ndLayer(OIDIndexContainerT& oids, const OID_t oid);
ExtentMapIndexFindResult search3dLayer(PartitionIndexContainerT& partitions,
const PartitionNumberT partitionNumber);
void deleteDbRoot(const DBRootT dbroot);
void deleteOID(const DBRootT dbroot, const OID_t oid);
void deleteEMEntry(const EMEntry& emEntry, const ExtentMapIdxT emIdent);
private:
BRMManagedShmImpl fBRMManagedShmMemImpl_;
ExtentMapIndexImpl(unsigned key, off_t size, bool readOnly = false);
ExtentMapIndexImpl(const ExtentMapIndexImpl& rhs);
ExtentMapIndexImpl& operator=(const ExtentMapIndexImpl& rhs);
static std::mutex fInstanceMutex_;
static ExtentMapIndexImpl* fInstance_;
static const constexpr uint32_t dbRootContainerUnitSize_ = 64ULL;
static const constexpr uint32_t oidContainerUnitSize_ = 352ULL; // 2 * map overhead
static const constexpr uint32_t partitionContainerUnitSize_ = 368ULL; // single map overhead
static const constexpr uint32_t emIdentUnitSize_ = sizeof(uint64_t);
static const constexpr uint32_t extraUnits_ = 2;
static const constexpr size_t freeSpaceThreshold_ = 256 * 1024;
};
/** @brief This class encapsulates the extent map functionality of the system
*
* This class encapsulates the extent map functionality of the system. It
@ -345,7 +497,7 @@ class ExtentMap : public Undoable
*/
EXPORT void load(const std::string& filename, bool fixFL = false);
/** @brief Loads the ExtentMap entries from a binayr blob.
/** @brief Loads the ExtentMap entries from a binary blob.
*
* Loads the ExtentMap entries from a file. This will
* clear out any existing entries. The intention is that before
@ -886,6 +1038,9 @@ class ExtentMap : public Undoable
EXPORT void dumpTo(std::ostream& os);
EXPORT const bool* getEMLockStatus();
EXPORT const bool* getEMFLLockStatus();
EXPORT const bool* getEMIndexLockStatus();
size_t EMIndexShmemSize();
size_t EMIndexShmemFree();
#ifdef BRM_DEBUG
EXPORT void printEM() const;
@ -895,11 +1050,11 @@ class ExtentMap : public Undoable
#endif
private:
static const size_t EM_INCREMENT_ROWS = 100;
static const size_t EM_INITIAL_SIZE = EM_INCREMENT_ROWS * 10 * sizeof(EMEntry);
static const size_t EM_INCREMENT = EM_INCREMENT_ROWS * sizeof(EMEntry);
static const size_t EM_FREELIST_INITIAL_SIZE = 50 * sizeof(InlineLBIDRange);
static const size_t EM_FREELIST_INCREMENT = 50 * sizeof(InlineLBIDRange);
static const constexpr size_t EM_INCREMENT_ROWS = 100;
static const constexpr size_t EM_INITIAL_SIZE = EM_INCREMENT_ROWS * 10 * sizeof(EMEntry);
static const constexpr size_t EM_INCREMENT = EM_INCREMENT_ROWS * sizeof(EMEntry);
static const constexpr size_t EM_FREELIST_INITIAL_SIZE = 50 * sizeof(InlineLBIDRange);
static const constexpr size_t EM_FREELIST_INCREMENT = 50 * sizeof(InlineLBIDRange);
ExtentMap(const ExtentMap& em);
ExtentMap& operator=(const ExtentMap& em);
@ -910,6 +1065,7 @@ class ExtentMap : public Undoable
key_t fCurrentFLShmkey;
MSTEntry* fEMShminfo;
MSTEntry* fFLShminfo;
MSTEntry* fEMIndexShminfo;
const MasterSegmentTable fMST;
bool r_only;
typedef std::tr1::unordered_map<int, oam::DBRootConfigList*> PmDbRootMap_t;
@ -917,8 +1073,9 @@ class ExtentMap : public Undoable
time_t fCacheTime; // timestamp associated with config cache
int numUndoRecords;
bool flLocked, emLocked;
static boost::mutex mutex; // @bug5355 - made mutex static
bool flLocked, emLocked, emIndexLocked;
static boost::mutex mutex; // @bug5355 - made mutex static
static boost::mutex emIndexMutex;
boost::mutex fConfigCacheMutex; // protect access to Config Cache
enum OPS
@ -930,6 +1087,12 @@ class ExtentMap : public Undoable
OPS EMLock, FLLock;
LastIndEmptyIndEmptyInd _createExtentCommonSearch(const OID_t OID, const DBRootT dbRoot,
const PartitionNumberT partitionNum,
const SegmentT segmentNum);
void logAndSetEMIndexReadOnly(const std::string& funcName);
LBID_t _createColumnExtent_DBroot(uint32_t size, int OID, uint32_t colWidth, uint16_t dbRoot,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t& partitionNum, uint16_t& segmentNum, uint32_t& startBlockOffset);
@ -941,24 +1104,32 @@ class ExtentMap : public Undoable
uint16_t segmentNum);
template <typename T>
bool isValidCPRange(const T& max, const T& min, execplan::CalpontSystemCatalog::ColDataType type) const;
void deleteExtent(int emIndex);
void deleteExtent(const int emIndex, const bool clearEMIndex = true);
LBID_t getLBIDsFromFreeList(uint32_t size);
void reserveLBIDRange(LBID_t start, uint8_t size); // used by load() to allocate pre-existing LBIDs
key_t chooseEMShmkey(); // see the code for how keys are segmented
key_t chooseFLShmkey(); // see the code for how keys are segmented
key_t chooseEMShmkey();
key_t chooseFLShmkey();
key_t chooseEMIndexShmkey();
key_t getInitialEMIndexShmkey() const;
// see the code for how keys are segmented
key_t chooseShmkey(const MSTEntry* masterTableEntry, const uint32_t keyRangeBase) const;
void grabEMEntryTable(OPS op);
void grabFreeList(OPS op);
void grabEMIndex(OPS op);
void releaseEMEntryTable(OPS op);
void releaseFreeList(OPS op);
void releaseEMIndex(OPS op);
void growEMShmseg(size_t nrows = 0);
void growFLShmseg();
void growEMIndexShmseg(const size_t suggestedSize = 0);
void finishChanges();
EXPORT unsigned getFilesPerColumnPartition();
unsigned getExtentsPerSegmentFile();
unsigned getDbRootCount();
void getPmDbRoots(int pm, std::vector<int>& dbRootList);
DBRootVec getAllDbRoots();
void checkReloadConfig();
ShmKeys fShmKeys;
@ -979,6 +1150,7 @@ class ExtentMap : public Undoable
ExtentMapImpl* fPExtMapImpl;
FreeListImpl* fPFreeListImpl;
ExtentMapIndexImpl* fPExtMapIndexImpl_;
};
inline std::ostream& operator<<(std::ostream& os, ExtentMap& rhs)