mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-06-06 08:21:01 +03:00
6023 lines
194 KiB
C++
6023 lines
194 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
/*****************************************************************************
|
|
* $Id: extentmap.cpp 1936 2013-07-09 22:10:29Z dhall $
|
|
*
|
|
****************************************************************************/
|
|
|
|
#include <iostream>
|
|
#include <sys/types.h>
|
|
#include <sys/time.h>
|
|
#include <sys/stat.h>
|
|
#include <cstdlib>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <stdexcept>
|
|
#include <algorithm>
|
|
#include <ios>
|
|
#include <cerrno>
|
|
#include <sstream>
|
|
#include <vector>
|
|
#include <limits>
|
|
#include <boost/scoped_array.hpp>
|
|
#include <boost/scoped_ptr.hpp>
|
|
#include <boost/thread.hpp>
|
|
#ifndef _MSC_VER
|
|
#include <tr1/unordered_set>
|
|
#else
|
|
#include <unordered_set>
|
|
#endif
|
|
|
|
#include <boost/interprocess/shared_memory_object.hpp>
|
|
#include <boost/interprocess/mapped_region.hpp>
|
|
namespace bi = boost::interprocess;
|
|
|
|
#include "liboamcpp.h"
|
|
#include "brmtypes.h"
|
|
#include "configcpp.h"
|
|
#include "rwlock.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "mastersegmenttable.h"
|
|
#include "blocksize.h"
|
|
#include "dataconvert.h"
|
|
#include "oamcache.h"
|
|
#include "IDBDataFile.h"
|
|
#include "IDBPolicy.h"
|
|
#ifdef BRM_INFO
|
|
#include "tracer.h"
|
|
#include "configcpp.h"
|
|
#endif
|
|
|
|
#define EXTENTMAP_DLLEXPORT
|
|
#include "extentmap.h"
|
|
#undef EXTENTMAP_DLLEXPORT
|
|
|
|
#define EM_MAX_SEQNUM 2000000000
|
|
#define MAX_IO_RETRIES 10
|
|
#define EM_MAGIC_V1 0x76f78b1c
|
|
#define EM_MAGIC_V2 0x76f78b1d
|
|
#define EM_MAGIC_V3 0x76f78b1e
|
|
#define EM_MAGIC_V4 0x76f78b1f
|
|
|
|
#ifndef NDEBUG
|
|
#define ASSERT(x) \
|
|
if (!(x)) { \
|
|
cerr << "assertion at file " << __FILE__ << " line " << __LINE__ << " failed" << endl; \
|
|
throw logic_error("assertion failed"); \
|
|
}
|
|
#else
|
|
#define ASSERT(x)
|
|
#endif
|
|
|
|
using namespace std;
|
|
using namespace boost;
|
|
using namespace logging;
|
|
using namespace idbdatafile;
|
|
|
|
namespace
|
|
{
|
|
unsigned ExtentSize = 0; // dmc-need to deprecate
|
|
unsigned ExtentRows = 0;
|
|
unsigned filesPerColumnPartition = 0;
|
|
unsigned extentsPerSegmentFile = 0;
|
|
|
|
// Increment CP sequence (version) number, and wrap-around when applicable
|
|
inline void incSeqNum(int32_t& seqNum)
|
|
{
|
|
seqNum++;
|
|
|
|
if (seqNum > EM_MAX_SEQNUM)
|
|
seqNum = 0;
|
|
}
|
|
|
|
}
|
|
|
|
namespace BRM
|
|
{
|
|
|
|
//------------------------------------------------------------------------------
|
|
// EMCasualPartition_struct methods
|
|
//------------------------------------------------------------------------------
|
|
|
|
EMCasualPartition_struct::EMCasualPartition_struct()
|
|
{
|
|
lo_val = numeric_limits<int64_t>::min();
|
|
hi_val = numeric_limits<int64_t>::max();
|
|
sequenceNum = 0;
|
|
isValid = CP_INVALID;
|
|
}
|
|
|
|
EMCasualPartition_struct::EMCasualPartition_struct(const int64_t lo, const int64_t hi, const int32_t seqNum)
|
|
{
|
|
lo_val = lo;
|
|
hi_val = hi;
|
|
sequenceNum = seqNum;
|
|
isValid = CP_INVALID;
|
|
}
|
|
|
|
EMCasualPartition_struct::EMCasualPartition_struct(const EMCasualPartition_struct& em)
|
|
{
|
|
lo_val = em.lo_val;
|
|
hi_val = em.hi_val;
|
|
sequenceNum = em.sequenceNum;
|
|
isValid = em.isValid;
|
|
}
|
|
|
|
EMCasualPartition_struct& EMCasualPartition_struct::operator= (const EMCasualPartition_struct& em)
|
|
{
|
|
lo_val = em.lo_val;
|
|
hi_val = em.hi_val;
|
|
sequenceNum = em.sequenceNum;
|
|
isValid = em.isValid;
|
|
return *this;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Version 4 EmEntry methods
|
|
//------------------------------------------------------------------------------
|
|
|
|
EMEntry::EMEntry()
|
|
{
|
|
fileID = 0;
|
|
blockOffset = 0;
|
|
HWM = 0;
|
|
partitionNum = 0;
|
|
segmentNum = 0;
|
|
dbRoot = 0;
|
|
colWid = 0;
|
|
status = 0;
|
|
}
|
|
|
|
EMEntry::EMEntry(const EMEntry& e)
|
|
{
|
|
range.start = e.range.start;
|
|
range.size = e.range.size;
|
|
fileID = e.fileID;
|
|
blockOffset = e.blockOffset;
|
|
HWM = e.HWM;
|
|
partition = e.partition;
|
|
partitionNum = e.partitionNum;
|
|
segmentNum = e.segmentNum;
|
|
dbRoot = e.dbRoot;
|
|
colWid = e.colWid;
|
|
status = e.status;
|
|
}
|
|
|
|
EMEntry& EMEntry::operator= (const EMEntry& e)
|
|
{
|
|
range.start = e.range.start;
|
|
range.size = e.range.size;
|
|
fileID = e.fileID;
|
|
blockOffset = e.blockOffset;
|
|
HWM = e.HWM;
|
|
partition = e.partition;
|
|
partitionNum = e.partitionNum;
|
|
segmentNum = e.segmentNum;
|
|
colWid = e.colWid;
|
|
dbRoot = e.dbRoot;
|
|
status = e.status;
|
|
return *this;
|
|
}
|
|
|
|
bool EMEntry::operator< (const EMEntry& e) const
|
|
{
|
|
if (range.start < e.range.start)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/*static*/
|
|
boost::mutex ExtentMapImpl::fInstanceMutex;
|
|
boost::mutex ExtentMap::mutex;
|
|
|
|
/*static*/
|
|
ExtentMapImpl* ExtentMapImpl::fInstance = 0;
|
|
|
|
/*static*/
|
|
ExtentMapImpl* ExtentMapImpl::makeExtentMapImpl(unsigned key, off_t size, bool readOnly)
|
|
{
|
|
boost::mutex::scoped_lock lk(fInstanceMutex);
|
|
|
|
if (fInstance)
|
|
{
|
|
if (key != fInstance->fExtMap.key())
|
|
{
|
|
BRMShmImpl newShm(key, 0);
|
|
fInstance->swapout(newShm);
|
|
}
|
|
|
|
ASSERT(key == fInstance->fExtMap.key());
|
|
return fInstance;
|
|
}
|
|
|
|
fInstance = new ExtentMapImpl(key, size, readOnly);
|
|
|
|
return fInstance;
|
|
}
|
|
|
|
ExtentMapImpl::ExtentMapImpl(unsigned key, off_t size, bool readOnly) :
|
|
fExtMap(key, size, readOnly)
|
|
{
|
|
}
|
|
|
|
/*static*/
|
|
boost::mutex FreeListImpl::fInstanceMutex;
|
|
|
|
/*static*/
|
|
FreeListImpl* FreeListImpl::fInstance = 0;
|
|
|
|
/*static*/
|
|
FreeListImpl* FreeListImpl::makeFreeListImpl(unsigned key, off_t size, bool readOnly)
|
|
{
|
|
boost::mutex::scoped_lock lk(fInstanceMutex);
|
|
|
|
if (fInstance)
|
|
{
|
|
if (key != fInstance->fFreeList.key())
|
|
{
|
|
BRMShmImpl newShm(key, 0);
|
|
fInstance->swapout(newShm);
|
|
}
|
|
|
|
ASSERT(key == fInstance->fFreeList.key());
|
|
return fInstance;
|
|
}
|
|
|
|
fInstance = new FreeListImpl(key, size, readOnly);
|
|
|
|
return fInstance;
|
|
}
|
|
|
|
FreeListImpl::FreeListImpl(unsigned key, off_t size, bool readOnly) :
|
|
fFreeList(key, size, readOnly)
|
|
{
|
|
}
|
|
|
|
ExtentMap::ExtentMap()
|
|
{
|
|
fExtentMap = NULL;
|
|
fFreeList = NULL;
|
|
fCurrentEMShmkey = -1;
|
|
fCurrentFLShmkey = -1;
|
|
fEMShminfo = NULL;
|
|
fFLShminfo = NULL;
|
|
r_only = false;
|
|
flLocked = false;
|
|
emLocked = false;
|
|
fPExtMapImpl = 0;
|
|
fPFreeListImpl = 0;
|
|
|
|
#ifdef BRM_INFO
|
|
fDebug = ("Y" == config::Config::makeConfig()->getConfig("DBRM", "Debug"));
|
|
#endif
|
|
}
|
|
|
|
ExtentMap::~ExtentMap()
|
|
{
|
|
PmDbRootMap_t::iterator iter = fPmDbRootMap.begin();
|
|
PmDbRootMap_t::iterator end = fPmDbRootMap.end();
|
|
|
|
while (iter != end)
|
|
{
|
|
delete iter->second;
|
|
iter->second = 0;
|
|
++iter;
|
|
}
|
|
|
|
fPmDbRootMap.clear();
|
|
}
|
|
|
|
// Casual Partioning support
|
|
//
|
|
|
|
/**
|
|
* @brief mark the max/min values of an extent as invalid
|
|
*
|
|
* mark the extent containing the lbid as invalid and
|
|
* increment the sequenceNum value. If the lbid is found
|
|
* in the extent map a 0 is returned otherwise a 1.
|
|
*
|
|
**/
|
|
|
|
int ExtentMap::_markInvalid(const LBID_t lbid, const execplan::CalpontSystemCatalog::ColDataType colDataType)
|
|
{
|
|
int entries;
|
|
int i;
|
|
LBID_t lastBlock;
|
|
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
lastBlock = fExtentMap[i].range.start +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
|
|
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
fExtentMap[i].partition.cprange.isValid = CP_UPDATING;
|
|
|
|
if (isUnsigned(colDataType))
|
|
{
|
|
fExtentMap[i].partition.cprange.lo_val = numeric_limits<uint64_t>::max();
|
|
fExtentMap[i].partition.cprange.hi_val = 0;
|
|
}
|
|
else
|
|
{
|
|
fExtentMap[i].partition.cprange.lo_val = numeric_limits<int64_t>::max();
|
|
fExtentMap[i].partition.cprange.hi_val = numeric_limits<int64_t>::min();
|
|
}
|
|
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
#ifdef BRM_DEBUG
|
|
ostringstream os;
|
|
os << "ExtentMap::_markInvalid(): casual partitioning update: firstLBID=" <<
|
|
fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
|
|
fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
|
|
" min=" << fExtentMap[i].partition.cprange.lo_val <<
|
|
" max=" << fExtentMap[i].partition.cprange.hi_val <<
|
|
"seq=" << fExtentMap[i].partition.cprange.sequenceNum;
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
#endif
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
throw logic_error("ExtentMap::markInvalid(): lbid isn't allocated");
|
|
}
|
|
|
|
int ExtentMap::markInvalid(const LBID_t lbid,
|
|
const execplan::CalpontSystemCatalog::ColDataType colDataType)
|
|
{
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (lbid < 0)
|
|
throw invalid_argument("ExtentMap::markInvalid(): lbid must be >= 0");
|
|
|
|
#endif
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("_markInvalid");
|
|
TRACER_ADDINPUT(lbid);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BRM_DEBUG
|
|
ostringstream os;
|
|
os << "ExtentMap::markInvalid(" << lbid << "," << colDataType << ")";
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
return _markInvalid(lbid, colDataType);
|
|
}
|
|
|
|
/**
|
|
* @brief calls markInvalid(LBID_t lbid) for each extent containing any lbid in vector<LBID_t>& lbids
|
|
*
|
|
**/
|
|
|
|
int ExtentMap::markInvalid(const vector<LBID_t>& lbids,
|
|
const vector<execplan::CalpontSystemCatalog::ColDataType>& colDataTypes)
|
|
{
|
|
uint32_t i, size = lbids.size();
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
for (i = 0; i < size; ++i)
|
|
if (lbids[i] < 0)
|
|
throw invalid_argument("ExtentMap::markInvalid(vector): all lbids must be >= 0");
|
|
|
|
#endif
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("_markInvalid");
|
|
TRACER_ADDINPUT(size);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
|
|
// XXXPAT: what's the proper return code when one and only one fails?
|
|
for (i = 0; i < size; ++i)
|
|
{
|
|
#ifdef BRM_DEBUG
|
|
ostringstream os;
|
|
os << "ExtentMap::markInvalid() lbids[" << i << "]=" << lbids[i] <<
|
|
" colDataTypes[" << i << "]=" << colDataTypes[i];
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
#endif
|
|
|
|
try
|
|
{
|
|
_markInvalid(lbids[i], colDataTypes[i]);
|
|
}
|
|
catch (std::exception& e)
|
|
{
|
|
cerr << "ExtentMap::markInvalid(vector): warning! lbid " << lbids[i] <<
|
|
" caused " << e.what() << endl;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* @brief set the max/min values for the extent if the seqNum matches the extents sequenceNum
|
|
*
|
|
* reset the lbid's hi_val to max and lo_val to min
|
|
* the seqNum matches the ExtentMap.sequenceNum. Then increments
|
|
* the current sequenceNum value by 1. If the sequenceNum does not
|
|
* match the seqNum value do not update the lbid's max/min values
|
|
* or increment the sequenceNum value and return a -1.
|
|
|
|
**/
|
|
|
|
int ExtentMap::setMaxMin(const LBID_t lbid,
|
|
const int64_t max,
|
|
const int64_t min,
|
|
const int32_t seqNum,
|
|
bool firstNode)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("updateMaxMin");
|
|
TRACER_ADDINPUT(lbid);
|
|
TRACER_ADDINPUT(max);
|
|
TRACER_ADDINPUT(min);
|
|
TRACER_ADDINPUT(seqNum);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int entries;
|
|
int i;
|
|
LBID_t lastBlock;
|
|
int32_t curSequence;
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (lbid < 0)
|
|
throw invalid_argument("ExtentMap::setMaxMin(): lbid must be >= 0");
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
lastBlock = fExtentMap[i].range.start +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
|
|
curSequence = fExtentMap[i].partition.cprange.sequenceNum;
|
|
|
|
if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
|
|
{
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (firstNode)
|
|
{
|
|
ostringstream os;
|
|
os << "ExtentMap::setMaxMin(): casual partitioning update: firstLBID=" <<
|
|
fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
|
|
fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
|
|
" min=" << min << " max=" << max << "seq=" << seqNum;
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
#endif
|
|
|
|
if (curSequence == seqNum)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
fExtentMap[i].partition.cprange.hi_val = max;
|
|
fExtentMap[i].partition.cprange.lo_val = min;
|
|
fExtentMap[i].partition.cprange.isValid = CP_VALID;
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
return 0;
|
|
}
|
|
//special val to indicate a reset--used by editem -c.
|
|
//Also used by COMMIT and ROLLBACK to invalidate CP.
|
|
else if (seqNum == -1)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
// We set hi_val and lo_val to correct values for signed or unsigned
|
|
// during the markinvalid step, which sets the invalid variable to CP_UPDATING.
|
|
// During this step (seqNum == -1), the min and max passed in are not reliable
|
|
// and should not be used.
|
|
fExtentMap[i].partition.cprange.isValid = CP_INVALID;
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (emLocked)
|
|
releaseEMEntryTable(WRITE);
|
|
|
|
throw logic_error("ExtentMap::setMaxMin(): lbid isn't allocated");
|
|
// return -1;
|
|
}
|
|
|
|
// @bug 1970. Added updateExtentsMaxMin function.
|
|
// @note - The key passed in the map must the the first LBID in the extent.
|
|
void ExtentMap::setExtentsMaxMin(const CPMaxMinMap_t& cpMap, bool firstNode, bool useLock)
|
|
{
|
|
CPMaxMinMap_t::const_iterator it;
|
|
|
|
#ifdef BRM_DEBUG
|
|
log("ExtentMap::setExtentsMaxMin()", logging::LOG_TYPE_DEBUG);
|
|
|
|
for (it = cpMap.begin(); it != cpMap.end(); ++it)
|
|
{
|
|
ostringstream os;
|
|
os << "FirstLBID=" << it->first <<
|
|
" min=" << it->second.min <<
|
|
" max=" << it->second.max <<
|
|
" seq=" << it->second.seqNum;
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("setExtentsMaxMin");
|
|
|
|
for (it = cpMap.begin(); it != cpMap.end(); ++it)
|
|
{
|
|
TRACER_ADDINPUT((*it).first);
|
|
TRACER_ADDINPUT((*it).second.max);
|
|
TRACER_ADDINPUT((*it).second.min);
|
|
TRACER_ADDINPUT((*it).second.seqNum);
|
|
TRACER_WRITE;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
int entries;
|
|
int i;
|
|
int32_t curSequence;
|
|
const int32_t extentsToUpdate = cpMap.size();
|
|
int32_t extentsUpdated = 0;
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (extentsToUpdate <= 0)
|
|
throw invalid_argument("ExtentMap::setExtentsMaxMin(): cpMap must be populated");
|
|
|
|
#endif
|
|
|
|
if (useLock)
|
|
grabEMEntryTable(WRITE);
|
|
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
it = cpMap.find(fExtentMap[i].range.start);
|
|
|
|
if (it != cpMap.end())
|
|
{
|
|
curSequence = fExtentMap[i].partition.cprange.sequenceNum;
|
|
|
|
if (curSequence == it->second.seqNum &&
|
|
fExtentMap[i].partition.cprange.isValid == CP_INVALID)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
fExtentMap[i].partition.cprange.hi_val = it->second.max;
|
|
fExtentMap[i].partition.cprange.lo_val = it->second.min;
|
|
fExtentMap[i].partition.cprange.isValid = CP_VALID;
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
extentsUpdated++;
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (firstNode)
|
|
{
|
|
ostringstream os;
|
|
os << "ExtentMap::setExtentsMaxMin(): casual partitioning update: firstLBID=" <<
|
|
fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
|
|
fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
|
|
" min=" << it->second.min << " max=" <<
|
|
it->second.max << " seq=" <<
|
|
it->second.seqNum;
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
#endif
|
|
}
|
|
//special val to indicate a reset -- ignore the min/max
|
|
else if (it->second.seqNum == -1)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
// We set hi_val and lo_val to correct values for signed or unsigned
|
|
// during the markinvalid step, which sets the invalid variable to CP_UPDATING.
|
|
// During this step (seqNum == -1), the min and max passed in are not reliable
|
|
// and should not be used.
|
|
fExtentMap[i].partition.cprange.isValid = CP_INVALID;
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
extentsUpdated++;
|
|
}
|
|
//special val to indicate a reset -- assign the min/max
|
|
else if (it->second.seqNum == -2)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
fExtentMap[i].partition.cprange.hi_val = it->second.max;
|
|
fExtentMap[i].partition.cprange.lo_val = it->second.min;
|
|
fExtentMap[i].partition.cprange.isValid = CP_INVALID;
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
extentsUpdated++;
|
|
}
|
|
// else sequence has changed since start of the query. Don't update the EM entry.
|
|
else
|
|
{
|
|
extentsUpdated++;
|
|
}
|
|
|
|
if (extentsUpdated == extentsToUpdate)
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
throw logic_error("ExtentMap::setExtentsMaxMin(): lbid isn't allocated");
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// @bug 1970. Added mergeExtentsMaxMin to merge CP info for list of extents.
|
|
// @note - The key passed in the map must the starting LBID in the extent.
|
|
// Used by cpimport to update extentmap casual partition min/max.
|
|
// NULL or empty values should not be passed in as min/max values.
|
|
// seqNum in the input struct is not currently used.
|
|
//
|
|
// Note that DML calls markInvalid() to flag an extent as CP_UPDATING and incre-
|
|
// ments the sequence number prior to any change, and then marks the extent as
|
|
// CP_INVALID at transaction's end.
|
|
// Since cpimport locks the entire table prior to making any changes, it is
|
|
// assumed that the state of an extent will not be changed (by anyone else)
|
|
// during an import; so cpimport does not employ the intermediate CP_UPDATING
|
|
// state that DML uses. cpimport just waits till the end of the job and incre-
|
|
// ments the sequence number and changes the state to CP_INVALID at that time.
|
|
// We may want/need to reconsider this at some point.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::mergeExtentsMaxMin(CPMaxMinMergeMap_t& cpMap, bool useLock)
|
|
{
|
|
CPMaxMinMergeMap_t::const_iterator it;
|
|
|
|
#ifdef BRM_DEBUG
|
|
log("ExtentMap::mergeExtentsMaxMin()", logging::LOG_TYPE_DEBUG);
|
|
|
|
for (it = cpMap.begin(); it != cpMap.end(); ++it)
|
|
{
|
|
ostringstream os;
|
|
os << "FirstLBID=" << it->first <<
|
|
" min=" << it->second.min <<
|
|
" max=" << it->second.max <<
|
|
" seq=" << it->second.seqNum <<
|
|
" typ: " << (*it).second.type <<
|
|
" new: " << (*it).second.newExtent;
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("mergeExtentsMaxMin");
|
|
unsigned int count = 1;
|
|
|
|
for (it = cpMap.begin(); it != cpMap.end(); ++it)
|
|
{
|
|
ostringstream oss;
|
|
oss << " " << count <<
|
|
". LBID: " << (*it).first <<
|
|
"; max: " << (*it).second.max <<
|
|
"; min: " << (*it).second.min <<
|
|
"; seq: " << (*it).second.seqNum <<
|
|
"; typ: " << (*it).second.type <<
|
|
"; new: " << (*it).second.newExtent;
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
count++;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
const int32_t extentsToMerge = cpMap.size();
|
|
int32_t extentsMerged = 0;
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (extentsToMerge <= 0)
|
|
throw invalid_argument("ExtentMap::mergeExtentsMaxMin(): "
|
|
"cpMap must be populated");
|
|
|
|
#endif
|
|
|
|
if (useLock)
|
|
grabEMEntryTable(WRITE);
|
|
|
|
int entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < entries; i++) // loop through all extents
|
|
{
|
|
if (fExtentMap[i].range.size != 0) // find eligible extents
|
|
{
|
|
it = cpMap.find(fExtentMap[i].range.start);
|
|
|
|
if (it != cpMap.end())
|
|
{
|
|
#ifdef BRM_DEBUG
|
|
ostringstream os;
|
|
os << "ExtentMap::mergeExtentsMaxMin(): casual partitioning update: firstLBID=" <<
|
|
fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
|
|
fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
|
|
" hi_val=" << fExtentMap[i].partition.cprange.hi_val <<
|
|
" lo_val=" << fExtentMap[i].partition.cprange.lo_val <<
|
|
" min=" << it->second.min << " max=" << it->second.max <<
|
|
" seq=" << it->second.seqNum;
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
#endif
|
|
|
|
switch (fExtentMap[i].partition.cprange.isValid)
|
|
{
|
|
// Merge input min/max with current min/max
|
|
case CP_VALID:
|
|
{
|
|
if (!isValidCPRange( it->second.max,
|
|
it->second.min,
|
|
it->second.type ))
|
|
{
|
|
break;
|
|
}
|
|
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
|
|
// We check the validity of the current min/max,
|
|
// because isValid could be CP_VALID for an extent
|
|
// having all NULL values, in which case the current
|
|
// min/max needs to be set instead of merged.
|
|
|
|
if (isValidCPRange(
|
|
fExtentMap[i].partition.cprange.hi_val,
|
|
fExtentMap[i].partition.cprange.lo_val,
|
|
it->second.type))
|
|
{
|
|
// Swap byte order to do binary string comparison
|
|
if (isCharType(it->second.type))
|
|
{
|
|
int64_t newMinVal =
|
|
static_cast<int64_t>( uint64ToStr(
|
|
static_cast<uint64_t>(it->second.min)));
|
|
int64_t newMaxVal =
|
|
static_cast<int64_t>( uint64ToStr(
|
|
static_cast<uint64_t>(it->second.max)));
|
|
int64_t oldMinVal =
|
|
static_cast<int64_t>( uint64ToStr(
|
|
static_cast<uint64_t>(
|
|
fExtentMap[i].partition.cprange.lo_val)) );
|
|
int64_t oldMaxVal =
|
|
static_cast<int64_t>( uint64ToStr(
|
|
static_cast<uint64_t>(
|
|
fExtentMap[i].partition.cprange.hi_val)) );
|
|
|
|
if (newMinVal < oldMinVal)
|
|
fExtentMap[i].partition.cprange.lo_val =
|
|
it->second.min;
|
|
|
|
if (newMaxVal > oldMaxVal)
|
|
fExtentMap[i].partition.cprange.hi_val =
|
|
it->second.max;
|
|
}
|
|
else if (isUnsigned(it->second.type))
|
|
{
|
|
if (static_cast<uint64_t>(it->second.min) <
|
|
static_cast<uint64_t>(fExtentMap[i].partition.cprange.lo_val))
|
|
{
|
|
fExtentMap[i].partition.cprange.lo_val =
|
|
it->second.min;
|
|
}
|
|
|
|
if (static_cast<uint64_t>(it->second.max) >
|
|
static_cast<uint64_t>(fExtentMap[i].partition.cprange.hi_val))
|
|
{
|
|
fExtentMap[i].partition.cprange.hi_val =
|
|
it->second.max;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (it->second.min <
|
|
fExtentMap[i].partition.cprange.lo_val)
|
|
fExtentMap[i].partition.cprange.lo_val =
|
|
it->second.min;
|
|
|
|
if (it->second.max >
|
|
fExtentMap[i].partition.cprange.hi_val)
|
|
fExtentMap[i].partition.cprange.hi_val =
|
|
it->second.max;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fExtentMap[i].partition.cprange.lo_val =
|
|
it->second.min;
|
|
fExtentMap[i].partition.cprange.hi_val =
|
|
it->second.max;
|
|
}
|
|
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
|
|
break;
|
|
}
|
|
|
|
// DML is updating; just increment seqnum.
|
|
// This case is here for completeness. Table lock should
|
|
// prevent this state from occurring (see notes at top of
|
|
// this function)
|
|
case CP_UPDATING:
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
|
|
break;
|
|
}
|
|
|
|
// Reset min/max to new min/max only "if" we can treat this
|
|
// as a new extent, else leave the extent marked as INVALID
|
|
case CP_INVALID:
|
|
default:
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
|
|
|
|
if (it->second.newExtent)
|
|
{
|
|
if (isValidCPRange( it->second.max,
|
|
it->second.min,
|
|
it->second.type ))
|
|
{
|
|
fExtentMap[i].partition.cprange.lo_val =
|
|
it->second.min;
|
|
fExtentMap[i].partition.cprange.hi_val =
|
|
it->second.max;
|
|
}
|
|
|
|
// Even if invalid range; we set state to CP_VALID,
|
|
// because the extent is valid, it is just empty.
|
|
fExtentMap[i].partition.cprange.isValid = CP_VALID;
|
|
}
|
|
|
|
incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
|
|
break;
|
|
}
|
|
} // switch on isValid state
|
|
|
|
extentsMerged++;
|
|
|
|
if (extentsMerged == extentsToMerge)
|
|
{
|
|
return; // Leave when all extents in map are matched
|
|
}
|
|
|
|
// Deleting objects from map, may speed up successive searches
|
|
cpMap.erase( it );
|
|
|
|
} // found a matching extent in the Map
|
|
} // extent map range size != 0
|
|
} // end of loop through extent map
|
|
|
|
throw logic_error("ExtentMap::mergeExtentsMaxMin(): lbid not found");
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Use this function to see if the range is a valid min/max range or not.
|
|
// Range is considered invalid if min or max, are NULL (min()), or EMPTY
|
|
// (min()+1). For unsigned types NULL is max() and EMPTY is max()-1.
|
|
//------------------------------------------------------------------------------
|
|
bool ExtentMap::isValidCPRange(int64_t max, int64_t min, execplan::CalpontSystemCatalog::ColDataType type) const
|
|
{
|
|
if (isUnsigned(type))
|
|
{
|
|
if ( (static_cast<uint64_t>(min) >= (numeric_limits<uint64_t>::max() - 1)) ||
|
|
(static_cast<uint64_t>(max) >= (numeric_limits<uint64_t>::max() - 1)) )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ( (min <= (numeric_limits<int64_t>::min() + 1)) ||
|
|
(max <= (numeric_limits<int64_t>::min() + 1)) )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* @brief retrieve the hi_val and lo_val or sequenceNum of the extent containing the LBID lbid.
|
|
*
|
|
* For the extent containing the LBID lbid, return the max/min values if the extent range values
|
|
* are valid and a -1 in the seqNum parameter. If the range values are flaged as invalid
|
|
* return the sequenceNum of the extent and the max/min values as -1.
|
|
**/
|
|
|
|
int ExtentMap::getMaxMin(const LBID_t lbid,
|
|
int64_t& max,
|
|
int64_t& min,
|
|
int32_t& seqNum)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getMaxMin");
|
|
TRACER_ADDINPUT(lbid);
|
|
TRACER_ADDOUTPUT(max);
|
|
TRACER_ADDOUTPUT(min);
|
|
TRACER_ADDOUTPUT(seqNum);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
max = numeric_limits<uint64_t>::max();
|
|
min = 0;
|
|
seqNum *= (-1);
|
|
int entries;
|
|
int i;
|
|
LBID_t lastBlock;
|
|
int isValid = CP_INVALID;
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (lbid < 0)
|
|
throw invalid_argument("ExtentMap::getMaxMin(): lbid must be >= 0");
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(READ);
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
lastBlock = fExtentMap[i].range.start +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
|
|
|
|
if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
|
|
{
|
|
max = fExtentMap[i].partition.cprange.hi_val;
|
|
min = fExtentMap[i].partition.cprange.lo_val;
|
|
seqNum = fExtentMap[i].partition.cprange.sequenceNum;
|
|
isValid = fExtentMap[i].partition.cprange.isValid;
|
|
releaseEMEntryTable(READ);
|
|
return isValid;
|
|
}
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
throw logic_error("ExtentMap::getMaxMin(): that lbid isn't allocated");
|
|
// return -1;
|
|
}
|
|
|
|
/* Removes a range from the freelist. Used by load() */
|
|
void ExtentMap::reserveLBIDRange(LBID_t start, uint8_t size)
|
|
{
|
|
int i;
|
|
int flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
LBID_t lastLBID = start + (size * 1024) - 1;
|
|
int32_t freeIndex = -1;
|
|
|
|
/* Find a range the request intersects. There should be one and only one. */
|
|
for (i = 0; i < flEntries; i++)
|
|
{
|
|
LBID_t eLastLBID;
|
|
|
|
// while scanning, grab the first free slot
|
|
if (fFreeList[i].size == 0)
|
|
{
|
|
if (freeIndex == -1)
|
|
freeIndex = i;
|
|
|
|
continue;
|
|
}
|
|
|
|
eLastLBID = fFreeList[i].start + (((int64_t) fFreeList[i].size) * 1024) - 1;
|
|
|
|
/* if it's at the front... */
|
|
if (start == fFreeList[i].start)
|
|
{
|
|
/* if the request is larger than the freelist entry -> implies an extent
|
|
* overlap. This is debugging code. */
|
|
//idbassert(size > fFreeList[i].size);
|
|
makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
|
|
fFreeList[i].start += size * 1024;
|
|
fFreeList[i].size -= size;
|
|
|
|
if (fFreeList[i].size == 0)
|
|
{
|
|
makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
|
|
fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
break;
|
|
}
|
|
/* if it's at the back... */
|
|
else if (eLastLBID == lastLBID)
|
|
{
|
|
makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
|
|
fFreeList[i].size -= size;
|
|
|
|
if (fFreeList[i].size == 0)
|
|
{
|
|
makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
|
|
fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
break;
|
|
/* This entry won't be the same size as the request or the first
|
|
* clause would have run instead.
|
|
*/
|
|
}
|
|
/* if it's in the middle... */
|
|
/* break it into two elements */
|
|
else if (fFreeList[i].start < start && eLastLBID > lastLBID)
|
|
{
|
|
if (freeIndex == -1)
|
|
{
|
|
if (fFLShminfo->currentSize == fFLShminfo->allocdSize)
|
|
{
|
|
growFLShmseg();
|
|
freeIndex = flEntries;
|
|
}
|
|
else
|
|
for (freeIndex = i + 1; freeIndex < flEntries; freeIndex++)
|
|
if (fFreeList[freeIndex].size == 0)
|
|
break;
|
|
|
|
#ifdef BRM_DEBUG
|
|
idbassert(nextIndex < flEntries);
|
|
#endif
|
|
}
|
|
|
|
makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
|
|
makeUndoRecord(&fFreeList[freeIndex], sizeof(InlineLBIDRange));
|
|
makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
|
|
fFreeList[i].size = (start - fFreeList[i].start) / 1024;
|
|
fFreeList[freeIndex].start = start + (size * 1024);
|
|
fFreeList[freeIndex].size = (eLastLBID - lastLBID) / 1024;
|
|
fFLShminfo->currentSize += sizeof(InlineLBIDRange);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
The file layout looks like this:
|
|
|
|
EM Magic (32-bits)
|
|
number of EM entries (32-bits)
|
|
number of FL entries (32-bits)
|
|
EMEntry
|
|
... (* numEM)
|
|
struct InlineLBIDRange
|
|
... (* numFL)
|
|
*/
|
|
|
|
void ExtentMap::loadVersion4(ifstream& in)
|
|
{
|
|
int emNumElements, flNumElements;
|
|
|
|
in.read((char*) &emNumElements, sizeof(int));
|
|
in.read((char*) &flNumElements, sizeof(int));
|
|
idbassert(emNumElements > 0);
|
|
|
|
void *fExtentMapPtr = static_cast<void*>(fExtentMap);
|
|
memset(fExtentMapPtr, 0, fEMShminfo->allocdSize);
|
|
fEMShminfo->currentSize = 0;
|
|
|
|
// init the free list
|
|
memset(fFreeList, 0, fFLShminfo->allocdSize);
|
|
fFreeList[0].size = (1 << 26); // 2^36 LBIDs
|
|
fFLShminfo->currentSize = sizeof(InlineLBIDRange);
|
|
|
|
// @Bug 3498
|
|
// Calculate how big an extent map we're going to need and allocate it in one call
|
|
if ((fEMShminfo->allocdSize / sizeof(EMEntry)) < (unsigned)emNumElements)
|
|
{
|
|
size_t nrows = emNumElements;
|
|
|
|
//Round up to the nearest EM_INCREMENT_ROWS
|
|
if ((nrows % EM_INCREMENT_ROWS) != 0)
|
|
{
|
|
nrows /= EM_INCREMENT_ROWS;
|
|
nrows++;
|
|
nrows *= EM_INCREMENT_ROWS;
|
|
}
|
|
|
|
growEMShmseg(nrows);
|
|
}
|
|
|
|
for (int i = 0; i < emNumElements; i++)
|
|
{
|
|
in.read((char*) &fExtentMap[i], sizeof(EMEntry));
|
|
reserveLBIDRange(fExtentMap[i].range.start, fExtentMap[i].range.size);
|
|
|
|
//@bug 1911 - verify status value is valid
|
|
if (fExtentMap[i].status < EXTENTSTATUSMIN ||
|
|
fExtentMap[i].status > EXTENTSTATUSMAX)
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
|
|
fEMShminfo->currentSize = emNumElements * sizeof(EMEntry);
|
|
|
|
#ifdef DUMP_EXTENT_MAP
|
|
EMEntry* emSrc = fExtentMap;
|
|
cout << "lbid\tsz\toid\tfbo\thwm\tpart#\tseg#\tDBRoot\twid\tst\thi\tlo\tsq\tv" << endl;
|
|
|
|
for (int i = 0; i < emNumElements; i++)
|
|
{
|
|
cout <<
|
|
emSrc[i].start
|
|
<< '\t' << emSrc[i].size
|
|
<< '\t' << emSrc[i].fileID
|
|
<< '\t' << emSrc[i].blockOffset
|
|
<< '\t' << emSrc[i].HWM
|
|
<< '\t' << emSrc[i].partitionNum
|
|
<< '\t' << emSrc[i].segmentNum
|
|
<< '\t' << emSrc[i].dbRoot
|
|
<< '\t' << emSrc[i].status
|
|
<< '\t' << emSrc[i].partition.cprange.hi_val
|
|
<< '\t' << emSrc[i].partition.cprange.lo_val
|
|
<< '\t' << emSrc[i].partition.cprange.sequenceNum
|
|
<< '\t' << (int)(emSrc[i].partition.cprange.isValid)
|
|
<< endl;
|
|
}
|
|
|
|
cout << "Free list entries:" << endl;
|
|
cout << "start\tsize" << endl;
|
|
|
|
for (int i = 0; i < flNumElements; i++)
|
|
cout << fFreeList[i].start << '\t' << fFreeList[i].size << endl;
|
|
|
|
#endif
|
|
}
|
|
|
|
void ExtentMap::loadVersion4(IDBDataFile* in)
|
|
{
|
|
int emNumElements = 0, flNumElements = 0;
|
|
|
|
int nbytes = 0;
|
|
nbytes += in->read((char*) &emNumElements, sizeof(int));
|
|
nbytes += in->read((char*) &flNumElements, sizeof(int));
|
|
idbassert(emNumElements > 0);
|
|
|
|
if ((size_t) nbytes != sizeof(int) + sizeof(int))
|
|
{
|
|
log_errno("ExtentMap::loadVersion4(): read ");
|
|
throw runtime_error("ExtentMap::loadVersion4(): read failed. Check the error log.");
|
|
}
|
|
|
|
void *fExtentMapPtr = static_cast<void*>(fExtentMap);
|
|
memset(fExtentMapPtr, 0, fEMShminfo->allocdSize);
|
|
fEMShminfo->currentSize = 0;
|
|
|
|
// init the free list
|
|
memset(fFreeList, 0, fFLShminfo->allocdSize);
|
|
fFreeList[0].size = (1 << 26); // 2^36 LBIDs
|
|
fFLShminfo->currentSize = sizeof(InlineLBIDRange);
|
|
|
|
// @Bug 3498
|
|
// Calculate how big an extent map we're going to need and allocate it in one call
|
|
if ((fEMShminfo->allocdSize / sizeof(EMEntry)) < (unsigned)emNumElements)
|
|
{
|
|
size_t nrows = emNumElements;
|
|
|
|
//Round up to the nearest EM_INCREMENT_ROWS
|
|
if ((nrows % EM_INCREMENT_ROWS) != 0)
|
|
{
|
|
nrows /= EM_INCREMENT_ROWS;
|
|
nrows++;
|
|
nrows *= EM_INCREMENT_ROWS;
|
|
}
|
|
|
|
growEMShmseg(nrows);
|
|
}
|
|
|
|
for (int i = 0; i < emNumElements; i++)
|
|
{
|
|
if (in->read((char*) &fExtentMap[i], sizeof(EMEntry)) != sizeof(EMEntry))
|
|
{
|
|
log_errno("ExtentMap::loadVersion4(): read ");
|
|
throw runtime_error("ExtentMap::loadVersion4(): read failed. Check the error log.");
|
|
}
|
|
|
|
reserveLBIDRange(fExtentMap[i].range.start, fExtentMap[i].range.size);
|
|
|
|
//@bug 1911 - verify status value is valid
|
|
if (fExtentMap[i].status < EXTENTSTATUSMIN ||
|
|
fExtentMap[i].status > EXTENTSTATUSMAX)
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
|
|
fEMShminfo->currentSize = emNumElements * sizeof(EMEntry);
|
|
|
|
#ifdef DUMP_EXTENT_MAP
|
|
EMEntry* emSrc = fExtentMap;
|
|
cout << "lbid\tsz\toid\tfbo\thwm\tpart#\tseg#\tDBRoot\twid\tst\thi\tlo\tsq\tv" << endl;
|
|
|
|
for (int i = 0; i < emNumElements; i++)
|
|
{
|
|
cout <<
|
|
emSrc[i].start
|
|
<< '\t' << emSrc[i].size
|
|
<< '\t' << emSrc[i].fileID
|
|
<< '\t' << emSrc[i].blockOffset
|
|
<< '\t' << emSrc[i].HWM
|
|
<< '\t' << emSrc[i].partitionNum
|
|
<< '\t' << emSrc[i].segmentNum
|
|
<< '\t' << emSrc[i].dbRoot
|
|
<< '\t' << emSrc[i].status
|
|
<< '\t' << emSrc[i].partition.cprange.hi_val
|
|
<< '\t' << emSrc[i].partition.cprange.lo_val
|
|
<< '\t' << emSrc[i].partition.cprange.sequenceNum
|
|
<< '\t' << (int)(emSrc[i].partition.cprange.isValid)
|
|
<< endl;
|
|
}
|
|
|
|
cout << "Free list entries:" << endl;
|
|
cout << "start\tsize" << endl;
|
|
|
|
for (int i = 0; i < flNumElements; i++)
|
|
cout << fFreeList[i].start << '\t' << fFreeList[i].size << endl;
|
|
|
|
#endif
|
|
}
|
|
|
|
void ExtentMap::load(const string& filename, bool fixFL)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("load");
|
|
TRACER_ADDSTRINPUT(filename);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
|
|
try
|
|
{
|
|
grabFreeList(WRITE);
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseEMEntryTable(WRITE);
|
|
throw;
|
|
}
|
|
|
|
if (IDBPolicy::useHdfs())
|
|
{
|
|
const char* filename_p = filename.c_str();
|
|
scoped_ptr<IDBDataFile> in(IDBDataFile::open(
|
|
IDBPolicy::getType(filename_p, IDBPolicy::WRITEENG),
|
|
filename_p, "r", 0));
|
|
|
|
if (!in)
|
|
{
|
|
log_errno("ExtentMap::load(): open");
|
|
releaseFreeList(WRITE);
|
|
releaseEMEntryTable(WRITE);
|
|
throw ios_base::failure("ExtentMap::load(): open failed. Check the error log.");
|
|
}
|
|
|
|
try
|
|
{
|
|
int emVersion = 0;
|
|
int bytes = in->read((char*) &emVersion, sizeof(int));
|
|
|
|
if (bytes == (int) sizeof(int) && emVersion == EM_MAGIC_V4)
|
|
loadVersion4(in.get());
|
|
else
|
|
{
|
|
log("ExtentMap::load(): That file is not a valid ExtentMap image");
|
|
throw runtime_error("ExtentMap::load(): That file is not a valid ExtentMap image");
|
|
}
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseFreeList(WRITE);
|
|
releaseEMEntryTable(WRITE);
|
|
throw;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ifstream in;
|
|
in.open(filename.c_str(), ios_base::in | ios_base::binary);
|
|
|
|
if (!in)
|
|
{
|
|
log_errno("ExtentMap::load(): open");
|
|
releaseFreeList(WRITE);
|
|
releaseEMEntryTable(WRITE);
|
|
throw ios_base::failure("ExtentMap::load(): open failed. Check the error log.");
|
|
}
|
|
|
|
in.exceptions(ios_base::badbit | ios_base::failbit);
|
|
|
|
try
|
|
{
|
|
int emVersion;
|
|
in.read((char*) &emVersion, sizeof(int));
|
|
|
|
if (emVersion == EM_MAGIC_V4)
|
|
loadVersion4(in);
|
|
else
|
|
{
|
|
log("ExtentMap::load(): That file is not a valid ExtentMap image");
|
|
throw runtime_error("ExtentMap::load(): That file is not a valid ExtentMap image");
|
|
}
|
|
}
|
|
catch (...)
|
|
{
|
|
in.close();
|
|
releaseFreeList(WRITE);
|
|
releaseEMEntryTable(WRITE);
|
|
throw;
|
|
}
|
|
|
|
in.close();
|
|
}
|
|
|
|
releaseFreeList(WRITE);
|
|
releaseEMEntryTable(WRITE);
|
|
// checkConsistency();
|
|
}
|
|
|
|
void ExtentMap::save(const string& filename)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("save");
|
|
TRACER_ADDSTRINPUT(filename);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
int allocdSize, loadSize[3], i;
|
|
mode_t utmp;
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
try
|
|
{
|
|
grabFreeList(READ);
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
|
|
if (fEMShminfo->currentSize == 0)
|
|
{
|
|
log("ExtentMap::save(): got request to save an empty BRM");
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw runtime_error("ExtentMap::save(): got request to save an empty BRM");
|
|
}
|
|
|
|
if (IDBPolicy::useHdfs())
|
|
{
|
|
utmp = ::umask(0);
|
|
const char* filename_p = filename.c_str();
|
|
scoped_ptr<IDBDataFile> out(IDBDataFile::open(
|
|
IDBPolicy::getType(filename_p, IDBPolicy::WRITEENG),
|
|
filename_p, "wb", IDBDataFile::USE_VBUF));
|
|
::umask(utmp);
|
|
|
|
if (!out)
|
|
{
|
|
log_errno("ExtentMap::save(): open");
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw ios_base::failure("ExtentMap::save(): open failed. Check the error log.");
|
|
}
|
|
|
|
loadSize[0] = EM_MAGIC_V4;
|
|
loadSize[1] = fEMShminfo->currentSize / sizeof(EMEntry);
|
|
loadSize[2] = fFLShminfo->allocdSize / sizeof(InlineLBIDRange); // needs to send all entries
|
|
|
|
int bytes = 0;
|
|
|
|
try
|
|
{
|
|
const int wsize = 3 * sizeof(int);
|
|
bytes = out->write((char*)loadSize, wsize);
|
|
|
|
if (bytes != wsize)
|
|
throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
|
|
allocdSize = fEMShminfo->allocdSize / sizeof(EMEntry);
|
|
const int emEntrySize = sizeof(EMEntry);
|
|
|
|
for (i = 0; i < allocdSize; i++)
|
|
{
|
|
if (fExtentMap[i].range.size > 0)
|
|
{
|
|
try
|
|
{
|
|
bytes = out->write((char*) &fExtentMap[i], emEntrySize);
|
|
|
|
if (bytes != emEntrySize)
|
|
throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
allocdSize = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
const int inlineLbidRangeSize = sizeof(InlineLBIDRange);
|
|
|
|
for (i = 0; i < allocdSize; i++)
|
|
{
|
|
// if (fFreeList[i].size > 0) {
|
|
try
|
|
{
|
|
int bytes = out->write((char*) &fFreeList[i], inlineLbidRangeSize);
|
|
|
|
if (bytes != inlineLbidRangeSize)
|
|
throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
|
|
// }
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ofstream out;
|
|
|
|
// Make em writes to disk use a buffer size of StrmBufSize bytes (instead of the default 8K)
|
|
const unsigned StrmBufSize = 1 * 1024 * 1024;
|
|
scoped_array<char> buf(new char[StrmBufSize]);
|
|
out.rdbuf()->pubsetbuf(buf.get(), StrmBufSize);
|
|
|
|
utmp = ::umask(0);
|
|
out.open(filename.c_str(), ios_base::out | ios_base::binary);
|
|
::umask(utmp);
|
|
|
|
if (!out)
|
|
{
|
|
log_errno("ExtentMap::save(): open");
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw ios_base::failure("ExtentMap::save(): open failed. Check the error log.");
|
|
}
|
|
|
|
out.exceptions(ios_base::badbit);
|
|
|
|
loadSize[0] = EM_MAGIC_V4;
|
|
loadSize[1] = fEMShminfo->currentSize / sizeof(EMEntry);
|
|
loadSize[2] = fFLShminfo->allocdSize / sizeof(InlineLBIDRange); // needs to send all entries
|
|
|
|
try
|
|
{
|
|
out.write((char*)loadSize, 3 * sizeof(int));
|
|
}
|
|
catch (...)
|
|
{
|
|
out.close();
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
|
|
allocdSize = fEMShminfo->allocdSize / sizeof(EMEntry);
|
|
|
|
for (i = 0; i < allocdSize; i++)
|
|
{
|
|
if (fExtentMap[i].range.size > 0)
|
|
{
|
|
try
|
|
{
|
|
out.write((char*) &fExtentMap[i], sizeof(EMEntry));
|
|
}
|
|
catch (...)
|
|
{
|
|
out.close();
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
allocdSize = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
|
|
for (i = 0; i < allocdSize; i++)
|
|
{
|
|
// if (fFreeList[i].size > 0) {
|
|
try
|
|
{
|
|
out.write((char*) &fFreeList[i], sizeof(InlineLBIDRange));
|
|
}
|
|
catch (...)
|
|
{
|
|
out.close();
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
|
|
// }
|
|
}
|
|
|
|
out.close();
|
|
}
|
|
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
/* always returns holding the EM lock, and with the EM seg mapped */
|
|
void ExtentMap::grabEMEntryTable(OPS op)
|
|
{
|
|
mutex::scoped_lock lk(mutex);
|
|
|
|
if (op == READ)
|
|
fEMShminfo = fMST.getTable_read(MasterSegmentTable::EMTable);
|
|
else
|
|
{
|
|
fEMShminfo = fMST.getTable_write(MasterSegmentTable::EMTable);
|
|
emLocked = true;
|
|
}
|
|
|
|
if (!fPExtMapImpl || fPExtMapImpl->key() != (unsigned)fEMShminfo->tableShmkey)
|
|
{
|
|
if (fExtentMap != NULL)
|
|
{
|
|
fExtentMap = NULL;
|
|
}
|
|
|
|
if (fEMShminfo->allocdSize == 0)
|
|
{
|
|
if (op == READ)
|
|
{
|
|
fMST.getTable_upgrade(MasterSegmentTable::EMTable);
|
|
emLocked = true;
|
|
|
|
if (fEMShminfo->allocdSize == 0)
|
|
growEMShmseg();
|
|
|
|
emLocked = false; // has to be done holding the write lock
|
|
fMST.getTable_downgrade(MasterSegmentTable::EMTable);
|
|
}
|
|
else
|
|
growEMShmseg();
|
|
}
|
|
else
|
|
{
|
|
fPExtMapImpl = ExtentMapImpl::makeExtentMapImpl(fEMShminfo->tableShmkey, 0);
|
|
ASSERT(fPExtMapImpl);
|
|
|
|
if (r_only)
|
|
fPExtMapImpl->makeReadOnly();
|
|
|
|
fExtentMap = fPExtMapImpl->get();
|
|
|
|
if (fExtentMap == NULL)
|
|
{
|
|
log_errno("ExtentMap::grabEMEntryTable(): shmat");
|
|
throw runtime_error("ExtentMap::grabEMEntryTable(): shmat failed. Check the error log.");
|
|
}
|
|
}
|
|
}
|
|
else
|
|
fExtentMap = fPExtMapImpl->get();
|
|
}
|
|
|
|
/* always returns holding the FL lock */
|
|
void ExtentMap::grabFreeList(OPS op)
|
|
{
|
|
mutex::scoped_lock lk(mutex, defer_lock);
|
|
|
|
if (op == READ)
|
|
{
|
|
fFLShminfo = fMST.getTable_read(MasterSegmentTable::EMFreeList);
|
|
lk.lock();
|
|
}
|
|
else
|
|
{
|
|
fFLShminfo = fMST.getTable_write(MasterSegmentTable::EMFreeList);
|
|
flLocked = true;
|
|
}
|
|
|
|
if (!fPFreeListImpl || fPFreeListImpl->key() != (unsigned)fFLShminfo->tableShmkey)
|
|
{
|
|
if (fFreeList != NULL)
|
|
{
|
|
fFreeList = NULL;
|
|
}
|
|
|
|
if (fFLShminfo->allocdSize == 0)
|
|
{
|
|
if (op == READ)
|
|
{
|
|
lk.unlock();
|
|
fMST.getTable_upgrade(MasterSegmentTable::EMFreeList);
|
|
flLocked = true;
|
|
|
|
if (fFLShminfo->allocdSize == 0)
|
|
growFLShmseg();
|
|
|
|
flLocked = false; // has to be done holding the write lock
|
|
fMST.getTable_downgrade(MasterSegmentTable::EMFreeList);
|
|
}
|
|
else
|
|
growFLShmseg();
|
|
}
|
|
else
|
|
{
|
|
fPFreeListImpl = FreeListImpl::makeFreeListImpl(fFLShminfo->tableShmkey, 0);
|
|
ASSERT(fPFreeListImpl);
|
|
|
|
if (r_only)
|
|
fPFreeListImpl->makeReadOnly();
|
|
|
|
fFreeList = fPFreeListImpl->get();
|
|
|
|
if (fFreeList == NULL)
|
|
{
|
|
log_errno("ExtentMap::grabFreeList(): shmat");
|
|
throw runtime_error("ExtentMap::grabFreeList(): shmat failed. Check the error log.");
|
|
}
|
|
|
|
if (op == READ)
|
|
lk.unlock();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fFreeList = fPFreeListImpl->get();
|
|
|
|
if (op == READ)
|
|
lk.unlock();
|
|
}
|
|
}
|
|
|
|
void ExtentMap::releaseEMEntryTable(OPS op)
|
|
{
|
|
if (op == READ)
|
|
fMST.releaseTable_read(MasterSegmentTable::EMTable);
|
|
else
|
|
{
|
|
/*
|
|
Note: Technically we should mark it unlocked after it's unlocked,
|
|
however, that's a race condition. The only reason the up operation
|
|
here will fail is if the underlying semaphore doesn't exist anymore
|
|
or there is a locking logic error somewhere else. Either way,
|
|
declaring the EM unlocked here is OK. Same with all similar assignments.
|
|
*/
|
|
emLocked = false;
|
|
fMST.releaseTable_write(MasterSegmentTable::EMTable);
|
|
}
|
|
}
|
|
|
|
void ExtentMap::releaseFreeList(OPS op)
|
|
{
|
|
if (op == READ)
|
|
fMST.releaseTable_read(MasterSegmentTable::EMFreeList);
|
|
else
|
|
{
|
|
flLocked = false;
|
|
fMST.releaseTable_write(MasterSegmentTable::EMFreeList);
|
|
}
|
|
}
|
|
|
|
key_t ExtentMap::chooseEMShmkey()
|
|
{
|
|
int fixedKeys = 1;
|
|
key_t ret;
|
|
|
|
if (fEMShminfo->tableShmkey + 1 == (key_t) (fShmKeys.KEYRANGE_EXTENTMAP_BASE +
|
|
fShmKeys.KEYRANGE_SIZE - 1) || (unsigned)fEMShminfo->tableShmkey < fShmKeys.KEYRANGE_EXTENTMAP_BASE)
|
|
ret = fShmKeys.KEYRANGE_EXTENTMAP_BASE + fixedKeys;
|
|
else
|
|
ret = fEMShminfo->tableShmkey + 1;
|
|
|
|
return ret;
|
|
}
|
|
|
|
key_t ExtentMap::chooseFLShmkey()
|
|
{
|
|
int fixedKeys = 1, ret;
|
|
|
|
if (fFLShminfo->tableShmkey + 1 == (key_t) (fShmKeys.KEYRANGE_EMFREELIST_BASE +
|
|
fShmKeys.KEYRANGE_SIZE - 1) || (unsigned)fFLShminfo->tableShmkey < fShmKeys.KEYRANGE_EMFREELIST_BASE)
|
|
ret = fShmKeys.KEYRANGE_EMFREELIST_BASE + fixedKeys;
|
|
else
|
|
ret = fFLShminfo->tableShmkey + 1;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Must be called holding the EM write lock
|
|
Returns with the new shmseg mapped */
|
|
void ExtentMap::growEMShmseg(size_t nrows)
|
|
{
|
|
size_t allocSize;
|
|
key_t newshmkey;
|
|
|
|
if (fEMShminfo->allocdSize == 0)
|
|
allocSize = EM_INITIAL_SIZE;
|
|
else
|
|
allocSize = fEMShminfo->allocdSize + EM_INCREMENT;
|
|
|
|
newshmkey = chooseEMShmkey();
|
|
ASSERT((allocSize == EM_INITIAL_SIZE && !fPExtMapImpl) || fPExtMapImpl);
|
|
|
|
//Use the larger of the calculated value or the specified value
|
|
allocSize = max(allocSize, nrows * sizeof(EMEntry));
|
|
|
|
if (!fPExtMapImpl)
|
|
{
|
|
fPExtMapImpl = ExtentMapImpl::makeExtentMapImpl(newshmkey, allocSize, r_only);
|
|
}
|
|
else
|
|
{
|
|
fPExtMapImpl->grow(newshmkey, allocSize);
|
|
}
|
|
|
|
fEMShminfo->tableShmkey = newshmkey;
|
|
fEMShminfo->allocdSize = allocSize;
|
|
|
|
if (r_only)
|
|
fPExtMapImpl->makeReadOnly();
|
|
|
|
fExtentMap = fPExtMapImpl->get();
|
|
}
|
|
|
|
/* Must be called holding the FL lock
|
|
Returns with the new shmseg mapped */
|
|
void ExtentMap::growFLShmseg()
|
|
{
|
|
size_t allocSize;
|
|
key_t newshmkey;
|
|
|
|
if (fFLShminfo->allocdSize == 0)
|
|
allocSize = EM_FREELIST_INITIAL_SIZE;
|
|
else
|
|
allocSize = fFLShminfo->allocdSize + EM_FREELIST_INCREMENT;
|
|
|
|
newshmkey = chooseFLShmkey();
|
|
ASSERT((allocSize == EM_FREELIST_INITIAL_SIZE && !fPFreeListImpl) || fPFreeListImpl);
|
|
|
|
if (!fPFreeListImpl)
|
|
fPFreeListImpl = FreeListImpl::makeFreeListImpl(newshmkey, allocSize, false);
|
|
else
|
|
fPFreeListImpl->grow(newshmkey, allocSize);
|
|
|
|
fFLShminfo->tableShmkey = newshmkey;
|
|
fFreeList = fPFreeListImpl->get();
|
|
|
|
// init freelist entry
|
|
if (fFLShminfo->allocdSize == 0)
|
|
{
|
|
fFreeList->size = (1ULL << 36) / 1024;
|
|
fFLShminfo->currentSize = sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
fFLShminfo->allocdSize = allocSize;
|
|
|
|
if (r_only)
|
|
fPFreeListImpl->makeReadOnly();
|
|
|
|
fFreeList = fPFreeListImpl->get();
|
|
}
|
|
|
|
// @bug 1509. Added new version of lookup that returns the first and last lbid for the extent that contains the
|
|
// given lbid.
|
|
int ExtentMap::lookup(LBID_t lbid, LBID_t& firstLbid, LBID_t& lastLbid)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("lookup");
|
|
TRACER_ADDINPUT(lbid);
|
|
TRACER_ADDOUTPUT(firstLbid);
|
|
TRACER_ADDOUTPUT(lastLbid);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int entries, i;
|
|
LBID_t lastBlock;
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
//printEM();
|
|
if (lbid < 0)
|
|
{
|
|
log("ExtentMap::lookup(): lbid must be >= 0", logging::LOG_TYPE_DEBUG);
|
|
cout << "ExtentMap::lookup(): lbid must be >= 0. Lbid passed was " << lbid << endl;
|
|
throw invalid_argument("ExtentMap::lookup(): lbid must be >= 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(READ);
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
lastBlock = fExtentMap[i].range.start +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
|
|
|
|
if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
|
|
{
|
|
firstLbid = fExtentMap[i].range.start;
|
|
lastLbid = lastBlock;
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
return -1;
|
|
}
|
|
|
|
// @bug 1055+. New functions added for multiple files per OID enhancement.
|
|
int ExtentMap::lookupLocal(LBID_t lbid, int& OID, uint16_t& dbRoot, uint32_t& partitionNum, uint16_t& segmentNum, uint32_t& fileBlockOffset)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("lookupLocal");
|
|
TRACER_ADDINPUT(lbid);
|
|
TRACER_ADDOUTPUT(OID);
|
|
TRACER_ADDSHORTOUTPUT(dbRoot);
|
|
TRACER_ADDOUTPUT(partitionNum);
|
|
TRACER_ADDSHORTOUTPUT(segmentNum);
|
|
TRACER_ADDOUTPUT(fileBlockOffset);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
#ifdef EM_AS_A_TABLE_POC__
|
|
|
|
if (lbid >= (1LL << 54))
|
|
{
|
|
OID = 1084;
|
|
dbRoot = 1;
|
|
partitionNum = 0;
|
|
segmentNum = 0;
|
|
fileBlockOffset = 0;
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
int entries, i, offset;
|
|
LBID_t lastBlock;
|
|
|
|
if (lbid < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::lookupLocal(): invalid lbid requested: " << lbid;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
lastBlock = fExtentMap[i].range.start +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
|
|
|
|
if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
|
|
{
|
|
OID = fExtentMap[i].fileID;
|
|
dbRoot = fExtentMap[i].dbRoot;
|
|
segmentNum = fExtentMap[i].segmentNum;
|
|
partitionNum = fExtentMap[i].partitionNum;
|
|
|
|
// TODO: Offset logic.
|
|
offset = lbid - fExtentMap[i].range.start;
|
|
fileBlockOffset = fExtentMap[i].blockOffset + offset;
|
|
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
return -1;
|
|
}
|
|
|
|
int ExtentMap::lookupLocal(int OID, uint32_t partitionNum, uint16_t segmentNum, uint32_t fileBlockOffset, LBID_t& LBID)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("lookupLocal");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDINPUT(fileBlockOffset);
|
|
TRACER_ADDOUTPUT(LBID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int entries, i, offset;
|
|
|
|
if (OID < 0 || fileBlockOffset < 0)
|
|
{
|
|
log("ExtentMap::lookup(): OID and FBO must be >= 0", logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument("ExtentMap::lookup(): OID and FBO must be >= 0");
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
|
|
// TODO: Blockoffset logic.
|
|
if (fExtentMap[i].range.size != 0 &&
|
|
fExtentMap[i].fileID == OID &&
|
|
fExtentMap[i].partitionNum == partitionNum &&
|
|
fExtentMap[i].segmentNum == segmentNum &&
|
|
fExtentMap[i].blockOffset <= fileBlockOffset &&
|
|
fileBlockOffset <= (fExtentMap[i].blockOffset +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1))
|
|
{
|
|
|
|
offset = fileBlockOffset - fExtentMap[i].blockOffset;
|
|
LBID = fExtentMap[i].range.start + offset;
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
return -1;
|
|
}
|
|
|
|
int ExtentMap::lookupLocal_DBroot(int OID, uint16_t dbroot, uint32_t partitionNum, uint16_t segmentNum,
|
|
uint32_t fileBlockOffset, LBID_t& LBID)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("lookupLocal");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDINPUT(fileBlockOffset);
|
|
TRACER_ADDOUTPUT(LBID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int entries, i, offset;
|
|
|
|
if (OID < 0 || fileBlockOffset < 0)
|
|
{
|
|
log("ExtentMap::lookup(): OID and FBO must be >= 0", logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument("ExtentMap::lookup(): OID and FBO must be >= 0");
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
|
|
// TODO: Blockoffset logic.
|
|
if (fExtentMap[i].range.size != 0 &&
|
|
fExtentMap[i].fileID == OID &&
|
|
fExtentMap[i].dbRoot == dbroot &&
|
|
fExtentMap[i].partitionNum == partitionNum &&
|
|
fExtentMap[i].segmentNum == segmentNum &&
|
|
fExtentMap[i].blockOffset <= fileBlockOffset &&
|
|
fileBlockOffset <= (fExtentMap[i].blockOffset +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1))
|
|
{
|
|
|
|
offset = fileBlockOffset - fExtentMap[i].blockOffset;
|
|
LBID = fExtentMap[i].range.start + offset;
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
return -1;
|
|
}
|
|
|
|
// @bug 1055-.
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Lookup/return starting LBID for the specified OID, partition, segment, and
|
|
// file block offset.
|
|
//------------------------------------------------------------------------------
|
|
int ExtentMap::lookupLocalStartLbid(int OID,
|
|
uint32_t partitionNum,
|
|
uint16_t segmentNum,
|
|
uint32_t fileBlockOffset,
|
|
LBID_t& LBID)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("lookupLocalStartLbid");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDINPUT(fileBlockOffset);
|
|
TRACER_ADDOUTPUT(LBID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int entries, i;
|
|
|
|
if (OID < 0 || fileBlockOffset < 0)
|
|
{
|
|
log("ExtentMap::lookupLocalStartLbid(): OID and FBO must be >= 0",
|
|
logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument("ExtentMap::lookupLocalStartLbid(): "
|
|
"OID and FBO must be >= 0");
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < entries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0 &&
|
|
fExtentMap[i].fileID == OID &&
|
|
fExtentMap[i].partitionNum == partitionNum &&
|
|
fExtentMap[i].segmentNum == segmentNum &&
|
|
fExtentMap[i].blockOffset <= fileBlockOffset &&
|
|
fileBlockOffset <= (fExtentMap[i].blockOffset +
|
|
(static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1))
|
|
{
|
|
LBID = fExtentMap[i].range.start;
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
return -1;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates a "stripe" of column extents across a table, for the specified
|
|
// columns and DBRoot.
|
|
// cols - Vector of columns OIDs and widths to be allocated
|
|
// dbRoot - DBRoot to be used for new extents
|
|
// partitionNum - when creating the first extent for a column (on dbRoot),
|
|
// partitionNum must be specified as an input argument.
|
|
// If not the first extent on dbRoot, then partitionNum
|
|
// for the new extents will be assigned and returned, based
|
|
// on the current last extent for dbRoot.
|
|
// output:
|
|
// partitionNum - Partition number for new extents
|
|
// segmentNum - Segment number for new exents
|
|
// extents - starting Lbid, numBlocks, and FBO for new extents
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::createStripeColumnExtents(
|
|
const vector<CreateStripeColumnExtentsArgIn>& cols,
|
|
uint16_t dbRoot,
|
|
uint32_t& partitionNum,
|
|
uint16_t& segmentNum,
|
|
vector<CreateStripeColumnExtentsArgOut>& extents)
|
|
{
|
|
LBID_t startLbid;
|
|
int allocSize;
|
|
uint32_t startBlkOffset;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
OID_t baselineOID = -1;
|
|
uint16_t baselineSegmentNum = -1;
|
|
uint32_t baselinePartNum = -1;
|
|
|
|
for (uint32_t i = 0; i < cols.size(); i++)
|
|
{
|
|
createColumnExtent_DBroot(
|
|
cols[i].oid,
|
|
cols[i].width,
|
|
dbRoot,
|
|
cols[i].colDataType,
|
|
partitionNum,
|
|
segmentNum,
|
|
startLbid,
|
|
allocSize,
|
|
startBlkOffset,
|
|
false);
|
|
|
|
if (i == 0)
|
|
{
|
|
baselineOID = cols[i].oid;
|
|
baselineSegmentNum = segmentNum;
|
|
baselinePartNum = partitionNum;
|
|
}
|
|
else
|
|
{
|
|
if ((segmentNum != baselineSegmentNum) ||
|
|
(partitionNum != baselinePartNum))
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::createStripeColumnExtents(): "
|
|
"Inconsistent segment extent creation: " <<
|
|
"DBRoot: " << dbRoot <<
|
|
"OID1: " << baselineOID <<
|
|
"; Part#: " << baselinePartNum <<
|
|
"; Seg#: " << baselineSegmentNum <<
|
|
" <versus> OID2: " << cols[i].oid <<
|
|
"; Part#: " << partitionNum <<
|
|
"; Seg#: " << segmentNum;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
}
|
|
|
|
CreateStripeColumnExtentsArgOut extentInfo;
|
|
extentInfo.startLbid = startLbid;
|
|
extentInfo.allocSize = allocSize;
|
|
extentInfo.startBlkOffset = startBlkOffset;
|
|
extents.push_back( extentInfo );
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates an extent for a column file on the specified DBRoot. This is the
|
|
// external API function referenced by the dbrm wrapper class.
|
|
// required input:
|
|
// OID - column OID for which the extent is to be created
|
|
// colWidth - width of column in bytes
|
|
// dbRoot - DBRoot where extent is to be added
|
|
// partitionNum - when creating the first extent for a column (on dbRoot),
|
|
// partitionNum must be specified as an input argument.
|
|
// If not the first extent on dbRoot, then partitionNum
|
|
// for the new extent will be assigned and returned, based
|
|
// on the current last extent for dbRoot.
|
|
// useLock - Grab ExtentMap and FreeList WRITE lock to perform work
|
|
// output:
|
|
// partitionNum - partition number for the new extent
|
|
// segmentNum - segment number for the new extent
|
|
// lbid - starting LBID of the created extent
|
|
// allocdsize - number of LBIDs allocated
|
|
// startBlockOffset-starting block of the created extent
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::createColumnExtent_DBroot(int OID,
|
|
uint32_t colWidth,
|
|
uint16_t dbRoot,
|
|
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
|
uint32_t& partitionNum,
|
|
uint16_t& segmentNum,
|
|
LBID_t& lbid,
|
|
int& allocdsize,
|
|
uint32_t& startBlockOffset,
|
|
bool useLock) // defaults to true
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("createColumnExtent_DBroot");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(colWidth);
|
|
TRACER_ADDSHORTINPUT(dbRoot);
|
|
TRACER_ADDOUTPUT(partitionNum);
|
|
TRACER_ADDSHORTOUTPUT(segmentNum);
|
|
TRACER_ADDINT64OUTPUT(lbid);
|
|
TRACER_ADDOUTPUT(allocdsize);
|
|
TRACER_ADDOUTPUT(startBlockOffset);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (OID <= 0)
|
|
{
|
|
log("ExtentMap::createColumnExtent_DBroot(): OID must be > 0",
|
|
logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument(
|
|
"ExtentMap::createColumnExtent_DBroot(): OID must be > 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
// Convert extent size in rows to extent size in 8192-byte blocks.
|
|
// extentRows should be multiple of blocksize (8192).
|
|
const unsigned EXTENT_SIZE = (getExtentRows() * colWidth) / BLOCK_SIZE;
|
|
|
|
if (useLock)
|
|
{
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
}
|
|
|
|
if (fEMShminfo->currentSize == fEMShminfo->allocdSize)
|
|
growEMShmseg();
|
|
|
|
// size is the number of multiples of 1024 blocks.
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
uint32_t size = EXTENT_SIZE / 1024;
|
|
|
|
lbid = _createColumnExtent_DBroot(size, OID, colWidth,
|
|
dbRoot, colDataType, partitionNum, segmentNum, startBlockOffset);
|
|
|
|
allocdsize = EXTENT_SIZE;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates an extent for a column file for the specified DBRoot. This is the
|
|
// internal implementation function.
|
|
// input:
|
|
// size - number of multiples of 1024 blocks allocated to the extent
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
// OID - column OID for which the extent is to be created
|
|
// colWidth - width of column in bytes
|
|
// dbRoot - dbRoot where extent is to be added
|
|
// partitionNum - when creating the first extent for an empty dbRoot,
|
|
// partitionNum must be specified as an input argument.
|
|
// output:
|
|
// partitionNum - when adding an extent to a dbRoot,
|
|
// partitionNum will be the assigned partition number
|
|
// segmentNum - segment number for the new extent
|
|
// startBlockOffset-starting block of the created extent
|
|
// returns starting LBID of the created extent.
|
|
//------------------------------------------------------------------------------
|
|
LBID_t ExtentMap::_createColumnExtent_DBroot(uint32_t size, int OID,
|
|
uint32_t colWidth,
|
|
uint16_t dbRoot,
|
|
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
|
uint32_t& partitionNum,
|
|
uint16_t& segmentNum,
|
|
uint32_t& startBlockOffset)
|
|
{
|
|
int emptyEMEntry = -1;
|
|
int lastExtentIndex = -1;
|
|
uint32_t highestOffset = 0;
|
|
uint32_t highestPartNum = 0;
|
|
uint16_t highestSegNum = 0;
|
|
const unsigned FILES_PER_COL_PART = getFilesPerColumnPartition();
|
|
const unsigned EXTENT_ROWS = getExtentRows();
|
|
const unsigned EXTENTS_PER_SEGFILE = getExtentsPerSegmentFile();
|
|
const unsigned DBROOT_COUNT = getDbRootCount();
|
|
|
|
// Variables that track list of segfiles in target (HWM) DBRoot & partition.
|
|
// Map segment number to the highest fbo extent in each file
|
|
typedef tr1::unordered_map<uint16_t, uint32_t> TargetDbRootSegsMap;
|
|
typedef TargetDbRootSegsMap::iterator TargetDbRootSegsMapIter;
|
|
typedef TargetDbRootSegsMap::const_iterator TargetDbRootSegsMapConstIter;
|
|
TargetDbRootSegsMap targetDbRootSegs;
|
|
|
|
uint32_t highEmptySegNum = 0; // high seg num for user specified partition;
|
|
// only comes into play for empty DBRoot.
|
|
bool bHighEmptySegNumSet = false;
|
|
|
|
//--------------------------------------------------------------------------
|
|
// First Step: Scan ExtentMap
|
|
// 1. find HWM extent in relevant DBRoot
|
|
// 2. if DBRoot is empty, track highest seg num in user specified partition
|
|
// 3. Find first unused extent map entry
|
|
//--------------------------------------------------------------------------
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
LBID_t startLBID = getLBIDsFromFreeList( size );
|
|
|
|
// Find the first empty Entry; and find last extent for this OID and dbRoot
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
if (fExtentMap[i].fileID == OID)
|
|
{
|
|
|
|
// 1. Find HWM extent in relevant DBRoot
|
|
if (fExtentMap[i].dbRoot == dbRoot)
|
|
{
|
|
if ( (fExtentMap[i].partitionNum > highestPartNum) ||
|
|
((fExtentMap[i].partitionNum == highestPartNum) &&
|
|
(fExtentMap[i].blockOffset > highestOffset)) ||
|
|
((fExtentMap[i].partitionNum == highestPartNum) &&
|
|
(fExtentMap[i].blockOffset == highestOffset) &&
|
|
(fExtentMap[i].segmentNum >= highestSegNum)) )
|
|
{
|
|
|
|
lastExtentIndex = i;
|
|
highestPartNum = fExtentMap[i].partitionNum;
|
|
highestSegNum = fExtentMap[i].segmentNum;
|
|
highestOffset = fExtentMap[i].blockOffset;
|
|
}
|
|
}
|
|
|
|
// 2. for empty DBRoot track hi seg# in user specified part#
|
|
if ((lastExtentIndex == -1) &&
|
|
(fExtentMap[i].partitionNum == partitionNum))
|
|
{
|
|
if ((fExtentMap[i].segmentNum > highEmptySegNum) ||
|
|
(!bHighEmptySegNumSet))
|
|
{
|
|
highEmptySegNum = fExtentMap[i].segmentNum;
|
|
bHighEmptySegNumSet = true;
|
|
}
|
|
}
|
|
} // found extentmap entry for specified OID
|
|
} // found valid extentmap entry
|
|
|
|
// 3. Find first available extent map entry that can be reused
|
|
else if (emptyEMEntry < 0)
|
|
emptyEMEntry = i;
|
|
} // Loop through extent map entries
|
|
|
|
if (emptyEMEntry == -1)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::_createColumnExtent_DBroot(): "
|
|
"could not find an empty EMEntry for OID " << OID <<
|
|
"; Extent Map is full",
|
|
log(oss.str(),
|
|
logging::LOG_TYPE_CRITICAL);
|
|
throw logic_error( oss.str() );
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// If DBRoot is not empty, then...
|
|
// Second Step: Scan ExtentMap again after I know the last partition
|
|
// 4. track highest seg num for HWM+1 partition
|
|
// 5. track highest seg num for HWM partition
|
|
// 6. save list of segment numbers and fbos in target DBRoot and partition
|
|
//
|
|
// Scanning the extentmap a second time is not a good thing to be doing.
|
|
// But the alternative isn't good either. There is certain information
|
|
// I need to capture about the last partition and DBRoot, and for the next
|
|
// partition as well (which may contain segment files on other DBRoots),
|
|
// but until I scan the extentmap, I don't know what my last partition is.
|
|
// If I try to do this in a single scan, then I am forced to spend time
|
|
// capturing information about partitions that turn out to be inconse-
|
|
// quential because the "known" last partition will keep changing as I
|
|
// scan the extentmap.
|
|
//--------------------------------------------------------------------------
|
|
bool bSegsOutOfService = false;
|
|
int partHighSeg = -1; // hi seg num for last partition
|
|
int partHighSegNext = -1; // hi seg num for next partition
|
|
|
|
if (lastExtentIndex >= 0)
|
|
{
|
|
uint32_t targetDbRootPart = fExtentMap[lastExtentIndex].partitionNum;
|
|
uint32_t targetDbRootPartNext = targetDbRootPart + 1;
|
|
partHighSeg = fExtentMap[lastExtentIndex].segmentNum;
|
|
targetDbRootSegs.insert( TargetDbRootSegsMap::value_type(
|
|
fExtentMap[lastExtentIndex].segmentNum,
|
|
fExtentMap[lastExtentIndex].blockOffset) );
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
if (fExtentMap[i].fileID == OID)
|
|
{
|
|
|
|
// 4. Track hi seg for hwm+1 partition
|
|
if (fExtentMap[i].partitionNum == targetDbRootPartNext)
|
|
{
|
|
if (fExtentMap[i].segmentNum > partHighSegNext)
|
|
{
|
|
partHighSegNext = fExtentMap[i].segmentNum;
|
|
}
|
|
}
|
|
|
|
// 5. Track hi seg for hwm partition
|
|
else if (fExtentMap[i].partitionNum == targetDbRootPart)
|
|
{
|
|
if (fExtentMap[i].segmentNum > partHighSeg)
|
|
{
|
|
partHighSeg = fExtentMap[i].segmentNum;
|
|
}
|
|
|
|
// 6. Save list of seg files in target DBRoot/Partition,
|
|
// along with the highest fbo for each seg file
|
|
if (fExtentMap[i].dbRoot == dbRoot)
|
|
{
|
|
if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
|
|
bSegsOutOfService = true;
|
|
|
|
TargetDbRootSegsMapIter iter =
|
|
targetDbRootSegs.find(fExtentMap[i].segmentNum);
|
|
|
|
if (iter == targetDbRootSegs.end())
|
|
{
|
|
targetDbRootSegs.insert(
|
|
TargetDbRootSegsMap::value_type(
|
|
fExtentMap[i].segmentNum,
|
|
fExtentMap[i].blockOffset) );
|
|
}
|
|
else
|
|
{
|
|
if (fExtentMap[i].blockOffset > iter->second)
|
|
{
|
|
iter->second = fExtentMap[i].blockOffset;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} // found extentmap entry for specified OID
|
|
} // found valid extentmap entry
|
|
} // loop through extent map entries
|
|
} // (lastExtentIndex >= 0)
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Third Step: Select partition and segment number for new extent
|
|
// 1. Loop through targetDbRootSegs to find segment file for next extent
|
|
// 2. Check for exceptions that warrant going to next physical partition
|
|
// a. See if any extents are marked outOfService
|
|
// b. See if extents are not evenly layered as expected
|
|
// 3. Perform additional new partition/segment logic as applicable
|
|
// a. No action taken if 2a or 2b already detected need for new partition
|
|
// b. If HWM extent is in last file of DBRoot/Partition, see if next
|
|
// extent goes in new partition, or if wrap-around within current
|
|
// partition.
|
|
// c. If extent needs to go in next partition, figure out the next
|
|
// partition and the next available segment in that partition.
|
|
// 4. Set blockOffset of new extent based on where extent is being added
|
|
//--------------------------------------------------------------------------
|
|
uint16_t newDbRoot = dbRoot;
|
|
uint32_t newPartitionNum = partitionNum;
|
|
uint16_t newSegmentNum = 0;
|
|
uint32_t newBlockOffset = 0;
|
|
|
|
// If this is not the first extent for this OID and DBRoot then
|
|
// extrapolate part# and seg# from last extent; wrap around segment and
|
|
// partition number as needed.
|
|
// else
|
|
// use part# that the user specifies
|
|
if (lastExtentIndex >= 0)
|
|
{
|
|
bool startNewPartition = false;
|
|
bool startNewStripeInSegFile = false;
|
|
const unsigned int filesPerDBRootPerPartition =
|
|
FILES_PER_COL_PART / DBROOT_COUNT;
|
|
|
|
int& lastExtIdx = lastExtentIndex;
|
|
|
|
// Find first, last, next seg files in target partition and DBRoot
|
|
uint16_t firstTargetSeg = fExtentMap[lastExtIdx].segmentNum;
|
|
uint16_t lastTargetSeg = fExtentMap[lastExtIdx].segmentNum;
|
|
uint16_t nextTargetSeg = fExtentMap[lastExtIdx].segmentNum;
|
|
|
|
// 1. Loop thru targetDbRootSegs[] to find next segment after
|
|
// lastExtIdx in target list.
|
|
// We save low and high segment to use in wrap-around case.
|
|
if (targetDbRootSegs.size() > 1)
|
|
{
|
|
bool bNextSegSet = false;
|
|
|
|
for (TargetDbRootSegsMapConstIter iter = targetDbRootSegs.begin();
|
|
iter != targetDbRootSegs.end();
|
|
++iter)
|
|
{
|
|
uint16_t targetSeg = iter->first;
|
|
|
|
if (targetSeg < firstTargetSeg)
|
|
firstTargetSeg = targetSeg;
|
|
else if (targetSeg > lastTargetSeg)
|
|
lastTargetSeg = targetSeg;
|
|
|
|
if (targetSeg > fExtentMap[lastExtIdx].segmentNum)
|
|
{
|
|
if ((targetSeg < nextTargetSeg) || (!bNextSegSet))
|
|
{
|
|
nextTargetSeg = targetSeg;
|
|
bNextSegSet = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
newPartitionNum = fExtentMap[lastExtIdx].partitionNum;
|
|
|
|
// 2a. Skip to next physical partition if any extents in HWM partition/
|
|
// DBRoot are marked as outOfService
|
|
if (bSegsOutOfService)
|
|
{
|
|
|
|
// cout << "Skipping to next partition (outOfService segs)" <<
|
|
// ": oid-" << fExtentMap[lastExtentIndex].fileID <<
|
|
// "; root-" << fExtentMap[lastExtentIndex].dbRoot <<
|
|
// "; part-" << fExtentMap[lastExtentIndex].partitionNum << endl;
|
|
|
|
startNewPartition = true;
|
|
}
|
|
|
|
// @bug 4765
|
|
// 2b. Skip to next physical partition if we have a set of
|
|
// segment files that are not "layered" as expected, meaning we
|
|
// have > 1 layer of extents with an incomplete lower layer (could
|
|
// be caused by the dropping of logical partitions).
|
|
else if (targetDbRootSegs.size() < filesPerDBRootPerPartition)
|
|
{
|
|
for (TargetDbRootSegsMapConstIter iter = targetDbRootSegs.begin();
|
|
iter != targetDbRootSegs.end();
|
|
++iter)
|
|
{
|
|
if (iter->second > 0)
|
|
{
|
|
|
|
// cout << "Skipping to next partition (unbalanced)" <<
|
|
// ": oid-" << fExtentMap[lastExtentIndex].fileID <<
|
|
// "; root-" << fExtentMap[lastExtentIndex].dbRoot <<
|
|
// "; part-" << fExtentMap[lastExtentIndex].partitionNum <<
|
|
// "; seg-" << iter->first <<
|
|
// "; hifbo-"<< iter->second << endl;
|
|
|
|
startNewPartition = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3a.If we already detected need for new partition, then take no action
|
|
if (startNewPartition)
|
|
{
|
|
// no action taken here; we take additional action later.
|
|
}
|
|
|
|
// 3b.If HWM extent is in last seg file for this partition and DBRoot,
|
|
// find out if we need to add a new partition for next extent.
|
|
else if (targetDbRootSegs.size() >= filesPerDBRootPerPartition)
|
|
{
|
|
if (fExtentMap[lastExtIdx].segmentNum == lastTargetSeg)
|
|
{
|
|
// Use blockOffset of lastExtIdx to see if we need to add
|
|
// the next extent to a new partition.
|
|
if (fExtentMap[lastExtIdx].blockOffset ==
|
|
((EXTENTS_PER_SEGFILE - 1) *
|
|
(EXTENT_ROWS * colWidth / BLOCK_SIZE)) )
|
|
{
|
|
startNewPartition = true;
|
|
}
|
|
else // Wrap-around; add extent to low seg in this partition
|
|
{
|
|
startNewStripeInSegFile = true;
|
|
newSegmentNum = firstTargetSeg;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
newSegmentNum = nextTargetSeg;
|
|
}
|
|
}
|
|
else // Select next segment file in current HWM partition
|
|
{
|
|
newSegmentNum = partHighSeg + 1;
|
|
}
|
|
|
|
// 3c. Find new partition and segment if we can't create
|
|
// an extent for this DBRoot in the current HWM partition.
|
|
if (startNewPartition)
|
|
{
|
|
newPartitionNum++;
|
|
|
|
if (partHighSegNext == -1)
|
|
newSegmentNum = 0;
|
|
else
|
|
newSegmentNum = partHighSegNext + 1;
|
|
}
|
|
|
|
// 4. Set blockOffset (fbo) for new extent relative to it's seg file
|
|
// case1: Init fbo to 0 if first extent in partition/DbRoot
|
|
// case2: Init fbo to 0 if first extent in segment file (other than
|
|
// first segment in this partition/DbRoot, which case1 handled)
|
|
// case3: Init fbo based on previous extent
|
|
|
|
// case1: leave newBlockOffset set to 0
|
|
if (startNewPartition)
|
|
{
|
|
//...no action necessary
|
|
}
|
|
|
|
// case2: leave newBlockOffset set to 0
|
|
else if ((fExtentMap[lastExtIdx].blockOffset == 0) &&
|
|
(newSegmentNum > firstTargetSeg))
|
|
{
|
|
//...no action necessary
|
|
}
|
|
|
|
// case3: Init blockOffset based on previous extent. If we are adding
|
|
// extent to 1st seg file, then need to bump up the offset; else
|
|
// adding extent to same stripe and can repeat the same offset.
|
|
else
|
|
{
|
|
if (startNewStripeInSegFile) // start next stripe
|
|
{
|
|
newBlockOffset = static_cast<uint64_t>
|
|
(fExtentMap[lastExtIdx].range.size) * 1024 +
|
|
fExtentMap[lastExtIdx].blockOffset;
|
|
}
|
|
else // next extent, same stripe
|
|
{
|
|
newBlockOffset = fExtentMap[lastExtIdx].blockOffset;
|
|
}
|
|
}
|
|
} // lastExtentIndex >= 0
|
|
else // Empty DBRoot; use part# that the user specifies
|
|
{
|
|
if (bHighEmptySegNumSet)
|
|
newSegmentNum = highEmptySegNum + 1;
|
|
else
|
|
newSegmentNum = 0;
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Fourth Step: Construct the new extentmap entry
|
|
//--------------------------------------------------------------------------
|
|
|
|
makeUndoRecord(&fExtentMap[emptyEMEntry], sizeof(EMEntry));
|
|
EMEntry* e = &fExtentMap[emptyEMEntry];
|
|
|
|
e->range.start = startLBID;
|
|
e->range.size = size;
|
|
e->fileID = OID;
|
|
|
|
if (isUnsigned(colDataType))
|
|
{
|
|
e->partition.cprange.lo_val = numeric_limits<uint64_t>::max();
|
|
e->partition.cprange.hi_val = 0;
|
|
}
|
|
else
|
|
{
|
|
e->partition.cprange.lo_val = numeric_limits<int64_t>::max();
|
|
e->partition.cprange.hi_val = numeric_limits<int64_t>::min();
|
|
}
|
|
|
|
e->partition.cprange.sequenceNum = 0;
|
|
|
|
e->colWid = colWidth;
|
|
|
|
e->dbRoot = newDbRoot;
|
|
e->partitionNum = newPartitionNum;
|
|
e->segmentNum = newSegmentNum;
|
|
|
|
e->blockOffset = newBlockOffset;
|
|
e->HWM = 0;
|
|
e->status = EXTENTUNAVAILABLE; // mark extent as in process
|
|
|
|
// Partition, segment, and blockOffset 0 represents new table or column.
|
|
// When DDL creates a table, we can mark the first extent as VALID, since
|
|
// the table has no data. Marking as VALID enables cpimport to update
|
|
// the CP min/max for the first import.
|
|
// If DDL is adding a column to an existing table, setting to VALID won't
|
|
// hurt, because DDL resets to INVALID after the extent is created.
|
|
if ((e->partitionNum == 0) &&
|
|
(e->segmentNum == 0) &&
|
|
(e->blockOffset == 0))
|
|
e->partition.cprange.isValid = CP_VALID;
|
|
else
|
|
e->partition.cprange.isValid = CP_INVALID;
|
|
|
|
partitionNum = e->partitionNum;
|
|
segmentNum = e->segmentNum;
|
|
startBlockOffset = e->blockOffset;
|
|
|
|
makeUndoRecord(fEMShminfo, sizeof(MSTEntry));
|
|
fEMShminfo->currentSize += sizeof(struct EMEntry);
|
|
|
|
return startLBID;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates an extent for the exact segment column file specified by the
|
|
// requested OID, DBRoot, partition number, and segment number. This is
|
|
// the external API function referenced by the dbrm wrapper class.
|
|
// required input:
|
|
// OID - column OID for which the extent is to be created
|
|
// colWidth - width of column in bytes
|
|
// dbRoot - DBRoot where extent is to be added
|
|
// partitionNum - partitionNum
|
|
// segmentNum - segmentNum
|
|
// output:
|
|
// lbid - starting LBID of the created extent
|
|
// allocdsize - number of LBIDs allocated
|
|
// startBlockOffset-starting block of the created extent
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::createColumnExtentExactFile(int OID,
|
|
uint32_t colWidth,
|
|
uint16_t dbRoot,
|
|
uint32_t partitionNum,
|
|
uint16_t segmentNum,
|
|
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
|
LBID_t& lbid,
|
|
int& allocdsize,
|
|
uint32_t& startBlockOffset)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("createColumnExtentExactFile");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(colWidth);
|
|
TRACER_ADDSHORTINPUT(dbRoot);
|
|
TRACER_ADDOUTPUT(partitionNum);
|
|
TRACER_ADDSHORTOUTPUT(segmentNum);
|
|
TRACER_ADDINT64OUTPUT(lbid);
|
|
TRACER_ADDOUTPUT(allocdsize);
|
|
TRACER_ADDOUTPUT(startBlockOffset);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (OID <= 0)
|
|
{
|
|
log("ExtentMap::createColumnExtentExactFile(): OID must be > 0",
|
|
logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument(
|
|
"ExtentMap::createColumnExtentExactFile(): OID must be > 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
// Convert extent size in rows to extent size in 8192-byte blocks.
|
|
// extentRows should be multiple of blocksize (8192).
|
|
const unsigned EXTENT_SIZE = (getExtentRows() * colWidth) / BLOCK_SIZE;
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
if (fEMShminfo->currentSize == fEMShminfo->allocdSize)
|
|
growEMShmseg();
|
|
|
|
// size is the number of multiples of 1024 blocks.
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
uint32_t size = EXTENT_SIZE / 1024;
|
|
|
|
lbid = _createColumnExtentExactFile(size, OID, colWidth,
|
|
dbRoot, partitionNum, segmentNum, colDataType, startBlockOffset);
|
|
|
|
allocdsize = EXTENT_SIZE;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates an extent for the exact segment file specified by the requested
|
|
// OID, DBRoot, partition, and segment. This is the internal implementation
|
|
// function.
|
|
// input:
|
|
// size - number of multiples of 1024 blocks allocated to the extent
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
// OID - column OID for which the extent is to be created
|
|
// colWidth - width of column in bytes
|
|
// dbRoot - dbRoot where extent is to be added
|
|
// partitionNum - partitionNum
|
|
// segmentNum - segmentNum
|
|
// output:
|
|
// startBlockOffset-starting block of the created extent
|
|
// returns starting LBID of the created extent.
|
|
//------------------------------------------------------------------------------
|
|
LBID_t ExtentMap::_createColumnExtentExactFile(uint32_t size, int OID,
|
|
uint32_t colWidth,
|
|
uint16_t dbRoot,
|
|
uint32_t partitionNum,
|
|
uint16_t segmentNum,
|
|
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
|
uint32_t& startBlockOffset)
|
|
{
|
|
int emptyEMEntry = -1;
|
|
int lastExtentIndex = -1;
|
|
uint32_t highestOffset = 0;
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
LBID_t startLBID = getLBIDsFromFreeList( size );
|
|
|
|
// Find the first empty Entry; and find the last extent for this
|
|
// combination of OID, partition, and segment.
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
if (fExtentMap[i].fileID == OID)
|
|
{
|
|
if ((fExtentMap[i].dbRoot == dbRoot) &&
|
|
(fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].segmentNum == segmentNum) &&
|
|
(fExtentMap[i].blockOffset >= highestOffset))
|
|
{
|
|
lastExtentIndex = i;
|
|
highestOffset = fExtentMap[i].blockOffset;
|
|
}
|
|
}
|
|
}
|
|
else if (emptyEMEntry < 0)
|
|
emptyEMEntry = i;
|
|
} // Loop through extent map entries
|
|
|
|
if (emptyEMEntry == -1)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::_createColumnExtentExactFile(): "
|
|
"could not find an empty EMEntry for OID " << OID <<
|
|
"; Extent Map is full",
|
|
log(oss.str(),
|
|
logging::LOG_TYPE_CRITICAL);
|
|
throw logic_error( oss.str() );
|
|
}
|
|
|
|
makeUndoRecord(&fExtentMap[emptyEMEntry], sizeof(EMEntry));
|
|
EMEntry* e = &fExtentMap[emptyEMEntry];
|
|
|
|
e->range.start = startLBID;
|
|
e->range.size = size;
|
|
e->fileID = OID;
|
|
|
|
if (isUnsigned(colDataType))
|
|
{
|
|
e->partition.cprange.lo_val = numeric_limits<uint64_t>::max();
|
|
e->partition.cprange.hi_val = 0;
|
|
}
|
|
else
|
|
{
|
|
e->partition.cprange.lo_val = numeric_limits<int64_t>::max();
|
|
e->partition.cprange.hi_val = numeric_limits<int64_t>::min();
|
|
}
|
|
|
|
e->partition.cprange.sequenceNum = 0;
|
|
|
|
e->colWid = colWidth;
|
|
|
|
e->dbRoot = dbRoot;
|
|
e->partitionNum = partitionNum;
|
|
e->segmentNum = segmentNum;
|
|
e->status = EXTENTUNAVAILABLE; // mark extent as in process
|
|
|
|
// If first extent for this OID, partition, dbroot, and segment then
|
|
// blockOffset is set to 0
|
|
// else
|
|
// blockOffset is extrapolated from the last extent
|
|
if (lastExtentIndex == -1)
|
|
{
|
|
e->blockOffset = 0;
|
|
e->HWM = 0;
|
|
}
|
|
else
|
|
{
|
|
e->blockOffset = static_cast<uint64_t>
|
|
(fExtentMap[lastExtentIndex].range.size) * 1024 +
|
|
fExtentMap[lastExtentIndex].blockOffset;
|
|
e->HWM = 0;
|
|
}
|
|
|
|
// Partition, segment, and blockOffset 0 represents new table or column.
|
|
// When DDL creates a table, we can mark the first extent as VALID, since
|
|
// the table has no data. Marking as VALID enables cpimport to update
|
|
// the CP min/max for the first import.
|
|
// If DDL is adding a column to an existing table, setting to VALID won't
|
|
// hurt, because DDL resets to INVALID after the extent is created.
|
|
if ((e->partitionNum == 0) &&
|
|
(e->segmentNum == 0) &&
|
|
(e->blockOffset == 0))
|
|
e->partition.cprange.isValid = CP_VALID;
|
|
else
|
|
e->partition.cprange.isValid = CP_INVALID;
|
|
|
|
startBlockOffset = e->blockOffset;
|
|
|
|
makeUndoRecord(fEMShminfo, sizeof(MSTEntry));
|
|
fEMShminfo->currentSize += sizeof(struct EMEntry);
|
|
|
|
return startLBID;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates an extent for a dictionary store file. This is the external API
|
|
// function.
|
|
// input:
|
|
// OID - column OID for which the extent is to be created
|
|
// dbRoot - DBRoot to be assigned to the new extent
|
|
// partitionNum - partition number to be assigned to the new extent
|
|
// segmentNum - segment number to be assigned to the new extent
|
|
// output:
|
|
// lbid - starting LBID of the created extent
|
|
// allocdsize - number LBIDs of allocated
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::createDictStoreExtent(int OID,
|
|
uint16_t dbRoot,
|
|
uint32_t partitionNum,
|
|
uint16_t segmentNum,
|
|
LBID_t& lbid,
|
|
int& allocdsize)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("createDictStoreExtent");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDSHORTINPUT(dbRoot);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDINT64OUTPUT(lbid);
|
|
TRACER_ADDOUTPUT(allocdsize);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (OID <= 0)
|
|
{
|
|
log("ExtentMap::createDictStoreExtent(): OID must be > 0",
|
|
logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument(
|
|
"ExtentMap::createDictStoreExtent(): OID must be > 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
// Convert extent size in rows to extent size in 8192-byte blocks.
|
|
// extentRows should be multiple of blocksize (8192).
|
|
const unsigned EXTENT_SIZE = (getExtentRows() * DICT_COL_WIDTH) / BLOCK_SIZE;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
if (fEMShminfo->currentSize == fEMShminfo->allocdSize)
|
|
growEMShmseg();
|
|
|
|
// size is the number of multiples of 1024 blocks.
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
uint32_t size = EXTENT_SIZE / 1024;
|
|
|
|
lbid = _createDictStoreExtent(size, OID,
|
|
dbRoot, partitionNum, segmentNum);
|
|
|
|
allocdsize = EXTENT_SIZE;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Creates an extent for a dictionary store file. This is the internal
|
|
// implementation function.
|
|
// input:
|
|
// size - number of multiples of 1024 blocks allocated to the extent
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
// OID - column OID for which the extent is to be created
|
|
// dbRoot - DBRoot to be assigned to the new extent
|
|
// partitionNum - partition number to be assigned to the new extent
|
|
// segmentNum - segment number to be assigned to the new extent
|
|
// returns starting LBID of the created extent.
|
|
//------------------------------------------------------------------------------
|
|
LBID_t ExtentMap::_createDictStoreExtent(uint32_t size, int OID,
|
|
uint16_t dbRoot,
|
|
uint32_t partitionNum,
|
|
uint16_t segmentNum)
|
|
{
|
|
int emptyEMEntry = -1;
|
|
int lastExtentIndex = -1;
|
|
uint32_t highestOffset = 0;
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
LBID_t startLBID = getLBIDsFromFreeList( size );
|
|
|
|
// Find the first empty Entry; and find the last extent for this
|
|
// combination of OID, partition, and segment.
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].segmentNum == segmentNum) &&
|
|
(fExtentMap[i].blockOffset >= highestOffset))
|
|
{
|
|
lastExtentIndex = i;
|
|
highestOffset = fExtentMap[i].blockOffset;
|
|
}
|
|
}
|
|
else if (emptyEMEntry < 0)
|
|
emptyEMEntry = i;
|
|
} // Loop through extent map entries
|
|
|
|
if (emptyEMEntry == -1)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::_createDictStoreExtent(): "
|
|
"could not find an empty EMEntry for OID " << OID <<
|
|
"; Extent Map is full",
|
|
log(oss.str(),
|
|
logging::LOG_TYPE_CRITICAL);
|
|
throw logic_error( oss.str() );
|
|
}
|
|
|
|
makeUndoRecord(&fExtentMap[emptyEMEntry], sizeof(EMEntry));
|
|
EMEntry* e = &fExtentMap[emptyEMEntry];
|
|
|
|
e->range.start = startLBID;
|
|
e->range.size = size;
|
|
e->fileID = OID;
|
|
e->status = EXTENTUNAVAILABLE;// @bug 1911 mark extent as in process
|
|
e->partition.cprange.lo_val = numeric_limits<int64_t>::max();
|
|
e->partition.cprange.hi_val = numeric_limits<int64_t>::min();
|
|
e->partition.cprange.sequenceNum = 0;
|
|
e->partition.cprange.isValid = CP_INVALID;
|
|
|
|
// If this is first extent for this OID, partition, segment then
|
|
// everything is set to 0 or taken from user input
|
|
// else
|
|
// everything is extrapolated from the last extent
|
|
if (lastExtentIndex == -1)
|
|
{
|
|
e->blockOffset = 0;
|
|
e->HWM = 0;
|
|
e->segmentNum = segmentNum;
|
|
e->partitionNum = partitionNum;
|
|
e->dbRoot = dbRoot;
|
|
e->colWid = 0; // we don't store col width for dictionaries;
|
|
// this helps to flag this as a dictionary extent
|
|
}
|
|
else
|
|
{
|
|
e->blockOffset = static_cast<uint64_t>
|
|
(fExtentMap[lastExtentIndex].range.size) * 1024 +
|
|
fExtentMap[lastExtentIndex].blockOffset;
|
|
e->HWM = 0;
|
|
e->segmentNum = fExtentMap[lastExtentIndex].segmentNum;
|
|
e->partitionNum = fExtentMap[lastExtentIndex].partitionNum;
|
|
e->dbRoot = fExtentMap[lastExtentIndex].dbRoot;
|
|
e->colWid = fExtentMap[lastExtentIndex].colWid;
|
|
}
|
|
|
|
makeUndoRecord(fEMShminfo, sizeof(MSTEntry));
|
|
fEMShminfo->currentSize += sizeof(struct EMEntry);
|
|
|
|
return startLBID;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Finds and returns the starting LBID for an LBID range taken from the
|
|
// free list.
|
|
// input:
|
|
// size - number of multiples of 1024 blocks needed from the free list
|
|
// ex: size=1 --> 1024 blocks
|
|
// size=2 --> 2048 blocks
|
|
// size=3 --> 3072 blocks, etc.
|
|
// returns selected starting LBID.
|
|
//------------------------------------------------------------------------------
|
|
LBID_t ExtentMap::getLBIDsFromFreeList ( uint32_t size )
|
|
{
|
|
LBID_t ret = -1;
|
|
int i;
|
|
int flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
|
|
for (i = 0; i < flEntries; i++)
|
|
{
|
|
if (size <= fFreeList[i].size)
|
|
{
|
|
makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
|
|
ret = fFreeList[i].start;
|
|
fFreeList[i].start += size * 1024;
|
|
fFreeList[i].size -= size;
|
|
|
|
if (fFreeList[i].size == 0)
|
|
{
|
|
makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
|
|
fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == flEntries)
|
|
{
|
|
log("ExtentMap::getLBIDsFromFreeList(): out of LBID space");
|
|
throw runtime_error(
|
|
"ExtentMap::getLBIDsFromFreeList(): out of LBID space");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef BRM_DEBUG
|
|
void ExtentMap::printEM(const EMEntry& em) const
|
|
{
|
|
cout << " Start "
|
|
<< em.range.start << " Size "
|
|
<< (long) em.range.size << " OID "
|
|
<< (long) em.fileID << " offset "
|
|
<< (long) em.blockOffset
|
|
<< " LV " << em.partition.cprange.lo_val
|
|
<< " HV " << em.partition.cprange.hi_val;
|
|
cout << endl;
|
|
}
|
|
|
|
|
|
void ExtentMap::printEM(const OID_t& oid) const
|
|
{
|
|
int emEntries = 0;
|
|
|
|
if (fEMShminfo)
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
cout << "Extent Map (OID=" << oid << ")" << endl;
|
|
|
|
for (int idx = 0; idx < emEntries ; idx++)
|
|
{
|
|
struct EMEntry& em = fExtentMap[idx];
|
|
|
|
if (em.fileID == oid && em.range.size != 0)
|
|
printEM(em);
|
|
}
|
|
|
|
cout << endl;
|
|
}
|
|
|
|
void ExtentMap::printEM() const
|
|
{
|
|
|
|
int emEntries = 0;
|
|
|
|
if (fEMShminfo)
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
cout << "Extent Map (" << emEntries << ")" << endl;
|
|
|
|
for (int idx = 0; idx < emEntries ; idx++)
|
|
{
|
|
struct EMEntry& em = fExtentMap[idx];
|
|
|
|
if (em.range.size != 0)
|
|
printEM(em);
|
|
}
|
|
|
|
cout << endl;
|
|
}
|
|
|
|
void ExtentMap::printFL() const
|
|
{
|
|
|
|
int flEntries = 0;
|
|
|
|
if (fFLShminfo)
|
|
flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
|
|
cout << "Free List" << endl;
|
|
|
|
for (int idx = 0; idx < flEntries; idx++)
|
|
{
|
|
|
|
cout << idx << " "
|
|
<< fFreeList[idx].start << " "
|
|
<< fFreeList[idx].size
|
|
<< endl;
|
|
}
|
|
|
|
cout << endl;
|
|
}
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Rollback (delete) the extents that logically follow the specified extent for
|
|
// the given OID and DBRoot. HWM for the last extent is reset to the specified
|
|
// value.
|
|
// input:
|
|
// oid - OID of the last logical extent to be retained
|
|
// bDeleteAll - Flag indicates whether all extents for oid and dbroot are
|
|
// to be deleted; else part#, seg#, and hwm are used.
|
|
// dbRoot - DBRoot of the extents to be considered.
|
|
// partitionNum - partition number of the last logical extent to be retained
|
|
// segmentNum - segment number of the last logical extent to be retained
|
|
// hwm - HWM to be assigned to the last logical extent retained
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::rollbackColumnExtents_DBroot ( int oid,
|
|
bool bDeleteAll,
|
|
uint16_t dbRoot,
|
|
uint32_t partitionNum,
|
|
uint16_t segmentNum,
|
|
HWM_t hwm)
|
|
{
|
|
//bool oidExists = false;
|
|
|
|
#ifdef BRM_INFO
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("rollbackColumnExtents");
|
|
TRACER_ADDINPUT(oid);
|
|
TRACER_ADDBOOLINPUT(bDeleteAll);
|
|
TRACER_ADDSHORTINPUT(dbRoot);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDINPUT(hwm);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (oid < 0)
|
|
{
|
|
log("ExtentMap::rollbackColumnExtents_DBroot(): OID must be >= 0",
|
|
logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument(
|
|
"ExtentMap::rollbackColumnExtents_DBroot(): OID must be >= 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
uint32_t fboLo = 0;
|
|
uint32_t fboHi = 0;
|
|
uint32_t fboLoPreviousStripe = 0;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == oid) &&
|
|
(fExtentMap[i].dbRoot == dbRoot))
|
|
{
|
|
|
|
//oidExists = true;
|
|
|
|
// Don't rollback extents that are out of service
|
|
if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
|
|
continue;
|
|
|
|
// If bDeleteAll is true, then we delete extent w/o regards to
|
|
// partition number, segment number, or HWM
|
|
if (bDeleteAll)
|
|
{
|
|
deleteExtent( i ); // case 0
|
|
continue;
|
|
}
|
|
|
|
// Calculate fbo range for the stripe containing the given hwm
|
|
if (fboHi == 0)
|
|
{
|
|
uint32_t range = fExtentMap[i].range.size * 1024;
|
|
fboLo = hwm - (hwm % range);
|
|
fboHi = fboLo + range - 1;
|
|
|
|
if (fboLo > 0)
|
|
fboLoPreviousStripe = fboLo - range;
|
|
}
|
|
|
|
// Delete, update, or ignore this extent:
|
|
// Later partition:
|
|
// case 1: extent in later partition than last extent, so delete
|
|
// Same partition:
|
|
// case 2: extent is in later stripe than last extent, so delete
|
|
// case 3: extent is in earlier stripe in the same partition.
|
|
// No action necessary for case3B and case3C.
|
|
// case 3A: extent is in trailing segment in previous stripe.
|
|
// This extent is now the last extent in that segment
|
|
// file, so reset the local HWM if it was altered.
|
|
// case 3B: extent in previous stripe but not a trailing segment
|
|
// case 3C: extent is in stripe that precedes previous stripe
|
|
// case 4: extent is in the same partition and stripe as the
|
|
// last logical extent we are to keep.
|
|
// case 4A: extent is in later segment so can be deleted
|
|
// case 4B: extent is in earlier segment, reset HWM if changed
|
|
// case 4C: this is last logical extent, reset HWM if changed
|
|
// Earlier partition:
|
|
// case 5: extent is in earlier parition, no action necessary
|
|
|
|
if (fExtentMap[i].partitionNum > partitionNum)
|
|
{
|
|
deleteExtent( i ); // case 1
|
|
}
|
|
else if (fExtentMap[i].partitionNum == partitionNum)
|
|
{
|
|
if (fExtentMap[i].blockOffset > fboHi)
|
|
{
|
|
deleteExtent( i ); // case 2
|
|
}
|
|
else if (fExtentMap[i].blockOffset < fboLo)
|
|
{
|
|
if (fExtentMap[i].blockOffset >= fboLoPreviousStripe)
|
|
{
|
|
if (fExtentMap[i].segmentNum > segmentNum)
|
|
{
|
|
if (fExtentMap[i].HWM != (fboLo - 1))
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = fboLo - 1; //case 3A
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// not a trailing segment in prev stripe case 3B
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent precedes previous stripe case 3C
|
|
}
|
|
}
|
|
else // extent is in same stripe
|
|
{
|
|
if (fExtentMap[i].segmentNum > segmentNum)
|
|
{
|
|
deleteExtent( i ); // case 4A
|
|
}
|
|
else if (fExtentMap[i].segmentNum < segmentNum)
|
|
{
|
|
if (fExtentMap[i].HWM != fboHi)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = fboHi; // case 4B
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
}
|
|
else // fExtentMap[i].segmentNum == segmentNum
|
|
{
|
|
if (fExtentMap[i].HWM != hwm)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = hwm; // case 4C
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent in earlier partition; no action necessary case 5
|
|
}
|
|
} // extent map entry with matching oid
|
|
} // loop through the extent map
|
|
|
|
// If this function is called, we are already in error recovery mode; so
|
|
// don't worry about reporting an error if the OID is not found, because
|
|
// we don't want/need the extents for that OID anyway.
|
|
//if (!oidExists)
|
|
//{
|
|
// ostringstream oss;
|
|
// oss << "ExtentMap::rollbackColumnExtents_DBroot(): "
|
|
// "Rollback failed: no extents exist for: OID-" << oid <<
|
|
// "; dbRoot-" << dbRoot <<
|
|
// "; partition-" << partitionNum <<
|
|
// "; segment-" << segmentNum <<
|
|
// "; hwm-" << hwm;
|
|
// log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
// throw invalid_argument(oss.str());
|
|
//}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Rollback (delete) the extents that follow the extents in partitionNum,
|
|
// for the given dictionary OID & DBRoot. The specified hwms represent the HWMs
|
|
// to be reset for each of segment store file in this partition. An HWM will
|
|
// not be given for "every" segment file if we are rolling back to a point where
|
|
// we had not yet created all the segment files in the partition. In any case,
|
|
// any extents for the "oid" that follow partitionNum, should be deleted.
|
|
// Likewise, any extents in the same partition, whose segment file is not in
|
|
// segNums[], should be deleted as well. If hwms is empty, then this DBRoot
|
|
// must have been empty at the start of the job, so all the extents for the
|
|
// specified oid and dbRoot can be deleted.
|
|
// input:
|
|
// oid - OID of the "last" extents to be retained
|
|
// dbRoot - DBRoot of the extents to be considered.
|
|
// partitionNum - partition number of the last extents to be retained
|
|
// segNums - list of segment files with extents to be restored
|
|
// hwms - HWMs to be assigned to the last retained extent in each of
|
|
// the corresponding segment store files in segNums.
|
|
// hwms[0] applies to segment store file segNums[0];
|
|
// hwms[1] applies to segment store file segNums[1]; etc.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::rollbackDictStoreExtents_DBroot ( int oid,
|
|
uint16_t dbRoot,
|
|
uint32_t partitionNum,
|
|
const vector<uint16_t>& segNums,
|
|
const vector<HWM_t>& hwms)
|
|
{
|
|
//bool oidExists = false;
|
|
|
|
#ifdef BRM_INFO
|
|
if (fDebug)
|
|
{
|
|
ostringstream oss;
|
|
|
|
for (unsigned int k = 0; k < hwms.size(); k++)
|
|
oss << "; hwms[" << k << "]-" << hwms[k];
|
|
|
|
const string& hwmString(oss.str());
|
|
|
|
// put TRACE inside separate scope {} to insure that temporary
|
|
// hwmString still exists when tracer destructor tries to print it.
|
|
{
|
|
TRACER_WRITELATER("rollbackDictStoreExtents_DBroot");
|
|
TRACER_ADDINPUT(oid);
|
|
TRACER_ADDSHORTINPUT(dbRoot);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSTRINPUT(hwmString);
|
|
TRACER_WRITE;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// Delete all extents for the specified OID and DBRoot,
|
|
// if we are not given any hwms and segment files.
|
|
bool bDeleteAll = false;
|
|
|
|
if (hwms.size() == 0)
|
|
bDeleteAll = true;
|
|
|
|
// segToHwmMap maps segment file number to corresponding pair<hwm,fboLo>
|
|
tr1::unordered_map<uint16_t, pair<uint32_t, uint32_t> > segToHwmMap;
|
|
tr1::unordered_map<uint16_t, pair<uint32_t, uint32_t> >::const_iterator
|
|
segToHwmMapIter;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == oid) &&
|
|
(fExtentMap[i].dbRoot == dbRoot))
|
|
{
|
|
|
|
//oidExists = true;
|
|
|
|
// Don't rollback extents that are out of service
|
|
if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
|
|
continue;
|
|
|
|
// If bDeleteAll is true, then we delete extent w/o regards to
|
|
// partition number, segment number, or HWM
|
|
if (bDeleteAll)
|
|
{
|
|
deleteExtent( i ); // case 0
|
|
continue;
|
|
}
|
|
|
|
// Calculate fbo's for the list of hwms we are given; and store
|
|
// the fbo and hwm in a map, using the segment file number as a key.
|
|
if (segToHwmMap.size() == 0)
|
|
{
|
|
uint32_t range = fExtentMap[i].range.size * 1024;
|
|
pair<uint32_t, uint32_t> segToHwmMapEntry;
|
|
|
|
for (unsigned int k = 0; k < hwms.size(); k++)
|
|
{
|
|
uint32_t fboLo = hwms[k] - (hwms[k] % range);
|
|
segToHwmMapEntry.first = hwms[k];
|
|
segToHwmMapEntry.second = fboLo;
|
|
segToHwmMap[ segNums[k] ] = segToHwmMapEntry;
|
|
}
|
|
}
|
|
|
|
// Delete, update, or ignore this extent:
|
|
// Later partition:
|
|
// case 1: extent is in later partition, so delete the extent
|
|
// Same partition:
|
|
// case 2: extent is in trailing seg file we don't need; so delete
|
|
// case 3: extent is in partition and segment file of interest
|
|
// case 3A: earlier extent in segment file; no action necessary
|
|
// case 3B: specified HWM falls in this extent, so reset HWM
|
|
// case 3C: later extent in segment file; so delete the extent
|
|
// Earlier partition:
|
|
// case 4: extent is in earlier parition, no action necessary
|
|
|
|
if (fExtentMap[i].partitionNum > partitionNum)
|
|
{
|
|
deleteExtent( i ); // case 1
|
|
}
|
|
else if (fExtentMap[i].partitionNum == partitionNum)
|
|
{
|
|
unsigned segNum = fExtentMap[i].segmentNum;
|
|
segToHwmMapIter = segToHwmMap.find( segNum );
|
|
|
|
if (segToHwmMapIter == segToHwmMap.end())
|
|
{
|
|
deleteExtent( i ); // case 2
|
|
}
|
|
else // segment number in the map of files to keep
|
|
{
|
|
uint32_t fboLo = segToHwmMapIter->second.second;
|
|
|
|
if (fExtentMap[i].blockOffset < fboLo)
|
|
{
|
|
// no action necessary case 3A
|
|
}
|
|
else if (fExtentMap[i].blockOffset == fboLo)
|
|
{
|
|
uint32_t hwm = segToHwmMapIter->second.first;
|
|
|
|
if (fExtentMap[i].HWM != hwm)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = hwm;
|
|
fExtentMap[i].status = EXTENTAVAILABLE; // case 3B
|
|
}
|
|
}
|
|
else
|
|
{
|
|
deleteExtent( i ); // case 3C
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent in earlier partition; no action necessary case 4
|
|
}
|
|
} // extent map entry with matching oid
|
|
} // loop through the extent map
|
|
|
|
// If this function is called, we are already in error recovery mode; so
|
|
// don't worry about reporting an error if the OID is not found, because
|
|
// we don't want/need the extents for that OID anyway.
|
|
//if (!oidExists)
|
|
//{
|
|
// ostringstream oss;
|
|
// oss << "ExtentMap::rollbackDictStoreExtents_DBroot(): "
|
|
// "Rollback failed: no extents exist for: OID-" << oid <<
|
|
// "; dbRoot-" << dbRoot <<
|
|
// "; partition-" << partitionNum;
|
|
// log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
// throw invalid_argument(oss.str());
|
|
//}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Delete the extents specified and reset hwm
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::deleteEmptyColExtents(const ExtentsInfoMap_t& extentsInfo)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("deleteEmptyColExtents");
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
uint32_t fboLo = 0;
|
|
uint32_t fboHi = 0;
|
|
uint32_t fboLoPreviousStripe = 0;
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
ExtentsInfoMap_t::const_iterator it;
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
it = extentsInfo.find ( fExtentMap[i].fileID );
|
|
|
|
if ( it != extentsInfo.end() )
|
|
{
|
|
// Don't rollback extents that are out of service
|
|
if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
|
|
continue;
|
|
|
|
// Calculate fbo range for the stripe containing the given hwm
|
|
if (fboHi == 0)
|
|
{
|
|
uint32_t range = fExtentMap[i].range.size * 1024;
|
|
fboLo = it->second.hwm - (it->second.hwm % range);
|
|
fboHi = fboLo + range - 1;
|
|
|
|
if (fboLo > 0)
|
|
fboLoPreviousStripe = fboLo - range;
|
|
}
|
|
|
|
// Delete, update, or ignore this extent:
|
|
// Later partition:
|
|
// case 1: extent in later partition than last extent, so delete
|
|
// Same partition:
|
|
// case 2: extent is in later stripe than last extent, so delete
|
|
// case 3: extent is in earlier stripe in the same partition.
|
|
// No action necessary for case3B and case3C.
|
|
// case 3A: extent is in trailing segment in previous stripe.
|
|
// This extent is now the last extent in that segment
|
|
// file, so reset the local HWM if it was altered.
|
|
// case 3B: extent in previous stripe but not a trailing segment
|
|
// case 3C: extent is in stripe that precedes previous stripe
|
|
// case 4: extent is in the same partition and stripe as the
|
|
// last logical extent we are to keep.
|
|
// case 4A: extent is in later segment so can be deleted
|
|
// case 4B: extent is in earlier segment, reset HWM if changed
|
|
// case 4C: this is last logical extent, reset HWM if changed
|
|
// Earlier partition:
|
|
// case 5: extent is in earlier parition, no action necessary
|
|
|
|
if (fExtentMap[i].partitionNum > it->second.partitionNum)
|
|
{
|
|
deleteExtent( i ); // case 1
|
|
}
|
|
else if (fExtentMap[i].partitionNum == it->second.partitionNum)
|
|
{
|
|
if (fExtentMap[i].blockOffset > fboHi)
|
|
{
|
|
deleteExtent( i ); // case 2
|
|
}
|
|
else if (fExtentMap[i].blockOffset < fboLo)
|
|
{
|
|
if (fExtentMap[i].blockOffset >= fboLoPreviousStripe)
|
|
{
|
|
if (fExtentMap[i].segmentNum > it->second.segmentNum)
|
|
{
|
|
if (fExtentMap[i].HWM != (fboLo - 1))
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = fboLo - 1; //case 3A
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// not a trailing segment in prev stripe case 3B
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent precedes previous stripe case 3C
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent is in same stripe
|
|
if (fExtentMap[i].segmentNum > it->second.segmentNum)
|
|
{
|
|
deleteExtent( i ); // case 4A
|
|
}
|
|
else if (fExtentMap[i].segmentNum < it->second.segmentNum)
|
|
{
|
|
if (fExtentMap[i].HWM != fboHi)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = fboHi; // case 4B
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// fExtentMap[i].segmentNum == segmentNum
|
|
if (fExtentMap[i].HWM != it->second.hwm)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = it->second.hwm;// case 4C
|
|
fExtentMap[i].status = EXTENTAVAILABLE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent in earlier partition; no action necessary case 5
|
|
}
|
|
} // extent map entry with matching oid
|
|
}
|
|
} // loop through the extent map
|
|
}
|
|
|
|
void ExtentMap::deleteEmptyDictStoreExtents(const ExtentsInfoMap_t& extentsInfo)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("deleteEmptyDictStoreExtents");
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
ExtentsInfoMap_t::const_iterator it;
|
|
|
|
uint32_t fboLo = 0;
|
|
uint32_t fboHi = 0;
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
it = extentsInfo.begin();
|
|
|
|
if ( it->second.newFile ) //The extent is the new extent
|
|
{
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
it = extentsInfo.find ( fExtentMap[i].fileID );
|
|
|
|
if ( it != extentsInfo.end() )
|
|
{
|
|
if ((fExtentMap[i].partitionNum == it->second.partitionNum)
|
|
&& (fExtentMap[i].segmentNum == it->second.segmentNum)
|
|
&& (fExtentMap[i].dbRoot == it->second.dbRoot) )
|
|
deleteExtent( i );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else //The extent is the old one
|
|
{
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
it = extentsInfo.find ( fExtentMap[i].fileID );
|
|
|
|
if ( it != extentsInfo.end() )
|
|
{
|
|
// Don't rollback extents that are out of service
|
|
if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
|
|
continue;
|
|
|
|
// Calculate fbo
|
|
if (fboHi == 0)
|
|
{
|
|
uint32_t range = fExtentMap[i].range.size * 1024;
|
|
fboLo = it->second.hwm - (it->second.hwm % range);
|
|
fboHi = fboLo + range - 1;
|
|
}
|
|
|
|
// Delete, update, or ignore this extent:
|
|
// Later partition:
|
|
// case 1: extent is in later partition, so delete the extent
|
|
// Same partition:
|
|
// case 2: extent is in partition and segment file of interest
|
|
// case 2A: earlier extent in segment file; no action necessary
|
|
// case 2B: specified HWM falls in this extent, so reset HWM
|
|
// case 2C: later extent in segment file; so delete the extent
|
|
// Earlier partition:
|
|
// case 3: extent is in earlier parition, no action necessary
|
|
|
|
if (fExtentMap[i].partitionNum > it->second.partitionNum)
|
|
{
|
|
deleteExtent( i ); // case 1
|
|
}
|
|
else if (fExtentMap[i].partitionNum == it->second.partitionNum)
|
|
{
|
|
if ( fExtentMap[i].segmentNum == it->second.segmentNum)
|
|
{
|
|
if (fExtentMap[i].blockOffset < fboLo)
|
|
{
|
|
// no action necessary case 2A
|
|
}
|
|
else if (fExtentMap[i].blockOffset == fboLo)
|
|
{
|
|
if (fExtentMap[i].HWM != it->second.hwm)
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].HWM = it->second.hwm;
|
|
fExtentMap[i].status = EXTENTAVAILABLE;//case 2B
|
|
}
|
|
}
|
|
else
|
|
{
|
|
deleteExtent( i ); // case 3C
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// no action necessary
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// extent in earlier partition; no action necessary case 4
|
|
}
|
|
} // extent map entry with matching oid
|
|
}
|
|
} // loop through the extent map
|
|
}
|
|
}
|
|
//------------------------------------------------------------------------------
|
|
// Delete all the extents for the specified OID
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::deleteOID(int OID)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("deleteOID");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
bool OIDExists = false;
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (OID < 0)
|
|
{
|
|
log("ExtentMap::deleteOID(): OID must be >= 0", logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument("ExtentMap::deleteOID(): OID must be >= 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int emIndex = 0; emIndex < emEntries; emIndex++)
|
|
{
|
|
|
|
if (fExtentMap[emIndex].range.size > 0 &&
|
|
fExtentMap[emIndex].fileID == OID)
|
|
{
|
|
OIDExists = true;
|
|
|
|
deleteExtent( emIndex );
|
|
}
|
|
}
|
|
|
|
if (!OIDExists)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::deleteOID(): There are no extent entries for OID " << OID << endl;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Delete all the extents for the specified OIDs
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::deleteOIDs(const OidsMap_t& OIDs)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("deleteOIDs");
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
OidsMap_t::const_iterator it;
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int emIndex = 0; emIndex < emEntries; emIndex++)
|
|
{
|
|
if (fExtentMap[emIndex].range.size > 0 )
|
|
{
|
|
it = OIDs.find ( fExtentMap[emIndex].fileID );
|
|
|
|
if ( it != OIDs.end() )
|
|
deleteExtent( emIndex );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Delete the specified extent from the extentmap and return to the free list.
|
|
// emIndex - the index (from the extent map) of the extent to be deleted
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::deleteExtent(int emIndex)
|
|
{
|
|
int flIndex, freeFLIndex, flEntries, preceedingExtent, succeedingExtent;
|
|
LBID_t flBlockEnd, emBlockEnd;
|
|
|
|
flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
|
|
emBlockEnd = fExtentMap[emIndex].range.start +
|
|
(static_cast<LBID_t>(fExtentMap[emIndex].range.size) * 1024);
|
|
|
|
//scan the freelist to see where this entry fits in
|
|
for (flIndex = 0, preceedingExtent = -1, succeedingExtent = -1, freeFLIndex = -1;
|
|
flIndex < flEntries; flIndex++)
|
|
{
|
|
if (fFreeList[flIndex].size == 0)
|
|
freeFLIndex = flIndex;
|
|
else
|
|
{
|
|
flBlockEnd = fFreeList[flIndex].start +
|
|
(static_cast<LBID_t>(fFreeList[flIndex].size) * 1024);
|
|
|
|
if (emBlockEnd == fFreeList[flIndex].start)
|
|
succeedingExtent = flIndex;
|
|
else if (flBlockEnd == fExtentMap[emIndex].range.start)
|
|
preceedingExtent = flIndex;
|
|
}
|
|
}
|
|
|
|
//update the freelist
|
|
|
|
//this space is in between 2 blocks in the FL
|
|
if (preceedingExtent != -1 && succeedingExtent != -1)
|
|
{
|
|
makeUndoRecord(&fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
|
|
|
|
// migrate the entry upward if there's a space
|
|
if (freeFLIndex < preceedingExtent && freeFLIndex != -1)
|
|
{
|
|
makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
|
|
memcpy(&fFreeList[freeFLIndex], &fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
|
|
fFreeList[preceedingExtent].size = 0;
|
|
preceedingExtent = freeFLIndex;
|
|
}
|
|
|
|
fFreeList[preceedingExtent].size += fFreeList[succeedingExtent].size +
|
|
fExtentMap[emIndex].range.size;
|
|
makeUndoRecord(&fFreeList[succeedingExtent], sizeof(InlineLBIDRange));
|
|
fFreeList[succeedingExtent].size = 0;
|
|
makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
|
|
fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
//this space has a free block at the end
|
|
else if (succeedingExtent != -1)
|
|
{
|
|
makeUndoRecord(&fFreeList[succeedingExtent], sizeof(InlineLBIDRange));
|
|
|
|
// migrate the entry upward if there's a space
|
|
if (freeFLIndex < succeedingExtent && freeFLIndex != -1)
|
|
{
|
|
makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
|
|
memcpy(&fFreeList[freeFLIndex], &fFreeList[succeedingExtent], sizeof(InlineLBIDRange));
|
|
fFreeList[succeedingExtent].size = 0;
|
|
succeedingExtent = freeFLIndex;
|
|
}
|
|
|
|
fFreeList[succeedingExtent].start = fExtentMap[emIndex].range.start;
|
|
fFreeList[succeedingExtent].size += fExtentMap[emIndex].range.size;
|
|
}
|
|
|
|
//this space has a free block at the beginning
|
|
else if (preceedingExtent != -1)
|
|
{
|
|
makeUndoRecord(&fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
|
|
|
|
// migrate the entry upward if there's a space
|
|
if (freeFLIndex < preceedingExtent && freeFLIndex != -1)
|
|
{
|
|
makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
|
|
memcpy(&fFreeList[freeFLIndex], &fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
|
|
fFreeList[preceedingExtent].size = 0;
|
|
preceedingExtent = freeFLIndex;
|
|
}
|
|
|
|
fFreeList[preceedingExtent].size += fExtentMap[emIndex].range.size;
|
|
}
|
|
|
|
//the freelist has no adjacent blocks, so make a new entry
|
|
else
|
|
{
|
|
if (fFLShminfo->currentSize == fFLShminfo->allocdSize)
|
|
{
|
|
growFLShmseg();
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (freeFLIndex != -1)
|
|
{
|
|
log("ExtentMap::deleteOID(): found a free FL entry in a supposedly full shmseg", logging::LOG_TYPE_DEBUG);
|
|
throw logic_error("ExtentMap::deleteOID(): found a free FL entry in a supposedly full shmseg");
|
|
}
|
|
|
|
#endif
|
|
freeFLIndex = flEntries; // happens to be the right index
|
|
flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
#ifdef BRM_DEBUG
|
|
|
|
if (freeFLIndex == -1)
|
|
{
|
|
log("ExtentMap::deleteOID(): no available free list entries?", logging::LOG_TYPE_DEBUG);
|
|
throw logic_error("ExtentMap::deleteOID(): no available free list entries?");
|
|
}
|
|
|
|
#endif
|
|
makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
|
|
fFreeList[freeFLIndex].start = fExtentMap[emIndex].range.start;
|
|
fFreeList[freeFLIndex].size = fExtentMap[emIndex].range.size;
|
|
makeUndoRecord(&fFLShminfo, sizeof(MSTEntry));
|
|
fFLShminfo->currentSize += sizeof(InlineLBIDRange);
|
|
}
|
|
|
|
//invalidate the entry in the Extent Map
|
|
makeUndoRecord(&fExtentMap[emIndex], sizeof(EMEntry));
|
|
fExtentMap[emIndex].range.size = 0;
|
|
makeUndoRecord(&fEMShminfo, sizeof(MSTEntry));
|
|
fEMShminfo->currentSize -= sizeof(struct EMEntry);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the last local HWM for the specified OID for the given DBroot.
|
|
// Also returns the DBRoot, and partition, and segment numbers for the relevant
|
|
// segment file. Technically, this function finds the "last" extent for the
|
|
// specified OID, and returns the HWM for that extent. It is assumed that the
|
|
// HWM for the segment file containing this "last" extent, has been stored in
|
|
// that extent's hwm; and that the hwm is not still hanging around in a previous
|
|
// extent for the same segment file.
|
|
// If no available or outOfService extent is found, then bFound is returned
|
|
// as false.
|
|
//------------------------------------------------------------------------------
|
|
HWM_t ExtentMap::getLastHWM_DBroot(int OID, uint16_t dbRoot,
|
|
uint32_t& partitionNum, uint16_t& segmentNum, int& status, bool& bFound)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getLastHWM_DBroot");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDSHORTINPUT(dbRoot);
|
|
TRACER_ADDOUTPUT(partitionNum);
|
|
TRACER_ADDSHORTOUTPUT(segmentNum);
|
|
TRACER_ADDOUTPUT(status);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
uint32_t lastExtent = 0;
|
|
int lastExtentIndex = -1;
|
|
partitionNum = 0;
|
|
segmentNum = 0;
|
|
HWM_t hwm = 0;
|
|
bFound = false;
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getLastHWM_DBroot(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
// Searching the array in reverse order should be faster since the last
|
|
// extent is usually at the bottom. We still have to search the entire
|
|
// array (just in case), but the number of operations per loop iteration
|
|
// will be less.
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = emEntries - 1; i >= 0; i--)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].dbRoot == dbRoot) &&
|
|
((fExtentMap[i].status == EXTENTAVAILABLE) ||
|
|
(fExtentMap[i].status == EXTENTOUTOFSERVICE)))
|
|
{
|
|
if ( (fExtentMap[i].partitionNum > partitionNum) ||
|
|
((fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].blockOffset > lastExtent)) ||
|
|
((fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].blockOffset == lastExtent) &&
|
|
(fExtentMap[i].segmentNum >= segmentNum)) )
|
|
{
|
|
lastExtent = fExtentMap[i].blockOffset;
|
|
partitionNum = fExtentMap[i].partitionNum;
|
|
segmentNum = fExtentMap[i].segmentNum;
|
|
lastExtentIndex = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
// save additional information before we release the read-lock
|
|
if (lastExtentIndex != -1)
|
|
{
|
|
hwm = fExtentMap[lastExtentIndex].HWM;
|
|
status = fExtentMap[lastExtentIndex].status;
|
|
bFound = true;
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
return hwm;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// For the specified OID and PM number, this function will return a vector
|
|
// of objects carrying HWM info (for the last segment file) and block count
|
|
// information about each DBRoot assigned to the specified PM.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::getDbRootHWMInfo(int OID, uint16_t pmNumber,
|
|
EmDbRootHWMInfo_v& emDbRootHwmInfos)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getDbRootHWMInfo");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDSHORTINPUT(pmNumber);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getDbRootHWMInfo(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
// Determine List of DBRoots for specified PM, and construct map of
|
|
// EmDbRootHWMInfo objects.
|
|
tr1::unordered_map<uint16_t, EmDbRootHWMInfo> emDbRootMap;
|
|
vector<int> dbRootList;
|
|
getPmDbRoots( pmNumber, dbRootList );
|
|
|
|
if ( dbRootList.size() > 0 )
|
|
{
|
|
for (unsigned int iroot = 0; iroot < dbRootList.size(); iroot++)
|
|
{
|
|
uint16_t rootID = dbRootList[iroot];
|
|
EmDbRootHWMInfo emDbRootInfo(rootID);
|
|
emDbRootMap[rootID] = emDbRootInfo;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getDbRootHWMInfo(): "
|
|
"There are no DBRoots for OID " << OID <<
|
|
" and PM " << pmNumber << endl;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
tr1::unordered_map<uint16_t, EmDbRootHWMInfo>::iterator emIter;
|
|
|
|
// Searching the array in reverse order should be faster since the last
|
|
// extent is usually at the bottom. We still have to search the entire
|
|
// array (just in case), but the number of operations per loop iteration
|
|
// will be less.
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = emEntries - 1; i >= 0; i--)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == OID))
|
|
{
|
|
|
|
// Include this extent in the search, only if the extent's
|
|
// DBRoot falls in the list of DBRoots for this PM.
|
|
emIter = emDbRootMap.find( fExtentMap[i].dbRoot );
|
|
|
|
if (emIter == emDbRootMap.end())
|
|
continue;
|
|
|
|
EmDbRootHWMInfo& emDbRoot = emIter->second;
|
|
|
|
if ((fExtentMap[i].status != EXTENTOUTOFSERVICE) &&
|
|
(fExtentMap[i].HWM != 0))
|
|
emDbRoot.totalBlocks += (fExtentMap[i].HWM + 1);
|
|
|
|
if ( (fExtentMap[i].partitionNum > emDbRoot.partitionNum) ||
|
|
((fExtentMap[i].partitionNum == emDbRoot.partitionNum) &&
|
|
(fExtentMap[i].blockOffset > emDbRoot.fbo)) ||
|
|
((fExtentMap[i].partitionNum == emDbRoot.partitionNum) &&
|
|
(fExtentMap[i].blockOffset == emDbRoot.fbo) &&
|
|
(fExtentMap[i].segmentNum >= emDbRoot.segmentNum)) )
|
|
{
|
|
emDbRoot.fbo = fExtentMap[i].blockOffset;
|
|
emDbRoot.partitionNum = fExtentMap[i].partitionNum;
|
|
emDbRoot.segmentNum = fExtentMap[i].segmentNum;
|
|
emDbRoot.localHWM = fExtentMap[i].HWM;
|
|
emDbRoot.startLbid = fExtentMap[i].range.start;
|
|
emDbRoot.status = fExtentMap[i].status;
|
|
emDbRoot.hwmExtentIndex = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
for (tr1::unordered_map<uint16_t, EmDbRootHWMInfo>::iterator iter =
|
|
emDbRootMap.begin(); iter != emDbRootMap.end(); ++iter)
|
|
{
|
|
EmDbRootHWMInfo& emDbRoot = iter->second;
|
|
|
|
if (emDbRoot.hwmExtentIndex != -1)
|
|
{
|
|
// @bug 5349: make sure HWM extent for each DBRoot is AVAILABLE
|
|
if (emDbRoot.status == EXTENTUNAVAILABLE)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getDbRootHWMInfo(): " <<
|
|
"OID " << OID <<
|
|
" has HWM extent that is UNAVAILABLE for " <<
|
|
"DBRoot" << emDbRoot.dbRoot <<
|
|
"; part#: " << emDbRoot.partitionNum <<
|
|
", seg#: " << emDbRoot.segmentNum <<
|
|
", fbo: " << emDbRoot.fbo <<
|
|
", localHWM: " << emDbRoot.localHWM <<
|
|
", lbid: " << emDbRoot.startLbid << endl;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw runtime_error(oss.str());
|
|
}
|
|
|
|
// In the loop above we ignored "all" the extents with HWM of 0,
|
|
// which is okay most of the time, because each segment file's HWM
|
|
// is carried in the last extent only. BUT if we have a segment
|
|
// file with HWM=0, having a single extent and a single block at
|
|
// the "end" of the data, we still need to account for this last
|
|
// block. So we increment the block count for this isolated case.
|
|
if ((emDbRoot.localHWM == 0) &&
|
|
(emDbRoot.status == EXTENTAVAILABLE))
|
|
{
|
|
emDbRoot.totalBlocks++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Copy internal map to the output vector argument
|
|
for (tr1::unordered_map<uint16_t, EmDbRootHWMInfo>::iterator iter =
|
|
emDbRootMap.begin(); iter != emDbRootMap.end(); ++iter)
|
|
{
|
|
emDbRootHwmInfos.push_back( iter->second );
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Return the existence (bFound) and state (status) for the segment file
|
|
// containing the extents for the specified OID, partition, and segment.
|
|
// If no extents are found, no exception is thrown. We instead just return
|
|
// bFound=false, so that the application can take the necessary action.
|
|
// The value returned in the "status" variable is based on the first extent
|
|
// found, since all the extents in a segment file should have the same state.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::getExtentState(int OID, uint32_t partitionNum,
|
|
uint16_t segmentNum, bool& bFound, int& status)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getExtentState");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDOUTPUT(status);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int i, emEntries;
|
|
bFound = false;
|
|
status = EXTENTAVAILABLE;
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getExtentState(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].segmentNum == segmentNum))
|
|
{
|
|
bFound = true;
|
|
status = fExtentMap[i].status;
|
|
break;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the HWM for the specified OID, partition, and segment numbers.
|
|
// Used to get the HWM for a specific column or dictionary store segment file.
|
|
//------------------------------------------------------------------------------
|
|
HWM_t ExtentMap::getLocalHWM(int OID, uint32_t partitionNum,
|
|
uint16_t segmentNum, int& status)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getLocalHWM");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDOUTPUT(status);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EM_AS_A_TABLE_POC__
|
|
|
|
if (OID == 1084)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
int i, emEntries;
|
|
HWM_t ret = 0;
|
|
bool OIDPartSegExists = false;
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getLocalHWM(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].segmentNum == segmentNum))
|
|
{
|
|
OIDPartSegExists = true;
|
|
status = fExtentMap[i].status;
|
|
|
|
if (fExtentMap[i].HWM != 0)
|
|
{
|
|
ret = fExtentMap[i].HWM;
|
|
releaseEMEntryTable(READ);
|
|
return ret;
|
|
}
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
if (OIDPartSegExists)
|
|
return 0;
|
|
else
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getLocalHWM(): There are no extent entries for OID " <<
|
|
OID << "; partition " << partitionNum << "; segment " <<
|
|
segmentNum << endl;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Sets the HWM for the specified OID, partition, and segment number.
|
|
// In addition, the HWM for the old HWM extent (for this segment file),
|
|
// is set to 0, so that the latest HWM is only carried in the last extent
|
|
// (per segment file).
|
|
// Used for dictionary or column OIDs to set the HWM for specific segment file.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::setLocalHWM(int OID, uint32_t partitionNum,
|
|
uint16_t segmentNum, HWM_t newHWM, bool firstNode, bool uselock)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("setLocalHWM");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_ADDINPUT(partitionNum);
|
|
TRACER_ADDSHORTINPUT(segmentNum);
|
|
TRACER_ADDINPUT(newHWM);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
bool addedAnExtent = false;
|
|
|
|
if (OID < 0)
|
|
{
|
|
log("ExtentMap::setLocalHWM(): OID must be >= 0",
|
|
logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument(
|
|
"ExtentMap::setLocalHWM(): OID must be >= 0");
|
|
}
|
|
|
|
#endif
|
|
|
|
int lastExtentIndex = -1;
|
|
int oldHWMExtentIndex = -1;
|
|
uint32_t highestOffset = 0;
|
|
|
|
if (uselock)
|
|
grabEMEntryTable(WRITE);
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].partitionNum == partitionNum) &&
|
|
(fExtentMap[i].segmentNum == segmentNum))
|
|
{
|
|
|
|
// Find current HWM extent
|
|
if (fExtentMap[i].blockOffset >= highestOffset)
|
|
{
|
|
highestOffset = fExtentMap[i].blockOffset;
|
|
lastExtentIndex = i;
|
|
}
|
|
|
|
// Find previous HWM extent
|
|
if (fExtentMap[i].HWM != 0)
|
|
{
|
|
oldHWMExtentIndex = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lastExtentIndex == -1)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::setLocalHWM(): Bad OID/partition/segment argument; "
|
|
"no extent entries for OID " << OID << "; partition " <<
|
|
partitionNum << "; segment " << segmentNum << endl;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
if (newHWM >= (fExtentMap[lastExtentIndex].blockOffset +
|
|
fExtentMap[lastExtentIndex].range.size * 1024))
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::setLocalHWM(): "
|
|
"new HWM is past the end of the file for OID " << OID << "; partition " <<
|
|
partitionNum << "; segment " << segmentNum << "; HWM " << newHWM;
|
|
log(oss.str(), logging::LOG_TYPE_DEBUG);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
// Save HWM in last extent for this segment file; and mark as AVAILABLE
|
|
makeUndoRecord(&fExtentMap[lastExtentIndex], sizeof(EMEntry));
|
|
fExtentMap[lastExtentIndex].HWM = newHWM;
|
|
fExtentMap[lastExtentIndex].status = EXTENTAVAILABLE;
|
|
|
|
// Reset HWM in old HWM extent to 0
|
|
if ((oldHWMExtentIndex != -1) && (oldHWMExtentIndex != lastExtentIndex))
|
|
{
|
|
makeUndoRecord(&fExtentMap[oldHWMExtentIndex], sizeof(EMEntry));
|
|
fExtentMap[oldHWMExtentIndex].HWM = 0;
|
|
#ifdef BRM_INFO
|
|
addedAnExtent = true;
|
|
#endif
|
|
}
|
|
|
|
#ifdef BRM_INFO
|
|
|
|
if (firstNode)
|
|
{
|
|
ostringstream os;
|
|
os << "ExtentMap::setLocalHWM(): firstLBID=" << fExtentMap[lastExtentIndex].range.start <<
|
|
" lastLBID=" << fExtentMap[lastExtentIndex].range.start +
|
|
fExtentMap[lastExtentIndex].range.size * 1024 - 1 << " newHWM=" << fExtentMap[lastExtentIndex].HWM
|
|
<< " min=" << fExtentMap[lastExtentIndex].partition.cprange.lo_val << " max=" <<
|
|
fExtentMap[lastExtentIndex].partition.cprange.hi_val << " seq=" <<
|
|
fExtentMap[lastExtentIndex].partition.cprange.sequenceNum << " status=";
|
|
|
|
switch (fExtentMap[lastExtentIndex].partition.cprange.isValid)
|
|
{
|
|
case CP_INVALID:
|
|
os << "invalid.";
|
|
break;
|
|
|
|
case CP_UPDATING:
|
|
os << "updating.";
|
|
break;
|
|
|
|
case CP_VALID:
|
|
os << "valid.";
|
|
break;
|
|
|
|
default:
|
|
os << "unknown(!!).";
|
|
break;
|
|
}
|
|
|
|
if (addedAnExtent)
|
|
os << " Data extended into a new extent.";
|
|
|
|
log(os.str(), logging::LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
void ExtentMap::bulkSetHWM(const vector<BulkSetHWMArg>& v, bool firstNode)
|
|
{
|
|
grabEMEntryTable(WRITE);
|
|
|
|
for (uint32_t i = 0; i < v.size(); i++)
|
|
setLocalHWM(v[i].oid, v[i].partNum, v[i].segNum, v[i].hwm, firstNode, false);
|
|
}
|
|
|
|
class BUHasher
|
|
{
|
|
public:
|
|
inline uint64_t operator()(const BulkUpdateDBRootArg& b) const
|
|
{
|
|
return b.startLBID;
|
|
}
|
|
};
|
|
|
|
class BUEqual
|
|
{
|
|
public:
|
|
inline bool operator()(const BulkUpdateDBRootArg& b1, const BulkUpdateDBRootArg& b2) const
|
|
{
|
|
return b1.startLBID == b2.startLBID;
|
|
}
|
|
};
|
|
|
|
void ExtentMap::bulkUpdateDBRoot(const vector<BulkUpdateDBRootArg>& args)
|
|
{
|
|
tr1::unordered_set<BulkUpdateDBRootArg, BUHasher, BUEqual> sArgs;
|
|
tr1::unordered_set<BulkUpdateDBRootArg, BUHasher, BUEqual>::iterator sit;
|
|
BulkUpdateDBRootArg key;
|
|
int emEntries;
|
|
|
|
for (uint32_t i = 0; i < args.size(); i++)
|
|
sArgs.insert(args[i]);
|
|
|
|
grabEMEntryTable(WRITE);
|
|
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
key.startLBID = fExtentMap[i].range.start;
|
|
sit = sArgs.find(key);
|
|
|
|
if (sit != sArgs.end())
|
|
fExtentMap[i].dbRoot = sit->dbRoot;
|
|
}
|
|
}
|
|
|
|
void ExtentMap::getExtents(int OID, vector<struct EMEntry>& entries,
|
|
bool sorted, bool notFoundErr, bool incOutOfService)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getExtents");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
int i, emEntries;
|
|
|
|
entries.clear();
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getExtents(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
// Pre-expand entries to stop lots of small allocs
|
|
entries.reserve(emEntries);
|
|
|
|
if (incOutOfService)
|
|
{
|
|
for (i = 0 ; i < emEntries; i++)
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].range.size != 0))
|
|
entries.push_back(fExtentMap[i]);
|
|
}
|
|
else
|
|
{
|
|
for (i = 0 ; i < emEntries; i++)
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].status != EXTENTOUTOFSERVICE))
|
|
entries.push_back(fExtentMap[i]);
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
if (sorted)
|
|
sort<vector<struct EMEntry>::iterator>(entries.begin(), entries.end());
|
|
}
|
|
|
|
void ExtentMap::getExtents_dbroot(int OID, vector<struct EMEntry>& entries, const uint16_t dbroot)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getExtents");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EM_AS_A_TABLE_POC__
|
|
|
|
if (OID == 1084)
|
|
{
|
|
EMEntry fakeEntry;
|
|
fakeEntry.range.start = (1LL << 54);
|
|
fakeEntry.range.size = 4;
|
|
fakeEntry.fileID = 1084;
|
|
fakeEntry.blockOffset = 0;
|
|
fakeEntry.HWM = 1;
|
|
fakeEntry.partitionNum = 0;
|
|
fakeEntry.segmentNum = 0;
|
|
fakeEntry.dbRoot = 1;
|
|
fakeEntry.colWid = 4;
|
|
fakeEntry.status = EXTENTAVAILABLE;
|
|
fakeEntry.partition.cprange.hi_val = numeric_limits<int64_t>::min() + 2;
|
|
fakeEntry.partition.cprange.lo_val = numeric_limits<int64_t>::max();
|
|
fakeEntry.partition.cprange.sequenceNum = 0;
|
|
fakeEntry.partition.cprange.isValid = CP_INVALID;
|
|
entries.push_back(fakeEntry);
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
int i, emEntries;
|
|
|
|
entries.clear();
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getExtents(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0 ; i < emEntries; i++)
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].range.size != 0) && (fExtentMap[i].dbRoot == dbroot))
|
|
entries.push_back(fExtentMap[i]);
|
|
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Get the number of extents for the specified OID and DBRoot.
|
|
// OutOfService extents are included/excluded depending on the
|
|
// value of the incOutOfService flag.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::getExtentCount_dbroot(int OID, uint16_t dbroot,
|
|
bool incOutOfService, uint64_t& numExtents)
|
|
{
|
|
int i, emEntries;
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getExtentsCount_dbroot(): invalid OID requested: " <<
|
|
OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
numExtents = 0;
|
|
|
|
if (incOutOfService)
|
|
{
|
|
for (i = 0 ; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].dbRoot == dbroot))
|
|
numExtents++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i = 0 ; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].dbRoot == dbroot) &&
|
|
(fExtentMap[i].status != EXTENTOUTOFSERVICE))
|
|
numExtents++;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Gets the DBRoot for the specified system catalog OID.
|
|
// Function assumes the specified System Catalog OID is fully contained on
|
|
// a single DBRoot, as the function only searches for and returns the first
|
|
// DBRoot entry that is found in the extent map.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::getSysCatDBRoot(OID_t oid, uint16_t& dbRoot)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getSysCatDBRoot");
|
|
TRACER_ADDINPUT(oid);
|
|
TRACER_ADDSHORTOUTPUT(dbRoot);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
bool bFound = false;
|
|
grabEMEntryTable(READ);
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0 ; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].fileID == oid))
|
|
{
|
|
dbRoot = fExtentMap[i].dbRoot;
|
|
bFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
if (!bFound)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getSysCatDBRoot(): OID not found: " << oid;
|
|
log(oss.str(), logging::LOG_TYPE_WARNING);
|
|
throw logic_error(oss.str());
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Delete all extents for the specified OID(s) and partition number.
|
|
// @bug 5237 - Removed restriction that prevented deletion of segment files in
|
|
// the last partition (for a DBRoot).
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::deletePartition(const set<OID_t>& oids,
|
|
const set<LogicalPartition>& partitionNums, string& emsg)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("deletePartition");
|
|
ostringstream oss;
|
|
set<LogicalPartition>::const_iterator partIt;
|
|
oss << "partitionNums: ";
|
|
for (partIt=partitionNums.begin(); partIt!=partitionNums.end(); ++partIt)
|
|
oss << (*partIt) << " ";
|
|
|
|
oss << endl;
|
|
oss << "OIDS: ";
|
|
set<OID_t>::const_iterator it;
|
|
|
|
for (it = oids.begin(); it != oids.end(); ++it)
|
|
{
|
|
oss << (*it) << ", ";
|
|
}
|
|
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
}
|
|
|
|
#endif
|
|
|
|
if (oids.size() == 0)
|
|
return;
|
|
|
|
int rc = 0;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
set<LogicalPartition> foundPartitions;
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
vector<uint32_t> extents;
|
|
|
|
// First: validate against referencing non-existent logical partitions
|
|
std::set<OID_t>::const_iterator it;
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
LogicalPartition lp(fExtentMap[i].dbRoot,
|
|
fExtentMap[i].partitionNum, fExtentMap[i].segmentNum);
|
|
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(partitionNums.find(lp) != partitionNums.end()))
|
|
{
|
|
it = oids.find( fExtentMap[i].fileID );
|
|
|
|
if (it != oids.end())
|
|
{
|
|
foundPartitions.insert(lp);
|
|
extents.push_back(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (foundPartitions.size() != partitionNums.size())
|
|
{
|
|
set<LogicalPartition>::const_iterator partIt;
|
|
Message::Args args;
|
|
ostringstream oss;
|
|
|
|
for (partIt = partitionNums.begin();
|
|
partIt != partitionNums.end(); ++partIt)
|
|
{
|
|
if (foundPartitions.find((*partIt)) == foundPartitions.end())
|
|
{
|
|
if (!oss.str().empty())
|
|
oss << ", ";
|
|
|
|
oss << (*partIt).toString();
|
|
}
|
|
}
|
|
|
|
args.add(oss.str());
|
|
emsg = IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NOT_EXIST, args);
|
|
rc = ERR_PARTITION_NOT_EXIST;
|
|
}
|
|
|
|
// this has to be the last error code to set and can not be over-written
|
|
if (foundPartitions.empty())
|
|
rc = WARN_NO_PARTITION_PERFORMED;
|
|
|
|
// really delete extents
|
|
for (uint32_t i = 0; i < extents.size(); i++)
|
|
{
|
|
deleteExtent(extents[i]);
|
|
}
|
|
|
|
// @bug 4772 throw exception on any error because they are all warnings.
|
|
if (rc)
|
|
throw IDBExcept(emsg, rc);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Mark all extents as out of service, for the specified OID(s) and partition
|
|
// number.
|
|
// @bug 5237 - Removed restriction that prevented deletion of segment files in
|
|
// the last partition (for a DBRoot).
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::markPartitionForDeletion(const set<OID_t>& oids,
|
|
const set<LogicalPartition>& partitionNums, string& emsg)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("markPartitionForDeletion");
|
|
ostringstream oss;
|
|
set<LogicalPartition>::const_iterator partIt;
|
|
oss << "partitionNums: ";
|
|
for (partIt=partitionNums.begin(); partIt!=partitionNums.end(); ++partIt)
|
|
oss << (*partIt) << " ";
|
|
|
|
oss << endl;
|
|
oss << "OIDS: ";
|
|
set<OID_t>::const_iterator it;
|
|
|
|
for (it = oids.begin(); it != oids.end(); ++it)
|
|
{
|
|
oss << (*it) << ", ";
|
|
}
|
|
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
}
|
|
|
|
#endif
|
|
|
|
if (oids.size() == 0)
|
|
return;
|
|
|
|
int rc = 0;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
set<LogicalPartition> foundPartitions;
|
|
vector<uint32_t> extents;
|
|
bool partitionAlreadyDisabled = false;
|
|
|
|
// Identify not exists partition first. Then mark disable.
|
|
std::set<OID_t>::const_iterator it;
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
LogicalPartition lp(fExtentMap[i].dbRoot,
|
|
fExtentMap[i].partitionNum, fExtentMap[i].segmentNum);
|
|
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(partitionNums.find(lp) != partitionNums.end()))
|
|
{
|
|
it = oids.find( fExtentMap[i].fileID );
|
|
|
|
if (it != oids.end())
|
|
{
|
|
if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
|
|
{
|
|
partitionAlreadyDisabled = true;
|
|
}
|
|
|
|
foundPartitions.insert(lp);
|
|
extents.push_back(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
// really disable partitions
|
|
for (uint32_t i = 0; i < extents.size(); i++)
|
|
{
|
|
makeUndoRecord(&fExtentMap[extents[i]], sizeof(EMEntry));
|
|
fExtentMap[extents[i]].status = EXTENTOUTOFSERVICE;
|
|
}
|
|
|
|
// validate against referencing non-existent logical partitions
|
|
if (foundPartitions.size() != partitionNums.size())
|
|
{
|
|
set<LogicalPartition>::const_iterator partIt;
|
|
Message::Args args;
|
|
ostringstream oss;
|
|
|
|
for (partIt = partitionNums.begin();
|
|
partIt != partitionNums.end(); ++partIt)
|
|
{
|
|
if (foundPartitions.find((*partIt)) == foundPartitions.end())
|
|
{
|
|
if (!oss.str().empty())
|
|
oss << ", ";
|
|
|
|
oss << (*partIt).toString();
|
|
}
|
|
}
|
|
|
|
args.add(oss.str());
|
|
emsg = emsg + string("\n") + IDBErrorInfo::instance()->errorMsg(
|
|
ERR_PARTITION_NOT_EXIST, args);
|
|
rc = ERR_PARTITION_NOT_EXIST;
|
|
}
|
|
|
|
// check already disabled error now, which could be a non-error
|
|
if (partitionAlreadyDisabled)
|
|
{
|
|
emsg = emsg + string("\n") + IDBErrorInfo::instance()->errorMsg(
|
|
ERR_PARTITION_ALREADY_DISABLED);
|
|
rc = ERR_PARTITION_ALREADY_DISABLED;
|
|
}
|
|
|
|
// this rc has to be the last one set and can not be over-written by others.
|
|
if (foundPartitions.empty())
|
|
{
|
|
rc = WARN_NO_PARTITION_PERFORMED;
|
|
}
|
|
|
|
// @bug 4772 throw exception on any error because they are all warnings.
|
|
if (rc)
|
|
throw IDBExcept(emsg, rc);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Mark all extents as out of service, for the specified OID(s)
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::markAllPartitionForDeletion(const set<OID_t>& oids)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("markPartitionForDeletion");
|
|
ostringstream oss;
|
|
oss << "OIDS: ";
|
|
set<OID_t>::const_iterator it;
|
|
|
|
for (it = oids.begin(); it != oids.end(); ++it)
|
|
{
|
|
oss << (*it) << ", ";
|
|
}
|
|
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
}
|
|
|
|
#endif
|
|
|
|
if (oids.size() == 0)
|
|
return;
|
|
|
|
set<OID_t>::const_iterator it;
|
|
|
|
grabEMEntryTable(WRITE);
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0 )
|
|
{
|
|
it = oids.find( fExtentMap[i].fileID );
|
|
|
|
if (it != oids.end())
|
|
{
|
|
makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
|
|
fExtentMap[i].status = EXTENTOUTOFSERVICE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Restore all extents for the specified OID(s) and partition number.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::restorePartition(const set<OID_t>& oids,
|
|
const set<LogicalPartition>& partitionNums, string& emsg)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("restorePartition");
|
|
ostringstream oss;
|
|
set<LogicalPartition>::const_iterator partIt;
|
|
oss << "partitionNums: ";
|
|
for (partIt=partitionNums.begin(); partIt!=partitionNums.end(); ++partIt)
|
|
oss << (*partIt) << " ";
|
|
|
|
oss << endl;
|
|
oss << "OIDS: ";
|
|
set<OID_t>::const_iterator it;
|
|
|
|
for (it = oids.begin(); it != oids.end(); ++it)
|
|
{
|
|
oss << (*it) << ", ";
|
|
}
|
|
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
}
|
|
|
|
#endif
|
|
|
|
if (oids.size() == 0)
|
|
return;
|
|
|
|
set<OID_t>::const_iterator it;
|
|
grabEMEntryTable(WRITE);
|
|
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
vector<uint32_t> extents;
|
|
set<LogicalPartition> foundPartitions;
|
|
bool partitionAlreadyEnabled = false;
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
LogicalPartition lp(fExtentMap[i].dbRoot, fExtentMap[i].partitionNum, fExtentMap[i].segmentNum);
|
|
|
|
if ((fExtentMap[i].range.size != 0 ) && partitionNums.find(lp) != partitionNums.end())
|
|
{
|
|
it = oids.find( fExtentMap[i].fileID );
|
|
|
|
if (it != oids.end())
|
|
{
|
|
if (fExtentMap[i].status == EXTENTAVAILABLE)
|
|
{
|
|
partitionAlreadyEnabled = true;
|
|
}
|
|
|
|
extents.push_back(i);
|
|
foundPartitions.insert(lp);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (foundPartitions.size() != partitionNums.size())
|
|
{
|
|
set<LogicalPartition>::const_iterator partIt;
|
|
Message::Args args;
|
|
ostringstream oss;
|
|
|
|
for (partIt = partitionNums.begin(); partIt != partitionNums.end(); ++partIt)
|
|
{
|
|
if (foundPartitions.empty() || foundPartitions.find((*partIt)) == foundPartitions.end())
|
|
{
|
|
if (!oss.str().empty())
|
|
oss << ", ";
|
|
|
|
oss << (*partIt).toString();
|
|
}
|
|
}
|
|
|
|
args.add(oss.str());
|
|
emsg = IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NOT_EXIST, args);
|
|
throw IDBExcept(emsg, ERR_PARTITION_NOT_EXIST);
|
|
}
|
|
|
|
// really enable partitions
|
|
for (uint32_t i = 0; i < extents.size(); i++)
|
|
{
|
|
makeUndoRecord(&fExtentMap[extents[i]], sizeof(EMEntry));
|
|
fExtentMap[extents[i]].status = EXTENTAVAILABLE;
|
|
}
|
|
|
|
if (partitionAlreadyEnabled)
|
|
{
|
|
emsg = IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_ALREADY_ENABLED);
|
|
throw IDBExcept(emsg, ERR_PARTITION_ALREADY_ENABLED);
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Return all the out-of-service partitions for the specified OID.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::getOutOfServicePartitions(OID_t oid,
|
|
set<LogicalPartition>& partitionNums)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("getExtents");
|
|
TRACER_ADDINPUT(oid);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
partitionNums.clear();
|
|
|
|
if (oid < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::getOutOfServicePartitions(): "
|
|
"invalid OID requested: " << oid;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0 ) &&
|
|
(fExtentMap[i].fileID == oid) &&
|
|
(fExtentMap[i].status == EXTENTOUTOFSERVICE))
|
|
{
|
|
|
|
// need to be logical partition number
|
|
LogicalPartition lp(fExtentMap[i].dbRoot,
|
|
fExtentMap[i].partitionNum,
|
|
fExtentMap[i].segmentNum);
|
|
partitionNums.insert(lp);
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Delete all extents for the specified dbroot
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::deleteDBRoot(uint16_t dbroot)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("deleteDBRoot");
|
|
ostringstream oss;
|
|
oss << "dbroot: " << dbroot;
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
}
|
|
|
|
#endif
|
|
|
|
grabEMEntryTable(WRITE);
|
|
grabFreeList(WRITE);
|
|
|
|
for (unsigned i = 0; i < fEMShminfo->allocdSize / sizeof(struct EMEntry); i++)
|
|
if (fExtentMap[i].range.size != 0 && fExtentMap[i].dbRoot == dbroot)
|
|
deleteExtent(i);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Does the specified DBRoot have any extents.
|
|
// Throws exception if extentmap shared memory is not loaded.
|
|
//------------------------------------------------------------------------------
|
|
bool ExtentMap::isDBRootEmpty(uint16_t dbroot)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("isDBRootEmpty");
|
|
TRACER_ADDINPUT(dbroot);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
bool bEmpty = true;
|
|
int i, emEntries;
|
|
grabEMEntryTable(READ);
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
if (fEMShminfo->currentSize == 0)
|
|
{
|
|
throw runtime_error(
|
|
"ExtentMap::isDBRootEmpty() shared memory not loaded");
|
|
}
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].dbRoot == dbroot))
|
|
{
|
|
bEmpty = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
|
|
return bEmpty;
|
|
}
|
|
|
|
void ExtentMap::lookup(OID_t OID, LBIDRange_v& ranges)
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITELATER("lookup");
|
|
TRACER_ADDINPUT(OID);
|
|
TRACER_WRITE;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EM_AS_A_TABLE_POC__
|
|
|
|
if (OID == 1084)
|
|
{
|
|
EMEntry fakeEntry;
|
|
fakeEntry.range.start = (1LL << 54);
|
|
fakeEntry.range.size = 4;
|
|
#if 0
|
|
fakeEntry.fileID = 1084;
|
|
fakeEntry.blockOffset = 0;
|
|
fakeEntry.HWM = 1;
|
|
fakeEntry.partitionNum = 0;
|
|
fakeEntry.segmentNum = 0;
|
|
fakeEntry.dbRoot = 1;
|
|
fakeEntry.colWid = 4;
|
|
fakeEntry.status = EXTENTAVAILABLE;
|
|
fakeEntry.partition.cprange.hi_val = numeric_limits<int64_t>::min() + 2;
|
|
fakeEntry.partition.cprange.lo_val = numeric_limits<int64_t>::max();
|
|
fakeEntry.partition.cprange.sequenceNum = 0;
|
|
fakeEntry.partition.cprange.isValid = CP_INVALID;
|
|
#endif
|
|
ranges.push_back(fakeEntry.range);
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
int i, emEntries;
|
|
LBIDRange tmp;
|
|
|
|
ranges.clear();
|
|
|
|
if (OID < 0)
|
|
{
|
|
ostringstream oss;
|
|
oss << "ExtentMap::lookup(): invalid OID requested: " << OID;
|
|
log(oss.str(), logging::LOG_TYPE_CRITICAL);
|
|
throw invalid_argument(oss.str());
|
|
}
|
|
|
|
grabEMEntryTable(READ);
|
|
emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (i = 0 ; i < emEntries; i++)
|
|
if ((fExtentMap[i].fileID == OID) &&
|
|
(fExtentMap[i].range.size != 0) &&
|
|
(fExtentMap[i].status != EXTENTOUTOFSERVICE))
|
|
{
|
|
tmp.start = fExtentMap[i].range.start;
|
|
tmp.size = fExtentMap[i].range.size * 1024;
|
|
ranges.push_back(tmp);
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
|
|
int ExtentMap::checkConsistency()
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug) TRACER_WRITENOW("checkConsistency");
|
|
|
|
#endif
|
|
|
|
/*
|
|
LBID space consistency checks
|
|
1. verify that every LBID is either in the EM xor the freelist
|
|
a. for every segment in the EM, make sure there is no overlapping entry in the FL
|
|
b. scan both lists to verify that the entire space is represented
|
|
2. verify that there are no adjacent entries in the freelist
|
|
OID consistency
|
|
3. make sure there are no gaps in the file offsets
|
|
4. make sure that only the last extent has a non-zero HWM
|
|
Struct integrity
|
|
5. verify that the number of entries in each table is consistent with
|
|
the recorded current size
|
|
*/
|
|
|
|
LBID_t emBegin, emEnd, flBegin, flEnd;
|
|
int i, j, flEntries, emEntries;
|
|
uint32_t usedEntries;
|
|
|
|
grabEMEntryTable(READ);
|
|
|
|
try
|
|
{
|
|
grabFreeList(READ);
|
|
}
|
|
catch (...)
|
|
{
|
|
releaseEMEntryTable(READ);
|
|
throw;
|
|
}
|
|
|
|
flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
emEntries = fEMShminfo->allocdSize / sizeof(EMEntry);
|
|
|
|
// test 1a - make sure every entry in the EM is not overlapped by an entry in the FL
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
emBegin = fExtentMap[i].range.start;
|
|
emEnd = emBegin + (fExtentMap[i].range.size * 1024) - 1;
|
|
|
|
for (j = 0; j < flEntries; j++)
|
|
{
|
|
if (fFreeList[j].size != 0)
|
|
{
|
|
flBegin = fFreeList[j].start;
|
|
flEnd = flBegin + (fFreeList[j].size * 1024) - 1;
|
|
|
|
//em entry overlaps the beginning
|
|
//em entry is contained within
|
|
//em entry overlaps the end
|
|
if ((emBegin <= flBegin && emEnd >= flBegin) ||
|
|
(emBegin >= flBegin && emEnd <= flEnd) ||
|
|
(emBegin <= flEnd && emEnd >= flEnd))
|
|
{
|
|
cerr << "EM::checkConsistency(): Improper LBID allocation detected" << endl;
|
|
throw logic_error("EM checkConsistency test 1a (data structures are read-locked)");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
cout << "test 1a passed\n";
|
|
|
|
//test 1b - verify that the entire LBID space is accounted for
|
|
|
|
int lbid, oldlbid;
|
|
|
|
lbid = 0;
|
|
|
|
while (lbid < 67108864) // 2^26 (2^36/1024)
|
|
{
|
|
oldlbid = lbid;
|
|
|
|
for (i = 0; i < flEntries; i++)
|
|
{
|
|
if (fFreeList[i].start % 1024 != 0)
|
|
{
|
|
cerr << "EM::checkConsistency(): A freelist entry is not 1024-block aligned" << endl;
|
|
throw logic_error("EM checkConsistency test 1b (data structures are read-locked)");
|
|
}
|
|
|
|
if (fFreeList[i].start / 1024 == lbid)
|
|
lbid += fFreeList[i].size;
|
|
}
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.start % 1024 != 0)
|
|
{
|
|
cerr << "EM::checkConsistency(): An extent map entry is not 1024-block aligned " << i << " " << fExtentMap[i].range.start << endl;
|
|
throw logic_error("EM checkConsistency test 1b (data structures are read-locked)");
|
|
}
|
|
|
|
if (fExtentMap[i].range.start / 1024 == lbid)
|
|
lbid += fExtentMap[i].range.size;
|
|
}
|
|
|
|
if (oldlbid == lbid)
|
|
{
|
|
cerr << "EM::checkConsistency(): There is a gap in the LBID space at block #" <<
|
|
static_cast<uint64_t>(lbid * 1024) << endl;
|
|
throw logic_error("EM checkConsistency test 1b (data structures are read-locked)");
|
|
}
|
|
}
|
|
|
|
cout << "test 1b passed\n";
|
|
|
|
// test 1c - verify that no dbroot is < 1
|
|
bool errorOut = false;
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
//cout << "EM[" << i << "]: dbRoot=" << fExtentMap[i].dbRoot(listMan) << endl;
|
|
if (fExtentMap[i].dbRoot == 0)
|
|
{
|
|
errorOut = true;
|
|
cerr << "EM::checkConsistency(): index " << i << " has a 0 dbroot\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
if (errorOut)
|
|
throw logic_error("EM checkConsistency test 1c (data structures are read-locked)");
|
|
|
|
cout << "test 1c passed\n";
|
|
|
|
#if 0 // a test ported from the tek2 branch, which requires a RID field to be stored; not relevant here
|
|
// test 1d - verify that each <OID, RID> pair is unique
|
|
cout << "Running test 1d\n";
|
|
|
|
set<OIDRID> uniquer;
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].size != 0 && !fExtentMap[i].isDict())
|
|
{
|
|
OIDRID element(fExtentMap[i].fileID, fExtentMap[i].rid);
|
|
|
|
if (uniquer.insert(element).second == false)
|
|
throw logic_error("EM consistency test 1d failed (data structures are read-locked)");
|
|
}
|
|
}
|
|
|
|
uniquer.clear();
|
|
cout << "Test 1d passed\n";
|
|
#endif
|
|
|
|
// test 2 - verify that the freelist is consolidated
|
|
for (i = 0; i < flEntries; i++)
|
|
{
|
|
if (fFreeList[i].size != 0)
|
|
{
|
|
flEnd = fFreeList[i].start + (fFreeList[i].size * 1024);
|
|
|
|
for (j = i + 1; j < flEntries; j++)
|
|
if (fFreeList[j].size != 0 && fFreeList[j].start == flEnd)
|
|
throw logic_error("EM checkConsistency test 2 (data structures are read-locked)");
|
|
}
|
|
}
|
|
|
|
cout << "test 2 passed\n";
|
|
|
|
// needs to be updated
|
|
#if 0
|
|
// test 3 - scan the extent map to make sure files have no LBID gaps
|
|
vector<OID_t> oids;
|
|
vector< vector<uint32_t> > fbos;
|
|
|
|
for (i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].size != 0)
|
|
{
|
|
for (j = 0; j < (int)oids.size(); j++)
|
|
if (oids[j] == fExtentMap[i].fileID)
|
|
break;
|
|
|
|
if (j == (int)oids.size())
|
|
{
|
|
oids.push_back(fExtentMap[i].fileID);
|
|
fbos.push_back(vector<uint32_t>());
|
|
}
|
|
|
|
fbos[j].push_back(fExtentMap[i].blockOffset);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < (int)fbos.size(); i++)
|
|
sort<vector<uint32_t>::iterator>(fbos[i].begin(), fbos[i].end());
|
|
|
|
const unsigned EXTENT_SIZE = getExtentSize();
|
|
|
|
for (i = 0; i < (int)fbos.size(); i++)
|
|
{
|
|
for (j = 0; j < (int)fbos[i].size(); j++)
|
|
{
|
|
if (fbos[i][j] != static_cast<uint32_t>(j * EXTENT_SIZE))
|
|
{
|
|
cerr << "EM: OID " << oids[i] << " has no extent at FBO " <<
|
|
j* EXTENT_SIZE << endl;
|
|
throw logic_error("EM checkConsistency test 3 (data structures are read-locked)");
|
|
}
|
|
}
|
|
}
|
|
|
|
fbos.clear();
|
|
oids.clear();
|
|
#endif
|
|
|
|
|
|
// test 5a - scan freelist to make sure the current size is accurate
|
|
|
|
for (i = 0, usedEntries = 0; i < emEntries; i++)
|
|
if (fExtentMap[i].range.size != 0)
|
|
usedEntries++;
|
|
|
|
if (usedEntries != fEMShminfo->currentSize / sizeof(EMEntry))
|
|
{
|
|
cerr << "checkConsistency: used extent map entries = " << usedEntries
|
|
<< " metadata says " << fEMShminfo->currentSize / sizeof(EMEntry)
|
|
<< endl;
|
|
throw logic_error("EM checkConsistency test 5a (data structures are read-locked)");
|
|
}
|
|
|
|
for (i = 0, usedEntries = 0; i < flEntries; i++)
|
|
if (fFreeList[i].size != 0)
|
|
usedEntries++;
|
|
|
|
if (usedEntries != fFLShminfo->currentSize / sizeof(InlineLBIDRange))
|
|
{
|
|
cerr << "checkConsistency: used freelist entries = " << usedEntries
|
|
<< " metadata says " << fFLShminfo->currentSize / sizeof(InlineLBIDRange)
|
|
<< endl;
|
|
throw logic_error("EM checkConsistency test 5a (data structures are read-locked)");
|
|
}
|
|
|
|
cout << "test 5a passed\n";
|
|
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
|
|
|
|
void ExtentMap::setReadOnly()
|
|
{
|
|
r_only = true;
|
|
}
|
|
|
|
void ExtentMap::undoChanges()
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug) TRACER_WRITENOW("undoChanges");
|
|
|
|
#endif
|
|
Undoable::undoChanges();
|
|
finishChanges();
|
|
}
|
|
|
|
void ExtentMap::confirmChanges()
|
|
{
|
|
#ifdef BRM_INFO
|
|
|
|
if (fDebug) TRACER_WRITENOW("confirmChanges");
|
|
|
|
#endif
|
|
Undoable::confirmChanges();
|
|
finishChanges();
|
|
}
|
|
|
|
void ExtentMap::finishChanges()
|
|
{
|
|
if (flLocked)
|
|
releaseFreeList(WRITE);
|
|
|
|
if (emLocked)
|
|
releaseEMEntryTable(WRITE);
|
|
}
|
|
|
|
const bool* ExtentMap::getEMFLLockStatus()
|
|
{
|
|
return &flLocked;
|
|
}
|
|
|
|
const bool* ExtentMap::getEMLockStatus()
|
|
{
|
|
return &emLocked;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Reload Config cache if config file time stamp has changed
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::checkReloadConfig()
|
|
{
|
|
config::Config* cf = config::Config::makeConfig();
|
|
|
|
// Immediately return if Columnstore.xml timestamp has not changed
|
|
if (cf->getCurrentMTime() == fCacheTime)
|
|
return;
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Initialize outdated attribute still used by primitiveserver.
|
|
// Hardcode to 8K for now, since that's all we support.
|
|
//--------------------------------------------------------------------------
|
|
ExtentSize = 0x2000;
|
|
|
|
// string es = cf->getConfig("ExtentMap", "ExtentSize");
|
|
// if (es.length() == 0) es = "8K";
|
|
// if (es == "8K" || es == "8k")
|
|
// {
|
|
// ExtentSize = 0x2000;
|
|
// }
|
|
// else if (es == "1K" || es == "1k")
|
|
// {
|
|
// ExtentSize = 0x400;
|
|
// }
|
|
// else if (es == "64K" || es == "64k")
|
|
// {
|
|
// ExtentSize = 0x10000;
|
|
// }
|
|
// else
|
|
// {
|
|
// throw logic_error("Invalid ExtentSize found in config file!");
|
|
// }
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Initialize number of rows per extent
|
|
// Hardcode to 8M for now, since that's all we support.
|
|
//--------------------------------------------------------------------------
|
|
ExtentRows = 0x800000;
|
|
|
|
// string er = cf->getConfig("ExtentMap", "ExtentRows");
|
|
// if (er.length() == 0) er = "8M";
|
|
// if (er == "8M" || er == "8m")
|
|
// {
|
|
// ExtentRows = 0x800000;
|
|
// }
|
|
// else if (er == "1M" || er == "1m")
|
|
// {
|
|
// ExtentRows = 0x100000;
|
|
// }
|
|
// else if (er == "64M" || er == "64m")
|
|
// {
|
|
// ExtentRows = 0x4000000;
|
|
// }
|
|
// else
|
|
// {
|
|
// throw logic_error("Invalid ExtentRows found in config file!");
|
|
// }
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Initialize segment files per physical partition
|
|
//--------------------------------------------------------------------------
|
|
string fpc = cf->getConfig("ExtentMap", "FilesPerColumnPartition");
|
|
filesPerColumnPartition = cf->uFromText(fpc);
|
|
|
|
if (filesPerColumnPartition == 0)
|
|
filesPerColumnPartition = 4;
|
|
|
|
// Get latest Columnstore.xml timestamp after first access forced a reload
|
|
fCacheTime = cf ->getLastMTime();
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Initialize extents per segment file
|
|
//--------------------------------------------------------------------------
|
|
string epsf = cf->getConfig("ExtentMap", "ExtentsPerSegmentFile");
|
|
extentsPerSegmentFile = cf->uFromText(epsf);
|
|
|
|
if (extentsPerSegmentFile == 0)
|
|
extentsPerSegmentFile = 2;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the number of extents in a segment file.
|
|
// Mutex lock and call to checkReloadConfig() not currently necessary since,
|
|
// going with hardcoded value. See checkReloadConfig().
|
|
//------------------------------------------------------------------------------
|
|
unsigned ExtentMap::getExtentSize() // dmc-should deprecate
|
|
{
|
|
// boost::mutex::scoped_lock lk(fConfigCacheMutex);
|
|
// checkReloadConfig( );
|
|
|
|
ExtentSize = 0x2000;
|
|
return ExtentSize;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the number or rows per extent. Only supported values are 1m, 8m,
|
|
// and 64m.
|
|
// Mutex lock and call to checkReloadConfig() not currently necessary since,
|
|
// going with hardcoded value. See checkReloadConfig().
|
|
//------------------------------------------------------------------------------
|
|
unsigned ExtentMap::getExtentRows()
|
|
{
|
|
// boost::mutex::scoped_lock lk(fConfigCacheMutex);
|
|
// checkReloadConfig( );
|
|
|
|
ExtentRows = 0x800000;
|
|
return ExtentRows;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the number of column segment files for an OID, that make up a
|
|
// partition.
|
|
//------------------------------------------------------------------------------
|
|
unsigned ExtentMap::getFilesPerColumnPartition()
|
|
{
|
|
boost::mutex::scoped_lock lk(fConfigCacheMutex);
|
|
checkReloadConfig( );
|
|
|
|
return filesPerColumnPartition;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the number of extents in a segment file.
|
|
//------------------------------------------------------------------------------
|
|
unsigned ExtentMap::getExtentsPerSegmentFile()
|
|
{
|
|
boost::mutex::scoped_lock lk(fConfigCacheMutex);
|
|
checkReloadConfig( );
|
|
|
|
return extentsPerSegmentFile;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Returns the number of DBRoots to be used in storing db column files.
|
|
//------------------------------------------------------------------------------
|
|
unsigned ExtentMap::getDbRootCount()
|
|
{
|
|
oam::OamCache* oamcache = oam::OamCache::makeOamCache();
|
|
unsigned int rootCnt = oamcache->getDBRootCount();
|
|
|
|
return rootCnt;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Get list of DBRoots that map to the specified PM. DBRoot list is cached
|
|
// internally in fPmDbRootMap after getting from Columnstore.xml via OAM.
|
|
//------------------------------------------------------------------------------
|
|
void ExtentMap::getPmDbRoots( int pm, vector<int>& dbRootList )
|
|
{
|
|
oam::OamCache* oamcache = oam::OamCache::makeOamCache();
|
|
oam::OamCache::PMDbrootsMap_t pmDbroots = oamcache->getPMToDbrootsMap();
|
|
|
|
dbRootList.clear();
|
|
dbRootList = (*pmDbroots)[pm];
|
|
}
|
|
|
|
vector<InlineLBIDRange> ExtentMap::getFreeListEntries()
|
|
{
|
|
vector<InlineLBIDRange> v;
|
|
grabEMEntryTable(READ);
|
|
grabFreeList(READ);
|
|
|
|
int allocdSize = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
|
|
|
|
for (int i = 0; i < allocdSize; i++)
|
|
v.push_back(fFreeList[i]);
|
|
|
|
releaseFreeList(READ);
|
|
releaseEMEntryTable(READ);
|
|
return v;
|
|
}
|
|
|
|
void ExtentMap::dumpTo(ostream& os)
|
|
{
|
|
grabEMEntryTable(READ);
|
|
unsigned emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
|
|
|
|
for (unsigned i = 0; i < emEntries; i++)
|
|
{
|
|
if (fExtentMap[i].range.size != 0)
|
|
{
|
|
os << fExtentMap[i].range.start << '|'
|
|
<< fExtentMap[i].range.size << '|'
|
|
<< fExtentMap[i].fileID << '|'
|
|
<< fExtentMap[i].blockOffset << '|'
|
|
<< fExtentMap[i].HWM << '|'
|
|
<< fExtentMap[i].partitionNum << '|'
|
|
<< fExtentMap[i].segmentNum << '|'
|
|
<< fExtentMap[i].dbRoot << '|'
|
|
<< fExtentMap[i].colWid << '|'
|
|
<< fExtentMap[i].status << '|'
|
|
<< fExtentMap[i].partition.cprange.hi_val << '|'
|
|
<< fExtentMap[i].partition.cprange.lo_val << '|'
|
|
<< fExtentMap[i].partition.cprange.sequenceNum << '|'
|
|
<< (int)fExtentMap[i].partition.cprange.isValid << '|'
|
|
<< endl;
|
|
}
|
|
}
|
|
|
|
releaseEMEntryTable(READ);
|
|
}
|
|
|
|
/*int ExtentMap::physicalPartitionNum(const set<OID_t>& oids,
|
|
const set<uint32_t>& partitionNums,
|
|
vector<PartitionInfo>& partitionInfos)
|
|
{
|
|
#ifdef BRM_INFO
|
|
if (fDebug)
|
|
{
|
|
TRACER_WRITENOW("physicalPartitionNum");
|
|
ostringstream oss;
|
|
set<uint32_t>::const_iterator partIt;
|
|
oss << "partitionNums: "
|
|
for (partIt=partitionNums.begin(); it!=partitionNums.end(); ++it)
|
|
oss << (*it) << " ";
|
|
oss << endl;
|
|
TRACER_WRITEDIRECT(oss.str());
|
|
}
|
|
#endif
|
|
|
|
set<OID_t>::const_iterator it;
|
|
grabEMEntryTable(READ);
|
|
|
|
int emEntries = fEMShminfo->allocdSize/sizeof(struct EMEntry);
|
|
PartitionInfo partInfo;
|
|
vector<uint32_t> extents;
|
|
set<uint32_t> foundPartitions;
|
|
for (int i = 0; i < emEntries; i++)
|
|
{
|
|
if ((fExtentMap[i].range.size != 0 ) &&
|
|
partitionNums.find(logicalPartitionNum(fExtentMap[i])) != partitionNums.end())
|
|
{
|
|
it = oids.find( fExtentMap[i].fileID );
|
|
if (it != oids.end())
|
|
{
|
|
partInfo.oid = fExtentMap[i].fileID;
|
|
partInfo.lp.dbroot = fExtentMap[i].dbRoot;
|
|
partInfo.lp.pp = fExtentMap[i].partitionNum;
|
|
partInfo.lp.seg = fExtentMap[i].segmentNum;
|
|
partitionInfos.push_back(partInfo);
|
|
}
|
|
}
|
|
}
|
|
releaseEMEntryTable(READ);
|
|
return 0;
|
|
}
|
|
*/
|
|
|
|
} //namespace
|
|
// vim:ts=4 sw=4:
|
|
|