mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-06-06 08:21:01 +03:00
2055 lines
60 KiB
C++
2055 lines
60 KiB
C++
/*
|
|
Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (c) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA.
|
|
*/
|
|
|
|
//
|
|
// C++ Interface: rowgroup
|
|
//
|
|
// Description:
|
|
//
|
|
// Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
|
|
|
|
#ifndef ROWGROUP_H_
|
|
#define ROWGROUP_H_
|
|
|
|
#include <vector>
|
|
#include <string>
|
|
#include <stdexcept>
|
|
//#define NDEBUG
|
|
#include <cassert>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/shared_array.hpp>
|
|
#include <boost/thread/mutex.hpp>
|
|
#include <cmath>
|
|
#include <cfloat>
|
|
#ifdef __linux__
|
|
#include <execinfo.h>
|
|
#endif
|
|
|
|
#if defined(_MSC_VER) && !defined(isnan)
|
|
#define isnan _isnan
|
|
#endif
|
|
|
|
#include "hasher.h"
|
|
|
|
#include "joblisttypes.h"
|
|
#include "bytestream.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "exceptclasses.h"
|
|
#include "mcsv1_udaf.h"
|
|
|
|
#include "branchpred.h"
|
|
|
|
#include "../winport/winport.h"
|
|
|
|
// Because including my_sys.h in a Columnstore header causes too many conflicts
|
|
struct charset_info_st;
|
|
typedef const struct charset_info_st CHARSET_INFO;
|
|
|
|
|
|
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
|
|
|
|
namespace rowgroup
|
|
{
|
|
|
|
const int16_t rgCommonSize = 8192;
|
|
|
|
/*
|
|
The format of the data RowGroup points to is currently ...
|
|
|
|
32-bit - Row Count
|
|
64-bit - Base Rid
|
|
16-bit - Status
|
|
Row*
|
|
|
|
where
|
|
Row = 16-bit rid # relative to the base rid
|
|
field1
|
|
field2
|
|
...
|
|
|
|
RowGroup.getRow() points a Row instance to the specified row within the
|
|
RowGroup data
|
|
*/
|
|
|
|
// VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
|
|
// This may be so, and we'll get link errors if someone trys, but so far no one has.
|
|
#ifdef _MSC_VER
|
|
#pragma warning (push)
|
|
#pragma warning (disable : 4200)
|
|
#endif
|
|
|
|
class StringStore
|
|
{
|
|
public:
|
|
StringStore();
|
|
virtual ~StringStore();
|
|
|
|
inline std::string getString(uint64_t offset) const;
|
|
uint64_t storeString(const uint8_t* data, uint32_t length); //returns the offset
|
|
inline const uint8_t* getPointer(uint64_t offset) const;
|
|
inline uint32_t getStringLength(uint64_t offset);
|
|
inline bool isEmpty() const;
|
|
inline uint64_t getSize() const;
|
|
inline bool isNullValue(uint64_t offset) const;
|
|
inline bool equals(const std::string& str, uint64_t offset) const;
|
|
|
|
void clear();
|
|
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&);
|
|
|
|
//@bug6065, make StringStore::storeString() thread safe
|
|
void useStoreStringMutex(bool b)
|
|
{
|
|
fUseStoreStringMutex = b;
|
|
}
|
|
bool useStoreStringMutex() const
|
|
{
|
|
return fUseStoreStringMutex;
|
|
}
|
|
|
|
private:
|
|
std::string empty_str;
|
|
|
|
StringStore(const StringStore&);
|
|
StringStore& operator=(const StringStore&);
|
|
static const uint32_t CHUNK_SIZE = 64 * 1024; // allocators like powers of 2
|
|
|
|
// This is an overlay b/c the underlying data needs to be any size,
|
|
// and alloc'd in one chunk. data can't be a separate dynamic chunk.
|
|
struct MemChunk
|
|
{
|
|
uint32_t currentSize;
|
|
uint32_t capacity;
|
|
uint8_t data[];
|
|
};
|
|
|
|
std::vector<boost::shared_array<uint8_t> > mem;
|
|
|
|
// To store strings > 64KB (BLOB/TEXT)
|
|
std::vector<boost::shared_array<uint8_t> > longStrings;
|
|
bool empty;
|
|
bool fUseStoreStringMutex; //@bug6065, make StringStore::storeString() thread safe
|
|
boost::mutex fMutex;
|
|
};
|
|
|
|
// Where we store user data for UDA(n)F
|
|
class UserDataStore
|
|
{
|
|
// length represents the fixed portion length of userData.
|
|
// There may be variable length data in containers or other
|
|
// user created structures.
|
|
struct StoreData
|
|
{
|
|
int32_t length;
|
|
std::string functionName;
|
|
boost::shared_ptr<mcsv1sdk::UserData> userData;
|
|
StoreData() : length(0) { }
|
|
StoreData(const StoreData& rhs)
|
|
{
|
|
length = rhs.length;
|
|
functionName = rhs.functionName;
|
|
userData = rhs.userData;
|
|
}
|
|
};
|
|
|
|
public:
|
|
UserDataStore();
|
|
virtual ~UserDataStore();
|
|
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&);
|
|
|
|
//Set to make UserDataStore thread safe
|
|
void useUserDataMutex(bool b)
|
|
{
|
|
fUseUserDataMutex = b;
|
|
}
|
|
bool useUserDataMutex() const
|
|
{
|
|
return fUseUserDataMutex;
|
|
}
|
|
|
|
// Returns the offset
|
|
uint32_t storeUserData(mcsv1sdk::mcsv1Context& context,
|
|
boost::shared_ptr<mcsv1sdk::UserData> data,
|
|
uint32_t length);
|
|
|
|
boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t offset) const;
|
|
|
|
private:
|
|
UserDataStore(const UserDataStore&);
|
|
UserDataStore& operator=(const UserDataStore&);
|
|
|
|
std::vector<StoreData> vStoreData;
|
|
|
|
bool fUseUserDataMutex;
|
|
boost::mutex fMutex;
|
|
};
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning (pop)
|
|
#endif
|
|
|
|
class RowGroup;
|
|
class Row;
|
|
|
|
/* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
|
|
class RGData
|
|
{
|
|
public:
|
|
RGData(); // useless unless followed by an = or a deserialize operation
|
|
RGData(const RowGroup& rg, uint32_t rowCount); // allocates memory for rowData
|
|
explicit RGData(const RowGroup& rg);
|
|
RGData(const RGData&);
|
|
virtual ~RGData();
|
|
|
|
inline RGData& operator=(const RGData&);
|
|
|
|
// amount should be the # returned by RowGroup::getDataSize()
|
|
void serialize(messageqcpp::ByteStream&, uint32_t amount) const;
|
|
|
|
// the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
|
|
// inline data with a length field. Once that's converted to string table format, that
|
|
// option can go away.
|
|
void deserialize(messageqcpp::ByteStream&, bool hasLengthField = false); // returns the # of bytes read
|
|
|
|
inline uint64_t getStringTableMemUsage();
|
|
void clear();
|
|
void reinit(const RowGroup& rg);
|
|
void reinit(const RowGroup& rg, uint32_t rowCount);
|
|
inline void setStringStore(boost::shared_ptr<StringStore>& ss)
|
|
{
|
|
strings = ss;
|
|
}
|
|
|
|
// this will use the pre-configured Row to figure out where row # num is, then set the Row
|
|
// to point to it. It's a shortcut around using a RowGroup to do the same thing for cases
|
|
// where it's inconvenient to instantiate one.
|
|
inline void getRow(uint32_t num, Row* row);
|
|
|
|
//@bug6065, make StringStore::storeString() thread safe
|
|
void useStoreStringMutex(bool b)
|
|
{
|
|
if (strings) strings->useStoreStringMutex(b);
|
|
}
|
|
bool useStoreStringMutex() const
|
|
{
|
|
return (strings ? (strings->useStoreStringMutex()) : false);
|
|
}
|
|
|
|
UserDataStore* getUserDataStore();
|
|
// make UserDataStore::storeData() thread safe
|
|
void useUserDataMutex(bool b)
|
|
{
|
|
if (userDataStore) userDataStore->useUserDataMutex(b);
|
|
}
|
|
bool useUserDataMutex() const
|
|
{
|
|
return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
|
|
}
|
|
|
|
boost::shared_array<uint8_t> rowData;
|
|
boost::shared_ptr<StringStore> strings;
|
|
boost::shared_ptr<UserDataStore> userDataStore;
|
|
private:
|
|
//boost::shared_array<uint8_t> rowData;
|
|
//boost::shared_ptr<StringStore> strings;
|
|
|
|
// Need sig to support backward compat. RGData can deserialize both forms.
|
|
static const uint32_t RGDATA_SIG = 0xffffffff; //won't happen for 'old' Rowgroup data
|
|
|
|
friend class RowGroup;
|
|
};
|
|
|
|
|
|
class Row
|
|
{
|
|
public:
|
|
struct Pointer
|
|
{
|
|
inline Pointer() : data(NULL), strings(NULL), userDataStore(NULL) { }
|
|
|
|
// Pointer(uint8_t*) implicitly makes old code compatible with the string table impl;
|
|
// make it explicit to identify things that still might need to be changed
|
|
inline Pointer(uint8_t* d) : data(d), strings(NULL), userDataStore(NULL) { }
|
|
inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s), userDataStore(NULL) { }
|
|
inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) :
|
|
data(d), strings(s), userDataStore(u) { }
|
|
uint8_t* data;
|
|
StringStore* strings;
|
|
UserDataStore* userDataStore;
|
|
};
|
|
|
|
Row();
|
|
Row(const Row&);
|
|
~Row();
|
|
|
|
Row& operator=(const Row&);
|
|
bool operator==(const Row&) const;
|
|
|
|
//void setData(uint8_t *rowData, StringStore *ss);
|
|
inline void setData(const Pointer&); // convenience fcn, can go away
|
|
inline uint8_t* getData() const;
|
|
|
|
inline void setPointer(const Pointer&);
|
|
inline Pointer getPointer() const;
|
|
|
|
inline void nextRow();
|
|
inline uint32_t getColumnWidth(uint32_t colIndex) const;
|
|
inline uint32_t getColumnCount() const;
|
|
inline uint32_t getSize() const; // this is only accurate if there is no string table
|
|
// if a string table is being used, getRealSize() takes into account variable-length strings
|
|
inline uint32_t getRealSize() const;
|
|
inline uint32_t getOffset(uint32_t colIndex) const;
|
|
inline uint32_t getScale(uint32_t colIndex) const;
|
|
inline uint32_t getPrecision(uint32_t colIndex) const;
|
|
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
|
|
inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
|
|
inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
|
|
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
|
|
|
// this returns true if the type is not CHAR or VARCHAR
|
|
inline bool isCharType(uint32_t colIndex) const;
|
|
inline bool isUnsigned(uint32_t colIndex) const;
|
|
inline bool isShortString(uint32_t colIndex) const;
|
|
inline bool isLongString(uint32_t colIndex) const;
|
|
|
|
template<int len> inline uint64_t getUintField(uint32_t colIndex) const;
|
|
inline uint64_t getUintField(uint32_t colIndex) const;
|
|
template<int len> inline int64_t getIntField(uint32_t colIndex) const;
|
|
inline int64_t getIntField(uint32_t colIndex) const;
|
|
template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
|
|
inline bool equals(long double val, uint32_t colIndex) const;
|
|
inline bool equals(const std::string& val, uint32_t colIndex) const;
|
|
|
|
inline double getDoubleField(uint32_t colIndex) const;
|
|
inline float getFloatField(uint32_t colIndex) const;
|
|
inline double getDecimalField(uint32_t colIndex) const
|
|
{
|
|
return 0.0; // TODO: Do something here
|
|
}
|
|
inline long double getLongDoubleField(uint32_t colIndex) const;
|
|
|
|
inline uint64_t getBaseRid() const;
|
|
inline uint64_t getRid() const;
|
|
inline uint16_t getRelRid() const; // returns a rid relative to this logical block
|
|
inline uint64_t getExtentRelativeRid() const; // returns a rid relative to the extent it's in
|
|
inline uint64_t getFileRelativeRid() const; // returns a file-relative rid
|
|
inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
|
|
uint16_t* blockNum, uint16_t* rowNum);
|
|
|
|
template<int len> void setUintField(uint64_t val, uint32_t colIndex);
|
|
|
|
/* Note: these 2 fcns avoid 1 array lookup per call. Using them only
|
|
in projection on the PM resulted in a 2.8% performance gain on
|
|
the queries listed in bug 2223.
|
|
TODO: apply them everywhere else possible, and write equivalents
|
|
for the other types as well as the getters.
|
|
*/
|
|
template<int len> void setUintField_offset(uint64_t val, uint32_t offset);
|
|
inline void nextRow(uint32_t size);
|
|
inline void prevRow(uint32_t size, uint64_t number);
|
|
|
|
inline void setUintField(uint64_t val, uint32_t colIndex);
|
|
template<int len> void setIntField(int64_t, uint32_t colIndex);
|
|
inline void setIntField(int64_t, uint32_t colIndex);
|
|
|
|
inline void setDoubleField(double val, uint32_t colIndex);
|
|
inline void setFloatField(float val, uint32_t colIndex);
|
|
inline void setDecimalField(double val, uint32_t colIndex) { }; // TODO: Do something here
|
|
inline void setLongDoubleField(long double val, uint32_t colIndex);
|
|
|
|
inline void setRid(uint64_t rid);
|
|
|
|
// is string efficient for this?
|
|
inline std::string getStringField(uint32_t colIndex) const;
|
|
inline const uint8_t* getStringPointer(uint32_t colIndex) const;
|
|
inline uint32_t getStringLength(uint32_t colIndex) const;
|
|
void setStringField(const std::string& val, uint32_t colIndex);
|
|
inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
|
|
|
|
// support VARBINARY
|
|
// Add 2-byte length at the beginning of the field. NULL and zero length field are
|
|
// treated the same, could use one of the length bit to distinguish these two cases.
|
|
inline std::string getVarBinaryStringField(uint32_t colIndex) const;
|
|
inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
|
|
// No string construction is necessary for better performance.
|
|
inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
|
|
inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
|
|
inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
|
|
inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
|
|
|
|
inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
|
|
inline void setUserData(mcsv1sdk::mcsv1Context& context,
|
|
boost::shared_ptr<mcsv1sdk::UserData> userData,
|
|
uint32_t len, uint32_t colIndex);
|
|
|
|
uint64_t getNullValue(uint32_t colIndex) const;
|
|
bool isNullValue(uint32_t colIndex) const;
|
|
|
|
// when NULLs are pulled out via getIntField(), they come out with these values.
|
|
// Ex: the 1-byte int null value is 0x80. When it gets cast to an int64_t
|
|
// it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
|
|
int64_t getSignedNullValue(uint32_t colIndex) const;
|
|
|
|
// copy data in srcIndex field to destIndex, all data type
|
|
inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
|
|
|
|
// copy data in srcIndex field to destAddr, all data type
|
|
//inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
|
|
|
|
// an adapter for code that uses the copyField call above;
|
|
// that's not string-table safe, this one is
|
|
inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
|
|
|
|
std::string toString() const;
|
|
std::string toCSV() const;
|
|
|
|
/* These fcns are used only in joins. The RID doesn't matter on the side that
|
|
gets hashed. We steal that field here to "mark" a row. */
|
|
inline void markRow();
|
|
inline void zeroRid();
|
|
inline bool isMarked();
|
|
void initToNull();
|
|
|
|
inline void usesStringTable(bool b)
|
|
{
|
|
useStringTable = b;
|
|
}
|
|
inline bool usesStringTable() const
|
|
{
|
|
return useStringTable;
|
|
}
|
|
inline bool hasLongString() const
|
|
{
|
|
return hasLongStringField;
|
|
}
|
|
|
|
// these are for cases when you already know the type definitions are the same.
|
|
// a fcn to check the type defs seperately doesn't exist yet. No normalization.
|
|
inline uint64_t hash(const std::vector<uint32_t>& keyColumns, uint32_t seed = 0) const;
|
|
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
|
inline uint64_t hash() const; // generates a hash for all cols
|
|
|
|
inline bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
|
|
inline bool equals(const Row&, uint32_t lastCol) const;
|
|
inline bool equals(const Row&) const;
|
|
|
|
inline void setUserDataStore(UserDataStore* u)
|
|
{
|
|
userDataStore = u;
|
|
}
|
|
private:
|
|
uint32_t columnCount;
|
|
uint64_t baseRid;
|
|
|
|
// the next 6 point to memory owned by RowGroup
|
|
uint32_t* oldOffsets;
|
|
uint32_t* stOffsets;
|
|
uint32_t* offsets;
|
|
uint32_t* colWidths;
|
|
execplan::CalpontSystemCatalog::ColDataType* types;
|
|
uint32_t* charsetNumbers;
|
|
uint8_t* data;
|
|
uint32_t* scale;
|
|
uint32_t* precision;
|
|
|
|
StringStore* strings;
|
|
bool useStringTable;
|
|
bool hasLongStringField;
|
|
uint32_t sTableThreshold;
|
|
boost::shared_array<bool> forceInline;
|
|
inline bool inStringTable(uint32_t col) const;
|
|
|
|
UserDataStore* userDataStore; // For UDAF
|
|
|
|
friend class RowGroup;
|
|
};
|
|
|
|
inline Row::Pointer Row::getPointer() const
|
|
{
|
|
return Pointer(data, strings, userDataStore);
|
|
}
|
|
inline uint8_t* Row::getData() const
|
|
{
|
|
return data;
|
|
}
|
|
|
|
inline void Row::setPointer(const Pointer& p)
|
|
{
|
|
data = p.data;
|
|
strings = p.strings;
|
|
bool hasStrings = (strings != 0);
|
|
|
|
if (useStringTable != hasStrings)
|
|
{
|
|
useStringTable = hasStrings;
|
|
offsets = (useStringTable ? stOffsets : oldOffsets);
|
|
}
|
|
|
|
userDataStore = p.userDataStore;
|
|
}
|
|
|
|
inline void Row::setData(const Pointer& p)
|
|
{
|
|
setPointer(p);
|
|
}
|
|
|
|
inline void Row::nextRow()
|
|
{
|
|
data += offsets[columnCount];
|
|
}
|
|
|
|
inline uint32_t Row::getColumnCount() const
|
|
{
|
|
return columnCount;
|
|
}
|
|
|
|
inline uint32_t Row::getColumnWidth(uint32_t col) const
|
|
{
|
|
return colWidths[col];
|
|
}
|
|
|
|
inline uint32_t Row::getSize() const
|
|
{
|
|
return offsets[columnCount];
|
|
}
|
|
|
|
inline uint32_t Row::getRealSize() const
|
|
{
|
|
if (!useStringTable)
|
|
return getSize();
|
|
|
|
uint32_t ret = 2;
|
|
|
|
for (uint32_t i = 0; i < columnCount; i++)
|
|
{
|
|
if (!inStringTable(i))
|
|
ret += getColumnWidth(i);
|
|
else
|
|
ret += getStringLength(i);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
inline uint32_t Row::getScale(uint32_t col) const
|
|
{
|
|
return scale[col];
|
|
}
|
|
|
|
inline uint32_t Row::getPrecision(uint32_t col) const
|
|
{
|
|
return precision[col];
|
|
}
|
|
|
|
inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
|
|
{
|
|
return types[colIndex];
|
|
}
|
|
|
|
inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline uint32_t Row::getCharsetNumber(uint32_t col) const
|
|
{
|
|
return charsetNumbers[col];
|
|
}
|
|
|
|
inline bool Row::isCharType(uint32_t colIndex) const
|
|
{
|
|
return execplan::isCharType(types[colIndex]);
|
|
}
|
|
|
|
inline bool Row::isUnsigned(uint32_t colIndex) const
|
|
{
|
|
return execplan::isUnsigned(types[colIndex]);
|
|
}
|
|
|
|
inline bool Row::isShortString(uint32_t colIndex) const
|
|
{
|
|
return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
|
|
}
|
|
|
|
inline bool Row::isLongString(uint32_t colIndex) const
|
|
{
|
|
return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
|
|
}
|
|
|
|
inline bool Row::inStringTable(uint32_t col) const
|
|
{
|
|
return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
|
|
}
|
|
|
|
template<int len>
|
|
inline bool Row::equals(uint64_t val, uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (len)
|
|
{
|
|
case 1:
|
|
return data[offsets[colIndex]] == val;
|
|
|
|
case 2:
|
|
return *((uint16_t*) &data[offsets[colIndex]]) == val;
|
|
|
|
case 4:
|
|
return *((uint32_t*) &data[offsets[colIndex]]) == val;
|
|
|
|
case 8:
|
|
return *((uint64_t*) &data[offsets[colIndex]]) == val;
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::equals(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline bool Row::equals(long double val, uint32_t colIndex) const
|
|
{
|
|
return *((long double*) &data[offsets[colIndex]]) == val;
|
|
}
|
|
|
|
inline bool Row::equals(const std::string& val, uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
{
|
|
uint64_t offset = *((uint64_t*) &data[offsets[colIndex]]);
|
|
return strings->equals(val, offset);
|
|
}
|
|
else
|
|
return (strncmp(val.c_str(), (char*) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0);
|
|
}
|
|
|
|
template<int len>
|
|
inline uint64_t Row::getUintField(uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (len)
|
|
{
|
|
case 1:
|
|
return data[offsets[colIndex]];
|
|
|
|
case 2:
|
|
return *((uint16_t*) &data[offsets[colIndex]]);
|
|
|
|
case 4:
|
|
return *((uint32_t*) &data[offsets[colIndex]]);
|
|
|
|
case 8:
|
|
return *((uint64_t*) &data[offsets[colIndex]]);
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::getUintField(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline uint64_t Row::getUintField(uint32_t colIndex) const
|
|
{
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1:
|
|
return data[offsets[colIndex]];
|
|
|
|
case 2:
|
|
return *((uint16_t*) &data[offsets[colIndex]]);
|
|
|
|
case 4:
|
|
return *((uint32_t*) &data[offsets[colIndex]]);
|
|
|
|
case 8:
|
|
return *((uint64_t*) &data[offsets[colIndex]]);
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::getUintField(): bad length.");
|
|
}
|
|
}
|
|
|
|
template<int len>
|
|
inline int64_t Row::getIntField(uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (len)
|
|
{
|
|
case 1:
|
|
return (int8_t) data[offsets[colIndex]];
|
|
|
|
case 2:
|
|
return *((int16_t*) &data[offsets[colIndex]]);
|
|
|
|
case 4:
|
|
return *((int32_t*) &data[offsets[colIndex]]);
|
|
|
|
case 8:
|
|
return *((int64_t*) &data[offsets[colIndex]]);
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::getIntField(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline int64_t Row::getIntField(uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1:
|
|
return (int8_t) data[offsets[colIndex]];
|
|
|
|
case 2:
|
|
return *((int16_t*) &data[offsets[colIndex]]);
|
|
|
|
case 4:
|
|
return *((int32_t*) &data[offsets[colIndex]]);
|
|
|
|
case 8:
|
|
return *((int64_t*) &data[offsets[colIndex]]);
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::getIntField(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
|
|
|
|
return &data[offsets[colIndex]];
|
|
}
|
|
|
|
inline uint32_t Row::getStringLength(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
|
|
|
|
return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
|
|
}
|
|
|
|
inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
|
|
{
|
|
uint64_t offset;
|
|
|
|
if (length > getColumnWidth(colIndex))
|
|
length = getColumnWidth(colIndex);
|
|
|
|
if (inStringTable(colIndex))
|
|
{
|
|
offset = strings->storeString(strdata, length);
|
|
*((uint64_t*) &data[offsets[colIndex]]) = offset;
|
|
// cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
|
|
// << " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
|
|
// << endl;
|
|
}
|
|
else
|
|
{
|
|
memcpy(&data[offsets[colIndex]], strdata, length);
|
|
memset(&data[offsets[colIndex] + length], 0,
|
|
offsets[colIndex + 1] - (offsets[colIndex] + length));
|
|
}
|
|
}
|
|
|
|
inline std::string Row::getStringField(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getString(*((uint64_t*) &data[offsets[colIndex]]));
|
|
|
|
// Not all CHAR/VARCHAR are NUL terminated so use length
|
|
return std::string((char*) &data[offsets[colIndex]],
|
|
strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
|
|
}
|
|
|
|
inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return getStringField(colIndex);
|
|
|
|
return std::string((char*) &data[offsets[colIndex] + 2], *((uint16_t*) &data[offsets[colIndex]]));
|
|
}
|
|
|
|
inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));;
|
|
|
|
return *((uint16_t*) &data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
|
|
|
|
return &data[offsets[colIndex] + 2];
|
|
}
|
|
|
|
inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
{
|
|
len = strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
|
|
return getVarBinaryField(colIndex);
|
|
}
|
|
else
|
|
{
|
|
len = *((uint16_t*) &data[offsets[colIndex]]);
|
|
return &data[offsets[colIndex] + 2];
|
|
}
|
|
}
|
|
|
|
inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
|
|
{
|
|
if (!userDataStore)
|
|
{
|
|
return boost::shared_ptr<mcsv1sdk::UserData>();
|
|
}
|
|
|
|
return userDataStore->getUserData(*((uint32_t*) &data[offsets[colIndex]]));
|
|
}
|
|
|
|
inline double Row::getDoubleField(uint32_t colIndex) const
|
|
{
|
|
return *((double*) &data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline float Row::getFloatField(uint32_t colIndex) const
|
|
{
|
|
return *((float*) &data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline long double Row::getLongDoubleField(uint32_t colIndex) const
|
|
{
|
|
return *((long double*) &data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline uint64_t Row::getRid() const
|
|
{
|
|
return baseRid + *((uint16_t*) data);
|
|
}
|
|
|
|
inline uint16_t Row::getRelRid() const
|
|
{
|
|
return *((uint16_t*) data);
|
|
}
|
|
|
|
inline uint64_t Row::getBaseRid() const
|
|
{
|
|
return baseRid;
|
|
}
|
|
|
|
inline void Row::markRow()
|
|
{
|
|
*((uint16_t*) data) = 0xffff;
|
|
}
|
|
|
|
inline void Row::zeroRid()
|
|
{
|
|
*((uint16_t*) data) = 0;
|
|
}
|
|
|
|
inline bool Row::isMarked()
|
|
{
|
|
return *((uint16_t*) data) == 0xffff;
|
|
}
|
|
|
|
/* Begin speculative code! */
|
|
inline uint32_t Row::getOffset(uint32_t colIndex) const
|
|
{
|
|
return offsets[colIndex];
|
|
}
|
|
|
|
template<int len>
|
|
inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 1:
|
|
data[offset] = val;
|
|
break;
|
|
|
|
case 2:
|
|
*((uint16_t*) &data[offset]) = val;
|
|
break;
|
|
|
|
case 4:
|
|
*((uint32_t*) &data[offset]) = val;
|
|
break;
|
|
|
|
case 8:
|
|
*((uint64_t*) &data[offset]) = val;
|
|
break;
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::setUintField called on a non-uint32_t field");
|
|
}
|
|
}
|
|
|
|
inline void Row::nextRow(uint32_t size)
|
|
{
|
|
data += size;
|
|
}
|
|
|
|
|
|
inline void Row::prevRow(uint32_t size, uint64_t number = 1)
|
|
{
|
|
data -= size * number;
|
|
}
|
|
|
|
template<int len>
|
|
inline void Row::setUintField(uint64_t val, uint32_t colIndex)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 1:
|
|
data[offsets[colIndex]] = val;
|
|
break;
|
|
|
|
case 2:
|
|
*((uint16_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 4:
|
|
*((uint32_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 8:
|
|
*((uint64_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::setUintField called on a non-uint32_t field");
|
|
}
|
|
}
|
|
|
|
inline void Row::setUintField(uint64_t val, uint32_t colIndex)
|
|
{
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1:
|
|
data[offsets[colIndex]] = val;
|
|
break;
|
|
|
|
case 2:
|
|
*((uint16_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 4:
|
|
*((uint32_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 8:
|
|
*((uint64_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::setUintField: bad length");
|
|
}
|
|
}
|
|
|
|
template<int len>
|
|
inline void Row::setIntField(int64_t val, uint32_t colIndex)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 1:
|
|
*((int8_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 2:
|
|
*((int16_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 4:
|
|
*((int32_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 8:
|
|
*((int64_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::setIntField: bad length");
|
|
}
|
|
}
|
|
|
|
inline void Row::setIntField(int64_t val, uint32_t colIndex)
|
|
{
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1:
|
|
*((int8_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 2:
|
|
*((int16_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 4:
|
|
*((int32_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
case 8:
|
|
*((int64_t*) &data[offsets[colIndex]]) = val;
|
|
break;
|
|
|
|
default:
|
|
idbassert(0);
|
|
throw std::logic_error("Row::setIntField: bad length");
|
|
}
|
|
}
|
|
|
|
inline void Row::setDoubleField(double val, uint32_t colIndex)
|
|
{
|
|
*((double*) &data[offsets[colIndex]]) = val;
|
|
}
|
|
|
|
inline void Row::setFloatField(float val, uint32_t colIndex)
|
|
{
|
|
//N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
|
|
// but not necessarily the same bits that you put in. This only seems to be for float (double seems
|
|
// to work).
|
|
if (std::isnan(val))
|
|
setUintField<4>(joblist::FLOATNULL, colIndex);
|
|
else
|
|
*((float*) &data[offsets[colIndex]]) = val;
|
|
}
|
|
|
|
inline void Row::setLongDoubleField(long double val, uint32_t colIndex)
|
|
{
|
|
uint8_t* p = &data[offsets[colIndex]];
|
|
*((long double*)p) = val;
|
|
if (sizeof(long double) == 16)
|
|
{
|
|
// zero out the unused portion as there may be garbage there.
|
|
*((uint64_t*)p+1) &= 0x000000000000FFFFULL;
|
|
}
|
|
}
|
|
|
|
inline void Row::setVarBinaryField(const std::string& val, uint32_t colIndex)
|
|
{
|
|
if (inStringTable(colIndex))
|
|
setStringField(val, colIndex);
|
|
else
|
|
{
|
|
*((uint16_t*) &data[offsets[colIndex]]) = static_cast<uint16_t>(val.length());
|
|
memcpy(&data[offsets[colIndex] + 2], val.data(), val.length());
|
|
}
|
|
}
|
|
|
|
inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
|
|
{
|
|
if (len > getColumnWidth(colIndex))
|
|
len = getColumnWidth(colIndex);
|
|
|
|
if (inStringTable(colIndex))
|
|
{
|
|
uint64_t offset = strings->storeString(val, len);
|
|
*((uint64_t*) &data[offsets[colIndex]]) = offset;
|
|
}
|
|
else
|
|
{
|
|
*((uint16_t*) &data[offsets[colIndex]]) = len;
|
|
memcpy(&data[offsets[colIndex] + 2], val, len);
|
|
}
|
|
}
|
|
|
|
inline void Row::setUserData(mcsv1sdk::mcsv1Context& context,
|
|
boost::shared_ptr<mcsv1sdk::UserData> userData,
|
|
uint32_t len, uint32_t colIndex)
|
|
{
|
|
if (!userDataStore)
|
|
{
|
|
return;
|
|
}
|
|
|
|
uint32_t offset = userDataStore->storeUserData(context, userData, len);
|
|
*((uint32_t*) &data[offsets[colIndex]]) = offset;
|
|
*((uint32_t*) &data[offsets[colIndex] + 4]) = len;
|
|
}
|
|
|
|
inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
|
|
{
|
|
uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
|
|
memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
|
|
}
|
|
|
|
inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
|
|
{
|
|
if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
|
|
types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
|
|
types[srcIndex] == execplan::CalpontSystemCatalog::TEXT))
|
|
out.setVarBinaryField(getVarBinaryStringField(srcIndex), destIndex);
|
|
else if (UNLIKELY(isLongString(srcIndex)))
|
|
out.setStringField(getStringPointer(srcIndex), getStringLength(srcIndex), destIndex);
|
|
//out.setStringField(getStringField(srcIndex), destIndex);
|
|
else if (UNLIKELY(isShortString(srcIndex)))
|
|
out.setUintField(getUintField(srcIndex), destIndex);
|
|
else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
|
|
out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
|
|
else
|
|
out.setIntField(getIntField(srcIndex), destIndex);
|
|
}
|
|
|
|
inline void Row::setRid(uint64_t rid)
|
|
{
|
|
*((uint16_t*) data) = rid & 0xffff;
|
|
}
|
|
|
|
inline uint64_t Row::hash() const
|
|
{
|
|
return hash(columnCount - 1);
|
|
}
|
|
|
|
inline uint64_t Row::hash(const std::vector<uint32_t>& keyCols, uint32_t seed) const
|
|
{
|
|
utils::Hasher_r h;
|
|
uint32_t ret = seed;
|
|
|
|
for (uint32_t i = 0; i < keyCols.size(); i++)
|
|
{
|
|
const uint32_t& col = keyCols[i];
|
|
|
|
if (UNLIKELY(isLongString(col)))
|
|
ret = h((const char*) getStringPointer(col), getStringLength(col), ret);
|
|
else
|
|
{
|
|
ret = h((const char*) &data[offsets[i]], colWidths[i], ret);
|
|
}
|
|
}
|
|
|
|
ret = h.finalize(ret, keyCols.size() << 2);
|
|
return ret;
|
|
}
|
|
|
|
inline uint64_t Row::hash(uint32_t lastCol) const
|
|
{
|
|
utils::Hasher_r h;
|
|
uint32_t ret = 0;
|
|
|
|
// Sometimes we ask this to hash 0 bytes, and it comes through looking like
|
|
// lastCol = -1. Return 0.
|
|
if (lastCol >= columnCount)
|
|
return 0;
|
|
|
|
// Two rows that store identical values but are in different formats will return different hashes
|
|
// if this fast path is used. Also can't use any column offsets in this fcn.
|
|
|
|
//if (!useStringTable) {
|
|
// ret = h((const char *) &data[offsets[0]], offsets[lastCol+1] - offsets[0], 0);
|
|
// return h.finalize(ret, offsets[lastCol+1]);
|
|
//}
|
|
|
|
for (uint32_t i = 0; i <= lastCol; i++)
|
|
if (UNLIKELY(isLongString(i)))
|
|
ret = h((const char*) getStringPointer(i), getStringLength(i), ret);
|
|
else
|
|
ret = h((const char*) &data[offsets[i]], colWidths[i], ret);
|
|
|
|
ret = h.finalize(ret, lastCol << 2); // arbitary choice for the 2nd param
|
|
return ret;
|
|
}
|
|
|
|
inline bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) const
|
|
{
|
|
for (uint32_t i = 0; i < keyCols.size(); i++)
|
|
{
|
|
const uint32_t& col = keyCols[i];
|
|
|
|
if (!isLongString(col))
|
|
{
|
|
if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
|
{
|
|
if (getLongDoubleField(i) != r2.getLongDoubleField(i))
|
|
return false;
|
|
}
|
|
else if (getUintField(col) != r2.getUintField(col))
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
if (getStringLength(col) != r2.getStringLength(col))
|
|
return false;
|
|
|
|
if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
inline bool Row::equals(const Row& r2, uint32_t lastCol) const
|
|
{
|
|
// This check fires with empty r2 only.
|
|
if (lastCol >= columnCount)
|
|
return true;
|
|
|
|
if (!useStringTable && !r2.useStringTable)
|
|
return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0]));
|
|
|
|
for (uint32_t i = 0; i <= lastCol; i++)
|
|
if (!isLongString(i))
|
|
{
|
|
if (getColType(i) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
|
{
|
|
if (getLongDoubleField(i) != r2.getLongDoubleField(i))
|
|
return false;
|
|
}
|
|
else if (getUintField(i) != r2.getUintField(i))
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
uint32_t len = getStringLength(i);
|
|
|
|
if (len != r2.getStringLength(i))
|
|
return false;
|
|
|
|
if (memcmp(getStringPointer(i), r2.getStringPointer(i), len))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
inline bool Row::equals(const Row& r2) const
|
|
{
|
|
return equals(r2, columnCount - 1);
|
|
}
|
|
|
|
|
|
/** @brief RowGroup is a lightweight interface for processing packed row data
|
|
|
|
A RowGroup is an interface for parsing and/or modifying blocks of data formatted as described at the top of
|
|
this file. Its lifecycle can be tied to a producer or consumer's lifecycle.
|
|
Only one instance is required to process any number of blocks with a
|
|
given column configuration. The column configuration is specified in the
|
|
constructor, and the block data to process is specified through the
|
|
setData() function. It will not copy or take ownership of the data it processes;
|
|
the caller should do that.
|
|
|
|
Row and RowGroup share some bits. RowGroup owns the memory they share.
|
|
*/
|
|
class RowGroup : public messageqcpp::Serializeable
|
|
{
|
|
public:
|
|
/** @brief The default ctor. It does nothing. Need to init by assignment or deserialization */
|
|
RowGroup();
|
|
|
|
/** @brief The RowGroup ctor, which specifies the column config to process
|
|
|
|
@param colCount The number of columns
|
|
@param positions An array specifying the offsets within the packed data
|
|
of a row where each column begins. It should have colCount + 1
|
|
entries. The first offset is 2, because a row begins with a 2-byte
|
|
RID. The last entry should be the offset of the last column +
|
|
its length, which is also the size of the entire row including the rid.
|
|
@param coids An array of oids for each column.
|
|
@param tkeys An array of unique id for each column.
|
|
@param colTypes An array of COLTYPEs for each column.
|
|
@param charsetNumbers an Array of the lookup numbers for the charset/collation object.
|
|
@param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
|
|
@param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
|
|
*/
|
|
|
|
RowGroup(uint32_t colCount,
|
|
const std::vector<uint32_t>& positions,
|
|
const std::vector<uint32_t>& cOids,
|
|
const std::vector<uint32_t>& tkeys,
|
|
const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
|
|
const std::vector<uint32_t>& charsetNumbers,
|
|
const std::vector<uint32_t>& scale,
|
|
const std::vector<uint32_t>& precision,
|
|
uint32_t stringTableThreshold,
|
|
bool useStringTable = true,
|
|
const std::vector<bool>& forceInlineData = std::vector<bool>()
|
|
);
|
|
|
|
/** @brief The copiers. It copies metadata, not thetypes row data */
|
|
RowGroup(const RowGroup&);
|
|
|
|
/** @brief Assignment operator. It copies metadata, not the row data */
|
|
RowGroup& operator=(const RowGroup&);
|
|
|
|
~RowGroup();
|
|
|
|
inline void initRow(Row*, bool forceInlineData = false) const;
|
|
inline uint32_t getRowCount() const;
|
|
inline void incRowCount();
|
|
inline void setRowCount(uint32_t num);
|
|
inline void getRow(uint32_t rowNum, Row*) const;
|
|
inline uint32_t getRowSize() const;
|
|
inline uint32_t getRowSizeWithStrings() const;
|
|
inline uint64_t getBaseRid() const;
|
|
void setData(RGData* rgd);
|
|
inline void setData(uint8_t* d);
|
|
inline uint8_t* getData() const;
|
|
inline RGData* getRGData() const;
|
|
|
|
uint32_t getStatus() const;
|
|
void setStatus(uint16_t);
|
|
|
|
uint32_t getDBRoot() const;
|
|
void setDBRoot(uint32_t);
|
|
|
|
uint32_t getDataSize() const;
|
|
uint32_t getDataSize(uint64_t n) const;
|
|
uint32_t getMaxDataSize() const;
|
|
uint32_t getMaxDataSizeWithStrings() const;
|
|
uint32_t getEmptySize() const;
|
|
|
|
// this returns the size of the row data with the string table
|
|
inline uint64_t getSizeWithStrings() const;
|
|
|
|
// sets the row count to 0 and the baseRid to something
|
|
// effectively initializing whatever chunk of memory
|
|
// data points to
|
|
void resetRowGroup(uint64_t baseRid);
|
|
|
|
/* The Serializeable interface */
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&);
|
|
|
|
uint32_t getColumnWidth(uint32_t col) const;
|
|
uint32_t getColumnCount() const;
|
|
inline const std::vector<uint32_t>& getOffsets() const;
|
|
inline const std::vector<uint32_t>& getOIDs() const;
|
|
inline const std::vector<uint32_t>& getKeys() const;
|
|
inline const std::vector<uint32_t>& getColWidths() const;
|
|
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
|
|
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
|
|
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
|
|
inline const std::vector<uint32_t>& getCharsetNumbers() const;
|
|
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
|
inline boost::shared_array<bool>& getForceInline();
|
|
static inline uint32_t getHeaderSize()
|
|
{
|
|
return headerSize;
|
|
}
|
|
|
|
// this returns true if the type is CHAR or VARCHAR
|
|
inline bool isCharType(uint32_t colIndex) const;
|
|
inline bool isUnsigned(uint32_t colIndex) const;
|
|
inline bool isShortString(uint32_t colIndex) const;
|
|
inline bool isLongString(uint32_t colIndex) const;
|
|
|
|
inline const std::vector<uint32_t>& getScale() const;
|
|
inline const std::vector<uint32_t>& getPrecision() const;
|
|
|
|
inline bool usesStringTable() const;
|
|
inline void setUseStringTable(bool);
|
|
|
|
// RGData *convertToInlineData(uint64_t *size = NULL) const; // caller manages the memory returned by this
|
|
// void convertToInlineDataInPlace();
|
|
// RGData *convertToStringTable(uint64_t *size = NULL) const;
|
|
// void convertToStringTableInPlace();
|
|
void serializeRGData(messageqcpp::ByteStream&) const;
|
|
inline uint32_t getStringTableThreshold() const;
|
|
|
|
void append(RGData&);
|
|
void append(RowGroup&);
|
|
void append(RGData&, uint pos); // insert starting at position 'pos'
|
|
void append(RowGroup&, uint pos);
|
|
|
|
RGData duplicate(); // returns a copy of the attached RGData
|
|
|
|
std::string toString() const;
|
|
|
|
/** operator+=
|
|
*
|
|
* append the metadata of another RowGroup to this RowGroup
|
|
*/
|
|
RowGroup& operator+=(const RowGroup& rhs);
|
|
|
|
// returns a RowGroup with only the first cols columns. Useful for generating a
|
|
// RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
|
|
RowGroup truncate(uint32_t cols);
|
|
|
|
/** operator<
|
|
*
|
|
* Orders RG's based on baseRid
|
|
*/
|
|
inline bool operator<(const RowGroup& rhs) const;
|
|
|
|
void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
|
|
|
|
/* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
|
|
inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
|
|
const uint8_t& extentNum, const uint16_t& blockNum);
|
|
inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
|
|
uint16_t* blockNum);
|
|
|
|
inline void setStringStore(boost::shared_ptr<StringStore>);
|
|
|
|
CHARSET_INFO* getCharset(uint32_t col);
|
|
|
|
private:
|
|
uint32_t columnCount;
|
|
uint8_t* data;
|
|
|
|
std::vector<uint32_t> oldOffsets; //inline data offsets
|
|
std::vector<uint32_t> stOffsets; //string table offsets
|
|
uint32_t* offsets; //offsets either points to oldOffsets or stOffsets
|
|
std::vector<uint32_t> colWidths;
|
|
// oids: the real oid of the column, may have duplicates with alias.
|
|
// This oid is necessary for front-end to decide the real column width.
|
|
std::vector<uint32_t> oids;
|
|
// keys: the unique id for pair(oid, alias). bug 1632.
|
|
// Used to map the projected column and rowgroup index
|
|
std::vector<uint32_t> keys;
|
|
std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
|
|
// For string collation
|
|
std::vector<uint32_t> charsetNumbers;
|
|
std::vector<CHARSET_INFO*> charsets;
|
|
|
|
// DECIMAL support. For non-decimal fields, the valutypeses are 0.
|
|
std::vector<uint32_t> scale;
|
|
std::vector<uint32_t> precision;
|
|
|
|
// string table impl
|
|
RGData* rgData;
|
|
StringStore* strings; // note, strings and data belong to rgData
|
|
bool useStringTable;
|
|
bool hasLongStringField;
|
|
uint32_t sTableThreshold;
|
|
boost::shared_array<bool> forceInline;
|
|
|
|
static const uint32_t headerSize = 18;
|
|
static const uint32_t rowCountOffset = 0;
|
|
static const uint32_t baseRidOffset = 4;
|
|
static const uint32_t statusOffset = 12;
|
|
static const uint32_t dbRootOffset = 14;
|
|
};
|
|
|
|
inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum,
|
|
const uint8_t& extentNum, const uint16_t& blockNum);
|
|
inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
|
|
uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
|
|
|
|
// returns the first rid of the logical block specified by baseRid
|
|
inline uint64_t getExtentRelativeRid(uint64_t baseRid);
|
|
|
|
// returns the first rid of the logical block specified by baseRid
|
|
inline uint64_t getFileRelativeRid(uint64_t baseRid);
|
|
|
|
/** operator+
|
|
*
|
|
* add the metadata of 2 RowGroups together and return a new RowGroup
|
|
*/
|
|
RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
|
|
|
|
boost::shared_array<int> makeMapping(const RowGroup& r1, const RowGroup& r2);
|
|
void applyMapping(const boost::shared_array<int>& mapping, const Row& in, Row* out);
|
|
void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
|
|
void applyMapping(const int* mapping, const Row& in, Row* out);
|
|
|
|
/* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
|
|
every row, they're a measurable performance penalty */
|
|
inline uint32_t RowGroup::getRowCount() const
|
|
{
|
|
// idbassert(data);
|
|
// if (!data) throw std::logic_error("RowGroup::getRowCount(): data is NULL!");
|
|
return *((uint32_t*) &data[rowCountOffset]);
|
|
}
|
|
|
|
inline void RowGroup::incRowCount()
|
|
{
|
|
// idbassert(data);
|
|
++(*((uint32_t*) &data[rowCountOffset]));
|
|
}
|
|
|
|
inline void RowGroup::setRowCount(uint32_t num)
|
|
{
|
|
// idbassert(data);
|
|
*((uint32_t*) &data[rowCountOffset]) = num;
|
|
}
|
|
|
|
inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
|
|
{
|
|
// idbassert(data);
|
|
if (useStringTable != r->usesStringTable())
|
|
initRow(r);
|
|
|
|
r->baseRid = getBaseRid();
|
|
r->data = &(data[headerSize + (rowNum * offsets[columnCount])]);
|
|
r->strings = strings;
|
|
r->userDataStore = rgData->userDataStore.get();
|
|
}
|
|
|
|
inline void RowGroup::setData(uint8_t* d)
|
|
{
|
|
data = d;
|
|
strings = NULL;
|
|
rgData = NULL;
|
|
setUseStringTable(false);
|
|
}
|
|
|
|
inline void RowGroup::setData(RGData* rgd)
|
|
{
|
|
data = rgd->rowData.get();
|
|
strings = rgd->strings.get();
|
|
rgData = rgd;
|
|
}
|
|
|
|
inline uint8_t* RowGroup::getData() const
|
|
{
|
|
//assert(!useStringTable);
|
|
return data;
|
|
}
|
|
|
|
inline RGData* RowGroup::getRGData() const
|
|
{
|
|
return rgData;
|
|
}
|
|
|
|
inline void RowGroup::setUseStringTable(bool b)
|
|
{
|
|
useStringTable = (b && hasLongStringField);
|
|
//offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
|
|
offsets = 0;
|
|
|
|
if (useStringTable && !stOffsets.empty())
|
|
offsets = &stOffsets[0];
|
|
else if (!useStringTable && !oldOffsets.empty())
|
|
offsets = &oldOffsets[0];
|
|
|
|
if (!useStringTable)
|
|
strings = NULL;
|
|
}
|
|
|
|
inline uint64_t RowGroup::getBaseRid() const
|
|
{
|
|
return *((uint64_t*) &data[baseRidOffset]);
|
|
}
|
|
|
|
inline bool RowGroup::operator<(const RowGroup& rhs) const
|
|
{
|
|
return (getBaseRid() < rhs.getBaseRid());
|
|
}
|
|
|
|
void RowGroup::initRow(Row* r, bool forceInlineData) const
|
|
{
|
|
r->columnCount = columnCount;
|
|
|
|
if (LIKELY(!types.empty()))
|
|
{
|
|
r->colWidths = (uint32_t*) &colWidths[0];
|
|
r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
|
|
r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
|
|
r->scale = (uint32_t*) & (scale[0]);
|
|
r->precision = (uint32_t*) & (precision[0]);
|
|
}
|
|
|
|
if (forceInlineData)
|
|
{
|
|
r->useStringTable = false;
|
|
r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
|
|
r->stOffsets = (uint32_t*) & (stOffsets[0]);
|
|
r->offsets = (uint32_t*) & (oldOffsets[0]);
|
|
}
|
|
else
|
|
{
|
|
r->useStringTable = useStringTable;
|
|
r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
|
|
r->stOffsets = (uint32_t*) & (stOffsets[0]);
|
|
r->offsets = offsets;
|
|
}
|
|
|
|
r->hasLongStringField = hasLongStringField;
|
|
r->sTableThreshold = sTableThreshold;
|
|
r->forceInline = forceInline;
|
|
}
|
|
|
|
inline uint32_t RowGroup::getRowSize() const
|
|
{
|
|
return offsets[columnCount];
|
|
}
|
|
|
|
inline uint32_t RowGroup::getRowSizeWithStrings() const
|
|
{
|
|
return oldOffsets[columnCount];
|
|
}
|
|
|
|
inline uint64_t RowGroup::getSizeWithStrings() const
|
|
{
|
|
if (strings == NULL)
|
|
return getDataSize();
|
|
else
|
|
return getDataSize() + strings->getSize();
|
|
}
|
|
|
|
inline bool RowGroup::isCharType(uint32_t colIndex) const
|
|
{
|
|
return execplan::isCharType(types[colIndex]);
|
|
}
|
|
|
|
inline bool RowGroup::isUnsigned(uint32_t colIndex) const
|
|
{
|
|
return execplan::isUnsigned(types[colIndex]);
|
|
}
|
|
|
|
inline bool RowGroup::isShortString(uint32_t colIndex) const
|
|
{
|
|
return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
|
|
(getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
|
|
}
|
|
|
|
inline bool RowGroup::isLongString(uint32_t colIndex) const
|
|
{
|
|
return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
|
|
(getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
|
|
types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
|
|
types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
|
|
types[colIndex] == execplan::CalpontSystemCatalog::TEXT);
|
|
}
|
|
|
|
inline bool RowGroup::usesStringTable() const
|
|
{
|
|
return useStringTable;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getOffsets() const
|
|
{
|
|
return oldOffsets;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getOIDs() const
|
|
{
|
|
return oids;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getKeys() const
|
|
{
|
|
return keys;
|
|
}
|
|
|
|
inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
|
|
{
|
|
return types[colIndex];
|
|
}
|
|
|
|
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
|
|
{
|
|
return charsetNumbers;
|
|
}
|
|
|
|
inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
|
|
{
|
|
return charsetNumbers[colIndex];
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getScale() const
|
|
{
|
|
return scale;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getPrecision() const
|
|
{
|
|
return precision;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getColWidths() const
|
|
{
|
|
return colWidths;
|
|
}
|
|
|
|
inline boost::shared_array<bool>& RowGroup::getForceInline()
|
|
{
|
|
return forceInline;
|
|
}
|
|
|
|
// These type defs look stupid at first, yes. I want to see compiler errors
|
|
// if/when we change the widths of these parms b/c this fcn will have to be
|
|
// reevaluated.
|
|
inline uint64_t convertToRid(const uint32_t& partitionNum,
|
|
const uint16_t& segmentNum, const uint8_t& exNum, const uint16_t& blNum)
|
|
{
|
|
uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum,
|
|
blockNum = blNum;
|
|
|
|
// extentNum gets trunc'd to 6 bits, blockNums to 10 bits
|
|
extentNum &= 0x3f;
|
|
blockNum &= 0x3ff;
|
|
|
|
return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
|
|
}
|
|
|
|
inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
|
|
const uint8_t& extentNum, const uint16_t& blockNum)
|
|
{
|
|
*((uint64_t*) &data[baseRidOffset]) = convertToRid(partNum, segNum,
|
|
extentNum, blockNum);
|
|
}
|
|
|
|
inline uint32_t RowGroup::getStringTableThreshold() const
|
|
{
|
|
return sTableThreshold;
|
|
}
|
|
|
|
inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
|
|
{
|
|
if (useStringTable)
|
|
{
|
|
rgData->setStringStore(ss);
|
|
strings = rgData->strings.get();
|
|
}
|
|
}
|
|
|
|
inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
|
|
uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
|
|
{
|
|
if (partNum) *partNum = rid >> 32;
|
|
|
|
if (segNum) *segNum = rid >> 16;
|
|
|
|
if (extentNum) *extentNum = (rid >> 10) & 0x3f;
|
|
|
|
if (blockNum) *blockNum = rid & 0x3ff;
|
|
}
|
|
|
|
inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum,
|
|
uint8_t* extentNum, uint16_t* blockNum)
|
|
{
|
|
getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
|
|
}
|
|
|
|
// returns the first RID of the logical block identified by baseRid
|
|
inline uint64_t getExtentRelativeRid(uint64_t baseRid)
|
|
{
|
|
uint64_t blockNum = baseRid & 0x3ff;
|
|
return (blockNum << 13);
|
|
}
|
|
|
|
inline uint64_t Row::getExtentRelativeRid() const
|
|
{
|
|
return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
|
|
}
|
|
|
|
// returns the first RID of the logical block identified by baseRid
|
|
inline uint64_t getFileRelativeRid(uint64_t baseRid)
|
|
{
|
|
uint64_t extentNum = (baseRid >> 10) & 0x3f;
|
|
uint64_t blockNum = baseRid & 0x3ff;
|
|
return (extentNum << 23) | (blockNum << 13);
|
|
}
|
|
|
|
inline uint64_t Row::getFileRelativeRid() const
|
|
{
|
|
return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
|
|
}
|
|
|
|
inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
|
|
uint16_t* blockNum, uint16_t* rowNum)
|
|
{
|
|
getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
|
|
|
|
if (rowNum) *rowNum = getRelRid();
|
|
}
|
|
|
|
inline void copyRow(const Row& in, Row* out, uint32_t colCount)
|
|
{
|
|
if (&in == out)
|
|
return;
|
|
|
|
out->setRid(in.getRelRid());
|
|
|
|
if (!in.usesStringTable() && !out->usesStringTable())
|
|
{
|
|
memcpy(out->getData(), in.getData(), std::min(in.getOffset(colCount), out->getOffset(colCount)));
|
|
return;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < colCount; i++)
|
|
{
|
|
if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
|
|
in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
|
|
in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT ||
|
|
in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB))
|
|
out->setVarBinaryField(in.getVarBinaryStringField(i), i);
|
|
else if (UNLIKELY(in.isLongString(i)))
|
|
//out->setStringField(in.getStringField(i), i);
|
|
out->setStringField(in.getStringPointer(i), in.getStringLength(i), i);
|
|
else if (UNLIKELY(in.isShortString(i)))
|
|
out->setUintField(in.getUintField(i), i);
|
|
else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
|
|
out->setLongDoubleField(in.getLongDoubleField(i), i);
|
|
else
|
|
out->setIntField(in.getIntField(i), i);
|
|
}
|
|
}
|
|
|
|
inline void copyRow(const Row& in, Row* out)
|
|
{
|
|
copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
|
|
}
|
|
|
|
inline std::string StringStore::getString(uint64_t off) const
|
|
{
|
|
uint32_t length;
|
|
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return joblist::CPNULLSTRMARK;
|
|
|
|
MemChunk* mc;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
//off = off - 0x8000000000000000;
|
|
off &= ~0x8000000000000000;
|
|
|
|
if (longStrings.size() <= off)
|
|
return joblist::CPNULLSTRMARK;
|
|
|
|
mc = (MemChunk*) longStrings[off].get();
|
|
memcpy(&length, mc->data, 4);
|
|
return std::string((char*) mc->data + 4, length);
|
|
}
|
|
|
|
uint64_t chunk = off / CHUNK_SIZE;
|
|
uint64_t offset = off % CHUNK_SIZE;
|
|
|
|
// this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
|
|
// what gets returned, it just can't go out of bounds.
|
|
if (mem.size() <= chunk)
|
|
return joblist::CPNULLSTRMARK;
|
|
|
|
mc = (MemChunk*) mem[chunk].get();
|
|
|
|
memcpy(&length, &mc->data[offset], 4);
|
|
|
|
if ((offset + length) > mc->currentSize)
|
|
return joblist::CPNULLSTRMARK;
|
|
|
|
return std::string((char*) & (mc->data[offset]) + 4, length);
|
|
}
|
|
|
|
inline const uint8_t* StringStore::getPointer(uint64_t off) const
|
|
{
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
|
|
|
uint64_t chunk = off / CHUNK_SIZE;
|
|
uint64_t offset = off % CHUNK_SIZE;
|
|
MemChunk* mc;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
//off = off - 0x8000000000000000;
|
|
off &= ~0x8000000000000000;
|
|
|
|
if (longStrings.size() <= off)
|
|
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
|
|
|
mc = (MemChunk*) longStrings[off].get();
|
|
return mc->data + 4;
|
|
}
|
|
|
|
// this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
|
|
// what gets returned, it just can't go out of bounds.
|
|
if (UNLIKELY(mem.size() <= chunk))
|
|
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
|
|
|
mc = (MemChunk*) mem[chunk].get();
|
|
|
|
if (offset > mc->currentSize)
|
|
return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
|
|
|
|
return &(mc->data[offset]) + 4;
|
|
}
|
|
|
|
inline bool StringStore::isNullValue(uint64_t off) const
|
|
{
|
|
uint32_t length;
|
|
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return true;
|
|
|
|
// Long strings won't be NULL
|
|
if (off & 0x8000000000000000)
|
|
return false;
|
|
|
|
uint32_t chunk = off / CHUNK_SIZE;
|
|
uint32_t offset = off % CHUNK_SIZE;
|
|
MemChunk* mc;
|
|
|
|
if (mem.size() <= chunk)
|
|
return true;
|
|
|
|
mc = (MemChunk*) mem[chunk].get();
|
|
memcpy(&length, &mc->data[offset], 4);
|
|
|
|
if (length == 0)
|
|
return true;
|
|
|
|
if (length < 8)
|
|
return false;
|
|
|
|
if ((offset + length) > mc->currentSize)
|
|
return true;
|
|
|
|
if (mc->data[offset + 4] == 0) // "" = NULL string for some reason...
|
|
return true;
|
|
return (memcmp(&mc->data[offset+4], joblist::CPNULLSTRMARK.c_str(), 8) == 0);
|
|
}
|
|
|
|
inline bool StringStore::equals(const std::string& str, uint64_t off) const
|
|
{
|
|
uint32_t length;
|
|
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return str == joblist::CPNULLSTRMARK;
|
|
|
|
MemChunk* mc;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
if (longStrings.size() <= (off & ~0x8000000000000000))
|
|
return false;
|
|
|
|
mc = (MemChunk*) longStrings[off & ~0x8000000000000000].get();
|
|
|
|
memcpy(&length, mc->data, 4);
|
|
|
|
// Not sure if this check it needed, but adds safety
|
|
if (length > mc->currentSize)
|
|
return false;
|
|
|
|
return (strncmp(str.c_str(), (const char*) mc->data + 4, length) == 0);
|
|
}
|
|
|
|
uint32_t chunk = off / CHUNK_SIZE;
|
|
uint32_t offset = off % CHUNK_SIZE;
|
|
|
|
if (mem.size() <= chunk)
|
|
return false;
|
|
|
|
mc = (MemChunk*) mem[chunk].get();
|
|
memcpy(&length, &mc->data[offset], 4);
|
|
|
|
if ((offset + length) > mc->currentSize)
|
|
return false;
|
|
|
|
return (strncmp(str.c_str(), (const char*) &mc->data[offset] + 4, length) == 0);
|
|
}
|
|
inline uint32_t StringStore::getStringLength(uint64_t off)
|
|
{
|
|
uint32_t length;
|
|
MemChunk* mc;
|
|
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return 0;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
//off = off - 0x8000000000000000;
|
|
off &= ~0x8000000000000000;
|
|
|
|
if (longStrings.size() <= off)
|
|
return 0;
|
|
|
|
mc = (MemChunk*) longStrings[off].get();
|
|
memcpy(&length, mc->data, 4);
|
|
}
|
|
else
|
|
{
|
|
uint64_t chunk = off / CHUNK_SIZE;
|
|
uint64_t offset = off % CHUNK_SIZE;
|
|
|
|
if (mem.size() <= chunk)
|
|
return 0;
|
|
|
|
mc = (MemChunk*) mem[chunk].get();
|
|
memcpy(&length, &mc->data[offset], 4);
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
inline bool StringStore::isEmpty() const
|
|
{
|
|
return empty;
|
|
}
|
|
|
|
inline uint64_t StringStore::getSize() const
|
|
{
|
|
uint32_t i;
|
|
uint64_t ret = 0;
|
|
MemChunk* mc;
|
|
|
|
for (i = 0; i < mem.size(); i++)
|
|
{
|
|
mc = (MemChunk*) mem[i].get();
|
|
ret += mc->capacity;
|
|
}
|
|
|
|
for (i = 0; i < longStrings.size(); i++)
|
|
{
|
|
mc = (MemChunk*) longStrings[i].get();
|
|
ret += mc->capacity;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
inline RGData& RGData::operator=(const RGData& r)
|
|
{
|
|
rowData = r.rowData;
|
|
strings = r.strings;
|
|
userDataStore = r.userDataStore;
|
|
return *this;
|
|
}
|
|
|
|
inline void RGData::getRow(uint32_t num, Row* row)
|
|
{
|
|
uint32_t size = row->getSize();
|
|
row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get(), userDataStore.get()));
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
// vim:ts=4 sw=4:
|