mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
* fix(rowgroup): RGData now uses uint64_t counter for the fixed sizes columns data buf. The buffer can utilize > 4GB RAM that is necessary for PM side join. RGData ctor uses uint32_t allocating data buffer. This fact causes implicit heap overflow. * feat(bytestream,serdes): BS buffer size type is uint64_t This necessary to handle 64bit RGData, that comes as a separate patch. The pair of patches would allow to have PM joins when SmallSide size > 4GB. * feat(bytestream,serdes): Distribute BS buf size data type change to avoid implicit data type narrowing * feat(rowgroup): this returns bits lost during cherry-pick. The bits lost caused the first RGData::serialize to crash a process
2211 lines
66 KiB
C++
2211 lines
66 KiB
C++
/*
|
|
Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (c) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA.
|
|
*/
|
|
|
|
//
|
|
// C++ Interface: rowgroup
|
|
//
|
|
// Description:
|
|
//
|
|
// Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
|
|
|
|
#pragma once
|
|
|
|
#include <vector>
|
|
#include <string>
|
|
#include <stdexcept>
|
|
// #define NDEBUG
|
|
#include <cassert>
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include <boost/thread/mutex.hpp>
|
|
#include <cmath>
|
|
#include <cfloat>
|
|
#include <execinfo.h>
|
|
|
|
|
|
#include "hasher.h"
|
|
|
|
#include "joblisttypes.h"
|
|
#include "bytestream.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "exceptclasses.h"
|
|
#include "mcsv1_udaf.h"
|
|
|
|
#include "branchpred.h"
|
|
#include "datatypes/mcs_int128.h"
|
|
|
|
#include "collation.h"
|
|
#include "common/hashfamily.h"
|
|
|
|
#include "stdlib.h"
|
|
#include "execinfo.h"
|
|
|
|
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
|
|
|
|
namespace rowgroup
|
|
{
|
|
const int16_t rgCommonSize = 8192;
|
|
using RGDataSizeType = uint64_t;
|
|
|
|
/*
|
|
The RowGroup family of classes encapsulate the data moved through the
|
|
system.
|
|
|
|
- RowGroup specifies the format of the data primarily (+ some other metadata),
|
|
- RGData (aka RowGroup Data) encapsulates the data,
|
|
- Row is used to extract fields from the data and iterate.
|
|
|
|
JobListFactory instantiates the RowGroups to be used by each stage of processing.
|
|
RGDatas are passed between stages, and their RowGroup instances are used
|
|
to interpret them.
|
|
|
|
Historically, row data was just a chunk of contiguous memory, a uint8_t *.
|
|
Every field had a fixed width, which allowed for quick offset
|
|
calculation when assigning or retrieving individual fields. That worked
|
|
well for a few years, but at some point it became common to declare
|
|
all strings as max-length, and to manipulate them in queries.
|
|
|
|
Having fixed-width fields, even for strings, required an unreasonable
|
|
amount of memory. RGData & StringStore were introduced to handle strings
|
|
more efficiently, at least with respect to memory. The row data would
|
|
still be a uint8_t *, and columns would be fixed-width, but string fields
|
|
above a certain width would contain a 'Pointer' that referenced a string in
|
|
StringStore. Strings are stored efficiently in StringStore, so there is
|
|
no longer wasted space.
|
|
|
|
StringStore comes with a different inefficiency however. When a value
|
|
is overwritten, the original string cannot be freed independently of the
|
|
others, so it continues to use space. If values are only set once, as is
|
|
the typical case, then StringStore is efficient. When it is necessary
|
|
to overwrite string fields, it is possible to configure these classes
|
|
to use the original data format so that old string fields do not accumulate
|
|
in memory. Of course, be careful, because blobs and text fields in CS are
|
|
declared as 2GB strings!
|
|
|
|
A single RGData contains up to one 'logical block' worth of data,
|
|
which is 8192 rows. One RGData is usually treated as one unit of work by
|
|
PrimProc and the JobSteps, but the rows an RGData contains and how many are
|
|
treated as a work unit depend on the operation being done.
|
|
|
|
For example, PrimProc works in units of 8192 contiguous rows
|
|
that come from disk. If half of the rows were filtered out, then the
|
|
RGData it passes to the next stage would only contain 4096 rows.
|
|
|
|
Others build results incrementally before passing them along, such as
|
|
group-by. If one group contains 11111 values, then group-by will
|
|
return 2 RGDatas for that group, one with 8192 rows, and one with 2919.
|
|
|
|
Note: There is no synchronization in any of these classes for obvious
|
|
performance reasons. Likewise, although it's technically safe for many
|
|
readers to access an RGData simultaneously, that would not be an
|
|
efficient thing to do. Try to stick to designs where a single RGData
|
|
is used by a single thread at a time.
|
|
*/
|
|
|
|
// VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
|
|
// This may be so, and we'll get link errors if someone trys, but so far no one has.
|
|
|
|
// Helper to get a value from nested vector pointers.
|
|
template <typename T>
|
|
inline T derefFromTwoVectorPtrs(const std::vector<T>* outer, const std::vector<T>* inner, const T innerIdx)
|
|
{
|
|
auto outerIdx = inner->operator[](innerIdx);
|
|
return outer->operator[](outerIdx);
|
|
}
|
|
|
|
class StringStore
|
|
{
|
|
public:
|
|
StringStore() = default;
|
|
StringStore(const StringStore&) = delete;
|
|
StringStore(StringStore&&) = delete;
|
|
StringStore& operator=(const StringStore&) = delete;
|
|
StringStore& operator=(StringStore&&) = delete;
|
|
virtual ~StringStore();
|
|
|
|
inline utils::NullString getString(uint64_t offset) const;
|
|
// returns the offset.
|
|
// it may receive nullptr as data and it is proper way to store NULL values.
|
|
uint64_t storeString(const uint8_t* data, uint32_t length);
|
|
//please note getPointer can return nullptr.
|
|
inline const uint8_t* getPointer(uint64_t offset) const;
|
|
inline uint32_t getStringLength(uint64_t offset) const;
|
|
inline utils::ConstString getConstString(uint64_t offset) const
|
|
{
|
|
return utils::ConstString((const char*)getPointer(offset), getStringLength(offset));
|
|
}
|
|
inline bool isEmpty() const;
|
|
inline uint64_t getSize() const;
|
|
inline bool isNullValue(uint64_t offset) const;
|
|
|
|
void clear();
|
|
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&);
|
|
|
|
//@bug6065, make StringStore::storeString() thread safe
|
|
void useStoreStringMutex(bool b)
|
|
{
|
|
fUseStoreStringMutex = b;
|
|
}
|
|
bool useStoreStringMutex() const
|
|
{
|
|
return fUseStoreStringMutex;
|
|
}
|
|
|
|
// This is an overlay b/c the underlying data needs to be any size,
|
|
// and alloc'd in one chunk. data can't be a separate dynamic chunk.
|
|
// NOTE: Change here, requires a change in 'bytestream.h'.
|
|
struct MemChunk
|
|
{
|
|
uint32_t currentSize;
|
|
uint32_t capacity;
|
|
uint8_t data[];
|
|
};
|
|
|
|
private:
|
|
std::string empty_str;
|
|
static constexpr const uint32_t CHUNK_SIZE = 64 * 1024; // allocators like powers of 2
|
|
|
|
std::vector<std::shared_ptr<uint8_t[]>> mem;
|
|
|
|
// To store strings > 64KB (BLOB/TEXT)
|
|
std::vector<std::shared_ptr<uint8_t[]>> longStrings;
|
|
bool empty = true;
|
|
bool fUseStoreStringMutex = false; //@bug6065, make StringStore::storeString() thread safe
|
|
boost::mutex fMutex;
|
|
};
|
|
|
|
// Where we store user data for UDA(n)F
|
|
class UserDataStore
|
|
{
|
|
// length represents the fixed portion length of userData.
|
|
// There may be variable length data in containers or other
|
|
// user created structures.
|
|
struct StoreData
|
|
{
|
|
int32_t length;
|
|
std::string functionName;
|
|
boost::shared_ptr<mcsv1sdk::UserData> userData;
|
|
StoreData() : length(0)
|
|
{
|
|
}
|
|
StoreData(const StoreData& rhs)
|
|
{
|
|
length = rhs.length;
|
|
functionName = rhs.functionName;
|
|
userData = rhs.userData;
|
|
}
|
|
};
|
|
|
|
public:
|
|
UserDataStore() = default;
|
|
virtual ~UserDataStore() = default;
|
|
UserDataStore(const UserDataStore&) = delete;
|
|
UserDataStore(UserDataStore&&) = delete;
|
|
UserDataStore& operator=(const UserDataStore&) = delete;
|
|
UserDataStore& operator=(UserDataStore&&) = delete;
|
|
|
|
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&);
|
|
|
|
// Set to make UserDataStore thread safe
|
|
void useUserDataMutex(bool b)
|
|
{
|
|
fUseUserDataMutex = b;
|
|
}
|
|
bool useUserDataMutex() const
|
|
{
|
|
return fUseUserDataMutex;
|
|
}
|
|
|
|
// Returns the offset
|
|
uint32_t storeUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> data,
|
|
uint32_t length);
|
|
|
|
boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t offset) const;
|
|
|
|
private:
|
|
|
|
std::vector<StoreData> vStoreData;
|
|
|
|
bool fUseUserDataMutex = false;
|
|
boost::mutex fMutex;
|
|
};
|
|
|
|
|
|
class RowGroup;
|
|
class Row;
|
|
|
|
/* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
|
|
class RGData
|
|
{
|
|
public:
|
|
RGData() = default; // useless unless followed by an = or a deserialize operation
|
|
RGData(const RowGroup& rg, uint32_t rowCount); // allocates memory for rowData
|
|
explicit RGData(const RowGroup& rg);
|
|
RGData& operator=(const RGData&) = default;
|
|
RGData& operator=(RGData&&) = default;
|
|
RGData(const RGData&) = default;
|
|
RGData(RGData&&) = default;
|
|
virtual ~RGData() = default;
|
|
|
|
|
|
|
|
// amount should be the # returned by RowGroup::getDataSize()
|
|
void serialize(messageqcpp::ByteStream&, RGDataSizeType amount) const;
|
|
|
|
// the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
|
|
// inline data with a length field. Once that's converted to string table format, that
|
|
// option can go away.
|
|
void deserialize(messageqcpp::ByteStream&, RGDataSizeType amount = 0); // returns the # of bytes read
|
|
|
|
inline RGDataSizeType getStringTableMemUsage();
|
|
void clear();
|
|
void reinit(const RowGroup& rg);
|
|
void reinit(const RowGroup& rg, uint32_t rowCount);
|
|
inline void setStringStore(std::shared_ptr<StringStore>& ss)
|
|
{
|
|
strings = ss;
|
|
}
|
|
|
|
// this will use the pre-configured Row to figure out where row # num is, then set the Row
|
|
// to point to it. It's a shortcut around using a RowGroup to do the same thing for cases
|
|
// where it's inconvenient to instantiate one.
|
|
inline void getRow(uint32_t num, Row* row);
|
|
|
|
//@bug6065, make StringStore::storeString() thread safe
|
|
void useStoreStringMutex(bool b)
|
|
{
|
|
if (strings)
|
|
strings->useStoreStringMutex(b);
|
|
}
|
|
bool useStoreStringMutex() const
|
|
{
|
|
return (strings ? (strings->useStoreStringMutex()) : false);
|
|
}
|
|
|
|
UserDataStore* getUserDataStore();
|
|
// make UserDataStore::storeData() thread safe
|
|
void useUserDataMutex(bool b)
|
|
{
|
|
if (userDataStore)
|
|
userDataStore->useUserDataMutex(b);
|
|
}
|
|
bool useUserDataMutex() const
|
|
{
|
|
return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
|
|
}
|
|
|
|
bool hasRowData() const
|
|
{
|
|
return !!rowData;
|
|
}
|
|
|
|
private:
|
|
uint32_t rowSize = 0; // can't be.
|
|
uint32_t columnCount = 0; // shouldn't be, but...
|
|
std::shared_ptr<uint8_t[]> rowData;
|
|
std::shared_ptr<StringStore> strings;
|
|
std::shared_ptr<UserDataStore> userDataStore;
|
|
|
|
// Need sig to support backward compat. RGData can deserialize both forms.
|
|
static const uint32_t RGDATA_SIG = 0xffffffff; // won't happen for 'old' Rowgroup data
|
|
|
|
friend class RowGroup;
|
|
friend class RowGroupStorage;
|
|
};
|
|
|
|
class Row
|
|
{
|
|
public:
|
|
struct Pointer
|
|
{
|
|
inline Pointer() = default;
|
|
|
|
explicit inline Pointer(uint8_t* d) : data(d)
|
|
{
|
|
}
|
|
inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s)
|
|
{
|
|
}
|
|
inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) : data(d), strings(s), userDataStore(u)
|
|
{
|
|
}
|
|
uint8_t* data = nullptr;
|
|
StringStore* strings = nullptr;
|
|
UserDataStore* userDataStore = nullptr;
|
|
};
|
|
|
|
Row() = default;
|
|
Row(const Row&);
|
|
~Row() = default;
|
|
|
|
Row& operator=(const Row&);
|
|
bool operator==(const Row&) const;
|
|
|
|
inline void setData(const Pointer&);
|
|
inline uint8_t* getData() const;
|
|
|
|
inline void setPointer(const Pointer&);
|
|
inline Pointer getPointer() const;
|
|
|
|
inline void nextRow();
|
|
inline uint32_t getColumnWidth(uint32_t colIndex) const;
|
|
inline uint32_t getColumnCount() const;
|
|
inline uint32_t getInternalSize() const; // this is only accurate if there is no string table
|
|
inline uint32_t getSize() const; // this is only accurate if there is no string table
|
|
// if a string table is being used, getRealSize() takes into account variable-length strings
|
|
inline uint32_t getRealSize() const;
|
|
inline uint32_t getOffset(uint32_t colIndex) const;
|
|
inline uint32_t getScale(uint32_t colIndex) const;
|
|
inline uint32_t getPrecision(uint32_t colIndex) const;
|
|
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
|
|
inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
|
|
inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
|
|
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
|
|
|
// this returns true if the type is not CHAR or VARCHAR
|
|
inline bool isCharType(uint32_t colIndex) const;
|
|
inline bool isUnsigned(uint32_t colIndex) const;
|
|
inline bool isShortString(uint32_t colIndex) const;
|
|
inline bool isLongString(uint32_t colIndex) const;
|
|
|
|
bool colHasCollation(uint32_t colIndex) const
|
|
{
|
|
return datatypes::typeHasCollation(getColType(colIndex));
|
|
}
|
|
|
|
template <int len>
|
|
inline uint64_t getUintField(uint32_t colIndex) const;
|
|
inline uint64_t getUintField(uint32_t colIndex) const;
|
|
template <int len>
|
|
inline int64_t getIntField(uint32_t colIndex) const;
|
|
inline int64_t getIntField(uint32_t colIndex) const;
|
|
// Get a signed 64-bit integer column value, convert to the given
|
|
// floating point data type T (e.g. float, double, long double)
|
|
// and divide it according to the scale.
|
|
template <typename T>
|
|
inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
|
|
{
|
|
const T d = getIntField(colIndex);
|
|
if (!scale)
|
|
return d;
|
|
return d / datatypes::scaleDivisor<T>(scale);
|
|
}
|
|
template <typename T>
|
|
inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex) const
|
|
{
|
|
return getScaledSInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
|
|
}
|
|
// Get an unsigned 64-bit integer column value, convert to the given
|
|
// floating point data type T (e.g. float, double, long double)
|
|
// and divide it according to the scale.
|
|
template <typename T>
|
|
inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
|
|
{
|
|
const T d = getUintField(colIndex);
|
|
if (!scale)
|
|
return d;
|
|
return d / datatypes::scaleDivisor<T>(scale);
|
|
}
|
|
template <typename T>
|
|
inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex) const
|
|
{
|
|
return getScaledUInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
|
|
}
|
|
template <typename T>
|
|
inline bool equals(T* value, uint32_t colIndex) const;
|
|
template <int len>
|
|
inline bool equals(uint64_t val, uint32_t colIndex) const;
|
|
inline bool equals(long double val, uint32_t colIndex) const;
|
|
inline bool equals(const int128_t& val, uint32_t colIndex) const;
|
|
|
|
inline double getDoubleField(uint32_t colIndex) const;
|
|
inline float getFloatField(uint32_t colIndex) const;
|
|
inline datatypes::Decimal getDecimalField(uint32_t colIndex) const
|
|
{
|
|
if (LIKELY(getColumnWidth(colIndex) == datatypes::MAXDECIMALWIDTH))
|
|
return datatypes::Decimal(getTSInt128Field(colIndex), (int)getScale(colIndex), getPrecision(colIndex));
|
|
return datatypes::Decimal(datatypes::TSInt64(getIntField(colIndex)), (int)getScale(colIndex),
|
|
getPrecision(colIndex));
|
|
}
|
|
inline long double getLongDoubleField(uint32_t colIndex) const;
|
|
inline void storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const;
|
|
inline void getInt128Field(uint32_t colIndex, int128_t& x) const;
|
|
inline datatypes::TSInt128 getTSInt128Field(uint32_t colIndex) const;
|
|
|
|
inline uint64_t getBaseRid() const;
|
|
inline uint64_t getRid() const;
|
|
inline uint16_t getRelRid() const; // returns a rid relative to this logical block
|
|
inline uint64_t getExtentRelativeRid() const; // returns a rid relative to the extent it's in
|
|
inline uint64_t getFileRelativeRid() const; // returns a file-relative rid
|
|
inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum,
|
|
uint16_t* rowNum);
|
|
|
|
template <int len>
|
|
void setUintField(uint64_t val, uint32_t colIndex);
|
|
|
|
/* Note: these 2 fcns avoid 1 array lookup per call. Using them only
|
|
in projection on the PM resulted in a 2.8% performance gain on
|
|
the queries listed in bug 2223.
|
|
TODO: apply them everywhere else possible, and write equivalents
|
|
for the other types as well as the getters.
|
|
*/
|
|
template <int len>
|
|
void setUintField_offset(uint64_t val, uint32_t offset);
|
|
template <typename T>
|
|
void setIntField_offset(const T val, const uint32_t offset);
|
|
inline void nextRow(uint32_t size);
|
|
inline void prevRow(uint32_t size, uint64_t number);
|
|
|
|
inline void setUintField(uint64_t val, uint32_t colIndex);
|
|
template <int len>
|
|
void setIntField(int64_t, uint32_t colIndex);
|
|
inline void setIntField(int64_t, uint32_t colIndex);
|
|
|
|
inline void setDoubleField(double val, uint32_t colIndex);
|
|
inline void setFloatField(float val, uint32_t colIndex);
|
|
inline void setDecimalField(double val, uint32_t colIndex){}; // TODO: Do something here
|
|
inline void setLongDoubleField(const long double& val, uint32_t colIndex);
|
|
inline void setInt128Field(const int128_t& val, uint32_t colIndex);
|
|
|
|
inline void setRid(uint64_t rid);
|
|
|
|
// TODO: remove this (string is not efficient for this), use getConstString() instead
|
|
inline utils::NullString getStringField(uint32_t colIndex) const
|
|
{
|
|
utils::ConstString x = getConstString(colIndex);
|
|
return utils::NullString(x);
|
|
}
|
|
|
|
inline utils::ConstString getConstString(uint32_t colIndex) const;
|
|
inline utils::ConstString getShortConstString(uint32_t colIndex) const;
|
|
void setStringField(const utils::NullString& val, uint32_t colIndex);
|
|
void setStringField(const uint8_t* val, uint32_t length, uint32_t colIndex);
|
|
inline void setStringField(const utils::ConstString& str, uint32_t colIndex);
|
|
template <typename T>
|
|
inline void setBinaryField(const T* value, uint32_t width, uint32_t colIndex);
|
|
template <typename T>
|
|
inline void setBinaryField(const T* value, uint32_t colIndex);
|
|
template <typename T>
|
|
inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex);
|
|
// XXX: TODO: I'd deprecate these two functions in favor of get/setStringField.
|
|
// getSetStringField properly support binary data of up to 4G bytes
|
|
// and also provide perfomant interface through use of ConstString.
|
|
// support VARBINARY
|
|
// Add 2-byte length at the CHARSET_INFO*beginning of the field. nullptr and zero length field are
|
|
// treated the same, could use one of the length bit to distinguish these two cases.
|
|
inline void setVarBinaryField(const utils::NullString& val, uint32_t colIndex);
|
|
// No string construction is necessary for better performance.
|
|
inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
|
|
inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
|
|
inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
|
|
inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
|
|
inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
|
|
inline void setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
|
|
uint32_t len, uint32_t colIndex);
|
|
|
|
uint64_t getNullValue(uint32_t colIndex) const;
|
|
bool isNullValue(uint32_t colIndex) const;
|
|
template <cscDataType cscDT, int width>
|
|
inline bool isNullValue_offset(uint32_t offset) const;
|
|
|
|
// when NULLs are pulled out via getIntField(), they come out with these values.
|
|
// Ex: the 1-byte int null value is 0x80. When it gets cast to an int64_t
|
|
// it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
|
|
int64_t getSignedNullValue(uint32_t colIndex) const;
|
|
|
|
// copy data in srcIndex field to destIndex, all data type
|
|
inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
|
|
|
|
// copy data in srcIndex field to destAddr, all data type
|
|
// inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
|
|
|
|
// an adapter for code that uses the copyField call above;
|
|
// that's not string-table safe, this one is
|
|
inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
|
|
|
|
inline void copyBinaryField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
|
|
|
|
std::string toString(uint32_t rownum = 0) const;
|
|
std::string toCSV() const;
|
|
|
|
/* These fcns are used only in joins. The RID doesn't matter on the side that
|
|
gets hashed. We steal that field here to "mark" a row. */
|
|
inline void markRow();
|
|
inline void zeroRid();
|
|
inline bool isMarked();
|
|
void setToNull(uint32_t colIndex);
|
|
void initToNull();
|
|
|
|
inline void usesStringTable(bool b)
|
|
{
|
|
useStringTable = b;
|
|
}
|
|
inline bool usesStringTable() const
|
|
{
|
|
return useStringTable;
|
|
}
|
|
inline bool hasLongString() const
|
|
{
|
|
return hasLongStringField;
|
|
}
|
|
|
|
// these are for cases when you already know the type definitions are the same.
|
|
// a fcn to check the type defs seperately doesn't exist yet. No normalization.
|
|
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
|
|
inline uint64_t hash() const; // generates a hash for all cols
|
|
inline void colUpdateHasher(datatypes::MariaDBHasher& hM, const utils::Hasher_r& h, const uint32_t col,
|
|
uint32_t& intermediateHash) const;
|
|
inline void colUpdateHasherTypeless(datatypes::MariaDBHasher& hasher, uint32_t keyColsIdx,
|
|
const std::vector<uint32_t>& keyCols,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const std::vector<uint32_t>* smallSideColumnsWidths) const;
|
|
inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const std::vector<uint32_t>* smallSideColumnsWidths) const
|
|
{
|
|
datatypes::MariaDBHasher h;
|
|
for (uint32_t i = 0; i < keyCols.size(); i++)
|
|
colUpdateHasherTypeless(h, i, keyCols, smallSideKeyColumnsIds, smallSideColumnsWidths);
|
|
return h.finalize();
|
|
}
|
|
|
|
bool equals(const Row&, uint32_t lastCol) const;
|
|
inline bool equals(const Row&) const;
|
|
|
|
inline void setUserDataStore(UserDataStore* u)
|
|
{
|
|
userDataStore = u;
|
|
}
|
|
|
|
bool getNullMark(uint32_t col) const
|
|
{
|
|
return data[getInternalSize() + col];
|
|
}
|
|
|
|
void setNullMark(uint32_t col, bool isNull) const
|
|
{
|
|
data[getInternalSize() + col] = isNull;
|
|
}
|
|
|
|
const CHARSET_INFO* getCharset(uint32_t col) const;
|
|
|
|
private:
|
|
inline bool inStringTable(uint32_t col) const;
|
|
|
|
private:
|
|
uint32_t columnCount = 0;
|
|
uint64_t baseRid = 0;
|
|
|
|
// Note, the mem behind these pointer fields is owned by RowGroup not Row
|
|
uint32_t* oldOffsets = nullptr;
|
|
uint32_t* stOffsets = nullptr;
|
|
uint32_t* offsets = nullptr;
|
|
uint32_t* colWidths = nullptr;
|
|
execplan::CalpontSystemCatalog::ColDataType* types = nullptr;
|
|
uint32_t* charsetNumbers = nullptr;
|
|
CHARSET_INFO** charsets = nullptr;
|
|
uint8_t* data = nullptr;
|
|
uint32_t* scale = nullptr;
|
|
uint32_t* precision = nullptr;
|
|
|
|
StringStore* strings = nullptr;
|
|
bool useStringTable = true;
|
|
bool hasCollation = false;
|
|
bool hasLongStringField = false;
|
|
uint32_t sTableThreshold = 20;
|
|
std::shared_ptr<bool[]> forceInline;
|
|
UserDataStore* userDataStore = nullptr; // For UDAF
|
|
|
|
friend class RowGroup;
|
|
};
|
|
|
|
inline Row::Pointer Row::getPointer() const
|
|
{
|
|
return Pointer(data, strings, userDataStore);
|
|
}
|
|
inline uint8_t* Row::getData() const
|
|
{
|
|
return data;
|
|
}
|
|
|
|
inline void Row::setPointer(const Pointer& p)
|
|
{
|
|
data = p.data;
|
|
strings = p.strings;
|
|
bool hasStrings = (strings != 0);
|
|
|
|
if (useStringTable != hasStrings)
|
|
{
|
|
useStringTable = hasStrings;
|
|
offsets = (useStringTable ? stOffsets : oldOffsets);
|
|
}
|
|
|
|
userDataStore = p.userDataStore;
|
|
}
|
|
|
|
inline void Row::setData(const Pointer& p)
|
|
{
|
|
setPointer(p);
|
|
}
|
|
|
|
inline void Row::nextRow()
|
|
{
|
|
data += getSize();
|
|
}
|
|
|
|
inline uint32_t Row::getColumnCount() const
|
|
{
|
|
return columnCount;
|
|
}
|
|
|
|
inline uint32_t Row::getColumnWidth(uint32_t col) const
|
|
{
|
|
return colWidths[col];
|
|
}
|
|
|
|
inline uint32_t Row::getInternalSize() const
|
|
{
|
|
return offsets[columnCount];
|
|
}
|
|
|
|
inline uint32_t Row::getSize() const
|
|
{
|
|
return getInternalSize() + columnCount;
|
|
}
|
|
|
|
inline uint32_t Row::getRealSize() const
|
|
{
|
|
if (!useStringTable)
|
|
return getSize();
|
|
|
|
uint32_t ret = columnCount; // account for NULL flags.
|
|
|
|
for (uint32_t i = 0; i < columnCount; i++)
|
|
{
|
|
if (!inStringTable(i))
|
|
ret += getColumnWidth(i);
|
|
else
|
|
ret += getConstString(i).length();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
inline uint32_t Row::getScale(uint32_t col) const
|
|
{
|
|
return scale[col];
|
|
}
|
|
|
|
inline uint32_t Row::getPrecision(uint32_t col) const
|
|
{
|
|
return precision[col];
|
|
}
|
|
|
|
inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
|
|
{
|
|
return types[colIndex];
|
|
}
|
|
|
|
inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline uint32_t Row::getCharsetNumber(uint32_t col) const
|
|
{
|
|
return charsetNumbers[col];
|
|
}
|
|
|
|
inline bool Row::isCharType(uint32_t colIndex) const
|
|
{
|
|
return datatypes::isCharType(types[colIndex]);
|
|
}
|
|
|
|
inline bool Row::isUnsigned(uint32_t colIndex) const
|
|
{
|
|
return datatypes::isUnsigned(types[colIndex]);
|
|
}
|
|
|
|
inline bool Row::isShortString(uint32_t colIndex) const
|
|
{
|
|
return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
|
|
}
|
|
|
|
inline bool Row::isLongString(uint32_t colIndex) const
|
|
{
|
|
return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
|
|
}
|
|
|
|
inline bool Row::inStringTable(uint32_t col) const
|
|
{
|
|
return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
|
|
}
|
|
|
|
template <typename T>
|
|
inline bool Row::equals(T* value, uint32_t colIndex) const
|
|
{
|
|
return *reinterpret_cast<T*>(&data[offsets[colIndex]]) == *value;
|
|
}
|
|
|
|
template <int len>
|
|
inline bool Row::equals(uint64_t val, uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (len)
|
|
{
|
|
case 1: return data[offsets[colIndex]] == val;
|
|
|
|
case 2: return *((uint16_t*)&data[offsets[colIndex]]) == val;
|
|
|
|
case 4: return *((uint32_t*)&data[offsets[colIndex]]) == val;
|
|
|
|
case 8: return *((uint64_t*)&data[offsets[colIndex]]) == val;
|
|
default: idbassert(0); throw std::logic_error("Row::equals(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline bool Row::equals(long double val, uint32_t colIndex) const
|
|
{
|
|
return *((long double*)&data[offsets[colIndex]]) == val;
|
|
}
|
|
|
|
inline bool Row::equals(const int128_t& val, uint32_t colIndex) const
|
|
{
|
|
return *((int128_t*)&data[offsets[colIndex]]) == val;
|
|
}
|
|
|
|
template <int len>
|
|
inline uint64_t Row::getUintField(uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (len)
|
|
{
|
|
case 1: return data[offsets[colIndex]];
|
|
|
|
case 2: return *((uint16_t*)&data[offsets[colIndex]]);
|
|
|
|
case 4: return *((uint32_t*)&data[offsets[colIndex]]);
|
|
|
|
case 8: return *((uint64_t*)&data[offsets[colIndex]]);
|
|
default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline uint64_t Row::getUintField(uint32_t colIndex) const
|
|
{
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1: return data[offsets[colIndex]];
|
|
|
|
case 2: return *((uint16_t*)&data[offsets[colIndex]]);
|
|
|
|
case 4: return *((uint32_t*)&data[offsets[colIndex]]);
|
|
case 8: return *((uint64_t*)&data[offsets[colIndex]]);
|
|
|
|
default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length.");
|
|
}
|
|
}
|
|
|
|
template <int len>
|
|
inline int64_t Row::getIntField(uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (len)
|
|
{
|
|
case 1: return (int8_t)data[offsets[colIndex]];
|
|
|
|
case 2: return *((int16_t*)&data[offsets[colIndex]]);
|
|
|
|
case 4: return *((int32_t*)&data[offsets[colIndex]]);
|
|
|
|
case 8: return *((int64_t*)&data[offsets[colIndex]]);
|
|
|
|
default:
|
|
std::cout << "Row::getIntField getColumnWidth(colIndex) " << getColumnWidth(colIndex) << std::endl;
|
|
idbassert(0);
|
|
throw std::logic_error("Row::getIntField(): bad length.");
|
|
}
|
|
}
|
|
|
|
inline int64_t Row::getIntField(uint32_t colIndex) const
|
|
{
|
|
/* I think the compiler will optimize away the switch stmt */
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1: return (int8_t)data[offsets[colIndex]];
|
|
|
|
case 2: return *((int16_t*)&data[offsets[colIndex]]);
|
|
|
|
case 4: return *((int32_t*)&data[offsets[colIndex]]);
|
|
|
|
case 8: return *((int64_t*)&data[offsets[colIndex]]);
|
|
|
|
default:
|
|
idbassert(0); throw std::logic_error("Row::getIntField(): bad length.");
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Row::setBinaryField(const T* value, uint32_t width, uint32_t colIndex)
|
|
{
|
|
memcpy(&data[offsets[colIndex]], value, width);
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Row::setBinaryField(const T* value, uint32_t colIndex)
|
|
{
|
|
*reinterpret_cast<T*>(&data[offsets[colIndex]]) = *value;
|
|
}
|
|
|
|
template <>
|
|
inline void Row::setBinaryField<int128_t>(const int128_t* value, uint32_t colIndex)
|
|
{
|
|
datatypes::TSInt128::assignPtrPtr(&data[offsets[colIndex]], value);
|
|
}
|
|
|
|
// This method !cannot! be applied to uint8_t* buffers.
|
|
template <typename T>
|
|
inline void Row::setBinaryField_offset(const T* value, uint32_t width, uint32_t offset)
|
|
{
|
|
*reinterpret_cast<T*>(&data[offset]) = *value;
|
|
}
|
|
|
|
template <>
|
|
inline void Row::setBinaryField_offset<uint8_t>(const uint8_t* value, uint32_t width, uint32_t offset)
|
|
{
|
|
memcpy(&data[offset], value, width);
|
|
}
|
|
|
|
template <>
|
|
inline void Row::setBinaryField_offset<int128_t>(const int128_t* value, uint32_t width, uint32_t offset)
|
|
{
|
|
datatypes::TSInt128::assignPtrPtr(&data[offset], value);
|
|
}
|
|
|
|
inline utils::ConstString Row::getShortConstString(uint32_t colIndex) const
|
|
{
|
|
uint32_t offset = offsets[colIndex];
|
|
const char* src = (const char*)&data[offset];
|
|
if (!isNullValue(colIndex))
|
|
{
|
|
return utils::ConstString(src, strnlen(src, getColumnWidth(colIndex)));
|
|
}
|
|
else
|
|
{
|
|
return utils::ConstString(nullptr, 0);
|
|
}
|
|
}
|
|
|
|
inline utils::ConstString Row::getConstString(uint32_t colIndex) const
|
|
{
|
|
return inStringTable(colIndex) ? strings->getConstString(*((uint64_t*)&data[offsets[colIndex]]))
|
|
: getShortConstString(colIndex);
|
|
}
|
|
|
|
inline void Row::colUpdateHasher(datatypes::MariaDBHasher& hM, const utils::Hasher_r& h, const uint32_t col,
|
|
uint32_t& intermediateHash) const
|
|
{
|
|
switch (getColType(col))
|
|
{
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
case execplan::CalpontSystemCatalog::BLOB:
|
|
case execplan::CalpontSystemCatalog::TEXT:
|
|
{
|
|
CHARSET_INFO* cs = getCharset(col);
|
|
hM.add(cs, getConstString(col));
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
intermediateHash = h((const char*)&data[offsets[col]], colWidths[col], intermediateHash);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
inline void Row::colUpdateHasherTypeless(datatypes::MariaDBHasher& h, uint32_t keyColsIdx,
|
|
const std::vector<uint32_t>& keyCols,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const std::vector<uint32_t>* smallSideColumnsWidths) const
|
|
{
|
|
auto rowKeyColIdx = keyCols[keyColsIdx];
|
|
auto largeSideColType = getColType(rowKeyColIdx);
|
|
switch (largeSideColType)
|
|
{
|
|
case datatypes::SystemCatalog::CHAR:
|
|
case datatypes::SystemCatalog::VARCHAR:
|
|
case datatypes::SystemCatalog::BLOB:
|
|
case datatypes::SystemCatalog::TEXT:
|
|
{
|
|
CHARSET_INFO* cs = getCharset(rowKeyColIdx);
|
|
h.add(cs, getConstString(rowKeyColIdx));
|
|
break;
|
|
}
|
|
case datatypes::SystemCatalog::DECIMAL:
|
|
{
|
|
auto width = getColumnWidth(rowKeyColIdx);
|
|
if (datatypes::isWideDecimalType(largeSideColType, width))
|
|
{
|
|
bool joinHasSkewedKeyColumn = (smallSideColumnsWidths);
|
|
datatypes::TSInt128 val = getTSInt128Field(rowKeyColIdx);
|
|
if (joinHasSkewedKeyColumn &&
|
|
width != derefFromTwoVectorPtrs(smallSideColumnsWidths, smallSideKeyColumnsIds, keyColsIdx))
|
|
{
|
|
if (val.getValue() >= std::numeric_limits<int64_t>::min() &&
|
|
val.getValue() <= std::numeric_limits<uint64_t>::max())
|
|
{
|
|
h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXLEGACYWIDTH);
|
|
}
|
|
else
|
|
h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
|
|
}
|
|
else
|
|
h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
|
|
}
|
|
else
|
|
{
|
|
int64_t val = getIntField(rowKeyColIdx);
|
|
h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
if (isUnsigned(rowKeyColIdx))
|
|
{
|
|
uint64_t val = getUintField(rowKeyColIdx);
|
|
h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
|
|
}
|
|
else
|
|
{
|
|
int64_t val = getIntField(rowKeyColIdx);
|
|
h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
inline void Row::setStringField(const uint8_t* str, uint32_t length, uint32_t colIndex)
|
|
{
|
|
utils::ConstString temp((const char*)str, length);
|
|
setStringField(temp, colIndex);
|
|
}
|
|
inline void Row::setStringField(const utils::NullString& val, uint32_t colIndex)
|
|
{
|
|
utils::ConstString temp(val.str(), val.length());
|
|
setStringField(temp, colIndex);
|
|
}
|
|
inline void Row::setStringField(const utils::ConstString& str, uint32_t colIndex)
|
|
{
|
|
uint64_t offset;
|
|
|
|
// TODO: add multi-byte safe truncation here
|
|
uint32_t length = str.length();
|
|
uint32_t colWidth = getColumnWidth(colIndex);
|
|
|
|
setNullMark(colIndex, !str.str());
|
|
|
|
if (length > colWidth)
|
|
length = colWidth;
|
|
|
|
if (inStringTable(colIndex))
|
|
{
|
|
offset = strings->storeString((const uint8_t*)str.str(), length);
|
|
*((uint64_t*)&data[offsets[colIndex]]) = offset;
|
|
// cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
|
|
// << " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
|
|
// << endl;
|
|
}
|
|
else
|
|
{
|
|
uint8_t* buf = &data[offsets[colIndex]];
|
|
memset(buf + length, 0, offsets[colIndex + 1] - (offsets[colIndex] + length)); // needed for memcmp in equals().
|
|
if (str.str())
|
|
{
|
|
memcpy(buf, str.str(), length);
|
|
}
|
|
else if (colWidth <= 8) // special magic value.
|
|
{
|
|
setToNull(colIndex);
|
|
}
|
|
}
|
|
}
|
|
|
|
inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getStringLength(*((uint64_t*)&data[offsets[colIndex]]));
|
|
|
|
if (getNullMark(colIndex))
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
return *((uint16_t*)&data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
return strings->getPointer(*((uint64_t*)&data[offsets[colIndex]]));
|
|
|
|
if (getNullMark(colIndex))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
return &data[offsets[colIndex] + 2];
|
|
}
|
|
|
|
inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
|
|
{
|
|
if (inStringTable(colIndex))
|
|
{
|
|
len = strings->getStringLength(*((uint64_t*)&data[offsets[colIndex]]));
|
|
return getVarBinaryField(colIndex);
|
|
}
|
|
else
|
|
{
|
|
if (getNullMark(colIndex))
|
|
{
|
|
len = 0;
|
|
return nullptr;
|
|
}
|
|
len = *((uint16_t*)&data[offsets[colIndex]]);
|
|
return &data[offsets[colIndex] + 2];
|
|
}
|
|
}
|
|
|
|
inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
|
|
{
|
|
if (!userDataStore)
|
|
{
|
|
return boost::shared_ptr<mcsv1sdk::UserData>();
|
|
}
|
|
|
|
return userDataStore->getUserData(*((uint32_t*)&data[offsets[colIndex]]));
|
|
}
|
|
|
|
inline double Row::getDoubleField(uint32_t colIndex) const
|
|
{
|
|
return *((double*)&data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline float Row::getFloatField(uint32_t colIndex) const
|
|
{
|
|
return *((float*)&data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline long double Row::getLongDoubleField(uint32_t colIndex) const
|
|
{
|
|
return *((long double*)&data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline void Row::storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const
|
|
{
|
|
datatypes::TSInt128::assignPtrPtr(x, &data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const
|
|
{
|
|
datatypes::TSInt128::assignPtrPtr(&x, &data[offsets[colIndex]]);
|
|
}
|
|
|
|
inline datatypes::TSInt128 Row::getTSInt128Field(uint32_t colIndex) const
|
|
{
|
|
const int128_t* ptr = reinterpret_cast<int128_t*>(&data[offsets[colIndex]]);
|
|
return datatypes::TSInt128(ptr);
|
|
}
|
|
|
|
inline uint64_t Row::getRid() const
|
|
{
|
|
return baseRid + *((uint16_t*)data);
|
|
}
|
|
|
|
inline uint16_t Row::getRelRid() const
|
|
{
|
|
return *((uint16_t*)data);
|
|
}
|
|
|
|
inline uint64_t Row::getBaseRid() const
|
|
{
|
|
return baseRid;
|
|
}
|
|
|
|
inline void Row::markRow()
|
|
{
|
|
*((uint16_t*)data) = 0xffff;
|
|
}
|
|
|
|
inline void Row::zeroRid()
|
|
{
|
|
*((uint16_t*)data) = 0;
|
|
}
|
|
|
|
inline bool Row::isMarked()
|
|
{
|
|
return *((uint16_t*)data) == 0xffff;
|
|
}
|
|
|
|
/* Begin speculative code! */
|
|
inline uint32_t Row::getOffset(uint32_t colIndex) const
|
|
{
|
|
return offsets[colIndex];
|
|
}
|
|
|
|
template <int len>
|
|
inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 1: data[offset] = val; break;
|
|
|
|
case 2: *((uint16_t*)&data[offset]) = val; break;
|
|
|
|
case 4: *((uint32_t*)&data[offset]) = val; break;
|
|
|
|
case 8: *((uint64_t*)&data[offset]) = val; break;
|
|
|
|
default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field");
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
inline void Row::setIntField_offset(const T val, const uint32_t offset)
|
|
{
|
|
*((T*)&data[offset]) = val;
|
|
}
|
|
|
|
inline void Row::nextRow(uint32_t size)
|
|
{
|
|
data += size;
|
|
}
|
|
|
|
inline void Row::prevRow(uint32_t size, uint64_t number = 1)
|
|
{
|
|
data -= size * number;
|
|
}
|
|
|
|
template <int len>
|
|
inline void Row::setUintField(uint64_t val, uint32_t colIndex)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 1: data[offsets[colIndex]] = val; break;
|
|
|
|
case 2: *((uint16_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 4: *((uint32_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 8: *((uint64_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field");
|
|
}
|
|
}
|
|
|
|
inline void Row::setUintField(uint64_t val, uint32_t colIndex)
|
|
{
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1: data[offsets[colIndex]] = val; break;
|
|
|
|
case 2: *((uint16_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 4: *((uint32_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 8: *((uint64_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
default: idbassert(0); throw std::logic_error("Row::setUintField: bad length");
|
|
}
|
|
}
|
|
|
|
template <int len>
|
|
inline void Row::setIntField(int64_t val, uint32_t colIndex)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 2: *((int16_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 4: *((int32_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 8: *((int64_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
default: idbassert(0); throw std::logic_error("Row::setIntField: bad length");
|
|
}
|
|
}
|
|
|
|
inline void Row::setIntField(int64_t val, uint32_t colIndex)
|
|
{
|
|
switch (getColumnWidth(colIndex))
|
|
{
|
|
case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 2: *((int16_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 4: *((int32_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
case 8: *((int64_t*)&data[offsets[colIndex]]) = val; break;
|
|
|
|
default: idbassert(0); throw std::logic_error("Row::setIntField: bad length");
|
|
}
|
|
}
|
|
|
|
inline void Row::setDoubleField(double val, uint32_t colIndex)
|
|
{
|
|
*((double*)&data[offsets[colIndex]]) = val;
|
|
}
|
|
|
|
inline void Row::setFloatField(float val, uint32_t colIndex)
|
|
{
|
|
// N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
|
|
// but not necessarily the same bits that you put in. This only seems to be for float (double seems
|
|
// to work).
|
|
if (std::isnan(val))
|
|
setUintField<4>(joblist::FLOATNULL, colIndex);
|
|
else
|
|
*((float*)&data[offsets[colIndex]]) = val;
|
|
}
|
|
|
|
inline void Row::setLongDoubleField(const long double& val, uint32_t colIndex)
|
|
{
|
|
uint8_t* p = &data[offsets[colIndex]];
|
|
*reinterpret_cast<long double*>(p) = val;
|
|
#ifdef MASK_LONGDOUBLE
|
|
memset(p + 10, 0, 6);
|
|
#endif
|
|
}
|
|
|
|
inline void Row::setInt128Field(const int128_t& val, uint32_t colIndex)
|
|
{
|
|
setBinaryField<int128_t>(&val, colIndex);
|
|
}
|
|
|
|
inline void Row::setVarBinaryField(const utils::NullString& val, uint32_t colIndex)
|
|
{
|
|
setVarBinaryField((uint8_t*)val.str(), val.length(), colIndex);
|
|
}
|
|
|
|
inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
|
|
{
|
|
setNullMark(colIndex, !val);
|
|
|
|
if (inStringTable(colIndex))
|
|
{
|
|
if (len > getColumnWidth(colIndex))
|
|
len = getColumnWidth(colIndex);
|
|
|
|
uint64_t offset = strings->storeString(val, len);
|
|
*((uint64_t*)&data[offsets[colIndex]]) = offset;
|
|
}
|
|
else
|
|
{
|
|
if (len > getColumnWidth(colIndex))
|
|
len = getColumnWidth(colIndex);
|
|
|
|
idbassert(val != nullptr || !len);
|
|
|
|
*((uint16_t*)&data[offsets[colIndex]]) = len;
|
|
if (val != nullptr)
|
|
memcpy(&data[offsets[colIndex] + 2], val, len);
|
|
}
|
|
}
|
|
|
|
inline void Row::setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
|
|
uint32_t len, uint32_t colIndex)
|
|
{
|
|
if (!userDataStore)
|
|
{
|
|
return;
|
|
}
|
|
|
|
uint32_t offset = userDataStore->storeUserData(context, userData, len);
|
|
*((uint32_t*)&data[offsets[colIndex]]) = offset;
|
|
*((uint32_t*)&data[offsets[colIndex] + 4]) = len;
|
|
}
|
|
|
|
inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
|
|
{
|
|
uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
|
|
memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
|
|
setNullMark(destIndex, getNullMark(srcIndex));
|
|
}
|
|
|
|
inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
|
|
{
|
|
if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
|
|
types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
|
|
types[srcIndex] == execplan::CalpontSystemCatalog::TEXT))
|
|
{
|
|
out.setVarBinaryField(getVarBinaryField(srcIndex), getVarBinaryLength(srcIndex), destIndex);
|
|
}
|
|
else if (UNLIKELY(isLongString(srcIndex)))
|
|
{
|
|
out.setStringField(getConstString(srcIndex), destIndex);
|
|
}
|
|
else if (UNLIKELY(isShortString(srcIndex)))
|
|
{
|
|
out.setUintField(getUintField(srcIndex), destIndex);
|
|
}
|
|
else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
|
|
{
|
|
out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
|
|
}
|
|
else if (UNLIKELY(datatypes::isWideDecimalType(types[srcIndex], colWidths[srcIndex])))
|
|
{
|
|
copyBinaryField(out, destIndex, srcIndex);
|
|
}
|
|
else
|
|
{
|
|
out.setIntField(getIntField(srcIndex), destIndex);
|
|
}
|
|
}
|
|
|
|
inline void Row::copyBinaryField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
|
|
{
|
|
out.setInt128Field(getTSInt128Field(srcIndex).getValue(), destIndex);
|
|
}
|
|
|
|
inline void Row::setRid(uint64_t rid)
|
|
{
|
|
*((uint16_t*)data) = rid & 0xffff;
|
|
}
|
|
|
|
inline uint64_t Row::hash() const
|
|
{
|
|
return hash(columnCount - 1);
|
|
}
|
|
|
|
inline uint64_t Row::hash(uint32_t lastCol) const
|
|
{
|
|
// Use two hash classes. MariaDBHasher for text-based
|
|
// collation-aware data types and Hasher_r for all other data types.
|
|
// We deliver a hash that is a combination of both hashers' results.
|
|
utils::Hasher_r h;
|
|
datatypes::MariaDBHasher hM;
|
|
uint32_t intermediateHash = 0;
|
|
|
|
// Sometimes we ask this to hash 0 bytes, and it comes through looking like
|
|
// lastCol = -1. Return 0.
|
|
if (lastCol >= columnCount)
|
|
return 0;
|
|
|
|
for (uint32_t i = 0; i <= lastCol; i++)
|
|
colUpdateHasher(hM, h, i, intermediateHash);
|
|
|
|
return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
|
|
}
|
|
|
|
inline bool Row::equals(const Row& r2) const
|
|
{
|
|
return equals(r2, columnCount - 1);
|
|
}
|
|
|
|
/** @brief RowGroup is a lightweight interface for processing packed row data
|
|
|
|
A RowGroup is an interface for parsing and/or modifying row data as described at the top
|
|
of this file. Its lifecycle can be tied to a producer or consumer's lifecycle.
|
|
Only one instance is required to process any number of blocks with a
|
|
given column configuration. The column configuration is specified in the
|
|
constructor, and the block data to process is specified through the
|
|
setData() function. It will not copy or take ownership of the data it processes;
|
|
the caller should do that.
|
|
|
|
Row and RowGroup share some bits. RowGroup owns the memory they share.
|
|
*/
|
|
class RowGroup : public messageqcpp::Serializeable
|
|
{
|
|
public:
|
|
/** @brief The default ctor. It does nothing. Need to init by assignment or deserialization */
|
|
RowGroup();
|
|
|
|
/** @brief The RowGroup ctor, which specifies the column config to process
|
|
|
|
@param colCount The number of columns
|
|
@param positions An array specifying the offsets within the packed data
|
|
of a row where each column begins. It should have colCount + 1
|
|
entries. The first offset is 2, because a row begins with a 2-byte
|
|
RID. The last entry should be the offset of the last column +
|
|
its length, which is also the size of the entire row including the rid.
|
|
@param coids An array of oids for each column.
|
|
@param tkeys An array of unique id for each column.
|
|
@param colTypes An array of COLTYPEs for each column.
|
|
@param charsetNumbers an Array of the lookup numbers for the charset/collation object.
|
|
@param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
|
|
@param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
|
|
*/
|
|
RowGroup(uint32_t colCount, const std::vector<uint32_t>& positions, const std::vector<uint32_t>& cOids,
|
|
const std::vector<uint32_t>& tkeys,
|
|
const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
|
|
const std::vector<uint32_t>& charsetNumbers, const std::vector<uint32_t>& scale,
|
|
const std::vector<uint32_t>& precision, uint32_t stringTableThreshold, bool useStringTable = true,
|
|
const std::vector<bool>& forceInlineData = std::vector<bool>());
|
|
|
|
/** @brief The copiers. It copies metadata, not the row data */
|
|
RowGroup(const RowGroup&);
|
|
|
|
/** @brief Assignment operator. It copies metadata, not the row data */
|
|
RowGroup& operator=(const RowGroup&);
|
|
|
|
explicit RowGroup(messageqcpp::ByteStream& bs);
|
|
|
|
~RowGroup();
|
|
|
|
inline void initRow(Row*, bool forceInlineData = false) const;
|
|
inline uint32_t getRowCount() const;
|
|
inline void incRowCount();
|
|
inline void setRowCount(uint32_t num);
|
|
inline void getRow(uint32_t rowNum, Row*) const;
|
|
inline uint32_t getRowSize() const;
|
|
inline uint32_t getRowSizeWithStrings() const;
|
|
inline uint64_t getBaseRid() const;
|
|
void setData(RGData* rgd);
|
|
inline uint8_t* getData() const;
|
|
inline RGData* getRGData() const;
|
|
|
|
uint32_t getStatus() const;
|
|
void setStatus(uint16_t);
|
|
|
|
uint32_t getDBRoot() const;
|
|
void setDBRoot(uint32_t);
|
|
|
|
RGDataSizeType getDataSize() const;
|
|
RGDataSizeType getDataSize(uint64_t n) const;
|
|
RGDataSizeType getMaxDataSize() const;
|
|
RGDataSizeType getMaxDataSizeWithStrings() const;
|
|
RGDataSizeType getEmptySize() const;
|
|
|
|
// this returns the size of the row data with the string table
|
|
inline RGDataSizeType getSizeWithStrings() const;
|
|
inline RGDataSizeType getSizeWithStrings(uint64_t n) const;
|
|
|
|
// sets the row count to 0 and the baseRid to something
|
|
// effectively initializing whatever chunk of memory
|
|
// data points to
|
|
void resetRowGroup(uint64_t baseRid);
|
|
|
|
/* The Serializeable interface */
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&);
|
|
|
|
uint32_t getColumnWidth(uint32_t col) const;
|
|
uint32_t getColumnCount() const;
|
|
inline const std::vector<uint32_t>& getOffsets() const;
|
|
inline const std::vector<uint32_t>& getOIDs() const;
|
|
inline const std::vector<uint32_t>& getKeys() const;
|
|
inline const std::vector<uint32_t>& getColWidths() const;
|
|
inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
|
|
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
|
|
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
|
|
inline const std::vector<uint32_t>& getCharsetNumbers() const;
|
|
inline uint32_t getCharsetNumber(uint32_t colIndex) const;
|
|
inline std::shared_ptr<bool[]>& getForceInline();
|
|
static inline uint32_t getHeaderSize()
|
|
{
|
|
return headerSize;
|
|
}
|
|
|
|
// this returns true if the type is CHAR or VARCHAR
|
|
inline bool isCharType(uint32_t colIndex) const;
|
|
inline bool isUnsigned(uint32_t colIndex) const;
|
|
inline bool isShortString(uint32_t colIndex) const;
|
|
inline bool isLongString(uint32_t colIndex) const;
|
|
|
|
bool colHasCollation(uint32_t colIndex) const
|
|
{
|
|
return datatypes::typeHasCollation(getColType(colIndex));
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& getScale() const;
|
|
inline const std::vector<uint32_t>& getPrecision() const;
|
|
|
|
inline bool usesStringTable() const;
|
|
inline void setUseStringTable(bool);
|
|
|
|
bool hasLongString() const
|
|
{
|
|
return hasLongStringField;
|
|
}
|
|
|
|
void serializeRGData(messageqcpp::ByteStream&) const;
|
|
inline uint32_t getStringTableThreshold() const;
|
|
|
|
void append(RGData&);
|
|
void append(RowGroup&);
|
|
void append(RGData&, uint pos); // insert starting at position 'pos'
|
|
void append(RowGroup&, uint pos);
|
|
|
|
RGData duplicate(); // returns a copy of the attached RGData
|
|
|
|
std::string toString(const std::vector<uint64_t>& used = {}) const;
|
|
|
|
/** operator+=
|
|
*
|
|
* append the metadata of another RowGroup to this RowGroup
|
|
*/
|
|
RowGroup& operator+=(const RowGroup& rhs);
|
|
|
|
// returns a RowGroup with only the first cols columns. Useful for generating a
|
|
// RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
|
|
RowGroup truncate(uint32_t cols);
|
|
|
|
/** operator<
|
|
*
|
|
* Orders RG's based on baseRid
|
|
*/
|
|
inline bool operator<(const RowGroup& rhs) const;
|
|
|
|
void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
|
|
|
|
/* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
|
|
inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
|
|
const uint16_t& blockNum);
|
|
inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
|
|
|
|
inline void setStringStore(std::shared_ptr<StringStore>);
|
|
|
|
const CHARSET_INFO* getCharset(uint32_t col);
|
|
|
|
private:
|
|
uint32_t columnCount = 0;
|
|
uint8_t* data = nullptr;
|
|
|
|
std::vector<uint32_t> oldOffsets; // inline data offsets
|
|
std::vector<uint32_t> stOffsets; // string table offsets
|
|
uint32_t* offsets = nullptr; // offsets either points to oldOffsets or stOffsets
|
|
std::vector<uint32_t> colWidths;
|
|
// oids: the real oid of the column, may have duplicates with alias.
|
|
// This oid is necessary for front-end to decide the real column width.
|
|
std::vector<uint32_t> oids;
|
|
// keys: the unique id for pair(oid, alias). bug 1632.
|
|
// Used to map the projected column and rowgroup index
|
|
std::vector<uint32_t> keys;
|
|
std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
|
|
// For string collation
|
|
std::vector<uint32_t> charsetNumbers;
|
|
std::vector<CHARSET_INFO*> charsets;
|
|
|
|
// DECIMAL support. For non-decimal fields, the values are 0.
|
|
std::vector<uint32_t> scale;
|
|
std::vector<uint32_t> precision;
|
|
|
|
// string table impl
|
|
RGData* rgData = nullptr;
|
|
StringStore* strings = nullptr; // note, strings and data belong to rgData
|
|
bool useStringTable = true;
|
|
bool hasCollation = false;
|
|
bool hasLongStringField = false;
|
|
uint32_t sTableThreshold = 20;
|
|
std::shared_ptr<bool[]> forceInline;
|
|
|
|
static const uint64_t headerSize = 18;
|
|
static const uint64_t rowCountOffset = 0;
|
|
static const uint64_t baseRidOffset = 4;
|
|
static const uint64_t statusOffset = 12;
|
|
static const uint64_t dbRootOffset = 14;
|
|
};
|
|
|
|
inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
|
|
const uint16_t& blockNum);
|
|
inline void getLocationFromRid(uint64_t rid, uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
|
|
uint16_t* blockNum);
|
|
|
|
// returns the first rid of the logical block specified by baseRid
|
|
inline uint64_t getExtentRelativeRid(uint64_t baseRid);
|
|
|
|
// returns the first rid of the logical block specified by baseRid
|
|
inline uint64_t getFileRelativeRid(uint64_t baseRid);
|
|
|
|
/** operator+
|
|
*
|
|
* add the metadata of 2 RowGroups together and return a new RowGroup
|
|
*/
|
|
RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
|
|
|
|
std::shared_ptr<int[]> makeMapping(const RowGroup& r1, const RowGroup& r2);
|
|
void applyMapping(const std::shared_ptr<int[]>& mapping, const Row& in, Row* out);
|
|
void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
|
|
void applyMapping(const int* mapping, const Row& in, Row* out);
|
|
|
|
/* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
|
|
every row, they're a measurable performance penalty */
|
|
inline uint32_t RowGroup::getRowCount() const
|
|
{
|
|
// idbassert(data);
|
|
// if (!data) throw std::logic_error("RowGroup::getRowCount(): data is nullptr!");
|
|
return *((uint32_t*)&data[rowCountOffset]);
|
|
}
|
|
|
|
inline void RowGroup::incRowCount()
|
|
{
|
|
// idbassert(data);
|
|
++(*((uint32_t*)&data[rowCountOffset]));
|
|
}
|
|
|
|
inline void RowGroup::setRowCount(uint32_t num)
|
|
{
|
|
// idbassert(data);
|
|
*((uint32_t*)&data[rowCountOffset]) = num;
|
|
}
|
|
|
|
inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
|
|
{
|
|
// idbassert(data);
|
|
if (useStringTable != r->usesStringTable())
|
|
initRow(r);
|
|
|
|
r->baseRid = getBaseRid();
|
|
r->data = &(data[headerSize + (rowNum * r->getSize())]);
|
|
r->strings = strings;
|
|
r->userDataStore = rgData->userDataStore.get();
|
|
}
|
|
|
|
inline void RowGroup::setData(RGData* rgd)
|
|
{
|
|
data = rgd->rowData.get();
|
|
strings = rgd->strings.get();
|
|
rgData = rgd;
|
|
}
|
|
|
|
inline uint8_t* RowGroup::getData() const
|
|
{
|
|
// assert(!useStringTable);
|
|
return data;
|
|
}
|
|
|
|
inline RGData* RowGroup::getRGData() const
|
|
{
|
|
return rgData;
|
|
}
|
|
|
|
inline void RowGroup::setUseStringTable(bool b)
|
|
{
|
|
useStringTable = (b && hasLongStringField);
|
|
// offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
|
|
offsets = 0;
|
|
|
|
if (useStringTable && !stOffsets.empty())
|
|
offsets = &stOffsets[0];
|
|
else if (!useStringTable && !oldOffsets.empty())
|
|
offsets = &oldOffsets[0];
|
|
|
|
if (!useStringTable)
|
|
strings = nullptr;
|
|
}
|
|
|
|
inline uint64_t RowGroup::getBaseRid() const
|
|
{
|
|
return *((uint64_t*)&data[baseRidOffset]);
|
|
}
|
|
|
|
inline bool RowGroup::operator<(const RowGroup& rhs) const
|
|
{
|
|
return (getBaseRid() < rhs.getBaseRid());
|
|
}
|
|
|
|
void RowGroup::initRow(Row* r, bool forceInlineData) const
|
|
{
|
|
r->columnCount = columnCount;
|
|
|
|
if (LIKELY(!types.empty()))
|
|
{
|
|
r->colWidths = (uint32_t*)&colWidths[0];
|
|
r->types = (execplan::CalpontSystemCatalog::ColDataType*)&(types[0]);
|
|
r->charsetNumbers = (uint32_t*)&(charsetNumbers[0]);
|
|
r->charsets = (CHARSET_INFO**)&(charsets[0]);
|
|
r->scale = (uint32_t*)&(scale[0]);
|
|
r->precision = (uint32_t*)&(precision[0]);
|
|
}
|
|
|
|
if (forceInlineData)
|
|
{
|
|
r->useStringTable = false;
|
|
r->oldOffsets = (uint32_t*)&(oldOffsets[0]);
|
|
r->stOffsets = (uint32_t*)&(stOffsets[0]);
|
|
r->offsets = (uint32_t*)&(oldOffsets[0]);
|
|
}
|
|
else
|
|
{
|
|
r->useStringTable = useStringTable;
|
|
r->oldOffsets = (uint32_t*)&(oldOffsets[0]);
|
|
r->stOffsets = (uint32_t*)&(stOffsets[0]);
|
|
r->offsets = offsets;
|
|
}
|
|
|
|
r->hasLongStringField = hasLongStringField;
|
|
r->sTableThreshold = sTableThreshold;
|
|
r->forceInline = forceInline;
|
|
r->hasCollation = hasCollation;
|
|
}
|
|
|
|
inline uint32_t RowGroup::getRowSize() const
|
|
{
|
|
return offsets[columnCount] + columnCount;
|
|
}
|
|
|
|
inline uint32_t RowGroup::getRowSizeWithStrings() const
|
|
{
|
|
return oldOffsets[columnCount] + columnCount;
|
|
}
|
|
|
|
inline RGDataSizeType RowGroup::getSizeWithStrings(uint64_t n) const
|
|
{
|
|
if (strings == nullptr)
|
|
return getDataSize(n);
|
|
else
|
|
return getDataSize(n) + strings->getSize();
|
|
}
|
|
|
|
inline uint64_t RowGroup::getSizeWithStrings() const
|
|
{
|
|
return getSizeWithStrings(getRowCount());
|
|
}
|
|
|
|
inline bool RowGroup::isCharType(uint32_t colIndex) const
|
|
{
|
|
return datatypes::isCharType(types[colIndex]);
|
|
}
|
|
|
|
inline bool RowGroup::isUnsigned(uint32_t colIndex) const
|
|
{
|
|
return datatypes::isUnsigned(types[colIndex]);
|
|
}
|
|
|
|
inline bool RowGroup::isShortString(uint32_t colIndex) const
|
|
{
|
|
return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
|
|
(getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
|
|
}
|
|
|
|
inline bool RowGroup::isLongString(uint32_t colIndex) const
|
|
{
|
|
return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
|
|
(getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
|
|
types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
|
|
types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
|
|
types[colIndex] == execplan::CalpontSystemCatalog::TEXT);
|
|
}
|
|
|
|
inline bool RowGroup::usesStringTable() const
|
|
{
|
|
return useStringTable;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getOffsets() const
|
|
{
|
|
return oldOffsets;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getOIDs() const
|
|
{
|
|
return oids;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getKeys() const
|
|
{
|
|
return keys;
|
|
}
|
|
|
|
inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
|
|
{
|
|
return types[colIndex];
|
|
}
|
|
|
|
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
|
|
{
|
|
return types;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
|
|
{
|
|
return charsetNumbers;
|
|
}
|
|
|
|
inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
|
|
{
|
|
return charsetNumbers[colIndex];
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getScale() const
|
|
{
|
|
return scale;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getPrecision() const
|
|
{
|
|
return precision;
|
|
}
|
|
|
|
inline const std::vector<uint32_t>& RowGroup::getColWidths() const
|
|
{
|
|
return colWidths;
|
|
}
|
|
|
|
inline std::shared_ptr<bool[]>& RowGroup::getForceInline()
|
|
{
|
|
return forceInline;
|
|
}
|
|
|
|
inline uint64_t convertToRid(const uint32_t& partitionNum, const uint16_t& segmentNum, const uint8_t& exNum,
|
|
const uint16_t& blNum)
|
|
{
|
|
uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum, blockNum = blNum;
|
|
|
|
// extentNum gets trunc'd to 6 bits, blockNums to 10 bits
|
|
extentNum &= 0x3f;
|
|
blockNum &= 0x3ff;
|
|
|
|
return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
|
|
}
|
|
|
|
inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
|
|
const uint16_t& blockNum)
|
|
{
|
|
*((uint64_t*)&data[baseRidOffset]) = convertToRid(partNum, segNum, extentNum, blockNum);
|
|
}
|
|
|
|
inline uint32_t RowGroup::getStringTableThreshold() const
|
|
{
|
|
return sTableThreshold;
|
|
}
|
|
|
|
inline void RowGroup::setStringStore(std::shared_ptr<StringStore> ss)
|
|
{
|
|
if (useStringTable)
|
|
{
|
|
rgData->setStringStore(ss);
|
|
strings = rgData->strings.get();
|
|
}
|
|
}
|
|
|
|
inline void getLocationFromRid(uint64_t rid, uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
|
|
uint16_t* blockNum)
|
|
{
|
|
if (partNum)
|
|
*partNum = rid >> 32;
|
|
|
|
if (segNum)
|
|
*segNum = rid >> 16;
|
|
|
|
if (extentNum)
|
|
*extentNum = (rid >> 10) & 0x3f;
|
|
|
|
if (blockNum)
|
|
*blockNum = rid & 0x3ff;
|
|
}
|
|
|
|
inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
|
|
{
|
|
getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
|
|
}
|
|
|
|
// returns the first RID of the logical block identified by baseRid
|
|
inline uint64_t getExtentRelativeRid(uint64_t baseRid)
|
|
{
|
|
uint64_t blockNum = baseRid & 0x3ff;
|
|
return (blockNum << 13);
|
|
}
|
|
|
|
inline uint64_t Row::getExtentRelativeRid() const
|
|
{
|
|
return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
|
|
}
|
|
|
|
// returns the first RID of the logical block identified by baseRid
|
|
inline uint64_t getFileRelativeRid(uint64_t baseRid)
|
|
{
|
|
uint64_t extentNum = (baseRid >> 10) & 0x3f;
|
|
uint64_t blockNum = baseRid & 0x3ff;
|
|
return (extentNum << 23) | (blockNum << 13);
|
|
}
|
|
|
|
inline uint64_t Row::getFileRelativeRid() const
|
|
{
|
|
return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
|
|
}
|
|
|
|
inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum,
|
|
uint16_t* rowNum)
|
|
{
|
|
getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
|
|
|
|
if (rowNum)
|
|
*rowNum = getRelRid();
|
|
}
|
|
|
|
// This routine can be slow for your purposes. Please inspect copyRowInline below,
|
|
// in some cases it can be faster.
|
|
// Please be sure that copyRowInline does indeed copy rows of the same structure of
|
|
// fields.
|
|
inline void copyRow(const Row& in, Row* out, uint32_t colCount)
|
|
{
|
|
if (&in == out)
|
|
return;
|
|
|
|
out->setRid(in.getRelRid());
|
|
|
|
if (!in.usesStringTable() && !out->usesStringTable())
|
|
{
|
|
memcpy(out->getData(), in.getData(), std::min(in.getSize(), out->getSize()));
|
|
|
|
for (uint32_t i = 0; i < colCount; i++)
|
|
{
|
|
out->setNullMark(i, in.getNullMark(i));
|
|
}
|
|
return;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < colCount; i++)
|
|
{
|
|
if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
|
|
in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
|
|
in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT ||
|
|
in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB))
|
|
{
|
|
out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i);
|
|
}
|
|
else if (UNLIKELY(in.isLongString(i)))
|
|
{
|
|
out->setStringField(in.getConstString(i), i);
|
|
}
|
|
else if (UNLIKELY(in.isShortString(i)))
|
|
{
|
|
out->setUintField(in.getUintField(i), i);
|
|
}
|
|
else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::DOUBLE))
|
|
{
|
|
out->setDoubleField(in.getDoubleField(i), i);
|
|
}
|
|
else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
|
|
{
|
|
out->setLongDoubleField(in.getLongDoubleField(i), i);
|
|
}
|
|
else if (UNLIKELY(datatypes::isWideDecimalType(in.getColType(i), in.getColumnWidth(i))))
|
|
{
|
|
in.copyBinaryField(*out, i, i);
|
|
}
|
|
else
|
|
{
|
|
out->setIntField(in.getIntField(i), i);
|
|
}
|
|
}
|
|
}
|
|
|
|
inline void copyRow(const Row& in, Row* out)
|
|
{
|
|
copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
|
|
}
|
|
|
|
// This routine can be substantially faster than copyRow above, but there are caveats.
|
|
// The speedy part with memcpy should only be invoked when structures of the rows are the same.
|
|
// Otherwise information about NULLs for inline strings can be lost or garbled.
|
|
inline void copyRowInline(const Row& in, Row* out, uint32_t colCount)
|
|
{
|
|
if (&in == out)
|
|
return;
|
|
|
|
// XXX: this code still may copy data incorrectly if sizes of columns differ.
|
|
if (!in.usesStringTable() && !out->usesStringTable() && in.getSize() == out->getSize())
|
|
{
|
|
out->setRid(in.getRelRid());
|
|
|
|
memcpy(out->getData(), in.getData(), in.getSize());
|
|
return;
|
|
}
|
|
|
|
copyRow(in, out, colCount);
|
|
}
|
|
|
|
|
|
inline utils::NullString StringStore::getString(uint64_t off) const
|
|
{
|
|
uint32_t length;
|
|
utils::NullString nStr;
|
|
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return nStr;
|
|
|
|
MemChunk* mc;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
// off = off - 0x8000000000000000;
|
|
off &= ~0x8000000000000000;
|
|
|
|
if (longStrings.size() <= off)
|
|
return nStr;
|
|
|
|
mc = (MemChunk*)longStrings[off].get();
|
|
memcpy(&length, mc->data, 4);
|
|
nStr.assign(std::string((char*)mc->data + 4, length));
|
|
return nStr;
|
|
}
|
|
|
|
uint64_t chunk = off / CHUNK_SIZE;
|
|
uint64_t offset = off % CHUNK_SIZE;
|
|
|
|
// this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
|
|
// what gets returned, it just can't go out of bounds.
|
|
if (mem.size() <= chunk)
|
|
return nStr;
|
|
|
|
mc = (MemChunk*)mem[chunk].get();
|
|
|
|
memcpy(&length, &mc->data[offset], 4);
|
|
|
|
if ((offset + length) > mc->currentSize)
|
|
return nStr;
|
|
|
|
nStr.assign(std::string((char*)&(mc->data[offset]) + 4, length));
|
|
return nStr;
|
|
}
|
|
|
|
inline const uint8_t* StringStore::getPointer(uint64_t off) const
|
|
{
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return nullptr;
|
|
|
|
uint64_t chunk = off / CHUNK_SIZE;
|
|
uint64_t offset = off % CHUNK_SIZE;
|
|
MemChunk* mc;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
// off = off - 0x8000000000000000;
|
|
off &= ~0x8000000000000000;
|
|
|
|
if (longStrings.size() <= off)
|
|
return nullptr;
|
|
|
|
mc = (MemChunk*)longStrings[off].get();
|
|
return mc->data + 4;
|
|
}
|
|
|
|
// this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
|
|
// what gets returned, it just can't go out of bounds.
|
|
if (UNLIKELY(mem.size() <= chunk))
|
|
return nullptr;
|
|
|
|
mc = (MemChunk*)mem[chunk].get();
|
|
|
|
if (offset > mc->currentSize)
|
|
return nullptr;
|
|
|
|
return &(mc->data[offset]) + 4;
|
|
}
|
|
|
|
inline bool StringStore::isNullValue(uint64_t off) const
|
|
{
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
inline uint32_t StringStore::getStringLength(uint64_t off) const
|
|
{
|
|
uint32_t length;
|
|
MemChunk* mc;
|
|
|
|
if (off == std::numeric_limits<uint64_t>::max())
|
|
return 0;
|
|
|
|
if (off & 0x8000000000000000)
|
|
{
|
|
// off = off - 0x8000000000000000;
|
|
off &= ~0x8000000000000000;
|
|
|
|
if (longStrings.size() <= off)
|
|
return 0;
|
|
|
|
mc = (MemChunk*)longStrings[off].get();
|
|
memcpy(&length, mc->data, 4);
|
|
}
|
|
else
|
|
{
|
|
uint64_t chunk = off / CHUNK_SIZE;
|
|
uint64_t offset = off % CHUNK_SIZE;
|
|
|
|
if (mem.size() <= chunk)
|
|
return 0;
|
|
|
|
mc = (MemChunk*)mem[chunk].get();
|
|
memcpy(&length, &mc->data[offset], 4);
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
inline bool StringStore::isEmpty() const
|
|
{
|
|
return empty;
|
|
}
|
|
|
|
inline uint64_t StringStore::getSize() const
|
|
{
|
|
uint32_t i;
|
|
uint64_t ret = 0;
|
|
MemChunk* mc;
|
|
|
|
ret += sizeof(MemChunk) * mem.size();
|
|
for (i = 0; i < mem.size(); i++)
|
|
{
|
|
mc = (MemChunk*)mem[i].get();
|
|
ret += mc->capacity;
|
|
}
|
|
|
|
ret += sizeof(MemChunk) * longStrings.size();
|
|
for (i = 0; i < longStrings.size(); i++)
|
|
{
|
|
mc = (MemChunk*)longStrings[i].get();
|
|
ret += mc->capacity;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
inline void RGData::getRow(uint32_t num, Row* row)
|
|
{
|
|
idbassert(columnCount == row->getColumnCount() && rowSize == row->getSize());
|
|
uint32_t size = row->getSize();
|
|
row->setData(
|
|
Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get(), userDataStore.get()));
|
|
}
|
|
|
|
} // namespace rowgroup
|