You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-25 21:17:42 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			1997 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1997 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|    Copyright (c) 2017, MariaDB
 | |
|    Copyright (C) 2014 InfiniDB, Inc.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA.
 | |
| */
 | |
| 
 | |
| //
 | |
| // C++ Interface: rowgroup
 | |
| //
 | |
| // Description:
 | |
| //
 | |
| // Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
 | |
| 
 | |
| #ifndef ROWGROUP_H_
 | |
| #define ROWGROUP_H_
 | |
| 
 | |
| #include <vector>
 | |
| #include <string>
 | |
| #include <stdexcept>
 | |
| //#define NDEBUG
 | |
| #include <cassert>
 | |
| #include <boost/shared_ptr.hpp>
 | |
| #include <boost/shared_array.hpp>
 | |
| #include <boost/thread/mutex.hpp>
 | |
| #include <cmath>
 | |
| #include <cfloat>
 | |
| #ifdef __linux__
 | |
| #include <execinfo.h>
 | |
| #endif
 | |
| 
 | |
| #if defined(_MSC_VER) && !defined(isnan)
 | |
| #define isnan _isnan
 | |
| #endif
 | |
| 
 | |
| #include "hasher.h"
 | |
| 
 | |
| #include "joblisttypes.h"
 | |
| #include "bytestream.h"
 | |
| #include "calpontsystemcatalog.h"
 | |
| #include "exceptclasses.h"
 | |
| #include "mcsv1_udaf.h"
 | |
| 
 | |
| #include "branchpred.h"
 | |
| 
 | |
| #include "../winport/winport.h"
 | |
| 
 | |
| // Workaround for my_global.h #define of isnan(X) causing a std::std namespace
 | |
| using namespace std;
 | |
| 
 | |
| namespace rowgroup
 | |
| {
 | |
| 
 | |
| /*
 | |
| 	The format of the data RowGroup points to is currently ...
 | |
| 
 | |
| 	32-bit - Row Count
 | |
| 	64-bit - Base Rid
 | |
| 	16-bit - Status
 | |
| 	Row*
 | |
| 
 | |
| 	where
 | |
| 	Row =	16-bit rid # relative to the base rid
 | |
| 			field1
 | |
| 			field2
 | |
| 			...
 | |
| 
 | |
| 	RowGroup.getRow() points a Row instance to the specified row within the
 | |
| 	RowGroup data
 | |
| */
 | |
| 
 | |
| // VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
 | |
| // This may be so, and we'll get link errors if someone trys, but so far no one has.
 | |
| #ifdef _MSC_VER
 | |
| #pragma warning (push)
 | |
| #pragma warning (disable : 4200)
 | |
| #endif
 | |
| 
 | |
| class StringStore
 | |
| {
 | |
| public:
 | |
|     StringStore();
 | |
|     virtual ~StringStore();
 | |
| 
 | |
|     inline std::string getString(uint64_t offset) const;
 | |
|     uint64_t storeString(const uint8_t* data, uint32_t length);  //returns the offset
 | |
|     inline const uint8_t* getPointer(uint64_t offset) const;
 | |
|     inline uint32_t getStringLength(uint64_t offset);
 | |
|     inline bool isEmpty() const;
 | |
|     inline uint64_t getSize() const;
 | |
|     inline bool isNullValue(uint64_t offset) const;
 | |
|     inline bool equals(const std::string& str, uint64_t offset) const;
 | |
| 
 | |
|     void clear();
 | |
| 
 | |
|     void serialize(messageqcpp::ByteStream&) const;
 | |
|     void deserialize(messageqcpp::ByteStream&);
 | |
| 
 | |
|     //@bug6065, make StringStore::storeString() thread safe
 | |
|     void useStoreStringMutex(bool b)
 | |
|     {
 | |
|         fUseStoreStringMutex = b;
 | |
|     }
 | |
|     bool useStoreStringMutex() const
 | |
|     {
 | |
|         return fUseStoreStringMutex;
 | |
|     }
 | |
| 
 | |
| private:
 | |
|     std::string empty_str;
 | |
| 
 | |
|     StringStore(const StringStore&);
 | |
|     StringStore& operator=(const StringStore&);
 | |
|     static const uint32_t CHUNK_SIZE = 64 * 1024;  // allocators like powers of 2
 | |
| 
 | |
|     // This is an overlay b/c the underlying data needs to be any size,
 | |
|     // and alloc'd in one chunk.  data can't be a separate dynamic chunk.
 | |
|     struct MemChunk
 | |
|     {
 | |
|         uint32_t currentSize;
 | |
|         uint32_t capacity;
 | |
|         uint8_t data[];
 | |
|     };
 | |
| 
 | |
|     std::vector<boost::shared_array<uint8_t> > mem;
 | |
| 
 | |
|     // To store strings > 64KB (BLOB/TEXT)
 | |
|     std::vector<boost::shared_array<uint8_t> > longStrings;
 | |
|     bool empty;
 | |
|     bool fUseStoreStringMutex; //@bug6065, make StringStore::storeString() thread safe
 | |
|     boost::mutex fMutex;
 | |
| };
 | |
| 
 | |
| // Where we store user data for UDA(n)F
 | |
| class UserDataStore
 | |
| {
 | |
|     // length represents the fixed portion length of userData.
 | |
|     // There may be variable length data in containers or other
 | |
|     // user created structures.
 | |
|     struct StoreData
 | |
|     {
 | |
|         int32_t length;
 | |
|         std::string functionName;
 | |
|         boost::shared_ptr<mcsv1sdk::UserData> userData;
 | |
|         StoreData() : length(0) { }
 | |
|         StoreData(const StoreData& rhs)
 | |
|         {
 | |
|             length = rhs.length;
 | |
|             functionName = rhs.functionName;
 | |
|             userData = rhs.userData;
 | |
|         }
 | |
|     };
 | |
| 
 | |
| public:
 | |
|     UserDataStore();
 | |
|     virtual ~UserDataStore();
 | |
| 
 | |
|     void serialize(messageqcpp::ByteStream&) const;
 | |
|     void deserialize(messageqcpp::ByteStream&);
 | |
| 
 | |
|     //Set to make UserDataStore thread safe
 | |
|     void useUserDataMutex(bool b)
 | |
|     {
 | |
|         fUseUserDataMutex = b;
 | |
|     }
 | |
|     bool useUserDataMutex() const
 | |
|     {
 | |
|         return fUseUserDataMutex;
 | |
|     }
 | |
| 
 | |
|     // Returns the offset
 | |
|     uint32_t storeUserData(mcsv1sdk::mcsv1Context& context,
 | |
|                            boost::shared_ptr<mcsv1sdk::UserData> data,
 | |
|                            uint32_t length);
 | |
| 
 | |
|     boost::shared_ptr<mcsv1sdk::UserData>  getUserData(uint32_t offset) const;
 | |
| 
 | |
| private:
 | |
|     UserDataStore(const UserDataStore&);
 | |
|     UserDataStore& operator=(const UserDataStore&);
 | |
| 
 | |
|     std::vector<StoreData> vStoreData;
 | |
| 
 | |
|     bool fUseUserDataMutex;
 | |
|     boost::mutex fMutex;
 | |
| };
 | |
| 
 | |
| #ifdef _MSC_VER
 | |
| #pragma warning (pop)
 | |
| #endif
 | |
| 
 | |
| class RowGroup;
 | |
| class Row;
 | |
| 
 | |
| /* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
 | |
| class RGData
 | |
| {
 | |
| public:
 | |
|     RGData();   // useless unless followed by an = or a deserialize operation
 | |
|     RGData(const RowGroup& rg, uint32_t rowCount);   // allocates memory for rowData
 | |
|     explicit RGData(const RowGroup& rg);
 | |
|     RGData(const RGData&);
 | |
|     virtual ~RGData();
 | |
| 
 | |
|     inline RGData& operator=(const RGData&);
 | |
| 
 | |
|     // amount should be the # returned by RowGroup::getDataSize()
 | |
|     void serialize(messageqcpp::ByteStream&, uint32_t amount) const;
 | |
| 
 | |
|     // the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
 | |
|     // inline data with a length field.  Once that's converted to string table format, that
 | |
|     // option can go away.
 | |
|     void deserialize(messageqcpp::ByteStream&, bool hasLengthField = false); // returns the # of bytes read
 | |
| 
 | |
|     inline uint64_t getStringTableMemUsage();
 | |
|     void clear();
 | |
|     void reinit(const RowGroup& rg);
 | |
|     void reinit(const RowGroup& rg, uint32_t rowCount);
 | |
|     inline void setStringStore(boost::shared_ptr<StringStore>& ss)
 | |
|     {
 | |
|         strings = ss;
 | |
|     }
 | |
| 
 | |
|     // this will use the pre-configured Row to figure out where row # num is, then set the Row
 | |
|     // to point to it.  It's a shortcut around using a RowGroup to do the same thing for cases
 | |
|     // where it's inconvenient to instantiate one.
 | |
|     inline void getRow(uint32_t num, Row* row);
 | |
| 
 | |
|     //@bug6065, make StringStore::storeString() thread safe
 | |
|     void useStoreStringMutex(bool b)
 | |
|     {
 | |
|         if (strings) strings->useStoreStringMutex(b);
 | |
|     }
 | |
|     bool useStoreStringMutex() const
 | |
|     {
 | |
|         return (strings ? (strings->useStoreStringMutex()) : false);
 | |
|     }
 | |
| 
 | |
|     UserDataStore* getUserDataStore();
 | |
|     // make UserDataStore::storeData() thread safe
 | |
|     void useUserDataMutex(bool b)
 | |
|     {
 | |
|         if (userDataStore) userDataStore->useUserDataMutex(b);
 | |
|     }
 | |
|     bool useUserDataMutex() const
 | |
|     {
 | |
|         return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
 | |
|     }
 | |
| 
 | |
|     boost::shared_array<uint8_t> rowData;
 | |
|     boost::shared_ptr<StringStore> strings;
 | |
|     boost::shared_ptr<UserDataStore> userDataStore;
 | |
| private:
 | |
|     //boost::shared_array<uint8_t> rowData;
 | |
|     //boost::shared_ptr<StringStore> strings;
 | |
| 
 | |
|     // Need sig to support backward compat.  RGData can deserialize both forms.
 | |
|     static const uint32_t RGDATA_SIG = 0xffffffff;  //won't happen for 'old' Rowgroup data
 | |
| 
 | |
|     friend class RowGroup;
 | |
| };
 | |
| 
 | |
| 
 | |
| class Row
 | |
| {
 | |
| public:
 | |
|     struct Pointer
 | |
|     {
 | |
|         inline Pointer() : data(NULL), strings(NULL), userDataStore(NULL) { }
 | |
| 
 | |
|         // Pointer(uint8_t*) implicitly makes old code compatible with the string table impl;
 | |
|         // make it explicit to identify things that still might need to be changed
 | |
|         inline Pointer(uint8_t* d) : data(d), strings(NULL), userDataStore(NULL) { }
 | |
|         inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s), userDataStore(NULL) { }
 | |
|         inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) :
 | |
|             data(d), strings(s), userDataStore(u) { }
 | |
|         uint8_t* data;
 | |
|         StringStore* strings;
 | |
|         UserDataStore* userDataStore;
 | |
|     };
 | |
| 
 | |
|     Row();
 | |
|     Row(const Row&);
 | |
|     ~Row();
 | |
| 
 | |
|     Row& operator=(const Row&);
 | |
|     bool operator==(const Row&) const;
 | |
| 
 | |
|     //void setData(uint8_t *rowData, StringStore *ss);
 | |
|     inline void setData(const Pointer&);    // convenience fcn, can go away
 | |
|     inline uint8_t* getData() const;
 | |
| 
 | |
|     inline void setPointer(const Pointer&);
 | |
|     inline Pointer getPointer() const;
 | |
| 
 | |
|     inline void nextRow();
 | |
|     inline uint32_t getColumnWidth(uint32_t colIndex) const;
 | |
|     inline uint32_t getColumnCount() const;
 | |
|     inline uint32_t getSize() const;		// this is only accurate if there is no string table
 | |
|     // if a string table is being used, getRealSize() takes into account variable-length strings
 | |
|     inline uint32_t getRealSize() const;
 | |
|     inline uint32_t getOffset(uint32_t colIndex) const;
 | |
|     inline uint32_t getScale(uint32_t colIndex) const;
 | |
|     inline uint32_t getPrecision(uint32_t colIndex) const;
 | |
|     inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
 | |
|     inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
 | |
|     inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
 | |
| 
 | |
|     // this returns true if the type is not CHAR or VARCHAR
 | |
|     inline bool isCharType(uint32_t colIndex) const;
 | |
|     inline bool isUnsigned(uint32_t colIndex) const;
 | |
|     inline bool isShortString(uint32_t colIndex) const;
 | |
|     inline bool isLongString(uint32_t colIndex) const;
 | |
| 
 | |
|     template<int len> inline uint64_t getUintField(uint32_t colIndex) const;
 | |
|     inline uint64_t getUintField(uint32_t colIndex) const;
 | |
|     template<int len> inline int64_t getIntField(uint32_t colIndex) const;
 | |
|     inline int64_t getIntField(uint32_t colIndex) const;
 | |
|     template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
 | |
|     inline bool equals(const std::string& val, uint32_t colIndex) const;
 | |
| 
 | |
|     inline double getDoubleField(uint32_t colIndex) const;
 | |
|     inline float getFloatField(uint32_t colIndex) const;
 | |
|     inline double getDecimalField(uint32_t colIndex) const
 | |
|     {
 | |
|         return 0.0;   // TODO: Do something here
 | |
|     }
 | |
|     inline long double getLongDoubleField(uint32_t colIndex) const;
 | |
| 
 | |
|     inline uint64_t getBaseRid() const;
 | |
|     inline uint64_t getRid() const;
 | |
|     inline uint16_t getRelRid() const;   // returns a rid relative to this logical block
 | |
|     inline uint64_t getExtentRelativeRid() const;   // returns a rid relative to the extent it's in
 | |
|     inline uint64_t getFileRelativeRid() const; // returns a file-relative rid
 | |
|     inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | |
|                             uint16_t* blockNum, uint16_t* rowNum);
 | |
| 
 | |
|     template<int len> void setUintField(uint64_t val, uint32_t colIndex);
 | |
| 
 | |
|     /* Note: these 2 fcns avoid 1 array lookup per call.  Using them only
 | |
|     in projection on the PM resulted in a 2.8% performance gain on
 | |
|     the queries listed in bug 2223.
 | |
|     TODO: apply them everywhere else possible, and write equivalents
 | |
|     for the other types as well as the getters.
 | |
|     */
 | |
|     template<int len> void setUintField_offset(uint64_t val, uint32_t offset);
 | |
|     inline void nextRow(uint32_t size);
 | |
|     inline void prevRow(uint32_t size, uint64_t number);
 | |
|     
 | |
|     inline void setUintField(uint64_t val, uint32_t colIndex);
 | |
|     template<int len> void setIntField(int64_t, uint32_t colIndex);
 | |
|     inline void setIntField(int64_t, uint32_t colIndex);
 | |
| 
 | |
|     inline void setDoubleField(double val, uint32_t colIndex);
 | |
|     inline void setFloatField(float val, uint32_t colIndex);
 | |
|     inline void setDecimalField(double val, uint32_t colIndex) { };  // TODO: Do something here
 | |
|     inline void setLongDoubleField(long double val, uint32_t colIndex);
 | |
| 
 | |
|     inline void setRid(uint64_t rid);
 | |
| 
 | |
|     // is string efficient for this?
 | |
|     inline std::string getStringField(uint32_t colIndex) const;
 | |
|     inline const uint8_t* getStringPointer(uint32_t colIndex) const;
 | |
|     inline uint32_t getStringLength(uint32_t colIndex) const;
 | |
|     void setStringField(const std::string& val, uint32_t colIndex);
 | |
|     inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
 | |
| 
 | |
|     // support VARBINARY
 | |
|     // Add 2-byte length at the beginning of the field.  NULL and zero length field are
 | |
|     // treated the same, could use one of the length bit to distinguish these two cases.
 | |
|     inline std::string getVarBinaryStringField(uint32_t colIndex) const;
 | |
|     inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
 | |
|     // No string construction is necessary for better performance.
 | |
|     inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
 | |
|     inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
 | |
|     inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
 | |
|     inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
 | |
| 
 | |
|     inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
 | |
|     inline void setUserData(mcsv1sdk::mcsv1Context& context,
 | |
|                             boost::shared_ptr<mcsv1sdk::UserData> userData,
 | |
|                             uint32_t len, uint32_t colIndex);
 | |
| 
 | |
|     uint64_t getNullValue(uint32_t colIndex) const;
 | |
|     bool isNullValue(uint32_t colIndex) const;
 | |
| 
 | |
|     // when NULLs are pulled out via getIntField(), they come out with these values.
 | |
|     // Ex: the 1-byte int null value is 0x80.  When it gets cast to an int64_t
 | |
|     // it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
 | |
|     int64_t getSignedNullValue(uint32_t colIndex) const;
 | |
| 
 | |
|     // copy data in srcIndex field to destIndex, all data type
 | |
|     inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
 | |
| 
 | |
|     // copy data in srcIndex field to destAddr, all data type
 | |
|     //inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
 | |
| 
 | |
|     // an adapter for code that uses the copyField call above;
 | |
|     // that's not string-table safe, this one is
 | |
|     inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 | |
| 
 | |
|     std::string toString() const;
 | |
|     std::string toCSV() const;
 | |
| 
 | |
|     /* These fcns are used only in joins.  The RID doesn't matter on the side that
 | |
|     gets hashed.  We steal that field here to "mark" a row. */
 | |
|     inline void markRow();
 | |
|     inline void zeroRid();
 | |
|     inline bool isMarked();
 | |
|     void initToNull();
 | |
| 
 | |
|     inline void usesStringTable(bool b)
 | |
|     {
 | |
|         useStringTable = b;
 | |
|     }
 | |
|     inline bool usesStringTable() const
 | |
|     {
 | |
|         return useStringTable;
 | |
|     }
 | |
|     inline bool hasLongString() const
 | |
|     {
 | |
|         return hasLongStringField;
 | |
|     }
 | |
| 
 | |
|     // these are for cases when you already know the type definitions are the same.
 | |
|     // a fcn to check the type defs seperately doesn't exist yet.  No normalization.
 | |
|     inline uint64_t hash(const std::vector<uint32_t>& keyColumns, uint32_t seed = 0) const;
 | |
|     inline uint64_t hash(uint32_t lastCol) const;  // generates a hash for cols [0-lastCol]
 | |
|     inline uint64_t hash() const;  // generates a hash for all cols
 | |
| 
 | |
|     inline bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
 | |
|     inline bool equals(const Row&, uint32_t lastCol) const;
 | |
|     inline bool equals(const Row&) const;
 | |
| 
 | |
|     inline void setUserDataStore(UserDataStore* u)
 | |
|     {
 | |
|         userDataStore = u;
 | |
|     }
 | |
| private:
 | |
|     uint32_t columnCount;
 | |
|     uint64_t baseRid;
 | |
| 
 | |
|     // the next 6 point to memory owned by RowGroup
 | |
|     uint32_t* oldOffsets;
 | |
|     uint32_t* stOffsets;
 | |
|     uint32_t* offsets;
 | |
|     uint32_t* colWidths;
 | |
|     execplan::CalpontSystemCatalog::ColDataType* types;
 | |
|     uint8_t* data;
 | |
|     uint32_t* scale;
 | |
|     uint32_t* precision;
 | |
| 
 | |
|     StringStore* strings;
 | |
|     bool useStringTable;
 | |
|     bool hasLongStringField;
 | |
|     uint32_t sTableThreshold;
 | |
|     boost::shared_array<bool> forceInline;
 | |
|     inline bool inStringTable(uint32_t col) const;
 | |
| 
 | |
|     UserDataStore* userDataStore; // For UDAF
 | |
| 
 | |
|     friend class RowGroup;
 | |
| };
 | |
| 
 | |
| inline Row::Pointer Row::getPointer() const
 | |
| {
 | |
|     return Pointer(data, strings, userDataStore);
 | |
| }
 | |
| inline uint8_t* Row::getData() const
 | |
| {
 | |
|     return data;
 | |
| }
 | |
| 
 | |
| inline void Row::setPointer(const Pointer& p)
 | |
| {
 | |
|     data = p.data;
 | |
|     strings = p.strings;
 | |
|     bool hasStrings = (strings != 0);
 | |
| 
 | |
|     if (useStringTable != hasStrings)
 | |
|     {
 | |
|         useStringTable = hasStrings;
 | |
|         offsets = (useStringTable ? stOffsets : oldOffsets);
 | |
|     }
 | |
| 
 | |
|     userDataStore = p.userDataStore;
 | |
| }
 | |
| 
 | |
| inline void Row::setData(const Pointer& p)
 | |
| {
 | |
|     setPointer(p);
 | |
| }
 | |
| 
 | |
| inline void Row::nextRow()
 | |
| {
 | |
|     data += offsets[columnCount];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getColumnCount() const
 | |
| {
 | |
|     return columnCount;
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getColumnWidth(uint32_t col) const
 | |
| {
 | |
|     return colWidths[col];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getSize() const
 | |
| {
 | |
|     return offsets[columnCount];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getRealSize() const
 | |
| {
 | |
|     if (!useStringTable)
 | |
|         return getSize();
 | |
| 
 | |
|     uint32_t ret = 2;
 | |
| 
 | |
|     for (uint32_t i = 0; i < columnCount; i++)
 | |
|     {
 | |
|         if (!inStringTable(i))
 | |
|             ret += getColumnWidth(i);
 | |
|         else
 | |
|             ret += getStringLength(i);
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getScale(uint32_t col) const
 | |
| {
 | |
|     return scale[col];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getPrecision(uint32_t col) const
 | |
| {
 | |
|     return precision[col];
 | |
| }
 | |
| 
 | |
| inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
 | |
| {
 | |
|     return types[colIndex];
 | |
| }
 | |
| 
 | |
| inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
 | |
| {
 | |
|     return types;
 | |
| }
 | |
| 
 | |
| inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
 | |
| {
 | |
|     return types;
 | |
| }
 | |
| 
 | |
| inline bool Row::isCharType(uint32_t colIndex) const
 | |
| {
 | |
|     return execplan::isCharType(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool Row::isUnsigned(uint32_t colIndex) const
 | |
| {
 | |
|     return execplan::isUnsigned(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool Row::isShortString(uint32_t colIndex) const
 | |
| {
 | |
|     return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
 | |
| }
 | |
| 
 | |
| inline bool Row::isLongString(uint32_t colIndex) const
 | |
| {
 | |
|     return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
 | |
| }
 | |
| 
 | |
| inline bool Row::inStringTable(uint32_t col) const
 | |
| {
 | |
|     return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
 | |
| }
 | |
| 
 | |
| template<int len>
 | |
| inline bool Row::equals(uint64_t val, uint32_t colIndex) const
 | |
| {
 | |
|     /* I think the compiler will optimize away the switch stmt */
 | |
|     switch (len)
 | |
|     {
 | |
|         case 1:
 | |
|             return data[offsets[colIndex]] == val;
 | |
| 
 | |
|         case 2:
 | |
|             return *((uint16_t*) &data[offsets[colIndex]]) == val;
 | |
| 
 | |
|         case 4:
 | |
|             return *((uint32_t*) &data[offsets[colIndex]]) == val;
 | |
| 
 | |
|         case 8:
 | |
|             return *((uint64_t*) &data[offsets[colIndex]]) == val;
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::equals(): bad length.");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(const std::string& val, uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|     {
 | |
|         uint64_t offset = *((uint64_t*) &data[offsets[colIndex]]);
 | |
|         return strings->equals(val, offset);
 | |
|     }
 | |
|     else
 | |
|         return (strncmp(val.c_str(), (char*) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0);
 | |
| }
 | |
| 
 | |
| template<int len>
 | |
| inline uint64_t Row::getUintField(uint32_t colIndex) const
 | |
| {
 | |
|     /* I think the compiler will optimize away the switch stmt */
 | |
|     switch (len)
 | |
|     {
 | |
|         case 1:
 | |
|             return data[offsets[colIndex]];
 | |
| 
 | |
|         case 2:
 | |
|             return *((uint16_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 4:
 | |
|             return *((uint32_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 8:
 | |
|             return *((uint64_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::getUintField(): bad length.");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getUintField(uint32_t colIndex) const
 | |
| {
 | |
|     switch (getColumnWidth(colIndex))
 | |
|     {
 | |
|         case 1:
 | |
|             return data[offsets[colIndex]];
 | |
| 
 | |
|         case 2:
 | |
|             return *((uint16_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 4:
 | |
|             return *((uint32_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 8:
 | |
|             return *((uint64_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::getUintField(): bad length.");
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<int len>
 | |
| inline int64_t Row::getIntField(uint32_t colIndex) const
 | |
| {
 | |
|     /* I think the compiler will optimize away the switch stmt */
 | |
|     switch (len)
 | |
|     {
 | |
|         case 1:
 | |
|             return (int8_t) data[offsets[colIndex]];
 | |
| 
 | |
|         case 2:
 | |
|             return *((int16_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 4:
 | |
|             return *((int32_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 8:
 | |
|             return *((int64_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::getIntField(): bad length.");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline int64_t Row::getIntField(uint32_t colIndex) const
 | |
| {
 | |
|     /* I think the compiler will optimize away the switch stmt */
 | |
|     switch (getColumnWidth(colIndex))
 | |
|     {
 | |
|         case 1:
 | |
|             return (int8_t) data[offsets[colIndex]];
 | |
| 
 | |
|         case 2:
 | |
|             return *((int16_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 4:
 | |
|             return *((int32_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         case 8:
 | |
|             return *((int64_t*) &data[offsets[colIndex]]);
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::getIntField(): bad length.");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
 | |
| 
 | |
|     return &data[offsets[colIndex]];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getStringLength(uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
 | |
| 
 | |
|     return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
 | |
| }
 | |
| 
 | |
| inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
 | |
| {
 | |
|     uint64_t offset;
 | |
| 
 | |
|     if (length > getColumnWidth(colIndex))
 | |
|         length = getColumnWidth(colIndex);
 | |
| 
 | |
|     if (inStringTable(colIndex))
 | |
|     {
 | |
|         offset = strings->storeString(strdata, length);
 | |
|         *((uint64_t*) &data[offsets[colIndex]]) = offset;
 | |
| //		cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
 | |
| //				<< " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
 | |
| //				<< endl;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         memcpy(&data[offsets[colIndex]], strdata, length);
 | |
|         memset(&data[offsets[colIndex] + length], 0,
 | |
|                offsets[colIndex + 1] - (offsets[colIndex] + length));
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline std::string Row::getStringField(uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         return strings->getString(*((uint64_t*) &data[offsets[colIndex]]));
 | |
| 
 | |
|     // Not all CHAR/VARCHAR are NUL terminated so use length
 | |
|     return std::string((char*) &data[offsets[colIndex]],
 | |
|                        strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
 | |
| }
 | |
| 
 | |
| inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         return getStringField(colIndex);
 | |
| 
 | |
|     return std::string((char*) &data[offsets[colIndex] + 2], *((uint16_t*) &data[offsets[colIndex]]));
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));;
 | |
| 
 | |
|     return *((uint16_t*) &data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
 | |
| 
 | |
|     return &data[offsets[colIndex] + 2];
 | |
| }
 | |
| 
 | |
| inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|     {
 | |
|         len = strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
 | |
|         return getVarBinaryField(colIndex);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         len = *((uint16_t*) &data[offsets[colIndex]]);
 | |
|         return &data[offsets[colIndex] + 2];
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
 | |
| {
 | |
|     if (!userDataStore)
 | |
|     {
 | |
|         return boost::shared_ptr<mcsv1sdk::UserData>();
 | |
|     }
 | |
| 
 | |
|     return userDataStore->getUserData(*((uint32_t*) &data[offsets[colIndex]]));
 | |
| }
 | |
| 
 | |
| inline double Row::getDoubleField(uint32_t colIndex) const
 | |
| {
 | |
|     return *((double*) &data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline float Row::getFloatField(uint32_t colIndex) const
 | |
| {
 | |
|     return *((float*) &data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline long double Row::getLongDoubleField(uint32_t colIndex) const
 | |
| {
 | |
|     return *((long double*) &data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getRid() const
 | |
| {
 | |
|     return baseRid + *((uint16_t*) data);
 | |
| }
 | |
| 
 | |
| inline uint16_t Row::getRelRid() const
 | |
| {
 | |
|     return *((uint16_t*) data);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getBaseRid() const
 | |
| {
 | |
|     return baseRid;
 | |
| }
 | |
| 
 | |
| inline void Row::markRow()
 | |
| {
 | |
|     *((uint16_t*) data) = 0xffff;
 | |
| }
 | |
| 
 | |
| inline void Row::zeroRid()
 | |
| {
 | |
|     *((uint16_t*) data) = 0;
 | |
| }
 | |
| 
 | |
| inline bool Row::isMarked()
 | |
| {
 | |
|     return *((uint16_t*) data) == 0xffff;
 | |
| }
 | |
| 
 | |
| /* Begin speculative code! */
 | |
| inline uint32_t Row::getOffset(uint32_t colIndex) const
 | |
| {
 | |
|     return offsets[colIndex];
 | |
| }
 | |
| 
 | |
| template<int len>
 | |
| inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
 | |
| {
 | |
|     switch (len)
 | |
|     {
 | |
|         case 1:
 | |
|             data[offset] = val;
 | |
|             break;
 | |
| 
 | |
|         case 2:
 | |
|             *((uint16_t*) &data[offset]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 4:
 | |
|             *((uint32_t*) &data[offset]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 8:
 | |
|             *((uint64_t*) &data[offset]) = val;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::setUintField called on a non-uint32_t field");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void Row::nextRow(uint32_t size)
 | |
| {
 | |
|     data += size;
 | |
| }
 | |
| 
 | |
| 
 | |
| inline void Row::prevRow(uint32_t size, uint64_t number = 1)
 | |
| {
 | |
|     data -= size * number;
 | |
| }
 | |
| 
 | |
| template<int len>
 | |
| inline void Row::setUintField(uint64_t val, uint32_t colIndex)
 | |
| {
 | |
|     switch (len)
 | |
|     {
 | |
|         case 1:
 | |
|             data[offsets[colIndex]] = val;
 | |
|             break;
 | |
| 
 | |
|         case 2:
 | |
|             *((uint16_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 4:
 | |
|             *((uint32_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 8:
 | |
|             *((uint64_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::setUintField called on a non-uint32_t field");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void Row::setUintField(uint64_t val, uint32_t colIndex)
 | |
| {
 | |
|     switch (getColumnWidth(colIndex))
 | |
|     {
 | |
|         case 1:
 | |
|             data[offsets[colIndex]] = val;
 | |
|             break;
 | |
| 
 | |
|         case 2:
 | |
|             *((uint16_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 4:
 | |
|             *((uint32_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 8:
 | |
|             *((uint64_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::setUintField: bad length");
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<int len>
 | |
| inline void Row::setIntField(int64_t val, uint32_t colIndex)
 | |
| {
 | |
|     switch (len)
 | |
|     {
 | |
|         case 1:
 | |
|             *((int8_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 2:
 | |
|             *((int16_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 4:
 | |
|             *((int32_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 8:
 | |
|             *((int64_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::setIntField: bad length");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void Row::setIntField(int64_t val, uint32_t colIndex)
 | |
| {
 | |
|     switch (getColumnWidth(colIndex))
 | |
|     {
 | |
|         case 1:
 | |
|             *((int8_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 2:
 | |
|             *((int16_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 4:
 | |
|             *((int32_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         case 8:
 | |
|             *((int64_t*) &data[offsets[colIndex]]) = val;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             idbassert(0);
 | |
|             throw std::logic_error("Row::setIntField: bad length");
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void Row::setDoubleField(double val, uint32_t colIndex)
 | |
| {
 | |
|     *((double*) &data[offsets[colIndex]]) = val;
 | |
| }
 | |
| 
 | |
| inline void Row::setFloatField(float val, uint32_t colIndex)
 | |
| {
 | |
|     //N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
 | |
|     //  but not necessarily the same bits that you put in. This only seems to be for float (double seems
 | |
|     //  to work).
 | |
|     if (isnan(val))
 | |
|         setUintField<4>(joblist::FLOATNULL, colIndex);
 | |
|     else
 | |
|         *((float*) &data[offsets[colIndex]]) = val;
 | |
| }
 | |
| 
 | |
| inline void Row::setLongDoubleField(long double val, uint32_t colIndex)
 | |
| {
 | |
|     *((long double*) &data[offsets[colIndex]]) = val;
 | |
| }
 | |
| 
 | |
| inline void Row::setVarBinaryField(const std::string& val, uint32_t colIndex)
 | |
| {
 | |
|     if (inStringTable(colIndex))
 | |
|         setStringField(val, colIndex);
 | |
|     else
 | |
|     {
 | |
|         *((uint16_t*) &data[offsets[colIndex]]) = static_cast<uint16_t>(val.length());
 | |
|         memcpy(&data[offsets[colIndex] + 2], val.data(), val.length());
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
 | |
| {
 | |
|     if (len > getColumnWidth(colIndex))
 | |
|         len = getColumnWidth(colIndex);
 | |
| 
 | |
|     if (inStringTable(colIndex))
 | |
|     {
 | |
|         uint64_t offset = strings->storeString(val, len);
 | |
|         *((uint64_t*) &data[offsets[colIndex]]) = offset;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         *((uint16_t*) &data[offsets[colIndex]]) = len;
 | |
|         memcpy(&data[offsets[colIndex] + 2], val, len);
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void Row::setUserData(mcsv1sdk::mcsv1Context& context,
 | |
|                              boost::shared_ptr<mcsv1sdk::UserData> userData,
 | |
|                              uint32_t len, uint32_t colIndex)
 | |
| {
 | |
|     if (!userDataStore)
 | |
|     {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     uint32_t offset = userDataStore->storeUserData(context, userData, len);
 | |
|     *((uint32_t*) &data[offsets[colIndex]]) = offset;
 | |
|     *((uint32_t*) &data[offsets[colIndex] + 4]) = len;
 | |
| }
 | |
| 
 | |
| inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
 | |
| {
 | |
|     uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
 | |
|     memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
 | |
| }
 | |
| 
 | |
| inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
 | |
| {
 | |
|     if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
 | |
|                  types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
 | |
|                  types[srcIndex] == execplan::CalpontSystemCatalog::TEXT))
 | |
|         out.setVarBinaryField(getVarBinaryStringField(srcIndex), destIndex);
 | |
|     else if (UNLIKELY(isLongString(srcIndex)))
 | |
|         out.setStringField(getStringPointer(srcIndex), getStringLength(srcIndex), destIndex);
 | |
|     //out.setStringField(getStringField(srcIndex), destIndex);
 | |
|     else if (UNLIKELY(isShortString(srcIndex)))
 | |
|         out.setUintField(getUintField(srcIndex), destIndex);
 | |
|     else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
 | |
|         out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
 | |
|     else
 | |
|         out.setIntField(getIntField(srcIndex), destIndex);
 | |
| }
 | |
| 
 | |
| inline void Row::setRid(uint64_t rid)
 | |
| {
 | |
|     *((uint16_t*) data) = rid & 0xffff;
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::hash() const
 | |
| {
 | |
|     return hash(columnCount - 1);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::hash(const std::vector<uint32_t>& keyCols, uint32_t seed) const
 | |
| {
 | |
|     utils::Hasher_r h;
 | |
|     uint32_t ret = seed;
 | |
| 
 | |
|     for (uint32_t i = 0; i < keyCols.size(); i++)
 | |
|     {
 | |
|         const uint32_t& col = keyCols[i];
 | |
| 
 | |
|         if (UNLIKELY(isLongString(col)))
 | |
|             ret = h((const char*) getStringPointer(col), getStringLength(col), ret);
 | |
|         else
 | |
|         {
 | |
|             ret = h((const char*) &data[offsets[i]], colWidths[i], ret);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     ret = h.finalize(ret, keyCols.size() << 2);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::hash(uint32_t lastCol) const
 | |
| {
 | |
|     utils::Hasher_r h;
 | |
|     uint32_t ret = 0;
 | |
| 
 | |
|     // Sometimes we ask this to hash 0 bytes, and it comes through looking like
 | |
|     // lastCol = -1.  Return 0.
 | |
|     if (lastCol >= columnCount)
 | |
|         return 0;
 | |
| 
 | |
|     // Two rows that store identical values but are in different formats will return different hashes
 | |
|     // if this fast path is used.  Also can't use any column offsets in this fcn.
 | |
| 
 | |
|     //if (!useStringTable) {
 | |
|     //	ret = h((const char *) &data[offsets[0]], offsets[lastCol+1] - offsets[0], 0);
 | |
|     //	return h.finalize(ret, offsets[lastCol+1]);
 | |
|     //}
 | |
| 
 | |
|     for (uint32_t i = 0; i <= lastCol; i++)
 | |
|         if (UNLIKELY(isLongString(i)))
 | |
|             ret = h((const char*) getStringPointer(i), getStringLength(i), ret);
 | |
|         else
 | |
|             ret = h((const char*) &data[offsets[i]], colWidths[i], ret);
 | |
| 
 | |
|     ret = h.finalize(ret, lastCol << 2);   // arbitary choice for the 2nd param
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) const
 | |
| {
 | |
|     for (uint32_t i = 0; i < keyCols.size(); i++)
 | |
|     {
 | |
|         const uint32_t& col = keyCols[i];
 | |
| 
 | |
|         if (!isLongString(col))
 | |
|         {
 | |
|             if (getUintField(col) != r2.getUintField(col))
 | |
|                 return false;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             if (getStringLength(col) != r2.getStringLength(col))
 | |
|                 return false;
 | |
| 
 | |
|             if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
 | |
|                 return false;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(const Row& r2, uint32_t lastCol) const
 | |
| {
 | |
|     if (lastCol >= columnCount)
 | |
|         return true;
 | |
| 
 | |
|     if (!useStringTable && !r2.useStringTable)
 | |
|         return !(memcmp(&data[offsets[0]], &r2.data[offsets[0]], offsets[lastCol + 1] - offsets[0]));
 | |
| 
 | |
|     for (uint32_t i = 0; i <= lastCol; i++)
 | |
|         if (!isLongString(i))
 | |
|         {
 | |
|             if (getUintField(i) != r2.getUintField(i))
 | |
|                 return false;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             uint32_t len = getStringLength(i);
 | |
| 
 | |
|             if (len != r2.getStringLength(i))
 | |
|                 return false;
 | |
| 
 | |
|             if (memcmp(getStringPointer(i), r2.getStringPointer(i), len))
 | |
|                 return false;
 | |
|         }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(const Row& r2) const
 | |
| {
 | |
|     return equals(r2, columnCount - 1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /** @brief RowGroup is a lightweight interface for processing packed row data
 | |
| 
 | |
| 	A RowGroup is an interface for parsing and/or modifying blocks of data formatted as described at the top of
 | |
| 	this file.  Its lifecycle can be tied to a producer or consumer's lifecycle.
 | |
| 	Only one instance is required to process any number of blocks with a
 | |
| 	given column configuration.  The column configuration is specified in the
 | |
| 	constructor, and the block data to process is specified through the
 | |
| 	setData() function.	 It will not copy or take ownership of the data it processes;
 | |
| 	the caller should do that.
 | |
| 
 | |
| 	Row and RowGroup share some bits.  RowGroup owns the memory they share.
 | |
| */
 | |
| class RowGroup : public messageqcpp::Serializeable
 | |
| {
 | |
| public:
 | |
|     /** @brief The default ctor.  It does nothing.  Need to init by assignment or deserialization */
 | |
|     RowGroup();
 | |
| 
 | |
|     /** @brief The RowGroup ctor, which specifies the column config to process
 | |
| 
 | |
|     @param colCount The number of columns
 | |
|     @param positions An array specifying the offsets within the packed data
 | |
|     		of a row where each column begins.  It should have colCount + 1
 | |
|     		entries.  The first offset is 2, because a row begins with a 2-byte
 | |
|     		RID.  The last entry should be the offset of the last column +
 | |
|     		its length, which is also the size of the entire row including the rid.
 | |
|     @param coids An array of oids for each column.
 | |
|     @param tkeys An array of unique id for each column.
 | |
|     @param colTypes An array of COLTYPEs for each column.
 | |
|     @param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
 | |
|     @param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
 | |
|     */
 | |
| 
 | |
|     RowGroup(uint32_t colCount,
 | |
|              const std::vector<uint32_t>& positions,
 | |
|              const std::vector<uint32_t>& cOids,
 | |
|              const std::vector<uint32_t>& tkeys,
 | |
|              const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
 | |
|              const std::vector<uint32_t>& scale,
 | |
|              const std::vector<uint32_t>& precision,
 | |
|              uint32_t stringTableThreshold,
 | |
|              bool useStringTable = true,
 | |
|              const std::vector<bool>& forceInlineData = std::vector<bool>()
 | |
|             );
 | |
| 
 | |
|     /** @brief The copiers.  It copies metadata, not the row data */
 | |
|     RowGroup(const RowGroup&);
 | |
| 
 | |
|     /** @brief Assignment operator.  It copies metadata, not the row data */
 | |
|     RowGroup& operator=(const RowGroup&);
 | |
| 
 | |
|     ~RowGroup();
 | |
| 
 | |
|     inline void initRow(Row*, bool forceInlineData = false) const;
 | |
|     inline uint32_t getRowCount() const;
 | |
|     inline void incRowCount();
 | |
|     inline void setRowCount(uint32_t num);
 | |
|     inline void getRow(uint32_t rowNum, Row*) const;
 | |
|     inline uint32_t getRowSize() const;
 | |
|     inline uint32_t getRowSizeWithStrings() const;
 | |
|     inline uint64_t getBaseRid() const;
 | |
|     void setData(RGData* rgd);
 | |
|     inline void setData(uint8_t* d);
 | |
|     inline uint8_t* getData() const;
 | |
|     inline RGData* getRGData() const;
 | |
| 
 | |
|     uint32_t getStatus() const;
 | |
|     void setStatus(uint16_t);
 | |
| 
 | |
|     uint32_t getDBRoot() const;
 | |
|     void setDBRoot(uint32_t);
 | |
| 
 | |
|     uint32_t getDataSize() const;
 | |
|     uint32_t getDataSize(uint64_t n) const;
 | |
|     uint32_t getMaxDataSize() const;
 | |
|     uint32_t getMaxDataSizeWithStrings() const;
 | |
|     uint32_t getEmptySize() const;
 | |
| 
 | |
|     // this returns the size of the row data with the string table
 | |
|     inline uint64_t getSizeWithStrings() const;
 | |
| 
 | |
|     // sets the row count to 0 and the baseRid to something
 | |
|     // effectively initializing whatever chunk of memory
 | |
|     // data points to
 | |
|     void resetRowGroup(uint64_t baseRid);
 | |
| 
 | |
|     /* The Serializeable interface */
 | |
|     void serialize(messageqcpp::ByteStream&) const;
 | |
|     void deserialize(messageqcpp::ByteStream&);
 | |
| 
 | |
|     uint32_t getColumnWidth(uint32_t col) const;
 | |
|     uint32_t getColumnCount() const;
 | |
|     inline const std::vector<uint32_t>& getOffsets() const;
 | |
|     inline const std::vector<uint32_t>& getOIDs() const;
 | |
|     inline const std::vector<uint32_t>& getKeys() const;
 | |
|     inline const std::vector<uint32_t>& getColWidths() const;
 | |
|     inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
 | |
|     inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
 | |
|     inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
 | |
|     inline boost::shared_array<bool>& getForceInline();
 | |
|     static inline uint32_t getHeaderSize()
 | |
|     {
 | |
|         return headerSize;
 | |
|     }
 | |
| 
 | |
|     // this returns true if the type is CHAR or VARCHAR
 | |
|     inline bool isCharType(uint32_t colIndex) const;
 | |
|     inline bool isUnsigned(uint32_t colIndex) const;
 | |
|     inline bool isShortString(uint32_t colIndex) const;
 | |
|     inline bool isLongString(uint32_t colIndex) const;
 | |
| 
 | |
|     inline const std::vector<uint32_t>& getScale() const;
 | |
|     inline const std::vector<uint32_t>& getPrecision() const;
 | |
| 
 | |
|     inline bool usesStringTable() const;
 | |
|     inline void setUseStringTable(bool);
 | |
| 
 | |
| //	RGData *convertToInlineData(uint64_t *size = NULL) const;  // caller manages the memory returned by this
 | |
| //	void convertToInlineDataInPlace();
 | |
| //	RGData *convertToStringTable(uint64_t *size = NULL) const;
 | |
| //	void convertToStringTableInPlace();
 | |
|     void serializeRGData(messageqcpp::ByteStream&) const;
 | |
|     inline uint32_t getStringTableThreshold() const;
 | |
| 
 | |
|     void append(RGData&);
 | |
|     void append(RowGroup&);
 | |
|     void append(RGData&, uint pos);    // insert starting at position 'pos'
 | |
|     void append(RowGroup&, uint pos);
 | |
| 
 | |
|     RGData duplicate();   // returns a copy of the attached RGData
 | |
| 
 | |
|     std::string toString() const;
 | |
| 
 | |
|     /** operator+=
 | |
|     *
 | |
|     * append the metadata of another RowGroup to this RowGroup
 | |
|     */
 | |
|     RowGroup& operator+=(const RowGroup& rhs);
 | |
| 
 | |
|     // returns a RowGroup with only the first cols columns.  Useful for generating a
 | |
|     // RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
 | |
|     RowGroup truncate(uint32_t cols);
 | |
| 
 | |
|     /** operator<
 | |
|      *
 | |
|      * Orders RG's based on baseRid
 | |
|      */
 | |
|     inline bool operator<(const RowGroup& rhs) const;
 | |
| 
 | |
|     void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
 | |
| 
 | |
|     /* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
 | |
|     inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
 | |
|                            const uint8_t& extentNum, const uint16_t& blockNum);
 | |
|     inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | |
|                             uint16_t* blockNum);
 | |
| 
 | |
|     inline void setStringStore(boost::shared_ptr<StringStore>);
 | |
| 
 | |
| private:
 | |
|     uint32_t columnCount;
 | |
|     uint8_t* data;
 | |
| 
 | |
|     std::vector<uint32_t> oldOffsets; //inline data offsets
 | |
|     std::vector<uint32_t> stOffsets;  //string table offsets
 | |
|     uint32_t* offsets;   //offsets either points to oldOffsets or stOffsets
 | |
|     std::vector<uint32_t> colWidths;
 | |
|     // oids: the real oid of the column, may have duplicates with alias.
 | |
|     // This oid is necessary for front-end to decide the real column width.
 | |
|     std::vector<uint32_t> oids;
 | |
|     // keys: the unique id for pair(oid, alias). bug 1632.
 | |
|     // Used to map the projected column and rowgroup index
 | |
|     std::vector<uint32_t> keys;
 | |
|     std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
 | |
| 
 | |
|     // DECIMAL support.  For non-decimal fields, the values are 0.
 | |
|     std::vector<uint32_t> scale;
 | |
|     std::vector<uint32_t> precision;
 | |
| 
 | |
|     // string table impl
 | |
|     RGData* rgData;
 | |
|     StringStore* strings;   // note, strings and data belong to rgData
 | |
|     bool useStringTable;
 | |
|     bool hasLongStringField;
 | |
|     uint32_t sTableThreshold;
 | |
|     boost::shared_array<bool> forceInline;
 | |
| 
 | |
|     static const uint32_t headerSize = 18;
 | |
|     static const uint32_t rowCountOffset = 0;
 | |
|     static const uint32_t baseRidOffset = 4;
 | |
|     static const uint32_t statusOffset = 12;
 | |
|     static const uint32_t dbRootOffset = 14;
 | |
| };
 | |
| 
 | |
| inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum,
 | |
|                              const uint8_t& extentNum, const uint16_t& blockNum);
 | |
| inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
 | |
|                                uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
 | |
| 
 | |
| // returns the first rid of the logical block specified by baseRid
 | |
| inline uint64_t getExtentRelativeRid(uint64_t baseRid);
 | |
| 
 | |
| // returns the first rid of the logical block specified by baseRid
 | |
| inline uint64_t getFileRelativeRid(uint64_t baseRid);
 | |
| 
 | |
| /** operator+
 | |
| *
 | |
| * add the metadata of 2 RowGroups together and return a new RowGroup
 | |
| */
 | |
| RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
 | |
| 
 | |
| boost::shared_array<int> makeMapping(const RowGroup& r1, const RowGroup& r2);
 | |
| void applyMapping(const boost::shared_array<int>& mapping, const Row& in, Row* out);
 | |
| void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
 | |
| void applyMapping(const int* mapping, const Row& in, Row* out);
 | |
| 
 | |
| /* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
 | |
| every row, they're a measurable performance penalty */
 | |
| inline uint32_t RowGroup::getRowCount() const
 | |
| {
 | |
| // 	idbassert(data);
 | |
| // 	if (!data) throw std::logic_error("RowGroup::getRowCount(): data is NULL!");
 | |
|     return *((uint32_t*) &data[rowCountOffset]);
 | |
| }
 | |
| 
 | |
| inline void RowGroup::incRowCount()
 | |
| {
 | |
| // 	idbassert(data);
 | |
|     ++(*((uint32_t*) &data[rowCountOffset]));
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setRowCount(uint32_t num)
 | |
| {
 | |
| // 	idbassert(data);
 | |
|     *((uint32_t*) &data[rowCountOffset]) = num;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
 | |
| {
 | |
| // 	idbassert(data);
 | |
|     if (useStringTable != r->usesStringTable())
 | |
|         initRow(r);
 | |
| 
 | |
|     r->baseRid = getBaseRid();
 | |
|     r->data = &(data[headerSize + (rowNum * offsets[columnCount])]);
 | |
|     r->strings = strings;
 | |
|     r->userDataStore = rgData->userDataStore.get();
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setData(uint8_t* d)
 | |
| {
 | |
|     data = d;
 | |
|     strings = NULL;
 | |
|     rgData = NULL;
 | |
|     setUseStringTable(false);
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setData(RGData* rgd)
 | |
| {
 | |
|     data = rgd->rowData.get();
 | |
|     strings = rgd->strings.get();
 | |
|     rgData = rgd;
 | |
| }
 | |
| 
 | |
| inline uint8_t* RowGroup::getData() const
 | |
| {
 | |
|     //assert(!useStringTable);
 | |
|     return data;
 | |
| }
 | |
| 
 | |
| inline RGData* RowGroup::getRGData() const
 | |
| {
 | |
|     return rgData;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setUseStringTable(bool b)
 | |
| {
 | |
|     useStringTable = (b && hasLongStringField);
 | |
|     //offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
 | |
|     offsets = 0;
 | |
| 
 | |
|     if (useStringTable && !stOffsets.empty())
 | |
|         offsets = &stOffsets[0];
 | |
|     else if (!useStringTable && !oldOffsets.empty())
 | |
|         offsets = &oldOffsets[0];
 | |
| 
 | |
|     if (!useStringTable)
 | |
|         strings = NULL;
 | |
| }
 | |
| 
 | |
| inline uint64_t RowGroup::getBaseRid() const
 | |
| {
 | |
|     return *((uint64_t*) &data[baseRidOffset]);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::operator<(const RowGroup& rhs) const
 | |
| {
 | |
|     return (getBaseRid() < rhs.getBaseRid());
 | |
| }
 | |
| 
 | |
| void RowGroup::initRow(Row* r, bool forceInlineData) const
 | |
| {
 | |
|     r->columnCount = columnCount;
 | |
| 
 | |
|     if (LIKELY(!types.empty()))
 | |
|     {
 | |
|         r->colWidths = (uint32_t*) &colWidths[0];
 | |
|         r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
 | |
|         r->scale = (uint32_t*) & (scale[0]);
 | |
|         r->precision = (uint32_t*) & (precision[0]);
 | |
|     }
 | |
| 
 | |
|     if (forceInlineData)
 | |
|     {
 | |
|         r->useStringTable = false;
 | |
|         r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
 | |
|         r->stOffsets = (uint32_t*) & (stOffsets[0]);
 | |
|         r->offsets = (uint32_t*) & (oldOffsets[0]);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         r->useStringTable = useStringTable;
 | |
|         r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
 | |
|         r->stOffsets = (uint32_t*) & (stOffsets[0]);
 | |
|         r->offsets = offsets;
 | |
|     }
 | |
| 
 | |
|     r->hasLongStringField = hasLongStringField;
 | |
|     r->sTableThreshold = sTableThreshold;
 | |
|     r->forceInline = forceInline;
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getRowSize() const
 | |
| {
 | |
|     return offsets[columnCount];
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getRowSizeWithStrings() const
 | |
| {
 | |
|     return oldOffsets[columnCount];
 | |
| }
 | |
| 
 | |
| inline uint64_t RowGroup::getSizeWithStrings() const
 | |
| {
 | |
|     if (strings == NULL)
 | |
|         return getDataSize();
 | |
|     else
 | |
|         return getDataSize() + strings->getSize();
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isCharType(uint32_t colIndex) const
 | |
| {
 | |
|     return execplan::isCharType(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isUnsigned(uint32_t colIndex) const
 | |
| {
 | |
|     return execplan::isUnsigned(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isShortString(uint32_t colIndex) const
 | |
| {
 | |
|     return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
 | |
|             (getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isLongString(uint32_t colIndex) const
 | |
| {
 | |
|     return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
 | |
|             (getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
 | |
|             types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
 | |
|             types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
 | |
|             types[colIndex] == execplan::CalpontSystemCatalog::TEXT);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::usesStringTable() const
 | |
| {
 | |
|     return useStringTable;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getOffsets() const
 | |
| {
 | |
|     return oldOffsets;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getOIDs() const
 | |
| {
 | |
|     return oids;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getKeys() const
 | |
| {
 | |
|     return keys;
 | |
| }
 | |
| 
 | |
| inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
 | |
| {
 | |
|     return types[colIndex];
 | |
| }
 | |
| 
 | |
| inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
 | |
| {
 | |
|     return types;
 | |
| }
 | |
| 
 | |
| inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
 | |
| {
 | |
|     return types;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getScale() const
 | |
| {
 | |
|     return scale;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getPrecision() const
 | |
| {
 | |
|     return precision;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getColWidths() const
 | |
| {
 | |
|     return colWidths;
 | |
| }
 | |
| 
 | |
| inline boost::shared_array<bool>& RowGroup::getForceInline()
 | |
| {
 | |
|     return forceInline;
 | |
| }
 | |
| 
 | |
| // These type defs look stupid at first, yes.  I want to see compiler errors
 | |
| // if/when we change the widths of these parms b/c this fcn will have to be
 | |
| // reevaluated.
 | |
| inline uint64_t convertToRid(const uint32_t& partitionNum,
 | |
|                              const uint16_t& segmentNum, const uint8_t& exNum, const uint16_t& blNum)
 | |
| {
 | |
|     uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum,
 | |
|              blockNum = blNum;
 | |
| 
 | |
|     // extentNum gets trunc'd to 6 bits, blockNums to 10 bits
 | |
|     extentNum &= 0x3f;
 | |
|     blockNum &= 0x3ff;
 | |
| 
 | |
|     return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
 | |
|                                  const uint8_t& extentNum, const uint16_t& blockNum)
 | |
| {
 | |
|     *((uint64_t*) &data[baseRidOffset]) = convertToRid(partNum, segNum,
 | |
|                                           extentNum, blockNum);
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getStringTableThreshold() const
 | |
| {
 | |
|     return sTableThreshold;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
 | |
| {
 | |
|     if (useStringTable)
 | |
|     {
 | |
|         rgData->setStringStore(ss);
 | |
|         strings = rgData->strings.get();
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
 | |
|                                uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
 | |
| {
 | |
|     if (partNum) *partNum = rid >> 32;
 | |
| 
 | |
|     if (segNum) *segNum = rid >> 16;
 | |
| 
 | |
|     if (extentNum) *extentNum = (rid >> 10) & 0x3f;
 | |
| 
 | |
|     if (blockNum) *blockNum = rid & 0x3ff;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum,
 | |
|                                   uint8_t* extentNum, uint16_t* blockNum)
 | |
| {
 | |
|     getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
 | |
| }
 | |
| 
 | |
| // returns the first RID of the logical block identified by baseRid
 | |
| inline uint64_t getExtentRelativeRid(uint64_t baseRid)
 | |
| {
 | |
|     uint64_t blockNum = baseRid & 0x3ff;
 | |
|     return (blockNum << 13);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getExtentRelativeRid() const
 | |
| {
 | |
|     return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
 | |
| }
 | |
| 
 | |
| // returns the first RID of the logical block identified by baseRid
 | |
| inline uint64_t getFileRelativeRid(uint64_t baseRid)
 | |
| {
 | |
|     uint64_t extentNum = (baseRid >> 10) & 0x3f;
 | |
|     uint64_t blockNum = baseRid & 0x3ff;
 | |
|     return (extentNum << 23) | (blockNum << 13);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getFileRelativeRid() const
 | |
| {
 | |
|     return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
 | |
| }
 | |
| 
 | |
| inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | |
|                              uint16_t* blockNum, uint16_t* rowNum)
 | |
| {
 | |
|     getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
 | |
| 
 | |
|     if (rowNum) *rowNum = getRelRid();
 | |
| }
 | |
| 
 | |
| inline void copyRow(const Row& in, Row* out, uint32_t colCount)
 | |
| {
 | |
|     if (&in == out)
 | |
|         return;
 | |
| 
 | |
|     out->setRid(in.getRelRid());
 | |
| 
 | |
|     if (!in.usesStringTable() && !out->usesStringTable())
 | |
|     {
 | |
|         memcpy(out->getData(), in.getData(), std::min(in.getOffset(colCount), out->getOffset(colCount)));
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     for (uint32_t i = 0; i < colCount; i++)
 | |
|     {
 | |
|         if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
 | |
|                      in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
 | |
|                      in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT))
 | |
|             out->setVarBinaryField(in.getVarBinaryStringField(i), i);
 | |
|         else if (UNLIKELY(in.isLongString(i)))
 | |
|             //out->setStringField(in.getStringField(i), i);
 | |
|             out->setStringField(in.getStringPointer(i), in.getStringLength(i), i);
 | |
|         else if (UNLIKELY(in.isShortString(i)))
 | |
|             out->setUintField(in.getUintField(i), i);
 | |
|         else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
 | |
|             out->setLongDoubleField(in.getLongDoubleField(i), i);
 | |
|         else
 | |
|             out->setIntField(in.getIntField(i), i);
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void copyRow(const Row& in, Row* out)
 | |
| {
 | |
|     copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
 | |
| }
 | |
| 
 | |
| inline std::string StringStore::getString(uint64_t off) const
 | |
| {
 | |
|     uint32_t length;
 | |
| 
 | |
|     if (off == std::numeric_limits<uint64_t>::max())
 | |
|         return joblist::CPNULLSTRMARK;
 | |
| 
 | |
|     MemChunk* mc;
 | |
| 
 | |
|     if (off & 0x8000000000000000)
 | |
|     {
 | |
|         off = off - 0x8000000000000000;
 | |
| 
 | |
|         if (longStrings.size() <= off)
 | |
|             return joblist::CPNULLSTRMARK;
 | |
| 
 | |
|         mc = (MemChunk*) longStrings[off].get();
 | |
|         memcpy(&length, mc->data, 4);
 | |
|         return std::string((char*) mc->data + 4, length);
 | |
|     }
 | |
| 
 | |
|     uint64_t chunk = off / CHUNK_SIZE;
 | |
|     uint64_t offset = off % CHUNK_SIZE;
 | |
| 
 | |
|     // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
 | |
|     // what gets returned, it just can't go out of bounds.
 | |
|     if (mem.size() <= chunk)
 | |
|         return joblist::CPNULLSTRMARK;
 | |
| 
 | |
|     mc = (MemChunk*) mem[chunk].get();
 | |
| 
 | |
|     memcpy(&length, &mc->data[offset], 4);
 | |
| 
 | |
|     if ((offset + length) > mc->currentSize)
 | |
|         return joblist::CPNULLSTRMARK;
 | |
| 
 | |
|     return std::string((char*) & (mc->data[offset]) + 4, length);
 | |
| }
 | |
| 
 | |
| inline const uint8_t* StringStore::getPointer(uint64_t off) const
 | |
| {
 | |
|     if (off == std::numeric_limits<uint64_t>::max())
 | |
|         return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
 | |
| 
 | |
|     uint64_t chunk = off / CHUNK_SIZE;
 | |
|     uint64_t offset = off % CHUNK_SIZE;
 | |
|     MemChunk* mc;
 | |
| 
 | |
|     if (off & 0x8000000000000000)
 | |
|     {
 | |
|         off = off - 0x8000000000000000;
 | |
| 
 | |
|         if (longStrings.size() <= off)
 | |
|             return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
 | |
| 
 | |
|         mc = (MemChunk*) longStrings[off].get();
 | |
|         return mc->data + 4;
 | |
|     }
 | |
| 
 | |
|     // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
 | |
|     // what gets returned, it just can't go out of bounds.
 | |
|     if (UNLIKELY(mem.size() <= chunk))
 | |
|         return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
 | |
| 
 | |
|     mc = (MemChunk*) mem[chunk].get();
 | |
| 
 | |
|     if (offset > mc->currentSize)
 | |
|         return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
 | |
| 
 | |
|     return &(mc->data[offset]) + 4;
 | |
| }
 | |
| 
 | |
| inline bool StringStore::isNullValue(uint64_t off) const
 | |
| {
 | |
|     uint32_t length;
 | |
| 
 | |
|     if (off == std::numeric_limits<uint64_t>::max())
 | |
|         return true;
 | |
| 
 | |
|     // Long strings won't be NULL
 | |
|     if (off & 0x8000000000000000)
 | |
|         return false;
 | |
| 
 | |
|     uint32_t chunk = off / CHUNK_SIZE;
 | |
|     uint32_t offset = off % CHUNK_SIZE;
 | |
|     MemChunk* mc;
 | |
| 
 | |
|     if (mem.size() <= chunk)
 | |
|         return true;
 | |
| 
 | |
|     mc = (MemChunk*) mem[chunk].get();
 | |
|     memcpy(&length, &mc->data[offset], 4);
 | |
| 
 | |
|     if (length == 0)
 | |
|         return true;
 | |
| 
 | |
|     if (length < 8)
 | |
|         return false;
 | |
| 
 | |
|     if ((offset + length) > mc->currentSize)
 | |
|         return true;
 | |
| 
 | |
|     if (mc->data[offset + 4] == 0)  // "" = NULL string for some reason...
 | |
|         return true;
 | |
| 
 | |
|     return (*((uint64_t*) &mc->data[offset] + 4) == *((uint64_t*) joblist::CPNULLSTRMARK.c_str()));
 | |
| }
 | |
| 
 | |
| inline bool StringStore::equals(const std::string& str, uint64_t off) const
 | |
| {
 | |
|     uint32_t length;
 | |
| 
 | |
|     if (off == std::numeric_limits<uint64_t>::max())
 | |
|         return str == joblist::CPNULLSTRMARK;
 | |
| 
 | |
|     MemChunk* mc;
 | |
| 
 | |
|     if (off & 0x8000000000000000)
 | |
|     {
 | |
|         if (longStrings.size() <= (off - 0x8000000000000000))
 | |
|             return false;
 | |
| 
 | |
|         mc = (MemChunk*) longStrings[off - 0x8000000000000000].get();
 | |
| 
 | |
|         memcpy(&length, mc->data, 4);
 | |
| 
 | |
|         // Not sure if this check it needed, but adds safety
 | |
|         if (length > mc->currentSize)
 | |
|             return false;
 | |
| 
 | |
|         return (strncmp(str.c_str(), (const char*) mc->data + 4, length) == 0);
 | |
|     }
 | |
| 
 | |
|     uint32_t chunk = off / CHUNK_SIZE;
 | |
|     uint32_t offset = off % CHUNK_SIZE;
 | |
| 
 | |
|     if (mem.size() <=  chunk)
 | |
|         return false;
 | |
| 
 | |
|     mc = (MemChunk*) mem[chunk].get();
 | |
|     memcpy(&length, &mc->data[offset], 4);
 | |
| 
 | |
|     if ((offset + length) > mc->currentSize)
 | |
|         return false;
 | |
| 
 | |
|     return (strncmp(str.c_str(), (const char*) &mc->data[offset] + 4, length) == 0);
 | |
| }
 | |
| inline uint32_t StringStore::getStringLength(uint64_t off)
 | |
| {
 | |
|     uint32_t length;
 | |
|     MemChunk* mc;
 | |
| 
 | |
|     if (off == std::numeric_limits<uint64_t>::max())
 | |
|         return 0;
 | |
| 
 | |
|     if (off & 0x8000000000000000)
 | |
|     {
 | |
|         off = off - 0x8000000000000000;
 | |
| 
 | |
|         if (longStrings.size() <= off)
 | |
|             return 0;
 | |
| 
 | |
|         mc = (MemChunk*) longStrings[off].get();
 | |
|         memcpy(&length, mc->data, 4);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         uint64_t chunk = off / CHUNK_SIZE;
 | |
|         uint64_t offset = off % CHUNK_SIZE;
 | |
| 
 | |
|         if (mem.size() <= chunk)
 | |
|             return 0;
 | |
| 
 | |
|         mc = (MemChunk*) mem[chunk].get();
 | |
|         memcpy(&length, &mc->data[offset], 4);
 | |
|     }
 | |
| 
 | |
|     return length;
 | |
| }
 | |
| 
 | |
| inline bool StringStore::isEmpty() const
 | |
| {
 | |
|     return empty;
 | |
| }
 | |
| 
 | |
| inline uint64_t StringStore::getSize() const
 | |
| {
 | |
|     uint32_t i;
 | |
|     uint64_t ret = 0;
 | |
|     MemChunk* mc;
 | |
| 
 | |
|     for (i = 0; i < mem.size(); i++)
 | |
|     {
 | |
|         mc = (MemChunk*) mem[i].get();
 | |
|         ret += mc->capacity;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < longStrings.size(); i++)
 | |
|     {
 | |
|         mc = (MemChunk*) longStrings[i].get();
 | |
|         ret += mc->capacity;
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| inline RGData& RGData::operator=(const RGData& r)
 | |
| {
 | |
|     rowData = r.rowData;
 | |
|     strings = r.strings;
 | |
|     userDataStore = r.userDataStore;
 | |
|     return *this;
 | |
| }
 | |
| 
 | |
| inline void RGData::getRow(uint32_t num, Row* row)
 | |
| {
 | |
|     uint32_t size = row->getSize();
 | |
|     row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get(), userDataStore.get()));
 | |
| }
 | |
| 
 | |
| }
 | |
| 
 | |
| #endif
 | |
| // vim:ts=4 sw=4:
 | |
| 
 |