You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-31 18:30:33 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			2353 lines
		
	
	
		
			71 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			2353 lines
		
	
	
		
			71 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|    Copyright (C) 2014 InfiniDB, Inc.
 | |
|    Copyright (c) 2016-2024 MariaDB Corporation
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA.
 | |
| */
 | |
| 
 | |
| //
 | |
| // C++ Interface: rowgroup
 | |
| //
 | |
| // Description:
 | |
| //
 | |
| // Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include <vector>
 | |
| #include <string>
 | |
| #include <stdexcept>
 | |
| // #define NDEBUG
 | |
| #include <cassert>
 | |
| 
 | |
| #include <boost/core/span.hpp>
 | |
| #include <boost/shared_ptr.hpp>
 | |
| 
 | |
| #include <boost/thread/mutex.hpp>
 | |
| #include <cmath>
 | |
| #include <cfloat>
 | |
| #include <execinfo.h>
 | |
| 
 | |
| #include "countingallocator.h"
 | |
| #include "hasher.h"
 | |
| 
 | |
| #include "joblisttypes.h"
 | |
| #include "bytestream.h"
 | |
| #include "calpontsystemcatalog.h"
 | |
| #include "exceptclasses.h"
 | |
| #include "mcsv1_udaf.h"
 | |
| 
 | |
| #include "branchpred.h"
 | |
| #include "datatypes/mcs_int128.h"
 | |
| 
 | |
| #include "collation.h"
 | |
| #include "common/hashfamily.h"
 | |
| #include "buffertypes.h"
 | |
| 
 | |
| #include <cstdlib>
 | |
| #include "execinfo.h"
 | |
| 
 | |
| // Workaround for my_global.h #define of isnan(X) causing a std::std namespace
 | |
| namespace joblist
 | |
| {
 | |
| class GroupConcatAg;
 | |
| }
 | |
| 
 | |
| namespace rowgroup
 | |
| {
 | |
| constexpr int16_t rgCommonSize = 8192;
 | |
| using RGDataSizeType = uint64_t;
 | |
| 
 | |
| /*
 | |
|     The RowGroup family of classes encapsulate the data moved through the
 | |
|     system.
 | |
| 
 | |
|      - RowGroup specifies the format of the data primarily (+ some other metadata),
 | |
|      - RGData (aka RowGroup Data) encapsulates the data,
 | |
|      - Row is used to extract fields from the data and iterate.
 | |
| 
 | |
|     JobListFactory instantiates the RowGroups to be used by each stage of processing.
 | |
|     RGDatas are passed between stages, and their RowGroup instances are used
 | |
|     to interpret them.
 | |
| 
 | |
|     Historically, row data was just a chunk of contiguous memory, a uint8_t *.
 | |
|     Every field had a fixed width, which allowed for quick offset
 | |
|     calculation when assigning or retrieving individual fields.  That worked
 | |
|     well for a few years, but at some point it became common to declare
 | |
|     all strings as max-length, and to manipulate them in queries.
 | |
| 
 | |
|     Having fixed-width fields, even for strings, required an unreasonable
 | |
|     amount of memory.  RGData & StringStore were introduced to handle strings
 | |
|     more efficiently, at least with respect to memory.  The row data would
 | |
|     still be a uint8_t *, and columns would be fixed-width, but string fields
 | |
|     above a certain width would contain a 'Pointer' that referenced a string in
 | |
|     StringStore.  Strings are stored efficiently in StringStore, so there is
 | |
|     no longer wasted space.
 | |
| 
 | |
|     StringStore comes with a different inefficiency however.  When a value
 | |
|     is overwritten, the original string cannot be freed independently of the
 | |
|     others, so it continues to use space.  If values are only set once, as is
 | |
|     the typical case, then StringStore is efficient.  When it is necessary
 | |
|     to overwrite string fields, it is possible to configure these classes
 | |
|     to use the original data format so that old string fields do not accumulate
 | |
|     in memory.  Of course, be careful, because blobs and text fields in CS are
 | |
|     declared as 2GB strings!
 | |
| 
 | |
|     A single RGData contains up to one 'logical block' worth of data,
 | |
|     which is 8192 rows.  One RGData is usually treated as one unit of work by
 | |
|     PrimProc and the JobSteps, but the rows an RGData contains and how many are
 | |
|     treated as a work unit depend on the operation being done.
 | |
| 
 | |
|     For example, PrimProc works in units of 8192 contiguous rows
 | |
|     that come from disk.  If half of the rows were filtered out, then the
 | |
|     RGData it passes to the next stage would only contain 4096 rows.
 | |
| 
 | |
|     Others build results incrementally before passing them along, such as
 | |
|     group-by.  If one group contains 11111 values, then group-by will
 | |
|     return 2 RGDatas for that group, one with 8192 rows, and one with 2919.
 | |
| 
 | |
|     Note: There is no synchronization in any of these classes for obvious
 | |
|     performance reasons.  Likewise, although it's technically safe for many
 | |
|     readers to access an RGData simultaneously, that would not be an
 | |
|     efficient thing to do.  Try to stick to designs where a single RGData
 | |
|     is used by a single thread at a time.
 | |
| */
 | |
| 
 | |
| // VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
 | |
| // This may be so, and we'll get link errors if someone trys, but so far no one has.
 | |
| 
 | |
| // Helper to get a value from nested vector pointers.
 | |
| template <typename T>
 | |
| inline T derefFromTwoVectorPtrs(const std::vector<T>* outer, const std::vector<T>* inner, const T innerIdx)
 | |
| {
 | |
|   auto outerIdx = inner->operator[](innerIdx);
 | |
|   return outer->operator[](outerIdx);
 | |
| }
 | |
| 
 | |
| class StringStore
 | |
| {
 | |
|  public:
 | |
|   StringStore() = default;
 | |
|   StringStore(allocators::CountingAllocator<StringStoreBufType> alloc);
 | |
|   StringStore(const StringStore&) = delete;
 | |
|   StringStore(StringStore&&) = delete;
 | |
|   StringStore& operator=(const StringStore&) = delete;
 | |
|   StringStore& operator=(StringStore&&) = delete;
 | |
|   virtual ~StringStore();
 | |
| 
 | |
|   inline utils::NullString getString(uint64_t offset) const;
 | |
|   // returns the offset.
 | |
|   // it may receive nullptr as data and it is proper way to store NULL values.
 | |
|   uint64_t storeString(const uint8_t* data, uint32_t length);
 | |
|   // please note getPointer can return nullptr.
 | |
|   inline const uint8_t* getPointer(uint64_t offset) const;
 | |
|   inline uint32_t getStringLength(uint64_t offset) const;
 | |
|   inline utils::ConstString getConstString(uint64_t offset) const
 | |
|   {
 | |
|     return {(const char*)getPointer(offset), getStringLength(offset)};
 | |
|   }
 | |
|   inline bool isEmpty() const;
 | |
|   inline uint64_t getSize() const;
 | |
|   inline bool isNullValue(uint64_t offset) const;
 | |
| 
 | |
|   void clear();
 | |
| 
 | |
|   void serialize(messageqcpp::ByteStream&) const;
 | |
|   void deserialize(messageqcpp::ByteStream&);
 | |
| 
 | |
|   //@bug6065, make StringStore::storeString() thread safe
 | |
|   void useStoreStringMutex(bool b)
 | |
|   {
 | |
|     fUseStoreStringMutex = b;
 | |
|   }
 | |
|   bool useStoreStringMutex() const
 | |
|   {
 | |
|     return fUseStoreStringMutex;
 | |
|   }
 | |
|   void useOnlyLongStrings(bool b)
 | |
|   {
 | |
|     fUseOnlyLongStrings = b;
 | |
|   }
 | |
|   bool useOnlyLongStrings() const
 | |
|   {
 | |
|     return fUseOnlyLongStrings;
 | |
|   }
 | |
| 
 | |
|   // This is an overlay b/c the underlying data needs to be any size,
 | |
|   // and alloc'd in one chunk.  data can't be a separate dynamic chunk.
 | |
|   // NOTE: Change here, requires a change in 'bytestream.h'.
 | |
|   struct MemChunk
 | |
|   {
 | |
|     uint32_t currentSize;
 | |
|     uint32_t capacity;
 | |
|     uint8_t data[];
 | |
|   };
 | |
| 
 | |
|  private:
 | |
|   std::string empty_str;
 | |
|   static constexpr const uint32_t CHUNK_SIZE = 64 * 1024;  // allocators like powers of 2
 | |
| 
 | |
|   std::vector<boost::shared_ptr<uint8_t[]>> mem;
 | |
| 
 | |
|   // To store strings > 64KB (BLOB/TEXT)
 | |
|   std::vector<boost::shared_ptr<uint8_t[]>> longStrings;
 | |
|   bool empty = true;
 | |
|   bool fUseStoreStringMutex = false;  //@bug6065, make StringStore::storeString() thread safe
 | |
|   bool fUseOnlyLongStrings = false;
 | |
|   boost::mutex fMutex;
 | |
|   std::optional<allocators::CountingAllocator<StringStoreBufType>> alloc{};
 | |
| };
 | |
| 
 | |
| // Where we store user data for UDA(n)F
 | |
| class UserDataStore
 | |
| {
 | |
|   // length represents the fixed portion length of userData.
 | |
|   // There may be variable length data in containers or other
 | |
|   // user created structures.
 | |
|   struct StoreData
 | |
|   {
 | |
|     int32_t length;
 | |
|     std::string functionName;
 | |
|     boost::shared_ptr<mcsv1sdk::UserData> userData;
 | |
|     StoreData() : length(0)
 | |
|     {
 | |
|     }
 | |
|     StoreData(const StoreData& rhs)
 | |
|     {
 | |
|       length = rhs.length;
 | |
|       functionName = rhs.functionName;
 | |
|       userData = rhs.userData;
 | |
|     }
 | |
|   };
 | |
| 
 | |
|  public:
 | |
|   UserDataStore() = default;
 | |
|   virtual ~UserDataStore() = default;
 | |
|   UserDataStore(const UserDataStore&) = delete;
 | |
|   UserDataStore(UserDataStore&&) = delete;
 | |
|   UserDataStore& operator=(const UserDataStore&) = delete;
 | |
|   UserDataStore& operator=(UserDataStore&&) = delete;
 | |
| 
 | |
|   void serialize(messageqcpp::ByteStream&) const;
 | |
|   void deserialize(messageqcpp::ByteStream&);
 | |
| 
 | |
|   // Set to make UserDataStore thread safe
 | |
|   void useUserDataMutex(bool b)
 | |
|   {
 | |
|     fUseUserDataMutex = b;
 | |
|   }
 | |
|   bool useUserDataMutex() const
 | |
|   {
 | |
|     return fUseUserDataMutex;
 | |
|   }
 | |
| 
 | |
|   // Returns the offset
 | |
|   uint32_t storeUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> data,
 | |
|                          uint32_t length);
 | |
| 
 | |
|   boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t offset) const;
 | |
| 
 | |
|  private:
 | |
|   std::vector<StoreData> vStoreData;
 | |
| 
 | |
|   bool fUseUserDataMutex = false;
 | |
|   boost::mutex fMutex;
 | |
| };
 | |
| 
 | |
| struct GroupConcat;
 | |
| 
 | |
| class AggregateDataStore
 | |
| {
 | |
|  public:
 | |
|   AggregateDataStore() = default;
 | |
|   explicit AggregateDataStore(const std::vector<boost::shared_ptr<GroupConcat>>& groupConcat)
 | |
|    : fGroupConcat(groupConcat)
 | |
|   {
 | |
|   }
 | |
|   ~AggregateDataStore() = default;
 | |
|   AggregateDataStore(const AggregateDataStore&) = delete;
 | |
|   AggregateDataStore(AggregateDataStore&&) = delete;
 | |
|   AggregateDataStore& operator=(const AggregateDataStore&) = delete;
 | |
|   AggregateDataStore& operator=(AggregateDataStore&&) = delete;
 | |
| 
 | |
|   void serialize(messageqcpp::ByteStream&) const;
 | |
|   void deserialize(messageqcpp::ByteStream&);
 | |
| 
 | |
|   uint32_t storeAggregateData(boost::shared_ptr<joblist::GroupConcatAg>& data);
 | |
|   boost::shared_ptr<joblist::GroupConcatAg> getAggregateData(uint32_t pos) const;
 | |
| 
 | |
|   RGDataSizeType getDataSize() const;
 | |
| 
 | |
|  private:
 | |
|   friend class RGData;
 | |
|   std::vector<boost::shared_ptr<GroupConcat>> fGroupConcat;
 | |
|   std::vector<boost::shared_ptr<joblist::GroupConcatAg>> fData;
 | |
| };
 | |
| 
 | |
| class RowGroup;
 | |
| class Row;
 | |
| 
 | |
| /* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
 | |
| class RGData
 | |
| {
 | |
|  public:
 | |
|   RGData() = default;  // useless unless followed by an = or a deserialize operation
 | |
|   RGData(allocators::CountingAllocator<RGDataBufType>&);
 | |
|   RGData(const RowGroup& rg, uint32_t rowCount);  // allocates memory for rowData
 | |
|   explicit RGData(const RowGroup& rg);
 | |
|   explicit RGData(const RowGroup& rg, allocators::CountingAllocator<RGDataBufType>& alloc);
 | |
|   RGData& operator=(const RGData& rhs) = default;
 | |
|   RGData& operator=(RGData&&) = default;
 | |
|   RGData(const RGData&) = default;
 | |
|   RGData(RGData&&) = default;
 | |
|   virtual ~RGData() = default;
 | |
| 
 | |
|   // amount should be the # returned by RowGroup::getDataSize()
 | |
|   void serialize(messageqcpp::ByteStream&, RGDataSizeType amount) const;
 | |
| 
 | |
|   // the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
 | |
|   // inline data with a length field.  Once that's converted to string table format, that
 | |
|   // option can go away.
 | |
|   void deserialize(messageqcpp::ByteStream&, RGDataSizeType amount = 0);  // returns the # of bytes read
 | |
| 
 | |
|   inline RGDataSizeType getStringTableMemUsage();
 | |
|   void clear();
 | |
|   void reinit(const RowGroup& rg);
 | |
|   void reinit(const RowGroup& rg, uint32_t rowCount);
 | |
|   inline void setStringStore(boost::shared_ptr<StringStore>& ss)
 | |
|   {
 | |
|     strings = ss;
 | |
|   }
 | |
| 
 | |
|   // this will use the pre-configured Row to figure out where row # num is, then set the Row
 | |
|   // to point to it.  It's a shortcut around using a RowGroup to do the same thing for cases
 | |
|   // where it's inconvenient to instantiate one.
 | |
|   inline void getRow(uint32_t num, Row* row);
 | |
| 
 | |
|   //@bug6065, make StringStore::storeString() thread safe
 | |
|   void useStoreStringMutex(bool b)
 | |
|   {
 | |
|     if (strings)
 | |
|       strings->useStoreStringMutex(b);
 | |
|   }
 | |
|   bool useStoreStringMutex() const
 | |
|   {
 | |
|     return (strings ? (strings->useStoreStringMutex()) : false);
 | |
|   }
 | |
| 
 | |
|   UserDataStore* getUserDataStore();
 | |
|   // make UserDataStore::storeData() thread safe
 | |
|   void useUserDataMutex(bool b)
 | |
|   {
 | |
|     if (userDataStore)
 | |
|       userDataStore->useUserDataMutex(b);
 | |
|   }
 | |
|   bool useUserDataMutex() const
 | |
|   {
 | |
|     return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
 | |
|   }
 | |
| 
 | |
|   bool hasRowData() const
 | |
|   {
 | |
|     return !!rowData;
 | |
|   }
 | |
| 
 | |
|  private:
 | |
|   uint32_t rowSize = 0;      // can't be.
 | |
|   uint32_t columnCount = 0;  // shouldn't be, but...
 | |
|   boost::shared_ptr<RGDataBufType> rowData;
 | |
|   boost::shared_ptr<StringStore> strings;
 | |
|   std::shared_ptr<UserDataStore> userDataStore;
 | |
|   std::shared_ptr<AggregateDataStore> aggregateDataStore;
 | |
|   std::optional<allocators::CountingAllocator<RGDataBufType>> alloc = {};
 | |
| 
 | |
|   // Need sig to support backward compat.  RGData can deserialize both forms.
 | |
|   static const uint32_t RGDATA_SIG = 0xffffffff;  // won't happen for 'old' Rowgroup data
 | |
| 
 | |
|   friend class RowGroup;
 | |
|   friend class RowGroupStorage;
 | |
| };
 | |
| 
 | |
| class Row
 | |
| {
 | |
|  public:
 | |
|   struct Pointer
 | |
|   {
 | |
|     inline Pointer() = default;
 | |
| 
 | |
|     explicit inline Pointer(uint8_t* d) : data(d)
 | |
|     {
 | |
|     }
 | |
|     inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s)
 | |
|     {
 | |
|     }
 | |
|     inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) : data(d), strings(s), userDataStore(u)
 | |
|     {
 | |
|     }
 | |
|     inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u, AggregateDataStore* a)
 | |
|      : data(d), strings(s), userDataStore(u), aggregateDataStore(a)
 | |
|     {
 | |
|     }
 | |
|     uint8_t* data = nullptr;
 | |
|     StringStore* strings = nullptr;
 | |
|     UserDataStore* userDataStore = nullptr;
 | |
|     AggregateDataStore* aggregateDataStore = nullptr;
 | |
|   };
 | |
| 
 | |
|   Row() = default;
 | |
|   Row(const Row&);
 | |
|   ~Row() = default;
 | |
| 
 | |
|   Row& operator=(const Row&);
 | |
|   bool operator==(const Row&) const;
 | |
| 
 | |
|   inline void setData(const Pointer&);
 | |
|   inline uint8_t* getData() const;
 | |
| 
 | |
|   inline void setPointer(const Pointer&);
 | |
|   inline Pointer getPointer() const;
 | |
| 
 | |
|   inline void nextRow();
 | |
|   inline uint32_t getColumnWidth(uint32_t colIndex) const;
 | |
|   inline uint32_t getColumnCount() const;
 | |
|   inline uint32_t getInternalSize() const;  // this is only accurate if there is no string table
 | |
|   inline uint32_t getSize() const;          // this is only accurate if there is no string table
 | |
|   // if a string table is being used, getRealSize() takes into account variable-length strings
 | |
|   inline uint32_t getRealSize() const;
 | |
|   inline uint32_t getOffset(uint32_t colIndex) const;
 | |
|   inline uint32_t getScale(uint32_t colIndex) const;
 | |
|   inline uint32_t getPrecision(uint32_t colIndex) const;
 | |
|   inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
 | |
|   inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
 | |
|   inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
 | |
|   inline uint32_t getCharsetNumber(uint32_t colIndex) const;
 | |
| 
 | |
|   // this returns true if the type is not CHAR or VARCHAR
 | |
|   inline bool isCharType(uint32_t colIndex) const;
 | |
|   inline bool isUnsigned(uint32_t colIndex) const;
 | |
|   inline bool isShortString(uint32_t colIndex) const;
 | |
|   inline bool isLongString(uint32_t colIndex) const;
 | |
| 
 | |
|   bool colHasCollation(uint32_t colIndex) const
 | |
|   {
 | |
|     return datatypes::typeHasCollation(getColType(colIndex));
 | |
|   }
 | |
| 
 | |
|   template <int len>
 | |
|   inline uint64_t getUintField(uint32_t colIndex) const;
 | |
|   inline uint64_t getUintField(uint32_t colIndex) const;
 | |
|   template <int len>
 | |
|   inline int64_t getIntField(uint32_t colIndex) const;
 | |
|   inline int64_t getIntField(uint32_t colIndex) const;
 | |
|   // Get a signed 64-bit integer column value, convert to the given
 | |
|   // floating point data type T (e.g. float, double, long double)
 | |
|   // and divide it according to the scale.
 | |
|   template <typename T>
 | |
|   inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
 | |
|   {
 | |
|     const T d = getIntField(colIndex);
 | |
|     if (!scale)
 | |
|       return d;
 | |
|     return d / datatypes::scaleDivisor<T>(scale);
 | |
|   }
 | |
|   template <typename T>
 | |
|   inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex) const
 | |
|   {
 | |
|     return getScaledSInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
 | |
|   }
 | |
|   // Get an unsigned 64-bit integer column value, convert to the given
 | |
|   // floating point data type T (e.g. float, double, long double)
 | |
|   // and divide it according to the scale.
 | |
|   template <typename T>
 | |
|   inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
 | |
|   {
 | |
|     const T d = getUintField(colIndex);
 | |
|     if (!scale)
 | |
|       return d;
 | |
|     return d / datatypes::scaleDivisor<T>(scale);
 | |
|   }
 | |
|   template <typename T>
 | |
|   inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex) const
 | |
|   {
 | |
|     return getScaledUInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
 | |
|   }
 | |
|   template <typename T>
 | |
|   inline bool equals(T* value, uint32_t colIndex) const;
 | |
|   template <int len>
 | |
|   inline bool equals(uint64_t val, uint32_t colIndex) const;
 | |
|   inline bool equals(long double val, uint32_t colIndex) const;
 | |
|   inline bool equals(const int128_t& val, uint32_t colIndex) const;
 | |
| 
 | |
|   inline double getDoubleField(uint32_t colIndex) const;
 | |
|   inline float getFloatField(uint32_t colIndex) const;
 | |
|   inline datatypes::Decimal getDecimalField(uint32_t colIndex) const
 | |
|   {
 | |
|     if (LIKELY(getColumnWidth(colIndex) == datatypes::MAXDECIMALWIDTH))
 | |
|       return datatypes::Decimal(getTSInt128Field(colIndex), (int)getScale(colIndex), getPrecision(colIndex));
 | |
|     return datatypes::Decimal(datatypes::TSInt64(getIntField(colIndex)), (int)getScale(colIndex),
 | |
|                               getPrecision(colIndex));
 | |
|   }
 | |
|   inline long double getLongDoubleField(uint32_t colIndex) const;
 | |
|   inline void storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const;
 | |
|   inline void getInt128Field(uint32_t colIndex, int128_t& x) const;
 | |
|   inline datatypes::TSInt128 getTSInt128Field(uint32_t colIndex) const;
 | |
| 
 | |
|   inline uint64_t getBaseRid() const;
 | |
|   inline uint64_t getRid() const;
 | |
|   inline uint16_t getRelRid() const;             // returns a rid relative to this logical block
 | |
|   inline uint64_t getExtentRelativeRid() const;  // returns a rid relative to the extent it's in
 | |
|   inline uint64_t getFileRelativeRid() const;    // returns a file-relative rid
 | |
|   inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum,
 | |
|                           uint16_t* rowNum);
 | |
| 
 | |
|   template <int len>
 | |
|   void setUintField(uint64_t val, uint32_t colIndex);
 | |
| 
 | |
|   /* Note: these 2 fcns avoid 1 array lookup per call.  Using them only
 | |
|   in projection on the PM resulted in a 2.8% performance gain on
 | |
|   the queries listed in bug 2223.
 | |
|   TODO: apply them everywhere else possible, and write equivalents
 | |
|   for the other types as well as the getters.
 | |
|   */
 | |
|   template <int len>
 | |
|   void setUintField_offset(uint64_t val, uint32_t offset);
 | |
|   template <typename T>
 | |
|   void setIntField_offset(const T val, const uint32_t offset);
 | |
|   inline void nextRow(uint32_t size);
 | |
|   inline void prevRow(uint32_t size, uint64_t number);
 | |
| 
 | |
|   inline void setUintField(uint64_t val, uint32_t colIndex);
 | |
|   template <int len>
 | |
|   void setIntField(int64_t, uint32_t colIndex);
 | |
|   inline void setIntField(int64_t, uint32_t colIndex);
 | |
| 
 | |
|   inline void setDoubleField(double val, uint32_t colIndex);
 | |
|   inline void setFloatField(float val, uint32_t colIndex);
 | |
|   inline void setDecimalField(double /*val*/, uint32_t /*colIndex*/)
 | |
|   {
 | |
|   }  // TODO: Do something here
 | |
|   inline void setLongDoubleField(const long double& val, uint32_t colIndex);
 | |
|   inline void setInt128Field(const int128_t& val, uint32_t colIndex);
 | |
| 
 | |
|   inline void setRid(uint64_t rid);
 | |
| 
 | |
|   // TODO: remove this (string is not efficient for this), use getConstString() instead
 | |
|   inline utils::NullString getStringField(uint32_t colIndex) const
 | |
|   {
 | |
|     utils::ConstString x = getConstString(colIndex);
 | |
|     return utils::NullString(x);
 | |
|   }
 | |
| 
 | |
|   inline utils::ConstString getConstString(uint32_t colIndex) const;
 | |
|   inline utils::ConstString getShortConstString(uint32_t colIndex) const;
 | |
|   void setStringField(const utils::NullString& val, uint32_t colIndex);
 | |
|   void setStringField(const uint8_t* val, uint32_t length, uint32_t colIndex);
 | |
|   inline void setStringField(const utils::ConstString& str, uint32_t colIndex);
 | |
|   template <typename T>
 | |
|   inline void setBinaryField(const T* value, uint32_t width, uint32_t colIndex);
 | |
|   template <typename T>
 | |
|   inline void setBinaryField(const T* value, uint32_t colIndex);
 | |
|   template <typename T>
 | |
|   inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex);
 | |
|   // XXX: TODO: I'd deprecate these two functions in favor of get/setStringField.
 | |
|   // getSetStringField properly support binary data of up to 4G bytes
 | |
|   // and also provide perfomant interface through use of ConstString.
 | |
|   // support VARBINARY
 | |
|   // Add 2-byte length at the CHARSET_INFO*beginning of the field.  nullptr and zero length field are
 | |
|   // treated the same, could use one of the length bit to distinguish these two cases.
 | |
|   inline void setVarBinaryField(const utils::NullString& val, uint32_t colIndex);
 | |
|   // No string construction is necessary for better performance.
 | |
|   inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
 | |
|   inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
 | |
|   inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
 | |
|   inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
 | |
|   inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
 | |
|   inline void setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
 | |
|                           uint32_t len, uint32_t colIndex);
 | |
|   inline void setAggregateData(boost::shared_ptr<joblist::GroupConcatAg> data, uint32_t colIndex);
 | |
|   inline joblist::GroupConcatAg* getAggregateData(uint32_t colIndex) const;
 | |
| 
 | |
|   uint64_t getNullValue(uint32_t colIndex) const;
 | |
|   bool isNullValue(uint32_t colIndex) const;
 | |
|   template <cscDataType cscDT, int width>
 | |
|   inline bool isNullValue_offset(uint32_t offset) const;
 | |
| 
 | |
|   // when NULLs are pulled out via getIntField(), they come out with these values.
 | |
|   // Ex: the 1-byte int null value is 0x80.  When it gets cast to an int64_t
 | |
|   // it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
 | |
|   int64_t getSignedNullValue(uint32_t colIndex) const;
 | |
| 
 | |
|   // copy data in srcIndex field to destIndex, all data type
 | |
|   inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
 | |
| 
 | |
|   // copy data in srcIndex field to destAddr, all data type
 | |
|   // inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
 | |
| 
 | |
|   // an adapter for code that uses the copyField call above;
 | |
|   // that's not string-table safe, this one is
 | |
|   inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 | |
| 
 | |
|   inline void copyBinaryField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 | |
| 
 | |
|   std::string toString(uint32_t rownum = 0) const;
 | |
|   std::string toCSV() const;
 | |
| 
 | |
|   /* These fcns are used only in joins.  The RID doesn't matter on the side that
 | |
|   gets hashed.  We steal that field here to "mark" a row. */
 | |
|   inline void markRow();
 | |
|   inline void zeroRid();
 | |
|   inline bool isMarked();
 | |
|   void setToNull(uint32_t colIndex);
 | |
|   void initToNull();
 | |
| 
 | |
|   inline void usesStringTable(bool b)
 | |
|   {
 | |
|     useStringTable = b;
 | |
|   }
 | |
|   inline bool usesStringTable() const
 | |
|   {
 | |
|     return useStringTable;
 | |
|   }
 | |
|   inline bool hasLongString() const
 | |
|   {
 | |
|     return hasLongStringField;
 | |
|   }
 | |
| 
 | |
|   // these are for cases when you already know the type definitions are the same.
 | |
|   // a fcn to check the type defs seperately doesn't exist yet.  No normalization.
 | |
|   inline uint64_t hash(uint32_t lastCol) const;  // generates a hash for cols [0-lastCol]
 | |
|   inline uint64_t hash() const;                  // generates a hash for all cols
 | |
|   inline void colUpdateHasher(datatypes::MariaDBHasher& hM, const utils::Hasher_r& h, const uint32_t col,
 | |
|                               uint32_t& intermediateHash) const;
 | |
|   inline void colUpdateHasherTypeless(datatypes::MariaDBHasher& hasher, uint32_t keyColsIdx,
 | |
|                                       const std::vector<uint32_t>& keyCols,
 | |
|                                       const std::vector<uint32_t>* smallSideKeyColumnsIds,
 | |
|                                       const std::vector<uint32_t>* smallSideColumnsWidths) const;
 | |
|   inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols,
 | |
|                                const std::vector<uint32_t>* smallSideKeyColumnsIds,
 | |
|                                const std::vector<uint32_t>* smallSideColumnsWidths) const
 | |
|   {
 | |
|     datatypes::MariaDBHasher h;
 | |
|     for (uint32_t i = 0; i < keyCols.size(); i++)
 | |
|       colUpdateHasherTypeless(h, i, keyCols, smallSideKeyColumnsIds, smallSideColumnsWidths);
 | |
|     return h.finalize();
 | |
|   }
 | |
| 
 | |
|   bool equals(const Row&, uint32_t lastCol) const;
 | |
|   inline bool equals(const Row&) const;
 | |
| 
 | |
|   inline void setUserDataStore(UserDataStore* u)
 | |
|   {
 | |
|     userDataStore = u;
 | |
|   }
 | |
| 
 | |
|   bool getNullMark(uint32_t col) const
 | |
|   {
 | |
|     return data[getInternalSize() + col];
 | |
|   }
 | |
| 
 | |
|   void setNullMark(uint32_t col, bool isNull) const
 | |
|   {
 | |
|     data[getInternalSize() + col] = isNull;
 | |
|   }
 | |
| 
 | |
|   const CHARSET_INFO* getCharset(uint32_t col) const;
 | |
| 
 | |
|  private:
 | |
|   inline bool inStringTable(uint32_t col) const;
 | |
| 
 | |
|  private:
 | |
|   uint32_t columnCount = 0;
 | |
|   uint64_t baseRid = 0;
 | |
| 
 | |
|   // Note, the mem behind these pointer fields is owned by RowGroup not Row
 | |
|   uint32_t* oldOffsets = nullptr;
 | |
|   uint32_t* stOffsets = nullptr;
 | |
|   uint32_t* offsets = nullptr;
 | |
|   uint32_t* colWidths = nullptr;
 | |
|   execplan::CalpontSystemCatalog::ColDataType* types = nullptr;
 | |
|   uint32_t* charsetNumbers = nullptr;
 | |
|   CHARSET_INFO** charsets = nullptr;
 | |
|   uint8_t* data = nullptr;
 | |
|   uint32_t* scale = nullptr;
 | |
|   uint32_t* precision = nullptr;
 | |
| 
 | |
|   StringStore* strings = nullptr;
 | |
|   bool useStringTable = true;
 | |
|   bool hasCollation = false;
 | |
|   bool hasLongStringField = false;
 | |
|   uint32_t sTableThreshold = 20;
 | |
|   std::shared_ptr<bool[]> forceInline;
 | |
|   UserDataStore* userDataStore = nullptr;            // For UDAF
 | |
|   AggregateDataStore* aggregateDataStore = nullptr;  // group_concat & json_arrayagg
 | |
| 
 | |
|   friend class RowGroup;
 | |
| };
 | |
| 
 | |
| inline Row::Pointer Row::getPointer() const
 | |
| {
 | |
|   return Pointer(data, strings, userDataStore, aggregateDataStore);
 | |
| }
 | |
| inline uint8_t* Row::getData() const
 | |
| {
 | |
|   return data;
 | |
| }
 | |
| 
 | |
| inline void Row::setPointer(const Pointer& p)
 | |
| {
 | |
|   data = p.data;
 | |
|   strings = p.strings;
 | |
|   bool hasStrings = (strings != nullptr);
 | |
| 
 | |
|   if (useStringTable != hasStrings)
 | |
|   {
 | |
|     useStringTable = hasStrings;
 | |
|     offsets = (useStringTable ? stOffsets : oldOffsets);
 | |
|   }
 | |
| 
 | |
|   userDataStore = p.userDataStore;
 | |
|   aggregateDataStore = p.aggregateDataStore;
 | |
| }
 | |
| 
 | |
| inline void Row::setData(const Pointer& p)
 | |
| {
 | |
|   setPointer(p);
 | |
| }
 | |
| 
 | |
| inline void Row::nextRow()
 | |
| {
 | |
|   data += getSize();
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getColumnCount() const
 | |
| {
 | |
|   return columnCount;
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getColumnWidth(uint32_t col) const
 | |
| {
 | |
|   return colWidths[col];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getInternalSize() const
 | |
| {
 | |
|   return offsets[columnCount];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getSize() const
 | |
| {
 | |
|   return getInternalSize() + columnCount;
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getRealSize() const
 | |
| {
 | |
|   if (!useStringTable)
 | |
|     return getSize();
 | |
| 
 | |
|   uint32_t ret = columnCount;  // account for NULL flags.
 | |
| 
 | |
|   for (uint32_t i = 0; i < columnCount; i++)
 | |
|   {
 | |
|     if (!inStringTable(i))
 | |
|       ret += getColumnWidth(i);
 | |
|     else
 | |
|       ret += getConstString(i).length();
 | |
|   }
 | |
| 
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getScale(uint32_t col) const
 | |
| {
 | |
|   return scale[col];
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getPrecision(uint32_t col) const
 | |
| {
 | |
|   return precision[col];
 | |
| }
 | |
| 
 | |
| inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
 | |
| {
 | |
|   return types[colIndex];
 | |
| }
 | |
| 
 | |
| inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
 | |
| {
 | |
|   return types;
 | |
| }
 | |
| 
 | |
| inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
 | |
| {
 | |
|   return types;
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getCharsetNumber(uint32_t col) const
 | |
| {
 | |
|   return charsetNumbers[col];
 | |
| }
 | |
| 
 | |
| inline bool Row::isCharType(uint32_t colIndex) const
 | |
| {
 | |
|   return datatypes::isCharType(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool Row::isUnsigned(uint32_t colIndex) const
 | |
| {
 | |
|   return datatypes::isUnsigned(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool Row::isShortString(uint32_t colIndex) const
 | |
| {
 | |
|   return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
 | |
| }
 | |
| 
 | |
| inline bool Row::isLongString(uint32_t colIndex) const
 | |
| {
 | |
|   return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
 | |
| }
 | |
| 
 | |
| inline bool Row::inStringTable(uint32_t col) const
 | |
| {
 | |
|   return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
 | |
| }
 | |
| 
 | |
| template <typename T>
 | |
| inline bool Row::equals(T* value, uint32_t colIndex) const
 | |
| {
 | |
|   return *reinterpret_cast<T*>(&data[offsets[colIndex]]) == *value;
 | |
| }
 | |
| 
 | |
| template <int len>
 | |
| inline bool Row::equals(uint64_t val, uint32_t colIndex) const
 | |
| {
 | |
|   /* I think the compiler will optimize away the switch stmt */
 | |
|   switch (len)
 | |
|   {
 | |
|     case 1: return data[offsets[colIndex]] == val;
 | |
| 
 | |
|     case 2: return *((uint16_t*)&data[offsets[colIndex]]) == val;
 | |
| 
 | |
|     case 4: return *((uint32_t*)&data[offsets[colIndex]]) == val;
 | |
| 
 | |
|     case 8: return *((uint64_t*)&data[offsets[colIndex]]) == val;
 | |
|     default: idbassert(0); throw std::logic_error("Row::equals(): bad length.");
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(long double val, uint32_t colIndex) const
 | |
| {
 | |
|   return *((long double*)&data[offsets[colIndex]]) == val;
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(const int128_t& val, uint32_t colIndex) const
 | |
| {
 | |
|   return *((int128_t*)&data[offsets[colIndex]]) == val;
 | |
| }
 | |
| 
 | |
| template <int len>
 | |
| inline uint64_t Row::getUintField(uint32_t colIndex) const
 | |
| {
 | |
|   /* I think the compiler will optimize away the switch stmt */
 | |
|   switch (len)
 | |
|   {
 | |
|     case 1: return data[offsets[colIndex]];
 | |
| 
 | |
|     case 2: return *((uint16_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 4: return *((uint32_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 8: return *((uint64_t*)&data[offsets[colIndex]]);
 | |
|     default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length.");
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getUintField(uint32_t colIndex) const
 | |
| {
 | |
|   switch (getColumnWidth(colIndex))
 | |
|   {
 | |
|     case 1: return data[offsets[colIndex]];
 | |
| 
 | |
|     case 2: return *((uint16_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 4: return *((uint32_t*)&data[offsets[colIndex]]);
 | |
|     case 8: return *((uint64_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length.");
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <int len>
 | |
| inline int64_t Row::getIntField(uint32_t colIndex) const
 | |
| {
 | |
|   /* I think the compiler will optimize away the switch stmt */
 | |
|   switch (len)
 | |
|   {
 | |
|     case 1: return (int8_t)data[offsets[colIndex]];
 | |
| 
 | |
|     case 2: return *((int16_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 4: return *((int32_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 8: return *((int64_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     default:
 | |
|       std::cout << "Row::getIntField getColumnWidth(colIndex) " << getColumnWidth(colIndex) << std::endl;
 | |
|       idbassert(0);
 | |
|       throw std::logic_error("Row::getIntField(): bad length.");
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline int64_t Row::getIntField(uint32_t colIndex) const
 | |
| {
 | |
|   /* I think the compiler will optimize away the switch stmt */
 | |
|   switch (getColumnWidth(colIndex))
 | |
|   {
 | |
|     case 1: return (int8_t)data[offsets[colIndex]];
 | |
| 
 | |
|     case 2: return *((int16_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 4: return *((int32_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 8: return *((int64_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     case 16: return *((int128_t*)&data[offsets[colIndex]]);
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::getIntField(): bad length.");
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <typename T>
 | |
| inline void Row::setBinaryField(const T* value, uint32_t width, uint32_t colIndex)
 | |
| {
 | |
|   memcpy(&data[offsets[colIndex]], value, width);
 | |
| }
 | |
| 
 | |
| template <typename T>
 | |
| inline void Row::setBinaryField(const T* value, uint32_t colIndex)
 | |
| {
 | |
|   *reinterpret_cast<T*>(&data[offsets[colIndex]]) = *value;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| inline void Row::setBinaryField<int128_t>(const int128_t* value, uint32_t colIndex)
 | |
| {
 | |
|   datatypes::TSInt128::assignPtrPtr(&data[offsets[colIndex]], value);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| inline void Row::setBinaryField<uint128_t>(const uint128_t* value, uint32_t colIndex)
 | |
| {
 | |
|   datatypes::TSInt128::assignPtrPtr(&data[offsets[colIndex]], value);
 | |
| }
 | |
| 
 | |
| // This method !cannot! be applied to uint8_t* buffers.
 | |
| template <typename T>
 | |
| inline void Row::setBinaryField_offset(const T* value, uint32_t /*width*/, uint32_t offset)
 | |
| {
 | |
|   *reinterpret_cast<T*>(&data[offset]) = *value;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| inline void Row::setBinaryField_offset<uint8_t>(const uint8_t* value, uint32_t width, uint32_t offset)
 | |
| {
 | |
|   memcpy(&data[offset], value, width);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| inline void Row::setBinaryField_offset<int128_t>(const int128_t* value, uint32_t /*width*/, uint32_t offset)
 | |
| {
 | |
|   datatypes::TSInt128::assignPtrPtr(&data[offset], value);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| inline void Row::setBinaryField_offset<uint128_t>(const uint128_t* value, uint32_t /*width*/, uint32_t offset)
 | |
| {
 | |
|   datatypes::TSInt128::assignPtrPtr(&data[offset], value);
 | |
| }
 | |
| 
 | |
| inline utils::ConstString Row::getShortConstString(uint32_t colIndex) const
 | |
| {
 | |
|   uint32_t offset = offsets[colIndex];
 | |
|   const char* src = (const char*)&data[offset];
 | |
|   if (!isNullValue(colIndex))
 | |
|   {
 | |
|     return utils::ConstString(src, strnlen(src, getColumnWidth(colIndex)));
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     return utils::ConstString(nullptr, 0);
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline utils::ConstString Row::getConstString(uint32_t colIndex) const
 | |
| {
 | |
|   return inStringTable(colIndex) ? strings->getConstString(*((uint64_t*)&data[offsets[colIndex]]))
 | |
|                                  : getShortConstString(colIndex);
 | |
| }
 | |
| 
 | |
| inline void Row::colUpdateHasher(datatypes::MariaDBHasher& hM, const utils::Hasher_r& h, const uint32_t col,
 | |
|                                  uint32_t& intermediateHash) const
 | |
| {
 | |
|   switch (getColType(col))
 | |
|   {
 | |
|     case execplan::CalpontSystemCatalog::CHAR:
 | |
|     case execplan::CalpontSystemCatalog::VARCHAR:
 | |
|     case execplan::CalpontSystemCatalog::BLOB:
 | |
|     case execplan::CalpontSystemCatalog::TEXT:
 | |
|     case execplan::CalpontSystemCatalog::CLOB:
 | |
|     {
 | |
|       CHARSET_INFO* cs = getCharset(col);
 | |
|       hM.add(cs, getConstString(col));
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|     {
 | |
|       intermediateHash = h((const char*)&data[offsets[col]], colWidths[col], intermediateHash);
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::colUpdateHasherTypeless(datatypes::MariaDBHasher& h, uint32_t keyColsIdx,
 | |
|                                          const std::vector<uint32_t>& keyCols,
 | |
|                                          const std::vector<uint32_t>* smallSideKeyColumnsIds,
 | |
|                                          const std::vector<uint32_t>* smallSideColumnsWidths) const
 | |
| {
 | |
|   auto rowKeyColIdx = keyCols[keyColsIdx];
 | |
|   auto largeSideColType = getColType(rowKeyColIdx);
 | |
|   switch (largeSideColType)
 | |
|   {
 | |
|     case datatypes::SystemCatalog::CHAR:
 | |
|     case datatypes::SystemCatalog::VARCHAR:
 | |
|     case datatypes::SystemCatalog::BLOB:
 | |
|     case datatypes::SystemCatalog::TEXT:
 | |
|     case datatypes::SystemCatalog::CLOB:
 | |
|     {
 | |
|       CHARSET_INFO* cs = getCharset(rowKeyColIdx);
 | |
|       h.add(cs, getConstString(rowKeyColIdx));
 | |
|       break;
 | |
|     }
 | |
|     case datatypes::SystemCatalog::DECIMAL:
 | |
|     {
 | |
|       auto width = getColumnWidth(rowKeyColIdx);
 | |
|       if (datatypes::isWideDecimalType(largeSideColType, width))
 | |
|       {
 | |
|         bool joinHasSkewedKeyColumn = (smallSideColumnsWidths);
 | |
|         datatypes::TSInt128 val = getTSInt128Field(rowKeyColIdx);
 | |
|         if (joinHasSkewedKeyColumn &&
 | |
|             width != derefFromTwoVectorPtrs(smallSideColumnsWidths, smallSideKeyColumnsIds, keyColsIdx))
 | |
|         {
 | |
|           if (val.getValue() >= std::numeric_limits<int64_t>::min() &&
 | |
|               val.getValue() <= std::numeric_limits<uint64_t>::max())
 | |
|           {
 | |
|             h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXLEGACYWIDTH);
 | |
|           }
 | |
|           else
 | |
|             h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
 | |
|         }
 | |
|         else
 | |
|           h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         int64_t val = getIntField(rowKeyColIdx);
 | |
|         h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
 | |
|       }
 | |
| 
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|     {
 | |
|       if (isUnsigned(rowKeyColIdx))
 | |
|       {
 | |
|         uint64_t val = getUintField(rowKeyColIdx);
 | |
|         h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         int64_t val = getIntField(rowKeyColIdx);
 | |
|         h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
 | |
|       }
 | |
| 
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::setStringField(const uint8_t* str, uint32_t length, uint32_t colIndex)
 | |
| {
 | |
|   utils::ConstString temp((const char*)str, length);
 | |
|   setStringField(temp, colIndex);
 | |
| }
 | |
| inline void Row::setStringField(const utils::NullString& val, uint32_t colIndex)
 | |
| {
 | |
|   utils::ConstString temp(val.str(), val.length());
 | |
|   setStringField(temp, colIndex);
 | |
| }
 | |
| inline void Row::setStringField(const utils::ConstString& str, uint32_t colIndex)
 | |
| {
 | |
|   uint64_t offset;
 | |
| 
 | |
|   // TODO: add multi-byte safe truncation here
 | |
|   uint32_t length = str.length();
 | |
|   uint32_t colWidth = getColumnWidth(colIndex);
 | |
| 
 | |
|   setNullMark(colIndex, !str.str());
 | |
| 
 | |
|   if (length > colWidth)
 | |
|     length = colWidth;
 | |
| 
 | |
|   if (inStringTable(colIndex))
 | |
|   {
 | |
|     offset = strings->storeString((const uint8_t*)str.str(), length);
 | |
|     *((uint64_t*)&data[offsets[colIndex]]) = offset;
 | |
|     //		cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
 | |
|     //				<< " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
 | |
|     //				<< endl;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     // std::cout << "setStringField memcpy " << std::endl;
 | |
|     uint8_t* buf = &data[offsets[colIndex]];
 | |
|     memset(buf + length, 0,
 | |
|            offsets[colIndex + 1] - (offsets[colIndex] + length));  // needed for memcmp in equals().
 | |
|     if (str.str())
 | |
|     {
 | |
|       memcpy(buf, str.str(), length);
 | |
|     }
 | |
|     else if (colWidth <= 8)  // special magic value.
 | |
|     {
 | |
|       setToNull(colIndex);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
 | |
| {
 | |
|   if (inStringTable(colIndex))
 | |
|     return strings->getStringLength(*((uint64_t*)&data[offsets[colIndex]]));
 | |
| 
 | |
|   if (getNullMark(colIndex))
 | |
|   {
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   return *((uint16_t*)&data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
 | |
| {
 | |
|   if (inStringTable(colIndex))
 | |
|     return strings->getPointer(*((uint64_t*)&data[offsets[colIndex]]));
 | |
| 
 | |
|   if (getNullMark(colIndex))
 | |
|   {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   return &data[offsets[colIndex] + 2];
 | |
| }
 | |
| 
 | |
| inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
 | |
| {
 | |
|   if (inStringTable(colIndex))
 | |
|   {
 | |
|     len = strings->getStringLength(*((uint64_t*)&data[offsets[colIndex]]));
 | |
|     return getVarBinaryField(colIndex);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     if (getNullMark(colIndex))
 | |
|     {
 | |
|       len = 0;
 | |
|       return nullptr;
 | |
|     }
 | |
|     len = *((uint16_t*)&data[offsets[colIndex]]);
 | |
|     return &data[offsets[colIndex] + 2];
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
 | |
| {
 | |
|   if (!userDataStore)
 | |
|   {
 | |
|     return boost::shared_ptr<mcsv1sdk::UserData>();
 | |
|   }
 | |
| 
 | |
|   return userDataStore->getUserData(*((uint32_t*)&data[offsets[colIndex]]));
 | |
| }
 | |
| 
 | |
| inline double Row::getDoubleField(uint32_t colIndex) const
 | |
| {
 | |
|   return *((double*)&data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline float Row::getFloatField(uint32_t colIndex) const
 | |
| {
 | |
|   return *((float*)&data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline long double Row::getLongDoubleField(uint32_t colIndex) const
 | |
| {
 | |
|   return *((long double*)&data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline void Row::storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const
 | |
| {
 | |
|   datatypes::TSInt128::assignPtrPtr(x, &data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const
 | |
| {
 | |
|   datatypes::TSInt128::assignPtrPtr(&x, &data[offsets[colIndex]]);
 | |
| }
 | |
| 
 | |
| inline datatypes::TSInt128 Row::getTSInt128Field(uint32_t colIndex) const
 | |
| {
 | |
|   const int128_t* ptr = reinterpret_cast<int128_t*>(&data[offsets[colIndex]]);
 | |
|   return datatypes::TSInt128(ptr);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getRid() const
 | |
| {
 | |
|   return baseRid + *((uint16_t*)data);
 | |
| }
 | |
| 
 | |
| inline uint16_t Row::getRelRid() const
 | |
| {
 | |
|   return *((uint16_t*)data);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getBaseRid() const
 | |
| {
 | |
|   return baseRid;
 | |
| }
 | |
| 
 | |
| inline void Row::markRow()
 | |
| {
 | |
|   *((uint16_t*)data) = 0xffff;
 | |
| }
 | |
| 
 | |
| inline void Row::zeroRid()
 | |
| {
 | |
|   *((uint16_t*)data) = 0;
 | |
| }
 | |
| 
 | |
| inline bool Row::isMarked()
 | |
| {
 | |
|   return *((uint16_t*)data) == 0xffff;
 | |
| }
 | |
| 
 | |
| /* Begin speculative code! */
 | |
| inline uint32_t Row::getOffset(uint32_t colIndex) const
 | |
| {
 | |
|   return offsets[colIndex];
 | |
| }
 | |
| 
 | |
| template <int len>
 | |
| inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
 | |
| {
 | |
|   switch (len)
 | |
|   {
 | |
|     case 1: data[offset] = val; break;
 | |
| 
 | |
|     case 2: *((uint16_t*)&data[offset]) = val; break;
 | |
| 
 | |
|     case 4: *((uint32_t*)&data[offset]) = val; break;
 | |
| 
 | |
|     case 8: *((uint64_t*)&data[offset]) = val; break;
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field");
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <typename T>
 | |
| inline void Row::setIntField_offset(const T val, const uint32_t offset)
 | |
| {
 | |
|   *((T*)&data[offset]) = val;
 | |
| }
 | |
| 
 | |
| inline void Row::nextRow(uint32_t size)
 | |
| {
 | |
|   data += size;
 | |
| }
 | |
| 
 | |
| inline void Row::prevRow(uint32_t size, uint64_t number = 1)
 | |
| {
 | |
|   data -= size * number;
 | |
| }
 | |
| 
 | |
| template <int len>
 | |
| inline void Row::setUintField(uint64_t val, uint32_t colIndex)
 | |
| {
 | |
|   switch (len)
 | |
|   {
 | |
|     case 1: data[offsets[colIndex]] = val; break;
 | |
| 
 | |
|     case 2: *((uint16_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 4: *((uint32_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 8: *((uint64_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field");
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::setUintField(uint64_t val, uint32_t colIndex)
 | |
| {
 | |
|   switch (getColumnWidth(colIndex))
 | |
|   {
 | |
|     case 1: data[offsets[colIndex]] = val; break;
 | |
| 
 | |
|     case 2: *((uint16_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 4: *((uint32_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 8: *((uint64_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::setUintField: bad length");
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <int len>
 | |
| inline void Row::setIntField(int64_t val, uint32_t colIndex)
 | |
| {
 | |
|   //	idbassert(getColumnWidth(colIndex) == len);
 | |
|   switch (len)
 | |
|   {
 | |
|     case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 2: *((int16_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 4: *((int32_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 8: *((int64_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::setIntField: bad length");
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::setIntField(int64_t val, uint32_t colIndex)
 | |
| {
 | |
|   switch (getColumnWidth(colIndex))
 | |
|   {
 | |
|     case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 2: *((int16_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 4: *((int32_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     case 8: *((int64_t*)&data[offsets[colIndex]]) = val; break;
 | |
| 
 | |
|     default: idbassert(0); throw std::logic_error("Row::setIntField: bad length");
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::setDoubleField(double val, uint32_t colIndex)
 | |
| {
 | |
|   *((double*)&data[offsets[colIndex]]) = val;
 | |
| }
 | |
| 
 | |
| inline void Row::setFloatField(float val, uint32_t colIndex)
 | |
| {
 | |
|   // N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
 | |
|   //  but not necessarily the same bits that you put in. This only seems to be for float (double seems
 | |
|   //  to work).
 | |
|   if (std::isnan(val))
 | |
|     setUintField<4>(joblist::FLOATNULL, colIndex);
 | |
|   else
 | |
|     *((float*)&data[offsets[colIndex]]) = val;
 | |
| }
 | |
| 
 | |
| inline void Row::setLongDoubleField(const long double& val, uint32_t colIndex)
 | |
| {
 | |
|   uint8_t* p = &data[offsets[colIndex]];
 | |
|   *reinterpret_cast<long double*>(p) = val;
 | |
| #ifdef MASK_LONGDOUBLE
 | |
|   memset(p + 10, 0, 6);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| inline void Row::setInt128Field(const int128_t& val, uint32_t colIndex)
 | |
| {
 | |
|   setBinaryField<int128_t>(&val, colIndex);
 | |
| }
 | |
| 
 | |
| inline void Row::setVarBinaryField(const utils::NullString& val, uint32_t colIndex)
 | |
| {
 | |
|   setVarBinaryField((uint8_t*)val.str(), val.length(), colIndex);
 | |
| }
 | |
| 
 | |
| inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
 | |
| {
 | |
|   setNullMark(colIndex, !val);
 | |
| 
 | |
|   if (inStringTable(colIndex))
 | |
|   {
 | |
|     if (len > getColumnWidth(colIndex))
 | |
|       len = getColumnWidth(colIndex);
 | |
| 
 | |
|     uint64_t offset = strings->storeString(val, len);
 | |
|     *((uint64_t*)&data[offsets[colIndex]]) = offset;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     if (len > getColumnWidth(colIndex))
 | |
|       len = getColumnWidth(colIndex);
 | |
| 
 | |
|     idbassert(val != nullptr || !len);
 | |
| 
 | |
|     *((uint16_t*)&data[offsets[colIndex]]) = len;
 | |
|     if (val != nullptr)
 | |
|       memcpy(&data[offsets[colIndex] + 2], val, len);
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
 | |
|                              uint32_t len, uint32_t colIndex)
 | |
| {
 | |
|   if (!userDataStore)
 | |
|   {
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   uint32_t offset = userDataStore->storeUserData(context, userData, len);
 | |
|   *((uint32_t*)&data[offsets[colIndex]]) = offset;
 | |
|   *((uint32_t*)&data[offsets[colIndex] + 4]) = len;
 | |
| }
 | |
| 
 | |
| inline void Row::setAggregateData(boost::shared_ptr<joblist::GroupConcatAg> agData, uint32_t colIndex)
 | |
| {
 | |
|   if (!aggregateDataStore)
 | |
|   {
 | |
|     throw std::logic_error("Row::getAggregateData: no aggregateDataStore");
 | |
|   }
 | |
| 
 | |
|   uint32_t pos = aggregateDataStore->storeAggregateData(agData);
 | |
|   *((uint32_t*)&data[offsets[colIndex]]) = pos;
 | |
| }
 | |
| 
 | |
| inline joblist::GroupConcatAg* Row::getAggregateData(uint32_t colIndex) const
 | |
| {
 | |
|   if (!aggregateDataStore)
 | |
|   {
 | |
|     throw std::logic_error("Row::getAggregateData: no aggregateDataStore");
 | |
|   }
 | |
| 
 | |
|   uint32_t pos = *((uint32_t*)&data[offsets[colIndex]]);
 | |
|   return aggregateDataStore->getAggregateData(pos).get();
 | |
| }
 | |
| 
 | |
| inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
 | |
| {
 | |
|   uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
 | |
|   memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
 | |
|   setNullMark(destIndex, getNullMark(srcIndex));
 | |
| }
 | |
| 
 | |
| inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
 | |
| {
 | |
|   if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
 | |
|                types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
 | |
|                types[srcIndex] == execplan::CalpontSystemCatalog::TEXT ||
 | |
|                types[srcIndex] == execplan::CalpontSystemCatalog::CLOB))
 | |
|   {
 | |
|     out.setVarBinaryField(getVarBinaryField(srcIndex), getVarBinaryLength(srcIndex), destIndex);
 | |
|   }
 | |
|   else if (UNLIKELY(isLongString(srcIndex)))
 | |
|   {
 | |
|     out.setStringField(getConstString(srcIndex), destIndex);
 | |
|   }
 | |
|   else if (UNLIKELY(isShortString(srcIndex)))
 | |
|   {
 | |
|     out.setUintField(getUintField(srcIndex), destIndex);
 | |
|   }
 | |
|   else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
 | |
|   {
 | |
|     out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
 | |
|   }
 | |
|   else if (UNLIKELY(datatypes::isWideDecimalType(types[srcIndex], colWidths[srcIndex])))
 | |
|   {
 | |
|     copyBinaryField(out, destIndex, srcIndex);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     out.setIntField(getIntField(srcIndex), destIndex);
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void Row::copyBinaryField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
 | |
| {
 | |
|   out.setInt128Field(getTSInt128Field(srcIndex).getValue(), destIndex);
 | |
| }
 | |
| 
 | |
| inline void Row::setRid(uint64_t rid)
 | |
| {
 | |
|   *((uint16_t*)data) = rid & 0xffff;
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::hash() const
 | |
| {
 | |
|   return hash(columnCount - 1);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::hash(uint32_t lastCol) const
 | |
| {
 | |
|   // Use two hash classes. MariaDBHasher for text-based
 | |
|   // collation-aware data types and Hasher_r for all other data types.
 | |
|   // We deliver a hash that is a combination of both hashers' results.
 | |
|   utils::Hasher_r h;
 | |
|   datatypes::MariaDBHasher hM;
 | |
|   uint32_t intermediateHash = 0;
 | |
| 
 | |
|   // Sometimes we ask this to hash 0 bytes, and it comes through looking like
 | |
|   // lastCol = -1.  Return 0.
 | |
|   if (lastCol >= columnCount)
 | |
|     return 0;
 | |
| 
 | |
|   for (uint32_t i = 0; i <= lastCol; i++)
 | |
|     colUpdateHasher(hM, h, i, intermediateHash);
 | |
| 
 | |
|   return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
 | |
| }
 | |
| 
 | |
| inline bool Row::equals(const Row& r2) const
 | |
| {
 | |
|   return equals(r2, columnCount - 1);
 | |
| }
 | |
| 
 | |
| /** @brief RowGroup is a lightweight interface for processing packed row data
 | |
| 
 | |
|         A RowGroup is an interface for parsing and/or modifying row data as described at the top
 | |
|         of this file.  Its lifecycle can be tied to a producer or consumer's lifecycle.
 | |
|         Only one instance is required to process any number of blocks with a
 | |
|         given column configuration.  The column configuration is specified in the
 | |
|         constructor, and the block data to process is specified through the
 | |
|         setData() function.	 It will not copy or take ownership of the data it processes;
 | |
|         the caller should do that.
 | |
| 
 | |
|         Row and RowGroup share some bits.  RowGroup owns the memory they share.
 | |
| */
 | |
| class RowGroup : public messageqcpp::Serializeable
 | |
| {
 | |
|  public:
 | |
|   /** @brief The default ctor.  It does nothing.  Need to init by assignment or deserialization */
 | |
|   RowGroup();
 | |
| 
 | |
|   /** @brief The RowGroup ctor, which specifies the column config to process
 | |
| 
 | |
|   @param colCount The number of columns
 | |
|   @param positions An array specifying the offsets within the packed data
 | |
|               of a row where each column begins.  It should have colCount + 1
 | |
|               entries.  The first offset is 2, because a row begins with a 2-byte
 | |
|               RID.  The last entry should be the offset of the last column +
 | |
|               its length, which is also the size of the entire row including the rid.
 | |
|   @param coids An array of oids for each column.
 | |
|   @param tkeys An array of unique id for each column.
 | |
|   @param colTypes An array of COLTYPEs for each column.
 | |
|   @param charsetNumbers an Array of the lookup numbers for the charset/collation object.
 | |
|   @param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
 | |
|   @param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
 | |
|   */
 | |
|   RowGroup(uint32_t colCount, const std::vector<uint32_t>& positions, const std::vector<uint32_t>& cOids,
 | |
|            const std::vector<uint32_t>& tkeys,
 | |
|            const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
 | |
|            const std::vector<uint32_t>& charsetNumbers, const std::vector<uint32_t>& scale,
 | |
|            const std::vector<uint32_t>& precision, uint32_t stringTableThreshold, bool useStringTable = true,
 | |
|            const std::vector<bool>& forceInlineData = std::vector<bool>());
 | |
| 
 | |
|   /** @brief The copiers.  It copies metadata, not the row data */
 | |
|   RowGroup(const RowGroup&);
 | |
| 
 | |
|   /** @brief Assignment operator.  It copies metadata, not the row data */
 | |
|   RowGroup& operator=(const RowGroup&);
 | |
| 
 | |
|   explicit RowGroup(messageqcpp::ByteStream& bs);
 | |
| 
 | |
|   ~RowGroup() override;
 | |
| 
 | |
|   inline void initRow(Row*, bool forceInlineData = false) const;
 | |
|   inline uint32_t getRowCount() const;
 | |
|   inline void incRowCount();
 | |
|   inline void setRowCount(uint32_t num);
 | |
|   inline void getRow(uint32_t rowNum, Row*) const;
 | |
|   inline uint32_t getRowSize() const;
 | |
|   inline uint32_t getRowSizeWithStrings() const;
 | |
|   inline uint64_t getBaseRid() const;
 | |
|   void setData(RGData* rgd);
 | |
|   inline uint8_t* getData() const;
 | |
|   inline RGData* getRGData() const;
 | |
| 
 | |
|   uint32_t getStatus() const;
 | |
|   void setStatus(uint16_t);
 | |
| 
 | |
|   uint32_t getDBRoot() const;
 | |
|   void setDBRoot(uint32_t);
 | |
| 
 | |
|   RGDataSizeType getDataSize() const;
 | |
|   RGDataSizeType getDataSize(uint64_t n) const;
 | |
|   RGDataSizeType getMaxDataSize() const;
 | |
|   RGDataSizeType getMaxDataSizeWithStrings() const;
 | |
|   RGDataSizeType getEmptySize() const;
 | |
| 
 | |
|   // this returns the size of the row data with the string table
 | |
|   inline RGDataSizeType getSizeWithStrings() const;
 | |
|   inline RGDataSizeType getSizeWithStrings(uint64_t n) const;
 | |
| 
 | |
|   // sets the row count to 0 and the baseRid to something
 | |
|   // effectively initializing whatever chunk of memory
 | |
|   // data points to
 | |
|   void resetRowGroup(uint64_t baseRid);
 | |
| 
 | |
|   /* The Serializeable interface */
 | |
|   void serialize(messageqcpp::ByteStream&) const override;
 | |
|   void deserialize(messageqcpp::ByteStream&) override;
 | |
| 
 | |
|   uint32_t getColumnWidth(uint32_t col) const;
 | |
|   uint32_t getColumnCount() const;
 | |
|   inline const std::vector<uint32_t>& getOffsets() const;
 | |
|   inline const std::vector<uint32_t>& getOIDs() const;
 | |
|   inline const std::vector<uint32_t>& getKeys() const;
 | |
|   inline const std::vector<uint32_t>& getColWidths() const;
 | |
|   inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
 | |
|   inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
 | |
|   inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
 | |
|   inline const std::vector<uint32_t>& getCharsetNumbers() const;
 | |
|   inline uint32_t getCharsetNumber(uint32_t colIndex) const;
 | |
|   inline std::shared_ptr<bool[]>& getForceInline();
 | |
|   static inline uint32_t getHeaderSize()
 | |
|   {
 | |
|     return headerSize;
 | |
|   }
 | |
| 
 | |
|   // this returns true if the type is CHAR or VARCHAR
 | |
|   inline bool isCharType(uint32_t colIndex) const;
 | |
|   inline bool isUnsigned(uint32_t colIndex) const;
 | |
|   inline bool isShortString(uint32_t colIndex) const;
 | |
|   inline bool isLongString(uint32_t colIndex) const;
 | |
| 
 | |
|   bool colHasCollation(uint32_t colIndex) const
 | |
|   {
 | |
|     return datatypes::typeHasCollation(getColType(colIndex));
 | |
|   }
 | |
| 
 | |
|   inline const std::vector<uint32_t>& getScale() const;
 | |
|   inline const std::vector<uint32_t>& getPrecision() const;
 | |
| 
 | |
|   inline bool usesStringTable() const;
 | |
|   inline void setUseStringTable(bool);
 | |
|   void setUseOnlyLongString(bool b)
 | |
|   {
 | |
|     useOnlyLongStrings = b;
 | |
|   }
 | |
|   bool usesOnlyLongString() const
 | |
|   {
 | |
|     return useOnlyLongStrings;
 | |
|   }
 | |
|   void setUseAggregateDataStore(bool b, boost::span<boost::shared_ptr<GroupConcat>> group_concats = {});
 | |
|   bool usesAggregateDataStore() const
 | |
|   {
 | |
|     return useAggregateDataStore;
 | |
|   }
 | |
| 
 | |
|   bool hasLongString() const
 | |
|   {
 | |
|     return hasLongStringField;
 | |
|   }
 | |
| 
 | |
|   void serializeRGData(messageqcpp::ByteStream&) const;
 | |
|   inline uint32_t getStringTableThreshold() const;
 | |
| 
 | |
|   void append(RGData&);
 | |
|   void append(RowGroup&);
 | |
|   void append(RGData&, uint pos);  // insert starting at position 'pos'
 | |
|   void append(RowGroup&, uint pos);
 | |
| 
 | |
|   RGData duplicate();  // returns a copy of the attached RGData
 | |
| 
 | |
|   std::string toString(const std::vector<uint64_t>& used = {}) const;
 | |
| 
 | |
|   /** operator+=
 | |
|    *
 | |
|    * append the metadata of another RowGroup to this RowGroup
 | |
|    */
 | |
|   RowGroup& operator+=(const RowGroup& rhs);
 | |
| 
 | |
|   // returns a RowGroup with only the first cols columns.  Useful for generating a
 | |
|   // RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
 | |
|   RowGroup truncate(uint32_t cols);
 | |
| 
 | |
|   /** operator<
 | |
|    *
 | |
|    * Orders RG's based on baseRid
 | |
|    */
 | |
|   inline bool operator<(const RowGroup& rhs) const;
 | |
| 
 | |
|   void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
 | |
| 
 | |
|   /* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
 | |
|   inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
 | |
|                          const uint16_t& blockNum);
 | |
|   inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
 | |
| 
 | |
|   inline void setStringStore(boost::shared_ptr<StringStore>);
 | |
| 
 | |
|   const CHARSET_INFO* getCharset(uint32_t col);
 | |
| 
 | |
|   const auto& getGroupConcats() const
 | |
|   {
 | |
|     return fGroupConcats;
 | |
|   }
 | |
| 
 | |
|  private:
 | |
|   uint32_t columnCount = 0;
 | |
|   uint8_t* data = nullptr;
 | |
| 
 | |
|   std::vector<uint32_t> oldOffsets;  // inline data offsets
 | |
|   std::vector<uint32_t> stOffsets;   // string table offsets
 | |
|   uint32_t* offsets = nullptr;       // offsets either points to oldOffsets or stOffsets
 | |
|   std::vector<uint32_t> colWidths;
 | |
|   // oids: the real oid of the column, may have duplicates with alias.
 | |
|   // This oid is necessary for front-end to decide the real column width.
 | |
|   std::vector<uint32_t> oids;
 | |
|   // keys: the unique id for pair(oid, alias). bug 1632.
 | |
|   // Used to map the projected column and rowgroup index
 | |
|   std::vector<uint32_t> keys;
 | |
|   std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
 | |
|   // For string collation
 | |
|   std::vector<uint32_t> charsetNumbers;
 | |
|   std::vector<CHARSET_INFO*> charsets;
 | |
| 
 | |
|   // DECIMAL support.  For non-decimal fields, the values are 0.
 | |
|   std::vector<uint32_t> scale;
 | |
|   std::vector<uint32_t> precision;
 | |
| 
 | |
|   // string table impl
 | |
|   RGData* rgData = nullptr;
 | |
|   StringStore* strings = nullptr;  // note, strings and data belong to rgData
 | |
|   AggregateDataStore* aggregateDataStore = nullptr;
 | |
|   bool useStringTable = true;
 | |
|   bool useOnlyLongStrings = false;
 | |
|   bool useAggregateDataStore = false;
 | |
|   bool hasCollation = false;
 | |
|   bool hasLongStringField = false;
 | |
|   uint32_t sTableThreshold = 20;
 | |
|   std::shared_ptr<bool[]> forceInline;
 | |
| 
 | |
|   std::vector<boost::shared_ptr<GroupConcat>> fGroupConcats;
 | |
| 
 | |
|   static constexpr uint64_t headerSize = 18;
 | |
|   static constexpr uint64_t rowCountOffset = 0;
 | |
|   static constexpr uint64_t baseRidOffset = 4;
 | |
|   static constexpr uint64_t statusOffset = 12;
 | |
|   static constexpr uint64_t dbRootOffset = 14;
 | |
| };
 | |
| 
 | |
| inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
 | |
|                              const uint16_t& blockNum);
 | |
| inline void getLocationFromRid(uint64_t rid, uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | |
|                                uint16_t* blockNum);
 | |
| 
 | |
| // returns the first rid of the logical block specified by baseRid
 | |
| inline uint64_t getExtentRelativeRid(uint64_t baseRid);
 | |
| 
 | |
| // returns the first rid of the logical block specified by baseRid
 | |
| inline uint64_t getFileRelativeRid(uint64_t baseRid);
 | |
| 
 | |
| /** operator+
 | |
|  *
 | |
|  * add the metadata of 2 RowGroups together and return a new RowGroup
 | |
|  */
 | |
| RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
 | |
| 
 | |
| std::shared_ptr<int[]> makeMapping(const RowGroup& r1, const RowGroup& r2);
 | |
| void applyMapping(const std::shared_ptr<int[]>& mapping, const Row& in, Row* out);
 | |
| void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
 | |
| void applyMapping(const int* mapping, const Row& in, Row* out);
 | |
| 
 | |
| /* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
 | |
| every row, they're a measurable performance penalty */
 | |
| inline uint32_t RowGroup::getRowCount() const
 | |
| {
 | |
|   // 	idbassert(data);
 | |
|   // 	if (!data) throw std::logic_error("RowGroup::getRowCount(): data is nullptr!");
 | |
|   return *((uint32_t*)&data[rowCountOffset]);
 | |
| }
 | |
| 
 | |
| inline void RowGroup::incRowCount()
 | |
| {
 | |
|   // 	idbassert(data);
 | |
|   ++(*((uint32_t*)&data[rowCountOffset]));
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setRowCount(uint32_t num)
 | |
| {
 | |
|   // 	idbassert(data);
 | |
|   *((uint32_t*)&data[rowCountOffset]) = num;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
 | |
| {
 | |
|   // 	idbassert(data);
 | |
|   if (useStringTable != r->usesStringTable())
 | |
|     initRow(r);
 | |
| 
 | |
|   r->baseRid = getBaseRid();
 | |
|   r->data = &(data[headerSize + (rowNum * r->getSize())]);
 | |
|   r->strings = strings;
 | |
|   r->userDataStore = rgData->userDataStore.get();
 | |
|   r->aggregateDataStore = rgData->aggregateDataStore.get();
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setData(RGData* rgd)
 | |
| {
 | |
|   data = rgd->rowData.get();
 | |
|   strings = rgd->strings.get();
 | |
|   aggregateDataStore = rgd->aggregateDataStore.get();
 | |
|   rgData = rgd;
 | |
| }
 | |
| 
 | |
| inline uint8_t* RowGroup::getData() const
 | |
| {
 | |
|   // assert(!useStringTable);
 | |
|   return data;
 | |
| }
 | |
| 
 | |
| inline RGData* RowGroup::getRGData() const
 | |
| {
 | |
|   return rgData;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setUseStringTable(bool b)
 | |
| {
 | |
|   useStringTable = (b && hasLongStringField);
 | |
|   // offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
 | |
|   offsets = nullptr;
 | |
| 
 | |
|   if (useStringTable && !stOffsets.empty())
 | |
|     offsets = &stOffsets[0];
 | |
|   else if (!useStringTable && !oldOffsets.empty())
 | |
|     offsets = &oldOffsets[0];
 | |
| 
 | |
|   if (!useStringTable)
 | |
|     strings = nullptr;
 | |
| }
 | |
| 
 | |
| inline uint64_t RowGroup::getBaseRid() const
 | |
| {
 | |
|   return *((uint64_t*)&data[baseRidOffset]);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::operator<(const RowGroup& rhs) const
 | |
| {
 | |
|   return (getBaseRid() < rhs.getBaseRid());
 | |
| }
 | |
| 
 | |
| void RowGroup::initRow(Row* r, bool forceInlineData) const
 | |
| {
 | |
|   r->columnCount = columnCount;
 | |
| 
 | |
|   if (LIKELY(!types.empty()))
 | |
|   {
 | |
|     r->colWidths = (uint32_t*)&colWidths[0];
 | |
|     r->types = (execplan::CalpontSystemCatalog::ColDataType*)&(types[0]);
 | |
|     r->charsetNumbers = (uint32_t*)&(charsetNumbers[0]);
 | |
|     r->charsets = (CHARSET_INFO**)&(charsets[0]);
 | |
|     r->scale = (uint32_t*)&(scale[0]);
 | |
|     r->precision = (uint32_t*)&(precision[0]);
 | |
|   }
 | |
| 
 | |
|   if (forceInlineData)
 | |
|   {
 | |
|     r->useStringTable = false;
 | |
|     r->oldOffsets = (uint32_t*)&(oldOffsets[0]);
 | |
|     r->stOffsets = (uint32_t*)&(stOffsets[0]);
 | |
|     r->offsets = (uint32_t*)&(oldOffsets[0]);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     r->useStringTable = useStringTable;
 | |
|     r->oldOffsets = (uint32_t*)&(oldOffsets[0]);
 | |
|     r->stOffsets = (uint32_t*)&(stOffsets[0]);
 | |
|     r->offsets = offsets;
 | |
|   }
 | |
| 
 | |
|   r->hasLongStringField = hasLongStringField;
 | |
|   r->sTableThreshold = sTableThreshold;
 | |
|   r->forceInline = forceInline;
 | |
|   r->hasCollation = hasCollation;
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getRowSize() const
 | |
| {
 | |
|   return offsets[columnCount] + columnCount;
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getRowSizeWithStrings() const
 | |
| {
 | |
|   return oldOffsets[columnCount] + columnCount;
 | |
| }
 | |
| 
 | |
| inline RGDataSizeType RowGroup::getSizeWithStrings(uint64_t n) const
 | |
| {
 | |
|   RGDataSizeType ret = getDataSize(n);
 | |
|   if (strings)
 | |
|   {
 | |
|     ret += strings->getSize();
 | |
|   }
 | |
|   if (aggregateDataStore)
 | |
|   {
 | |
|     ret += aggregateDataStore->getDataSize();
 | |
|   }
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| inline uint64_t RowGroup::getSizeWithStrings() const
 | |
| {
 | |
|   return getSizeWithStrings(getRowCount());
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isCharType(uint32_t colIndex) const
 | |
| {
 | |
|   return datatypes::isCharType(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isUnsigned(uint32_t colIndex) const
 | |
| {
 | |
|   return datatypes::isUnsigned(types[colIndex]);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isShortString(uint32_t colIndex) const
 | |
| {
 | |
|   return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
 | |
|           (getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::isLongString(uint32_t colIndex) const
 | |
| {
 | |
|   return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
 | |
|           (getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
 | |
|           types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
 | |
|           types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
 | |
|           types[colIndex] == execplan::CalpontSystemCatalog::TEXT ||
 | |
|           types[colIndex] == execplan::CalpontSystemCatalog::CLOB);
 | |
| }
 | |
| 
 | |
| inline bool RowGroup::usesStringTable() const
 | |
| {
 | |
|   return useStringTable;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getOffsets() const
 | |
| {
 | |
|   return oldOffsets;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getOIDs() const
 | |
| {
 | |
|   return oids;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getKeys() const
 | |
| {
 | |
|   return keys;
 | |
| }
 | |
| 
 | |
| inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
 | |
| {
 | |
|   return types[colIndex];
 | |
| }
 | |
| 
 | |
| inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
 | |
| {
 | |
|   return types;
 | |
| }
 | |
| 
 | |
| inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
 | |
| {
 | |
|   return types;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
 | |
| {
 | |
|   return charsetNumbers;
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
 | |
| {
 | |
|   return charsetNumbers[colIndex];
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getScale() const
 | |
| {
 | |
|   return scale;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getPrecision() const
 | |
| {
 | |
|   return precision;
 | |
| }
 | |
| 
 | |
| inline const std::vector<uint32_t>& RowGroup::getColWidths() const
 | |
| {
 | |
|   return colWidths;
 | |
| }
 | |
| 
 | |
| inline std::shared_ptr<bool[]>& RowGroup::getForceInline()
 | |
| {
 | |
|   return forceInline;
 | |
| }
 | |
| 
 | |
| inline uint64_t convertToRid(const uint32_t& partitionNum, const uint16_t& segmentNum, const uint8_t& exNum,
 | |
|                              const uint16_t& blNum)
 | |
| {
 | |
|   uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum, blockNum = blNum;
 | |
| 
 | |
|   // extentNum gets trunc'd to 6 bits, blockNums to 10 bits
 | |
|   extentNum &= 0x3f;
 | |
|   blockNum &= 0x3ff;
 | |
| 
 | |
|   return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
 | |
|                                  const uint16_t& blockNum)
 | |
| {
 | |
|   *((uint64_t*)&data[baseRidOffset]) = convertToRid(partNum, segNum, extentNum, blockNum);
 | |
| }
 | |
| 
 | |
| inline uint32_t RowGroup::getStringTableThreshold() const
 | |
| {
 | |
|   return sTableThreshold;
 | |
| }
 | |
| 
 | |
| // TODO This is unused, so rm this in the dev branch.
 | |
| inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
 | |
| {
 | |
|   if (useStringTable)
 | |
|   {
 | |
|     rgData->setStringStore(ss);
 | |
|     strings = rgData->strings.get();
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void getLocationFromRid(uint64_t rid, uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | |
|                                uint16_t* blockNum)
 | |
| {
 | |
|   if (partNum)
 | |
|     *partNum = rid >> 32;
 | |
| 
 | |
|   if (segNum)
 | |
|     *segNum = rid >> 16;
 | |
| 
 | |
|   if (extentNum)
 | |
|     *extentNum = (rid >> 10) & 0x3f;
 | |
| 
 | |
|   if (blockNum)
 | |
|     *blockNum = rid & 0x3ff;
 | |
| }
 | |
| 
 | |
| inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
 | |
| {
 | |
|   getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
 | |
| }
 | |
| 
 | |
| // returns the first RID of the logical block identified by baseRid
 | |
| inline uint64_t getExtentRelativeRid(uint64_t baseRid)
 | |
| {
 | |
|   uint64_t blockNum = baseRid & 0x3ff;
 | |
|   return (blockNum << 13);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getExtentRelativeRid() const
 | |
| {
 | |
|   return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
 | |
| }
 | |
| 
 | |
| // returns the first RID of the logical block identified by baseRid
 | |
| inline uint64_t getFileRelativeRid(uint64_t baseRid)
 | |
| {
 | |
|   uint64_t extentNum = (baseRid >> 10) & 0x3f;
 | |
|   uint64_t blockNum = baseRid & 0x3ff;
 | |
|   return (extentNum << 23) | (blockNum << 13);
 | |
| }
 | |
| 
 | |
| inline uint64_t Row::getFileRelativeRid() const
 | |
| {
 | |
|   return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
 | |
| }
 | |
| 
 | |
| inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum,
 | |
|                              uint16_t* rowNum)
 | |
| {
 | |
|   getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
 | |
| 
 | |
|   if (rowNum)
 | |
|     *rowNum = getRelRid();
 | |
| }
 | |
| 
 | |
| // This routine can be slow for your purposes. Please inspect copyRowInline below,
 | |
| // in some cases it can be faster.
 | |
| // Please be sure that copyRowInline does indeed copy rows of the same structure of
 | |
| // fields.
 | |
| inline void copyRow(const Row& in, Row* out, uint32_t colCount)
 | |
| {
 | |
|   if (&in == out)
 | |
|     return;
 | |
| 
 | |
|   out->setRid(in.getRelRid());
 | |
| 
 | |
|   if (!in.usesStringTable() && !out->usesStringTable())
 | |
|   {
 | |
|     memcpy(out->getData(), in.getData(), std::min(in.getSize(), out->getSize()));
 | |
| 
 | |
|     for (uint32_t i = 0; i < colCount; i++)
 | |
|     {
 | |
|       out->setNullMark(i, in.getNullMark(i));
 | |
|     }
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   for (uint32_t i = 0; i < colCount; i++)
 | |
|   {
 | |
|     if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
 | |
|                  in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
 | |
|                  in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT ||
 | |
|                  in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB))
 | |
|     {
 | |
|       out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i);
 | |
|     }
 | |
|     else if (UNLIKELY(in.isLongString(i)))
 | |
|     {
 | |
|       out->setStringField(in.getConstString(i), i);
 | |
|     }
 | |
|     else if (UNLIKELY(in.isShortString(i)))
 | |
|     {
 | |
|       out->setUintField(in.getUintField(i), i);
 | |
|     }
 | |
|     else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::DOUBLE))
 | |
|     {
 | |
|       out->setDoubleField(in.getDoubleField(i), i);
 | |
|     }
 | |
|     else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
 | |
|     {
 | |
|       out->setLongDoubleField(in.getLongDoubleField(i), i);
 | |
|     }
 | |
|     else if (UNLIKELY(datatypes::isWideDecimalType(in.getColType(i), in.getColumnWidth(i))))
 | |
|     {
 | |
|       in.copyBinaryField(*out, i, i);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       out->setIntField(in.getIntField(i), i);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline void copyRow(const Row& in, Row* out)
 | |
| {
 | |
|   copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
 | |
| }
 | |
| 
 | |
| // This routine can be substantially faster than copyRow above, but there are caveats.
 | |
| // The speedy part with memcpy should only be invoked when structures of the rows are the same.
 | |
| // Otherwise information about NULLs for inline strings can be lost or garbled.
 | |
| inline void copyRowInline(const Row& in, Row* out, uint32_t colCount)
 | |
| {
 | |
|   if (&in == out)
 | |
|     return;
 | |
| 
 | |
|   // XXX: this code still may copy data incorrectly if sizes of columns differ.
 | |
|   if (!in.usesStringTable() && !out->usesStringTable() && in.getSize() == out->getSize())
 | |
|   {
 | |
|     out->setRid(in.getRelRid());
 | |
| 
 | |
|     memcpy(out->getData(), in.getData(), in.getSize());
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   copyRow(in, out, colCount);
 | |
| }
 | |
| 
 | |
| inline utils::NullString StringStore::getString(uint64_t off) const
 | |
| {
 | |
|   uint32_t length;
 | |
|   utils::NullString nStr;
 | |
| 
 | |
|   if (off == std::numeric_limits<uint64_t>::max())
 | |
|     return nStr;
 | |
| 
 | |
|   MemChunk* mc;
 | |
| 
 | |
|   if (off & 0x8000000000000000)
 | |
|   {
 | |
|     // off = off - 0x8000000000000000;
 | |
|     off &= ~0x8000000000000000;
 | |
| 
 | |
|     if (longStrings.size() <= off)
 | |
|       return nStr;
 | |
| 
 | |
|     mc = (MemChunk*)longStrings[off].get();
 | |
|     memcpy(&length, mc->data, 4);
 | |
|     nStr.assign(std::string((char*)mc->data + 4, length));
 | |
|     return nStr;
 | |
|   }
 | |
| 
 | |
|   uint64_t chunk = off / CHUNK_SIZE;
 | |
|   uint64_t offset = off % CHUNK_SIZE;
 | |
| 
 | |
|   // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
 | |
|   // what gets returned, it just can't go out of bounds.
 | |
|   if (mem.size() <= chunk)
 | |
|     return nStr;
 | |
| 
 | |
|   mc = (MemChunk*)mem[chunk].get();
 | |
| 
 | |
|   memcpy(&length, &mc->data[offset], 4);
 | |
| 
 | |
|   if ((offset + length) > mc->currentSize)
 | |
|     return nStr;
 | |
| 
 | |
|   nStr.assign(std::string((char*)&(mc->data[offset]) + 4, length));
 | |
|   return nStr;
 | |
| }
 | |
| 
 | |
| inline const uint8_t* StringStore::getPointer(uint64_t off) const
 | |
| {
 | |
|   if (off == std::numeric_limits<uint64_t>::max())
 | |
|     return nullptr;
 | |
| 
 | |
|   uint64_t chunk = off / CHUNK_SIZE;
 | |
|   uint64_t offset = off % CHUNK_SIZE;
 | |
|   MemChunk* mc;
 | |
| 
 | |
|   if (off & 0x8000000000000000)
 | |
|   {
 | |
|     // off = off - 0x8000000000000000;
 | |
|     off &= ~0x8000000000000000;
 | |
| 
 | |
|     if (longStrings.size() <= off)
 | |
|       return nullptr;
 | |
| 
 | |
|     mc = (MemChunk*)longStrings[off].get();
 | |
|     return mc->data + 4;
 | |
|   }
 | |
| 
 | |
|   // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
 | |
|   // what gets returned, it just can't go out of bounds.
 | |
|   if (UNLIKELY(mem.size() <= chunk))
 | |
|     return nullptr;
 | |
| 
 | |
|   mc = (MemChunk*)mem[chunk].get();
 | |
| 
 | |
|   if (offset > mc->currentSize)
 | |
|     return nullptr;
 | |
| 
 | |
|   return &(mc->data[offset]) + 4;
 | |
| }
 | |
| 
 | |
| inline bool StringStore::isNullValue(uint64_t off) const
 | |
| {
 | |
|   if (off == std::numeric_limits<uint64_t>::max())
 | |
|     return true;
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| inline uint32_t StringStore::getStringLength(uint64_t off) const
 | |
| {
 | |
|   uint32_t length;
 | |
|   MemChunk* mc;
 | |
| 
 | |
|   if (off == std::numeric_limits<uint64_t>::max())
 | |
|     return 0;
 | |
| 
 | |
|   if (off & 0x8000000000000000)
 | |
|   {
 | |
|     // off = off - 0x8000000000000000;
 | |
|     off &= ~0x8000000000000000;
 | |
| 
 | |
|     if (longStrings.size() <= off)
 | |
|       return 0;
 | |
| 
 | |
|     mc = (MemChunk*)longStrings[off].get();
 | |
|     memcpy(&length, mc->data, 4);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     uint64_t chunk = off / CHUNK_SIZE;
 | |
|     uint64_t offset = off % CHUNK_SIZE;
 | |
| 
 | |
|     if (mem.size() <= chunk)
 | |
|       return 0;
 | |
| 
 | |
|     mc = (MemChunk*)mem[chunk].get();
 | |
|     memcpy(&length, &mc->data[offset], 4);
 | |
|   }
 | |
| 
 | |
|   return length;
 | |
| }
 | |
| 
 | |
| inline bool StringStore::isEmpty() const
 | |
| {
 | |
|   return empty;
 | |
| }
 | |
| 
 | |
| inline uint64_t StringStore::getSize() const
 | |
| {
 | |
|   uint32_t i;
 | |
|   uint64_t ret = 0;
 | |
|   MemChunk* mc;
 | |
| 
 | |
|   ret += sizeof(MemChunk) * mem.size();
 | |
|   for (i = 0; i < mem.size(); i++)
 | |
|   {
 | |
|     mc = (MemChunk*)mem[i].get();
 | |
|     ret += mc->capacity;
 | |
|   }
 | |
| 
 | |
|   ret += sizeof(MemChunk) * longStrings.size();
 | |
|   for (i = 0; i < longStrings.size(); i++)
 | |
|   {
 | |
|     mc = (MemChunk*)longStrings[i].get();
 | |
|     ret += mc->capacity;
 | |
|   }
 | |
| 
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| inline void RGData::getRow(uint32_t num, Row* row)
 | |
| {
 | |
|   uint32_t incomingRowSize = row->getSize();
 | |
|   idbassert(columnCount == row->getColumnCount() && rowSize == incomingRowSize);
 | |
| 
 | |
|   row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * incomingRowSize)], strings.get(),
 | |
|                             userDataStore.get(), aggregateDataStore.get()));
 | |
| }
 | |
| 
 | |
| inline uint64_t rowGidRidToIdx(uint64_t gid, uint32_t rid, uint32_t maxRows)
 | |
| {
 | |
|   return gid * maxRows + rid;
 | |
| }
 | |
| 
 | |
| inline std::pair<uint64_t, uint64_t> rowIdxToGidRid(uint64_t idx, uint32_t maxRows)
 | |
| {
 | |
|   return {idx / maxRows, idx % maxRows};
 | |
| }
 | |
| 
 | |
| }  // namespace rowgroup
 |