You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			2353 lines
		
	
	
		
			71 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			2353 lines
		
	
	
		
			71 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
   Copyright (C) 2014 InfiniDB, Inc.
 | 
						|
   Copyright (c) 2016-2024 MariaDB Corporation
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU General Public License
 | 
						|
   as published by the Free Software Foundation; version 2 of
 | 
						|
   the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
						|
   MA 02110-1301, USA.
 | 
						|
*/
 | 
						|
 | 
						|
//
 | 
						|
// C++ Interface: rowgroup
 | 
						|
//
 | 
						|
// Description:
 | 
						|
//
 | 
						|
// Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
 | 
						|
 | 
						|
#pragma once
 | 
						|
 | 
						|
#include <vector>
 | 
						|
#include <string>
 | 
						|
#include <stdexcept>
 | 
						|
// #define NDEBUG
 | 
						|
#include <cassert>
 | 
						|
 | 
						|
#include <boost/core/span.hpp>
 | 
						|
#include <boost/shared_ptr.hpp>
 | 
						|
 | 
						|
#include <boost/thread/mutex.hpp>
 | 
						|
#include <cmath>
 | 
						|
#include <cfloat>
 | 
						|
#include <execinfo.h>
 | 
						|
 | 
						|
#include "countingallocator.h"
 | 
						|
#include "hasher.h"
 | 
						|
 | 
						|
#include "joblisttypes.h"
 | 
						|
#include "bytestream.h"
 | 
						|
#include "calpontsystemcatalog.h"
 | 
						|
#include "exceptclasses.h"
 | 
						|
#include "mcsv1_udaf.h"
 | 
						|
 | 
						|
#include "branchpred.h"
 | 
						|
#include "datatypes/mcs_int128.h"
 | 
						|
 | 
						|
#include "collation.h"
 | 
						|
#include "common/hashfamily.h"
 | 
						|
#include "buffertypes.h"
 | 
						|
 | 
						|
#include <cstdlib>
 | 
						|
#include "execinfo.h"
 | 
						|
 | 
						|
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
 | 
						|
namespace joblist
 | 
						|
{
 | 
						|
class GroupConcatAg;
 | 
						|
}
 | 
						|
 | 
						|
namespace rowgroup
 | 
						|
{
 | 
						|
constexpr int16_t rgCommonSize = 8192;
 | 
						|
using RGDataSizeType = uint64_t;
 | 
						|
 | 
						|
/*
 | 
						|
    The RowGroup family of classes encapsulate the data moved through the
 | 
						|
    system.
 | 
						|
 | 
						|
     - RowGroup specifies the format of the data primarily (+ some other metadata),
 | 
						|
     - RGData (aka RowGroup Data) encapsulates the data,
 | 
						|
     - Row is used to extract fields from the data and iterate.
 | 
						|
 | 
						|
    JobListFactory instantiates the RowGroups to be used by each stage of processing.
 | 
						|
    RGDatas are passed between stages, and their RowGroup instances are used
 | 
						|
    to interpret them.
 | 
						|
 | 
						|
    Historically, row data was just a chunk of contiguous memory, a uint8_t *.
 | 
						|
    Every field had a fixed width, which allowed for quick offset
 | 
						|
    calculation when assigning or retrieving individual fields.  That worked
 | 
						|
    well for a few years, but at some point it became common to declare
 | 
						|
    all strings as max-length, and to manipulate them in queries.
 | 
						|
 | 
						|
    Having fixed-width fields, even for strings, required an unreasonable
 | 
						|
    amount of memory.  RGData & StringStore were introduced to handle strings
 | 
						|
    more efficiently, at least with respect to memory.  The row data would
 | 
						|
    still be a uint8_t *, and columns would be fixed-width, but string fields
 | 
						|
    above a certain width would contain a 'Pointer' that referenced a string in
 | 
						|
    StringStore.  Strings are stored efficiently in StringStore, so there is
 | 
						|
    no longer wasted space.
 | 
						|
 | 
						|
    StringStore comes with a different inefficiency however.  When a value
 | 
						|
    is overwritten, the original string cannot be freed independently of the
 | 
						|
    others, so it continues to use space.  If values are only set once, as is
 | 
						|
    the typical case, then StringStore is efficient.  When it is necessary
 | 
						|
    to overwrite string fields, it is possible to configure these classes
 | 
						|
    to use the original data format so that old string fields do not accumulate
 | 
						|
    in memory.  Of course, be careful, because blobs and text fields in CS are
 | 
						|
    declared as 2GB strings!
 | 
						|
 | 
						|
    A single RGData contains up to one 'logical block' worth of data,
 | 
						|
    which is 8192 rows.  One RGData is usually treated as one unit of work by
 | 
						|
    PrimProc and the JobSteps, but the rows an RGData contains and how many are
 | 
						|
    treated as a work unit depend on the operation being done.
 | 
						|
 | 
						|
    For example, PrimProc works in units of 8192 contiguous rows
 | 
						|
    that come from disk.  If half of the rows were filtered out, then the
 | 
						|
    RGData it passes to the next stage would only contain 4096 rows.
 | 
						|
 | 
						|
    Others build results incrementally before passing them along, such as
 | 
						|
    group-by.  If one group contains 11111 values, then group-by will
 | 
						|
    return 2 RGDatas for that group, one with 8192 rows, and one with 2919.
 | 
						|
 | 
						|
    Note: There is no synchronization in any of these classes for obvious
 | 
						|
    performance reasons.  Likewise, although it's technically safe for many
 | 
						|
    readers to access an RGData simultaneously, that would not be an
 | 
						|
    efficient thing to do.  Try to stick to designs where a single RGData
 | 
						|
    is used by a single thread at a time.
 | 
						|
*/
 | 
						|
 | 
						|
// VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
 | 
						|
// This may be so, and we'll get link errors if someone trys, but so far no one has.
 | 
						|
 | 
						|
// Helper to get a value from nested vector pointers.
 | 
						|
template <typename T>
 | 
						|
inline T derefFromTwoVectorPtrs(const std::vector<T>* outer, const std::vector<T>* inner, const T innerIdx)
 | 
						|
{
 | 
						|
  auto outerIdx = inner->operator[](innerIdx);
 | 
						|
  return outer->operator[](outerIdx);
 | 
						|
}
 | 
						|
 | 
						|
class StringStore
 | 
						|
{
 | 
						|
 public:
 | 
						|
  StringStore() = default;
 | 
						|
  StringStore(allocators::CountingAllocator<StringStoreBufType> alloc);
 | 
						|
  StringStore(const StringStore&) = delete;
 | 
						|
  StringStore(StringStore&&) = delete;
 | 
						|
  StringStore& operator=(const StringStore&) = delete;
 | 
						|
  StringStore& operator=(StringStore&&) = delete;
 | 
						|
  virtual ~StringStore();
 | 
						|
 | 
						|
  inline utils::NullString getString(uint64_t offset) const;
 | 
						|
  // returns the offset.
 | 
						|
  // it may receive nullptr as data and it is proper way to store NULL values.
 | 
						|
  uint64_t storeString(const uint8_t* data, uint32_t length);
 | 
						|
  // please note getPointer can return nullptr.
 | 
						|
  inline const uint8_t* getPointer(uint64_t offset) const;
 | 
						|
  inline uint32_t getStringLength(uint64_t offset) const;
 | 
						|
  inline utils::ConstString getConstString(uint64_t offset) const
 | 
						|
  {
 | 
						|
    return {(const char*)getPointer(offset), getStringLength(offset)};
 | 
						|
  }
 | 
						|
  inline bool isEmpty() const;
 | 
						|
  inline uint64_t getSize() const;
 | 
						|
  inline bool isNullValue(uint64_t offset) const;
 | 
						|
 | 
						|
  void clear();
 | 
						|
 | 
						|
  void serialize(messageqcpp::ByteStream&) const;
 | 
						|
  void deserialize(messageqcpp::ByteStream&);
 | 
						|
 | 
						|
  //@bug6065, make StringStore::storeString() thread safe
 | 
						|
  void useStoreStringMutex(bool b)
 | 
						|
  {
 | 
						|
    fUseStoreStringMutex = b;
 | 
						|
  }
 | 
						|
  bool useStoreStringMutex() const
 | 
						|
  {
 | 
						|
    return fUseStoreStringMutex;
 | 
						|
  }
 | 
						|
  void useOnlyLongStrings(bool b)
 | 
						|
  {
 | 
						|
    fUseOnlyLongStrings = b;
 | 
						|
  }
 | 
						|
  bool useOnlyLongStrings() const
 | 
						|
  {
 | 
						|
    return fUseOnlyLongStrings;
 | 
						|
  }
 | 
						|
 | 
						|
  // This is an overlay b/c the underlying data needs to be any size,
 | 
						|
  // and alloc'd in one chunk.  data can't be a separate dynamic chunk.
 | 
						|
  // NOTE: Change here, requires a change in 'bytestream.h'.
 | 
						|
  struct MemChunk
 | 
						|
  {
 | 
						|
    uint32_t currentSize;
 | 
						|
    uint32_t capacity;
 | 
						|
    uint8_t data[];
 | 
						|
  };
 | 
						|
 | 
						|
 private:
 | 
						|
  std::string empty_str;
 | 
						|
  static constexpr const uint32_t CHUNK_SIZE = 64 * 1024;  // allocators like powers of 2
 | 
						|
 | 
						|
  std::vector<boost::shared_ptr<uint8_t[]>> mem;
 | 
						|
 | 
						|
  // To store strings > 64KB (BLOB/TEXT)
 | 
						|
  std::vector<boost::shared_ptr<uint8_t[]>> longStrings;
 | 
						|
  bool empty = true;
 | 
						|
  bool fUseStoreStringMutex = false;  //@bug6065, make StringStore::storeString() thread safe
 | 
						|
  bool fUseOnlyLongStrings = false;
 | 
						|
  boost::mutex fMutex;
 | 
						|
  std::optional<allocators::CountingAllocator<StringStoreBufType>> alloc{};
 | 
						|
};
 | 
						|
 | 
						|
// Where we store user data for UDA(n)F
 | 
						|
class UserDataStore
 | 
						|
{
 | 
						|
  // length represents the fixed portion length of userData.
 | 
						|
  // There may be variable length data in containers or other
 | 
						|
  // user created structures.
 | 
						|
  struct StoreData
 | 
						|
  {
 | 
						|
    int32_t length;
 | 
						|
    std::string functionName;
 | 
						|
    boost::shared_ptr<mcsv1sdk::UserData> userData;
 | 
						|
    StoreData() : length(0)
 | 
						|
    {
 | 
						|
    }
 | 
						|
    StoreData(const StoreData& rhs)
 | 
						|
    {
 | 
						|
      length = rhs.length;
 | 
						|
      functionName = rhs.functionName;
 | 
						|
      userData = rhs.userData;
 | 
						|
    }
 | 
						|
  };
 | 
						|
 | 
						|
 public:
 | 
						|
  UserDataStore() = default;
 | 
						|
  virtual ~UserDataStore() = default;
 | 
						|
  UserDataStore(const UserDataStore&) = delete;
 | 
						|
  UserDataStore(UserDataStore&&) = delete;
 | 
						|
  UserDataStore& operator=(const UserDataStore&) = delete;
 | 
						|
  UserDataStore& operator=(UserDataStore&&) = delete;
 | 
						|
 | 
						|
  void serialize(messageqcpp::ByteStream&) const;
 | 
						|
  void deserialize(messageqcpp::ByteStream&);
 | 
						|
 | 
						|
  // Set to make UserDataStore thread safe
 | 
						|
  void useUserDataMutex(bool b)
 | 
						|
  {
 | 
						|
    fUseUserDataMutex = b;
 | 
						|
  }
 | 
						|
  bool useUserDataMutex() const
 | 
						|
  {
 | 
						|
    return fUseUserDataMutex;
 | 
						|
  }
 | 
						|
 | 
						|
  // Returns the offset
 | 
						|
  uint32_t storeUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> data,
 | 
						|
                         uint32_t length);
 | 
						|
 | 
						|
  boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t offset) const;
 | 
						|
 | 
						|
 private:
 | 
						|
  std::vector<StoreData> vStoreData;
 | 
						|
 | 
						|
  bool fUseUserDataMutex = false;
 | 
						|
  boost::mutex fMutex;
 | 
						|
};
 | 
						|
 | 
						|
struct GroupConcat;
 | 
						|
 | 
						|
class AggregateDataStore
 | 
						|
{
 | 
						|
 public:
 | 
						|
  AggregateDataStore() = default;
 | 
						|
  explicit AggregateDataStore(const std::vector<boost::shared_ptr<GroupConcat>>& groupConcat)
 | 
						|
   : fGroupConcat(groupConcat)
 | 
						|
  {
 | 
						|
  }
 | 
						|
  ~AggregateDataStore() = default;
 | 
						|
  AggregateDataStore(const AggregateDataStore&) = delete;
 | 
						|
  AggregateDataStore(AggregateDataStore&&) = delete;
 | 
						|
  AggregateDataStore& operator=(const AggregateDataStore&) = delete;
 | 
						|
  AggregateDataStore& operator=(AggregateDataStore&&) = delete;
 | 
						|
 | 
						|
  void serialize(messageqcpp::ByteStream&) const;
 | 
						|
  void deserialize(messageqcpp::ByteStream&);
 | 
						|
 | 
						|
  uint32_t storeAggregateData(boost::shared_ptr<joblist::GroupConcatAg>& data);
 | 
						|
  boost::shared_ptr<joblist::GroupConcatAg> getAggregateData(uint32_t pos) const;
 | 
						|
 | 
						|
  RGDataSizeType getDataSize() const;
 | 
						|
 | 
						|
 private:
 | 
						|
  friend class RGData;
 | 
						|
  std::vector<boost::shared_ptr<GroupConcat>> fGroupConcat;
 | 
						|
  std::vector<boost::shared_ptr<joblist::GroupConcatAg>> fData;
 | 
						|
};
 | 
						|
 | 
						|
class RowGroup;
 | 
						|
class Row;
 | 
						|
 | 
						|
/* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
 | 
						|
class RGData
 | 
						|
{
 | 
						|
 public:
 | 
						|
  RGData() = default;  // useless unless followed by an = or a deserialize operation
 | 
						|
  RGData(allocators::CountingAllocator<RGDataBufType>&);
 | 
						|
  RGData(const RowGroup& rg, uint32_t rowCount);  // allocates memory for rowData
 | 
						|
  explicit RGData(const RowGroup& rg);
 | 
						|
  explicit RGData(const RowGroup& rg, allocators::CountingAllocator<RGDataBufType>& alloc);
 | 
						|
  RGData& operator=(const RGData& rhs) = default;
 | 
						|
  RGData& operator=(RGData&&) = default;
 | 
						|
  RGData(const RGData&) = default;
 | 
						|
  RGData(RGData&&) = default;
 | 
						|
  virtual ~RGData() = default;
 | 
						|
 | 
						|
  // amount should be the # returned by RowGroup::getDataSize()
 | 
						|
  void serialize(messageqcpp::ByteStream&, RGDataSizeType amount) const;
 | 
						|
 | 
						|
  // the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
 | 
						|
  // inline data with a length field.  Once that's converted to string table format, that
 | 
						|
  // option can go away.
 | 
						|
  void deserialize(messageqcpp::ByteStream&, RGDataSizeType amount = 0);  // returns the # of bytes read
 | 
						|
 | 
						|
  inline RGDataSizeType getStringTableMemUsage();
 | 
						|
  void clear();
 | 
						|
  void reinit(const RowGroup& rg);
 | 
						|
  void reinit(const RowGroup& rg, uint32_t rowCount);
 | 
						|
  inline void setStringStore(boost::shared_ptr<StringStore>& ss)
 | 
						|
  {
 | 
						|
    strings = ss;
 | 
						|
  }
 | 
						|
 | 
						|
  // this will use the pre-configured Row to figure out where row # num is, then set the Row
 | 
						|
  // to point to it.  It's a shortcut around using a RowGroup to do the same thing for cases
 | 
						|
  // where it's inconvenient to instantiate one.
 | 
						|
  inline void getRow(uint32_t num, Row* row);
 | 
						|
 | 
						|
  //@bug6065, make StringStore::storeString() thread safe
 | 
						|
  void useStoreStringMutex(bool b)
 | 
						|
  {
 | 
						|
    if (strings)
 | 
						|
      strings->useStoreStringMutex(b);
 | 
						|
  }
 | 
						|
  bool useStoreStringMutex() const
 | 
						|
  {
 | 
						|
    return (strings ? (strings->useStoreStringMutex()) : false);
 | 
						|
  }
 | 
						|
 | 
						|
  UserDataStore* getUserDataStore();
 | 
						|
  // make UserDataStore::storeData() thread safe
 | 
						|
  void useUserDataMutex(bool b)
 | 
						|
  {
 | 
						|
    if (userDataStore)
 | 
						|
      userDataStore->useUserDataMutex(b);
 | 
						|
  }
 | 
						|
  bool useUserDataMutex() const
 | 
						|
  {
 | 
						|
    return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
 | 
						|
  }
 | 
						|
 | 
						|
  bool hasRowData() const
 | 
						|
  {
 | 
						|
    return !!rowData;
 | 
						|
  }
 | 
						|
 | 
						|
 private:
 | 
						|
  uint32_t rowSize = 0;      // can't be.
 | 
						|
  uint32_t columnCount = 0;  // shouldn't be, but...
 | 
						|
  boost::shared_ptr<RGDataBufType> rowData;
 | 
						|
  boost::shared_ptr<StringStore> strings;
 | 
						|
  std::shared_ptr<UserDataStore> userDataStore;
 | 
						|
  std::shared_ptr<AggregateDataStore> aggregateDataStore;
 | 
						|
  std::optional<allocators::CountingAllocator<RGDataBufType>> alloc = {};
 | 
						|
 | 
						|
  // Need sig to support backward compat.  RGData can deserialize both forms.
 | 
						|
  static const uint32_t RGDATA_SIG = 0xffffffff;  // won't happen for 'old' Rowgroup data
 | 
						|
 | 
						|
  friend class RowGroup;
 | 
						|
  friend class RowGroupStorage;
 | 
						|
};
 | 
						|
 | 
						|
class Row
 | 
						|
{
 | 
						|
 public:
 | 
						|
  struct Pointer
 | 
						|
  {
 | 
						|
    inline Pointer() = default;
 | 
						|
 | 
						|
    explicit inline Pointer(uint8_t* d) : data(d)
 | 
						|
    {
 | 
						|
    }
 | 
						|
    inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s)
 | 
						|
    {
 | 
						|
    }
 | 
						|
    inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) : data(d), strings(s), userDataStore(u)
 | 
						|
    {
 | 
						|
    }
 | 
						|
    inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u, AggregateDataStore* a)
 | 
						|
     : data(d), strings(s), userDataStore(u), aggregateDataStore(a)
 | 
						|
    {
 | 
						|
    }
 | 
						|
    uint8_t* data = nullptr;
 | 
						|
    StringStore* strings = nullptr;
 | 
						|
    UserDataStore* userDataStore = nullptr;
 | 
						|
    AggregateDataStore* aggregateDataStore = nullptr;
 | 
						|
  };
 | 
						|
 | 
						|
  Row() = default;
 | 
						|
  Row(const Row&);
 | 
						|
  ~Row() = default;
 | 
						|
 | 
						|
  Row& operator=(const Row&);
 | 
						|
  bool operator==(const Row&) const;
 | 
						|
 | 
						|
  inline void setData(const Pointer&);
 | 
						|
  inline uint8_t* getData() const;
 | 
						|
 | 
						|
  inline void setPointer(const Pointer&);
 | 
						|
  inline Pointer getPointer() const;
 | 
						|
 | 
						|
  inline void nextRow();
 | 
						|
  inline uint32_t getColumnWidth(uint32_t colIndex) const;
 | 
						|
  inline uint32_t getColumnCount() const;
 | 
						|
  inline uint32_t getInternalSize() const;  // this is only accurate if there is no string table
 | 
						|
  inline uint32_t getSize() const;          // this is only accurate if there is no string table
 | 
						|
  // if a string table is being used, getRealSize() takes into account variable-length strings
 | 
						|
  inline uint32_t getRealSize() const;
 | 
						|
  inline uint32_t getOffset(uint32_t colIndex) const;
 | 
						|
  inline uint32_t getScale(uint32_t colIndex) const;
 | 
						|
  inline uint32_t getPrecision(uint32_t colIndex) const;
 | 
						|
  inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
 | 
						|
  inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
 | 
						|
  inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
 | 
						|
  inline uint32_t getCharsetNumber(uint32_t colIndex) const;
 | 
						|
 | 
						|
  // this returns true if the type is not CHAR or VARCHAR
 | 
						|
  inline bool isCharType(uint32_t colIndex) const;
 | 
						|
  inline bool isUnsigned(uint32_t colIndex) const;
 | 
						|
  inline bool isShortString(uint32_t colIndex) const;
 | 
						|
  inline bool isLongString(uint32_t colIndex) const;
 | 
						|
 | 
						|
  bool colHasCollation(uint32_t colIndex) const
 | 
						|
  {
 | 
						|
    return datatypes::typeHasCollation(getColType(colIndex));
 | 
						|
  }
 | 
						|
 | 
						|
  template <int len>
 | 
						|
  inline uint64_t getUintField(uint32_t colIndex) const;
 | 
						|
  inline uint64_t getUintField(uint32_t colIndex) const;
 | 
						|
  template <int len>
 | 
						|
  inline int64_t getIntField(uint32_t colIndex) const;
 | 
						|
  inline int64_t getIntField(uint32_t colIndex) const;
 | 
						|
  // Get a signed 64-bit integer column value, convert to the given
 | 
						|
  // floating point data type T (e.g. float, double, long double)
 | 
						|
  // and divide it according to the scale.
 | 
						|
  template <typename T>
 | 
						|
  inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
 | 
						|
  {
 | 
						|
    const T d = getIntField(colIndex);
 | 
						|
    if (!scale)
 | 
						|
      return d;
 | 
						|
    return d / datatypes::scaleDivisor<T>(scale);
 | 
						|
  }
 | 
						|
  template <typename T>
 | 
						|
  inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex) const
 | 
						|
  {
 | 
						|
    return getScaledSInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
 | 
						|
  }
 | 
						|
  // Get an unsigned 64-bit integer column value, convert to the given
 | 
						|
  // floating point data type T (e.g. float, double, long double)
 | 
						|
  // and divide it according to the scale.
 | 
						|
  template <typename T>
 | 
						|
  inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
 | 
						|
  {
 | 
						|
    const T d = getUintField(colIndex);
 | 
						|
    if (!scale)
 | 
						|
      return d;
 | 
						|
    return d / datatypes::scaleDivisor<T>(scale);
 | 
						|
  }
 | 
						|
  template <typename T>
 | 
						|
  inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex) const
 | 
						|
  {
 | 
						|
    return getScaledUInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
 | 
						|
  }
 | 
						|
  template <typename T>
 | 
						|
  inline bool equals(T* value, uint32_t colIndex) const;
 | 
						|
  template <int len>
 | 
						|
  inline bool equals(uint64_t val, uint32_t colIndex) const;
 | 
						|
  inline bool equals(long double val, uint32_t colIndex) const;
 | 
						|
  inline bool equals(const int128_t& val, uint32_t colIndex) const;
 | 
						|
 | 
						|
  inline double getDoubleField(uint32_t colIndex) const;
 | 
						|
  inline float getFloatField(uint32_t colIndex) const;
 | 
						|
  inline datatypes::Decimal getDecimalField(uint32_t colIndex) const
 | 
						|
  {
 | 
						|
    if (LIKELY(getColumnWidth(colIndex) == datatypes::MAXDECIMALWIDTH))
 | 
						|
      return datatypes::Decimal(getTSInt128Field(colIndex), (int)getScale(colIndex), getPrecision(colIndex));
 | 
						|
    return datatypes::Decimal(datatypes::TSInt64(getIntField(colIndex)), (int)getScale(colIndex),
 | 
						|
                              getPrecision(colIndex));
 | 
						|
  }
 | 
						|
  inline long double getLongDoubleField(uint32_t colIndex) const;
 | 
						|
  inline void storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const;
 | 
						|
  inline void getInt128Field(uint32_t colIndex, int128_t& x) const;
 | 
						|
  inline datatypes::TSInt128 getTSInt128Field(uint32_t colIndex) const;
 | 
						|
 | 
						|
  inline uint64_t getBaseRid() const;
 | 
						|
  inline uint64_t getRid() const;
 | 
						|
  inline uint16_t getRelRid() const;             // returns a rid relative to this logical block
 | 
						|
  inline uint64_t getExtentRelativeRid() const;  // returns a rid relative to the extent it's in
 | 
						|
  inline uint64_t getFileRelativeRid() const;    // returns a file-relative rid
 | 
						|
  inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum,
 | 
						|
                          uint16_t* rowNum);
 | 
						|
 | 
						|
  template <int len>
 | 
						|
  void setUintField(uint64_t val, uint32_t colIndex);
 | 
						|
 | 
						|
  /* Note: these 2 fcns avoid 1 array lookup per call.  Using them only
 | 
						|
  in projection on the PM resulted in a 2.8% performance gain on
 | 
						|
  the queries listed in bug 2223.
 | 
						|
  TODO: apply them everywhere else possible, and write equivalents
 | 
						|
  for the other types as well as the getters.
 | 
						|
  */
 | 
						|
  template <int len>
 | 
						|
  void setUintField_offset(uint64_t val, uint32_t offset);
 | 
						|
  template <typename T>
 | 
						|
  void setIntField_offset(const T val, const uint32_t offset);
 | 
						|
  inline void nextRow(uint32_t size);
 | 
						|
  inline void prevRow(uint32_t size, uint64_t number);
 | 
						|
 | 
						|
  inline void setUintField(uint64_t val, uint32_t colIndex);
 | 
						|
  template <int len>
 | 
						|
  void setIntField(int64_t, uint32_t colIndex);
 | 
						|
  inline void setIntField(int64_t, uint32_t colIndex);
 | 
						|
 | 
						|
  inline void setDoubleField(double val, uint32_t colIndex);
 | 
						|
  inline void setFloatField(float val, uint32_t colIndex);
 | 
						|
  inline void setDecimalField(double /*val*/, uint32_t /*colIndex*/)
 | 
						|
  {
 | 
						|
  }  // TODO: Do something here
 | 
						|
  inline void setLongDoubleField(const long double& val, uint32_t colIndex);
 | 
						|
  inline void setInt128Field(const int128_t& val, uint32_t colIndex);
 | 
						|
 | 
						|
  inline void setRid(uint64_t rid);
 | 
						|
 | 
						|
  // TODO: remove this (string is not efficient for this), use getConstString() instead
 | 
						|
  inline utils::NullString getStringField(uint32_t colIndex) const
 | 
						|
  {
 | 
						|
    utils::ConstString x = getConstString(colIndex);
 | 
						|
    return utils::NullString(x);
 | 
						|
  }
 | 
						|
 | 
						|
  inline utils::ConstString getConstString(uint32_t colIndex) const;
 | 
						|
  inline utils::ConstString getShortConstString(uint32_t colIndex) const;
 | 
						|
  void setStringField(const utils::NullString& val, uint32_t colIndex);
 | 
						|
  void setStringField(const uint8_t* val, uint32_t length, uint32_t colIndex);
 | 
						|
  inline void setStringField(const utils::ConstString& str, uint32_t colIndex);
 | 
						|
  template <typename T>
 | 
						|
  inline void setBinaryField(const T* value, uint32_t width, uint32_t colIndex);
 | 
						|
  template <typename T>
 | 
						|
  inline void setBinaryField(const T* value, uint32_t colIndex);
 | 
						|
  template <typename T>
 | 
						|
  inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex);
 | 
						|
  // XXX: TODO: I'd deprecate these two functions in favor of get/setStringField.
 | 
						|
  // getSetStringField properly support binary data of up to 4G bytes
 | 
						|
  // and also provide perfomant interface through use of ConstString.
 | 
						|
  // support VARBINARY
 | 
						|
  // Add 2-byte length at the CHARSET_INFO*beginning of the field.  nullptr and zero length field are
 | 
						|
  // treated the same, could use one of the length bit to distinguish these two cases.
 | 
						|
  inline void setVarBinaryField(const utils::NullString& val, uint32_t colIndex);
 | 
						|
  // No string construction is necessary for better performance.
 | 
						|
  inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
 | 
						|
  inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
 | 
						|
  inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
 | 
						|
  inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
 | 
						|
  inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
 | 
						|
  inline void setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
 | 
						|
                          uint32_t len, uint32_t colIndex);
 | 
						|
  inline void setAggregateData(boost::shared_ptr<joblist::GroupConcatAg> data, uint32_t colIndex);
 | 
						|
  inline joblist::GroupConcatAg* getAggregateData(uint32_t colIndex) const;
 | 
						|
 | 
						|
  uint64_t getNullValue(uint32_t colIndex) const;
 | 
						|
  bool isNullValue(uint32_t colIndex) const;
 | 
						|
  template <cscDataType cscDT, int width>
 | 
						|
  inline bool isNullValue_offset(uint32_t offset) const;
 | 
						|
 | 
						|
  // when NULLs are pulled out via getIntField(), they come out with these values.
 | 
						|
  // Ex: the 1-byte int null value is 0x80.  When it gets cast to an int64_t
 | 
						|
  // it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
 | 
						|
  int64_t getSignedNullValue(uint32_t colIndex) const;
 | 
						|
 | 
						|
  // copy data in srcIndex field to destIndex, all data type
 | 
						|
  inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
 | 
						|
 | 
						|
  // copy data in srcIndex field to destAddr, all data type
 | 
						|
  // inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
 | 
						|
 | 
						|
  // an adapter for code that uses the copyField call above;
 | 
						|
  // that's not string-table safe, this one is
 | 
						|
  inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 | 
						|
 | 
						|
  inline void copyBinaryField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 | 
						|
 | 
						|
  std::string toString(uint32_t rownum = 0) const;
 | 
						|
  std::string toCSV() const;
 | 
						|
 | 
						|
  /* These fcns are used only in joins.  The RID doesn't matter on the side that
 | 
						|
  gets hashed.  We steal that field here to "mark" a row. */
 | 
						|
  inline void markRow();
 | 
						|
  inline void zeroRid();
 | 
						|
  inline bool isMarked();
 | 
						|
  void setToNull(uint32_t colIndex);
 | 
						|
  void initToNull();
 | 
						|
 | 
						|
  inline void usesStringTable(bool b)
 | 
						|
  {
 | 
						|
    useStringTable = b;
 | 
						|
  }
 | 
						|
  inline bool usesStringTable() const
 | 
						|
  {
 | 
						|
    return useStringTable;
 | 
						|
  }
 | 
						|
  inline bool hasLongString() const
 | 
						|
  {
 | 
						|
    return hasLongStringField;
 | 
						|
  }
 | 
						|
 | 
						|
  // these are for cases when you already know the type definitions are the same.
 | 
						|
  // a fcn to check the type defs seperately doesn't exist yet.  No normalization.
 | 
						|
  inline uint64_t hash(uint32_t lastCol) const;  // generates a hash for cols [0-lastCol]
 | 
						|
  inline uint64_t hash() const;                  // generates a hash for all cols
 | 
						|
  inline void colUpdateHasher(datatypes::MariaDBHasher& hM, const utils::Hasher_r& h, const uint32_t col,
 | 
						|
                              uint32_t& intermediateHash) const;
 | 
						|
  inline void colUpdateHasherTypeless(datatypes::MariaDBHasher& hasher, uint32_t keyColsIdx,
 | 
						|
                                      const std::vector<uint32_t>& keyCols,
 | 
						|
                                      const std::vector<uint32_t>* smallSideKeyColumnsIds,
 | 
						|
                                      const std::vector<uint32_t>* smallSideColumnsWidths) const;
 | 
						|
  inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols,
 | 
						|
                               const std::vector<uint32_t>* smallSideKeyColumnsIds,
 | 
						|
                               const std::vector<uint32_t>* smallSideColumnsWidths) const
 | 
						|
  {
 | 
						|
    datatypes::MariaDBHasher h;
 | 
						|
    for (uint32_t i = 0; i < keyCols.size(); i++)
 | 
						|
      colUpdateHasherTypeless(h, i, keyCols, smallSideKeyColumnsIds, smallSideColumnsWidths);
 | 
						|
    return h.finalize();
 | 
						|
  }
 | 
						|
 | 
						|
  bool equals(const Row&, uint32_t lastCol) const;
 | 
						|
  inline bool equals(const Row&) const;
 | 
						|
 | 
						|
  inline void setUserDataStore(UserDataStore* u)
 | 
						|
  {
 | 
						|
    userDataStore = u;
 | 
						|
  }
 | 
						|
 | 
						|
  bool getNullMark(uint32_t col) const
 | 
						|
  {
 | 
						|
    return data[getInternalSize() + col];
 | 
						|
  }
 | 
						|
 | 
						|
  void setNullMark(uint32_t col, bool isNull) const
 | 
						|
  {
 | 
						|
    data[getInternalSize() + col] = isNull;
 | 
						|
  }
 | 
						|
 | 
						|
  const CHARSET_INFO* getCharset(uint32_t col) const;
 | 
						|
 | 
						|
 private:
 | 
						|
  inline bool inStringTable(uint32_t col) const;
 | 
						|
 | 
						|
 private:
 | 
						|
  uint32_t columnCount = 0;
 | 
						|
  uint64_t baseRid = 0;
 | 
						|
 | 
						|
  // Note, the mem behind these pointer fields is owned by RowGroup not Row
 | 
						|
  uint32_t* oldOffsets = nullptr;
 | 
						|
  uint32_t* stOffsets = nullptr;
 | 
						|
  uint32_t* offsets = nullptr;
 | 
						|
  uint32_t* colWidths = nullptr;
 | 
						|
  execplan::CalpontSystemCatalog::ColDataType* types = nullptr;
 | 
						|
  uint32_t* charsetNumbers = nullptr;
 | 
						|
  CHARSET_INFO** charsets = nullptr;
 | 
						|
  uint8_t* data = nullptr;
 | 
						|
  uint32_t* scale = nullptr;
 | 
						|
  uint32_t* precision = nullptr;
 | 
						|
 | 
						|
  StringStore* strings = nullptr;
 | 
						|
  bool useStringTable = true;
 | 
						|
  bool hasCollation = false;
 | 
						|
  bool hasLongStringField = false;
 | 
						|
  uint32_t sTableThreshold = 20;
 | 
						|
  std::shared_ptr<bool[]> forceInline;
 | 
						|
  UserDataStore* userDataStore = nullptr;            // For UDAF
 | 
						|
  AggregateDataStore* aggregateDataStore = nullptr;  // group_concat & json_arrayagg
 | 
						|
 | 
						|
  friend class RowGroup;
 | 
						|
};
 | 
						|
 | 
						|
inline Row::Pointer Row::getPointer() const
 | 
						|
{
 | 
						|
  return Pointer(data, strings, userDataStore, aggregateDataStore);
 | 
						|
}
 | 
						|
inline uint8_t* Row::getData() const
 | 
						|
{
 | 
						|
  return data;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setPointer(const Pointer& p)
 | 
						|
{
 | 
						|
  data = p.data;
 | 
						|
  strings = p.strings;
 | 
						|
  bool hasStrings = (strings != nullptr);
 | 
						|
 | 
						|
  if (useStringTable != hasStrings)
 | 
						|
  {
 | 
						|
    useStringTable = hasStrings;
 | 
						|
    offsets = (useStringTable ? stOffsets : oldOffsets);
 | 
						|
  }
 | 
						|
 | 
						|
  userDataStore = p.userDataStore;
 | 
						|
  aggregateDataStore = p.aggregateDataStore;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setData(const Pointer& p)
 | 
						|
{
 | 
						|
  setPointer(p);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::nextRow()
 | 
						|
{
 | 
						|
  data += getSize();
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getColumnCount() const
 | 
						|
{
 | 
						|
  return columnCount;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getColumnWidth(uint32_t col) const
 | 
						|
{
 | 
						|
  return colWidths[col];
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getInternalSize() const
 | 
						|
{
 | 
						|
  return offsets[columnCount];
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getSize() const
 | 
						|
{
 | 
						|
  return getInternalSize() + columnCount;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getRealSize() const
 | 
						|
{
 | 
						|
  if (!useStringTable)
 | 
						|
    return getSize();
 | 
						|
 | 
						|
  uint32_t ret = columnCount;  // account for NULL flags.
 | 
						|
 | 
						|
  for (uint32_t i = 0; i < columnCount; i++)
 | 
						|
  {
 | 
						|
    if (!inStringTable(i))
 | 
						|
      ret += getColumnWidth(i);
 | 
						|
    else
 | 
						|
      ret += getConstString(i).length();
 | 
						|
  }
 | 
						|
 | 
						|
  return ret;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getScale(uint32_t col) const
 | 
						|
{
 | 
						|
  return scale[col];
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getPrecision(uint32_t col) const
 | 
						|
{
 | 
						|
  return precision[col];
 | 
						|
}
 | 
						|
 | 
						|
inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return types[colIndex];
 | 
						|
}
 | 
						|
 | 
						|
inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
 | 
						|
{
 | 
						|
  return types;
 | 
						|
}
 | 
						|
 | 
						|
inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
 | 
						|
{
 | 
						|
  return types;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getCharsetNumber(uint32_t col) const
 | 
						|
{
 | 
						|
  return charsetNumbers[col];
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::isCharType(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return datatypes::isCharType(types[colIndex]);
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::isUnsigned(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return datatypes::isUnsigned(types[colIndex]);
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::isShortString(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::isLongString(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::inStringTable(uint32_t col) const
 | 
						|
{
 | 
						|
  return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
 | 
						|
}
 | 
						|
 | 
						|
template <typename T>
 | 
						|
inline bool Row::equals(T* value, uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return *reinterpret_cast<T*>(&data[offsets[colIndex]]) == *value;
 | 
						|
}
 | 
						|
 | 
						|
template <int len>
 | 
						|
inline bool Row::equals(uint64_t val, uint32_t colIndex) const
 | 
						|
{
 | 
						|
  /* I think the compiler will optimize away the switch stmt */
 | 
						|
  switch (len)
 | 
						|
  {
 | 
						|
    case 1: return data[offsets[colIndex]] == val;
 | 
						|
 | 
						|
    case 2: return *((uint16_t*)&data[offsets[colIndex]]) == val;
 | 
						|
 | 
						|
    case 4: return *((uint32_t*)&data[offsets[colIndex]]) == val;
 | 
						|
 | 
						|
    case 8: return *((uint64_t*)&data[offsets[colIndex]]) == val;
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::equals(): bad length.");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::equals(long double val, uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return *((long double*)&data[offsets[colIndex]]) == val;
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::equals(const int128_t& val, uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return *((int128_t*)&data[offsets[colIndex]]) == val;
 | 
						|
}
 | 
						|
 | 
						|
template <int len>
 | 
						|
inline uint64_t Row::getUintField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  /* I think the compiler will optimize away the switch stmt */
 | 
						|
  switch (len)
 | 
						|
  {
 | 
						|
    case 1: return data[offsets[colIndex]];
 | 
						|
 | 
						|
    case 2: return *((uint16_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 4: return *((uint32_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 8: return *((uint64_t*)&data[offsets[colIndex]]);
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length.");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::getUintField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  switch (getColumnWidth(colIndex))
 | 
						|
  {
 | 
						|
    case 1: return data[offsets[colIndex]];
 | 
						|
 | 
						|
    case 2: return *((uint16_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 4: return *((uint32_t*)&data[offsets[colIndex]]);
 | 
						|
    case 8: return *((uint64_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::getUintField(): bad length.");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
template <int len>
 | 
						|
inline int64_t Row::getIntField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  /* I think the compiler will optimize away the switch stmt */
 | 
						|
  switch (len)
 | 
						|
  {
 | 
						|
    case 1: return (int8_t)data[offsets[colIndex]];
 | 
						|
 | 
						|
    case 2: return *((int16_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 4: return *((int32_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 8: return *((int64_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    default:
 | 
						|
      std::cout << "Row::getIntField getColumnWidth(colIndex) " << getColumnWidth(colIndex) << std::endl;
 | 
						|
      idbassert(0);
 | 
						|
      throw std::logic_error("Row::getIntField(): bad length.");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline int64_t Row::getIntField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  /* I think the compiler will optimize away the switch stmt */
 | 
						|
  switch (getColumnWidth(colIndex))
 | 
						|
  {
 | 
						|
    case 1: return (int8_t)data[offsets[colIndex]];
 | 
						|
 | 
						|
    case 2: return *((int16_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 4: return *((int32_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 8: return *((int64_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    case 16: return *((int128_t*)&data[offsets[colIndex]]);
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::getIntField(): bad length.");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
template <typename T>
 | 
						|
inline void Row::setBinaryField(const T* value, uint32_t width, uint32_t colIndex)
 | 
						|
{
 | 
						|
  memcpy(&data[offsets[colIndex]], value, width);
 | 
						|
}
 | 
						|
 | 
						|
template <typename T>
 | 
						|
inline void Row::setBinaryField(const T* value, uint32_t colIndex)
 | 
						|
{
 | 
						|
  *reinterpret_cast<T*>(&data[offsets[colIndex]]) = *value;
 | 
						|
}
 | 
						|
 | 
						|
template <>
 | 
						|
inline void Row::setBinaryField<int128_t>(const int128_t* value, uint32_t colIndex)
 | 
						|
{
 | 
						|
  datatypes::TSInt128::assignPtrPtr(&data[offsets[colIndex]], value);
 | 
						|
}
 | 
						|
 | 
						|
template <>
 | 
						|
inline void Row::setBinaryField<uint128_t>(const uint128_t* value, uint32_t colIndex)
 | 
						|
{
 | 
						|
  datatypes::TSInt128::assignPtrPtr(&data[offsets[colIndex]], value);
 | 
						|
}
 | 
						|
 | 
						|
// This method !cannot! be applied to uint8_t* buffers.
 | 
						|
template <typename T>
 | 
						|
inline void Row::setBinaryField_offset(const T* value, uint32_t /*width*/, uint32_t offset)
 | 
						|
{
 | 
						|
  *reinterpret_cast<T*>(&data[offset]) = *value;
 | 
						|
}
 | 
						|
 | 
						|
template <>
 | 
						|
inline void Row::setBinaryField_offset<uint8_t>(const uint8_t* value, uint32_t width, uint32_t offset)
 | 
						|
{
 | 
						|
  memcpy(&data[offset], value, width);
 | 
						|
}
 | 
						|
 | 
						|
template <>
 | 
						|
inline void Row::setBinaryField_offset<int128_t>(const int128_t* value, uint32_t /*width*/, uint32_t offset)
 | 
						|
{
 | 
						|
  datatypes::TSInt128::assignPtrPtr(&data[offset], value);
 | 
						|
}
 | 
						|
 | 
						|
template <>
 | 
						|
inline void Row::setBinaryField_offset<uint128_t>(const uint128_t* value, uint32_t /*width*/, uint32_t offset)
 | 
						|
{
 | 
						|
  datatypes::TSInt128::assignPtrPtr(&data[offset], value);
 | 
						|
}
 | 
						|
 | 
						|
inline utils::ConstString Row::getShortConstString(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  uint32_t offset = offsets[colIndex];
 | 
						|
  const char* src = (const char*)&data[offset];
 | 
						|
  if (!isNullValue(colIndex))
 | 
						|
  {
 | 
						|
    return utils::ConstString(src, strnlen(src, getColumnWidth(colIndex)));
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    return utils::ConstString(nullptr, 0);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline utils::ConstString Row::getConstString(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return inStringTable(colIndex) ? strings->getConstString(*((uint64_t*)&data[offsets[colIndex]]))
 | 
						|
                                 : getShortConstString(colIndex);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::colUpdateHasher(datatypes::MariaDBHasher& hM, const utils::Hasher_r& h, const uint32_t col,
 | 
						|
                                 uint32_t& intermediateHash) const
 | 
						|
{
 | 
						|
  switch (getColType(col))
 | 
						|
  {
 | 
						|
    case execplan::CalpontSystemCatalog::CHAR:
 | 
						|
    case execplan::CalpontSystemCatalog::VARCHAR:
 | 
						|
    case execplan::CalpontSystemCatalog::BLOB:
 | 
						|
    case execplan::CalpontSystemCatalog::TEXT:
 | 
						|
    case execplan::CalpontSystemCatalog::CLOB:
 | 
						|
    {
 | 
						|
      CHARSET_INFO* cs = getCharset(col);
 | 
						|
      hM.add(cs, getConstString(col));
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    default:
 | 
						|
    {
 | 
						|
      intermediateHash = h((const char*)&data[offsets[col]], colWidths[col], intermediateHash);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::colUpdateHasherTypeless(datatypes::MariaDBHasher& h, uint32_t keyColsIdx,
 | 
						|
                                         const std::vector<uint32_t>& keyCols,
 | 
						|
                                         const std::vector<uint32_t>* smallSideKeyColumnsIds,
 | 
						|
                                         const std::vector<uint32_t>* smallSideColumnsWidths) const
 | 
						|
{
 | 
						|
  auto rowKeyColIdx = keyCols[keyColsIdx];
 | 
						|
  auto largeSideColType = getColType(rowKeyColIdx);
 | 
						|
  switch (largeSideColType)
 | 
						|
  {
 | 
						|
    case datatypes::SystemCatalog::CHAR:
 | 
						|
    case datatypes::SystemCatalog::VARCHAR:
 | 
						|
    case datatypes::SystemCatalog::BLOB:
 | 
						|
    case datatypes::SystemCatalog::TEXT:
 | 
						|
    case datatypes::SystemCatalog::CLOB:
 | 
						|
    {
 | 
						|
      CHARSET_INFO* cs = getCharset(rowKeyColIdx);
 | 
						|
      h.add(cs, getConstString(rowKeyColIdx));
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case datatypes::SystemCatalog::DECIMAL:
 | 
						|
    {
 | 
						|
      auto width = getColumnWidth(rowKeyColIdx);
 | 
						|
      if (datatypes::isWideDecimalType(largeSideColType, width))
 | 
						|
      {
 | 
						|
        bool joinHasSkewedKeyColumn = (smallSideColumnsWidths);
 | 
						|
        datatypes::TSInt128 val = getTSInt128Field(rowKeyColIdx);
 | 
						|
        if (joinHasSkewedKeyColumn &&
 | 
						|
            width != derefFromTwoVectorPtrs(smallSideColumnsWidths, smallSideKeyColumnsIds, keyColsIdx))
 | 
						|
        {
 | 
						|
          if (val.getValue() >= std::numeric_limits<int64_t>::min() &&
 | 
						|
              val.getValue() <= std::numeric_limits<uint64_t>::max())
 | 
						|
          {
 | 
						|
            h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXLEGACYWIDTH);
 | 
						|
          }
 | 
						|
          else
 | 
						|
            h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
 | 
						|
        }
 | 
						|
        else
 | 
						|
          h.add(&my_charset_bin, (const char*)&val.getValue(), datatypes::MAXDECIMALWIDTH);
 | 
						|
      }
 | 
						|
      else
 | 
						|
      {
 | 
						|
        int64_t val = getIntField(rowKeyColIdx);
 | 
						|
        h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
 | 
						|
      }
 | 
						|
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    default:
 | 
						|
    {
 | 
						|
      if (isUnsigned(rowKeyColIdx))
 | 
						|
      {
 | 
						|
        uint64_t val = getUintField(rowKeyColIdx);
 | 
						|
        h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
 | 
						|
      }
 | 
						|
      else
 | 
						|
      {
 | 
						|
        int64_t val = getIntField(rowKeyColIdx);
 | 
						|
        h.add(&my_charset_bin, (const char*)&val, datatypes::MAXLEGACYWIDTH);
 | 
						|
      }
 | 
						|
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setStringField(const uint8_t* str, uint32_t length, uint32_t colIndex)
 | 
						|
{
 | 
						|
  utils::ConstString temp((const char*)str, length);
 | 
						|
  setStringField(temp, colIndex);
 | 
						|
}
 | 
						|
inline void Row::setStringField(const utils::NullString& val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  utils::ConstString temp(val.str(), val.length());
 | 
						|
  setStringField(temp, colIndex);
 | 
						|
}
 | 
						|
inline void Row::setStringField(const utils::ConstString& str, uint32_t colIndex)
 | 
						|
{
 | 
						|
  uint64_t offset;
 | 
						|
 | 
						|
  // TODO: add multi-byte safe truncation here
 | 
						|
  uint32_t length = str.length();
 | 
						|
  uint32_t colWidth = getColumnWidth(colIndex);
 | 
						|
 | 
						|
  setNullMark(colIndex, !str.str());
 | 
						|
 | 
						|
  if (length > colWidth)
 | 
						|
    length = colWidth;
 | 
						|
 | 
						|
  if (inStringTable(colIndex))
 | 
						|
  {
 | 
						|
    offset = strings->storeString((const uint8_t*)str.str(), length);
 | 
						|
    *((uint64_t*)&data[offsets[colIndex]]) = offset;
 | 
						|
    //		cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
 | 
						|
    //				<< " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
 | 
						|
    //				<< endl;
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    // std::cout << "setStringField memcpy " << std::endl;
 | 
						|
    uint8_t* buf = &data[offsets[colIndex]];
 | 
						|
    memset(buf + length, 0,
 | 
						|
           offsets[colIndex + 1] - (offsets[colIndex] + length));  // needed for memcmp in equals().
 | 
						|
    if (str.str())
 | 
						|
    {
 | 
						|
      memcpy(buf, str.str(), length);
 | 
						|
    }
 | 
						|
    else if (colWidth <= 8)  // special magic value.
 | 
						|
    {
 | 
						|
      setToNull(colIndex);
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  if (inStringTable(colIndex))
 | 
						|
    return strings->getStringLength(*((uint64_t*)&data[offsets[colIndex]]));
 | 
						|
 | 
						|
  if (getNullMark(colIndex))
 | 
						|
  {
 | 
						|
    return 0;
 | 
						|
  }
 | 
						|
 | 
						|
  return *((uint16_t*)&data[offsets[colIndex]]);
 | 
						|
}
 | 
						|
 | 
						|
inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  if (inStringTable(colIndex))
 | 
						|
    return strings->getPointer(*((uint64_t*)&data[offsets[colIndex]]));
 | 
						|
 | 
						|
  if (getNullMark(colIndex))
 | 
						|
  {
 | 
						|
    return nullptr;
 | 
						|
  }
 | 
						|
 | 
						|
  return &data[offsets[colIndex] + 2];
 | 
						|
}
 | 
						|
 | 
						|
inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
 | 
						|
{
 | 
						|
  if (inStringTable(colIndex))
 | 
						|
  {
 | 
						|
    len = strings->getStringLength(*((uint64_t*)&data[offsets[colIndex]]));
 | 
						|
    return getVarBinaryField(colIndex);
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    if (getNullMark(colIndex))
 | 
						|
    {
 | 
						|
      len = 0;
 | 
						|
      return nullptr;
 | 
						|
    }
 | 
						|
    len = *((uint16_t*)&data[offsets[colIndex]]);
 | 
						|
    return &data[offsets[colIndex] + 2];
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  if (!userDataStore)
 | 
						|
  {
 | 
						|
    return boost::shared_ptr<mcsv1sdk::UserData>();
 | 
						|
  }
 | 
						|
 | 
						|
  return userDataStore->getUserData(*((uint32_t*)&data[offsets[colIndex]]));
 | 
						|
}
 | 
						|
 | 
						|
inline double Row::getDoubleField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return *((double*)&data[offsets[colIndex]]);
 | 
						|
}
 | 
						|
 | 
						|
inline float Row::getFloatField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return *((float*)&data[offsets[colIndex]]);
 | 
						|
}
 | 
						|
 | 
						|
inline long double Row::getLongDoubleField(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return *((long double*)&data[offsets[colIndex]]);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::storeInt128FieldIntoPtr(uint32_t colIndex, uint8_t* x) const
 | 
						|
{
 | 
						|
  datatypes::TSInt128::assignPtrPtr(x, &data[offsets[colIndex]]);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const
 | 
						|
{
 | 
						|
  datatypes::TSInt128::assignPtrPtr(&x, &data[offsets[colIndex]]);
 | 
						|
}
 | 
						|
 | 
						|
inline datatypes::TSInt128 Row::getTSInt128Field(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  const int128_t* ptr = reinterpret_cast<int128_t*>(&data[offsets[colIndex]]);
 | 
						|
  return datatypes::TSInt128(ptr);
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::getRid() const
 | 
						|
{
 | 
						|
  return baseRid + *((uint16_t*)data);
 | 
						|
}
 | 
						|
 | 
						|
inline uint16_t Row::getRelRid() const
 | 
						|
{
 | 
						|
  return *((uint16_t*)data);
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::getBaseRid() const
 | 
						|
{
 | 
						|
  return baseRid;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::markRow()
 | 
						|
{
 | 
						|
  *((uint16_t*)data) = 0xffff;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::zeroRid()
 | 
						|
{
 | 
						|
  *((uint16_t*)data) = 0;
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::isMarked()
 | 
						|
{
 | 
						|
  return *((uint16_t*)data) == 0xffff;
 | 
						|
}
 | 
						|
 | 
						|
/* Begin speculative code! */
 | 
						|
inline uint32_t Row::getOffset(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return offsets[colIndex];
 | 
						|
}
 | 
						|
 | 
						|
template <int len>
 | 
						|
inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
 | 
						|
{
 | 
						|
  switch (len)
 | 
						|
  {
 | 
						|
    case 1: data[offset] = val; break;
 | 
						|
 | 
						|
    case 2: *((uint16_t*)&data[offset]) = val; break;
 | 
						|
 | 
						|
    case 4: *((uint32_t*)&data[offset]) = val; break;
 | 
						|
 | 
						|
    case 8: *((uint64_t*)&data[offset]) = val; break;
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
template <typename T>
 | 
						|
inline void Row::setIntField_offset(const T val, const uint32_t offset)
 | 
						|
{
 | 
						|
  *((T*)&data[offset]) = val;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::nextRow(uint32_t size)
 | 
						|
{
 | 
						|
  data += size;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::prevRow(uint32_t size, uint64_t number = 1)
 | 
						|
{
 | 
						|
  data -= size * number;
 | 
						|
}
 | 
						|
 | 
						|
template <int len>
 | 
						|
inline void Row::setUintField(uint64_t val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  switch (len)
 | 
						|
  {
 | 
						|
    case 1: data[offsets[colIndex]] = val; break;
 | 
						|
 | 
						|
    case 2: *((uint16_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 4: *((uint32_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 8: *((uint64_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::setUintField called on a non-uint32_t field");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setUintField(uint64_t val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  switch (getColumnWidth(colIndex))
 | 
						|
  {
 | 
						|
    case 1: data[offsets[colIndex]] = val; break;
 | 
						|
 | 
						|
    case 2: *((uint16_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 4: *((uint32_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 8: *((uint64_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::setUintField: bad length");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
template <int len>
 | 
						|
inline void Row::setIntField(int64_t val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  //	idbassert(getColumnWidth(colIndex) == len);
 | 
						|
  switch (len)
 | 
						|
  {
 | 
						|
    case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 2: *((int16_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 4: *((int32_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 8: *((int64_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::setIntField: bad length");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setIntField(int64_t val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  switch (getColumnWidth(colIndex))
 | 
						|
  {
 | 
						|
    case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 2: *((int16_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 4: *((int32_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    case 8: *((int64_t*)&data[offsets[colIndex]]) = val; break;
 | 
						|
 | 
						|
    default: idbassert(0); throw std::logic_error("Row::setIntField: bad length");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setDoubleField(double val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  *((double*)&data[offsets[colIndex]]) = val;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setFloatField(float val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  // N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
 | 
						|
  //  but not necessarily the same bits that you put in. This only seems to be for float (double seems
 | 
						|
  //  to work).
 | 
						|
  if (std::isnan(val))
 | 
						|
    setUintField<4>(joblist::FLOATNULL, colIndex);
 | 
						|
  else
 | 
						|
    *((float*)&data[offsets[colIndex]]) = val;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setLongDoubleField(const long double& val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  uint8_t* p = &data[offsets[colIndex]];
 | 
						|
  *reinterpret_cast<long double*>(p) = val;
 | 
						|
#ifdef MASK_LONGDOUBLE
 | 
						|
  memset(p + 10, 0, 6);
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setInt128Field(const int128_t& val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  setBinaryField<int128_t>(&val, colIndex);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setVarBinaryField(const utils::NullString& val, uint32_t colIndex)
 | 
						|
{
 | 
						|
  setVarBinaryField((uint8_t*)val.str(), val.length(), colIndex);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
 | 
						|
{
 | 
						|
  setNullMark(colIndex, !val);
 | 
						|
 | 
						|
  if (inStringTable(colIndex))
 | 
						|
  {
 | 
						|
    if (len > getColumnWidth(colIndex))
 | 
						|
      len = getColumnWidth(colIndex);
 | 
						|
 | 
						|
    uint64_t offset = strings->storeString(val, len);
 | 
						|
    *((uint64_t*)&data[offsets[colIndex]]) = offset;
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    if (len > getColumnWidth(colIndex))
 | 
						|
      len = getColumnWidth(colIndex);
 | 
						|
 | 
						|
    idbassert(val != nullptr || !len);
 | 
						|
 | 
						|
    *((uint16_t*)&data[offsets[colIndex]]) = len;
 | 
						|
    if (val != nullptr)
 | 
						|
      memcpy(&data[offsets[colIndex] + 2], val, len);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
 | 
						|
                             uint32_t len, uint32_t colIndex)
 | 
						|
{
 | 
						|
  if (!userDataStore)
 | 
						|
  {
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  uint32_t offset = userDataStore->storeUserData(context, userData, len);
 | 
						|
  *((uint32_t*)&data[offsets[colIndex]]) = offset;
 | 
						|
  *((uint32_t*)&data[offsets[colIndex] + 4]) = len;
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setAggregateData(boost::shared_ptr<joblist::GroupConcatAg> agData, uint32_t colIndex)
 | 
						|
{
 | 
						|
  if (!aggregateDataStore)
 | 
						|
  {
 | 
						|
    throw std::logic_error("Row::getAggregateData: no aggregateDataStore");
 | 
						|
  }
 | 
						|
 | 
						|
  uint32_t pos = aggregateDataStore->storeAggregateData(agData);
 | 
						|
  *((uint32_t*)&data[offsets[colIndex]]) = pos;
 | 
						|
}
 | 
						|
 | 
						|
inline joblist::GroupConcatAg* Row::getAggregateData(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  if (!aggregateDataStore)
 | 
						|
  {
 | 
						|
    throw std::logic_error("Row::getAggregateData: no aggregateDataStore");
 | 
						|
  }
 | 
						|
 | 
						|
  uint32_t pos = *((uint32_t*)&data[offsets[colIndex]]);
 | 
						|
  return aggregateDataStore->getAggregateData(pos).get();
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
 | 
						|
{
 | 
						|
  uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
 | 
						|
  memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
 | 
						|
  setNullMark(destIndex, getNullMark(srcIndex));
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
 | 
						|
{
 | 
						|
  if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
 | 
						|
               types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
 | 
						|
               types[srcIndex] == execplan::CalpontSystemCatalog::TEXT ||
 | 
						|
               types[srcIndex] == execplan::CalpontSystemCatalog::CLOB))
 | 
						|
  {
 | 
						|
    out.setVarBinaryField(getVarBinaryField(srcIndex), getVarBinaryLength(srcIndex), destIndex);
 | 
						|
  }
 | 
						|
  else if (UNLIKELY(isLongString(srcIndex)))
 | 
						|
  {
 | 
						|
    out.setStringField(getConstString(srcIndex), destIndex);
 | 
						|
  }
 | 
						|
  else if (UNLIKELY(isShortString(srcIndex)))
 | 
						|
  {
 | 
						|
    out.setUintField(getUintField(srcIndex), destIndex);
 | 
						|
  }
 | 
						|
  else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
 | 
						|
  {
 | 
						|
    out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
 | 
						|
  }
 | 
						|
  else if (UNLIKELY(datatypes::isWideDecimalType(types[srcIndex], colWidths[srcIndex])))
 | 
						|
  {
 | 
						|
    copyBinaryField(out, destIndex, srcIndex);
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    out.setIntField(getIntField(srcIndex), destIndex);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::copyBinaryField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
 | 
						|
{
 | 
						|
  out.setInt128Field(getTSInt128Field(srcIndex).getValue(), destIndex);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::setRid(uint64_t rid)
 | 
						|
{
 | 
						|
  *((uint16_t*)data) = rid & 0xffff;
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::hash() const
 | 
						|
{
 | 
						|
  return hash(columnCount - 1);
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::hash(uint32_t lastCol) const
 | 
						|
{
 | 
						|
  // Use two hash classes. MariaDBHasher for text-based
 | 
						|
  // collation-aware data types and Hasher_r for all other data types.
 | 
						|
  // We deliver a hash that is a combination of both hashers' results.
 | 
						|
  utils::Hasher_r h;
 | 
						|
  datatypes::MariaDBHasher hM;
 | 
						|
  uint32_t intermediateHash = 0;
 | 
						|
 | 
						|
  // Sometimes we ask this to hash 0 bytes, and it comes through looking like
 | 
						|
  // lastCol = -1.  Return 0.
 | 
						|
  if (lastCol >= columnCount)
 | 
						|
    return 0;
 | 
						|
 | 
						|
  for (uint32_t i = 0; i <= lastCol; i++)
 | 
						|
    colUpdateHasher(hM, h, i, intermediateHash);
 | 
						|
 | 
						|
  return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
 | 
						|
}
 | 
						|
 | 
						|
inline bool Row::equals(const Row& r2) const
 | 
						|
{
 | 
						|
  return equals(r2, columnCount - 1);
 | 
						|
}
 | 
						|
 | 
						|
/** @brief RowGroup is a lightweight interface for processing packed row data
 | 
						|
 | 
						|
        A RowGroup is an interface for parsing and/or modifying row data as described at the top
 | 
						|
        of this file.  Its lifecycle can be tied to a producer or consumer's lifecycle.
 | 
						|
        Only one instance is required to process any number of blocks with a
 | 
						|
        given column configuration.  The column configuration is specified in the
 | 
						|
        constructor, and the block data to process is specified through the
 | 
						|
        setData() function.	 It will not copy or take ownership of the data it processes;
 | 
						|
        the caller should do that.
 | 
						|
 | 
						|
        Row and RowGroup share some bits.  RowGroup owns the memory they share.
 | 
						|
*/
 | 
						|
class RowGroup : public messageqcpp::Serializeable
 | 
						|
{
 | 
						|
 public:
 | 
						|
  /** @brief The default ctor.  It does nothing.  Need to init by assignment or deserialization */
 | 
						|
  RowGroup();
 | 
						|
 | 
						|
  /** @brief The RowGroup ctor, which specifies the column config to process
 | 
						|
 | 
						|
  @param colCount The number of columns
 | 
						|
  @param positions An array specifying the offsets within the packed data
 | 
						|
              of a row where each column begins.  It should have colCount + 1
 | 
						|
              entries.  The first offset is 2, because a row begins with a 2-byte
 | 
						|
              RID.  The last entry should be the offset of the last column +
 | 
						|
              its length, which is also the size of the entire row including the rid.
 | 
						|
  @param coids An array of oids for each column.
 | 
						|
  @param tkeys An array of unique id for each column.
 | 
						|
  @param colTypes An array of COLTYPEs for each column.
 | 
						|
  @param charsetNumbers an Array of the lookup numbers for the charset/collation object.
 | 
						|
  @param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
 | 
						|
  @param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
 | 
						|
  */
 | 
						|
  RowGroup(uint32_t colCount, const std::vector<uint32_t>& positions, const std::vector<uint32_t>& cOids,
 | 
						|
           const std::vector<uint32_t>& tkeys,
 | 
						|
           const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
 | 
						|
           const std::vector<uint32_t>& charsetNumbers, const std::vector<uint32_t>& scale,
 | 
						|
           const std::vector<uint32_t>& precision, uint32_t stringTableThreshold, bool useStringTable = true,
 | 
						|
           const std::vector<bool>& forceInlineData = std::vector<bool>());
 | 
						|
 | 
						|
  /** @brief The copiers.  It copies metadata, not the row data */
 | 
						|
  RowGroup(const RowGroup&);
 | 
						|
 | 
						|
  /** @brief Assignment operator.  It copies metadata, not the row data */
 | 
						|
  RowGroup& operator=(const RowGroup&);
 | 
						|
 | 
						|
  explicit RowGroup(messageqcpp::ByteStream& bs);
 | 
						|
 | 
						|
  ~RowGroup() override;
 | 
						|
 | 
						|
  inline void initRow(Row*, bool forceInlineData = false) const;
 | 
						|
  inline uint32_t getRowCount() const;
 | 
						|
  inline void incRowCount();
 | 
						|
  inline void setRowCount(uint32_t num);
 | 
						|
  inline void getRow(uint32_t rowNum, Row*) const;
 | 
						|
  inline uint32_t getRowSize() const;
 | 
						|
  inline uint32_t getRowSizeWithStrings() const;
 | 
						|
  inline uint64_t getBaseRid() const;
 | 
						|
  void setData(RGData* rgd);
 | 
						|
  inline uint8_t* getData() const;
 | 
						|
  inline RGData* getRGData() const;
 | 
						|
 | 
						|
  uint32_t getStatus() const;
 | 
						|
  void setStatus(uint16_t);
 | 
						|
 | 
						|
  uint32_t getDBRoot() const;
 | 
						|
  void setDBRoot(uint32_t);
 | 
						|
 | 
						|
  RGDataSizeType getDataSize() const;
 | 
						|
  RGDataSizeType getDataSize(uint64_t n) const;
 | 
						|
  RGDataSizeType getMaxDataSize() const;
 | 
						|
  RGDataSizeType getMaxDataSizeWithStrings() const;
 | 
						|
  RGDataSizeType getEmptySize() const;
 | 
						|
 | 
						|
  // this returns the size of the row data with the string table
 | 
						|
  inline RGDataSizeType getSizeWithStrings() const;
 | 
						|
  inline RGDataSizeType getSizeWithStrings(uint64_t n) const;
 | 
						|
 | 
						|
  // sets the row count to 0 and the baseRid to something
 | 
						|
  // effectively initializing whatever chunk of memory
 | 
						|
  // data points to
 | 
						|
  void resetRowGroup(uint64_t baseRid);
 | 
						|
 | 
						|
  /* The Serializeable interface */
 | 
						|
  void serialize(messageqcpp::ByteStream&) const override;
 | 
						|
  void deserialize(messageqcpp::ByteStream&) override;
 | 
						|
 | 
						|
  uint32_t getColumnWidth(uint32_t col) const;
 | 
						|
  uint32_t getColumnCount() const;
 | 
						|
  inline const std::vector<uint32_t>& getOffsets() const;
 | 
						|
  inline const std::vector<uint32_t>& getOIDs() const;
 | 
						|
  inline const std::vector<uint32_t>& getKeys() const;
 | 
						|
  inline const std::vector<uint32_t>& getColWidths() const;
 | 
						|
  inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
 | 
						|
  inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
 | 
						|
  inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
 | 
						|
  inline const std::vector<uint32_t>& getCharsetNumbers() const;
 | 
						|
  inline uint32_t getCharsetNumber(uint32_t colIndex) const;
 | 
						|
  inline std::shared_ptr<bool[]>& getForceInline();
 | 
						|
  static inline uint32_t getHeaderSize()
 | 
						|
  {
 | 
						|
    return headerSize;
 | 
						|
  }
 | 
						|
 | 
						|
  // this returns true if the type is CHAR or VARCHAR
 | 
						|
  inline bool isCharType(uint32_t colIndex) const;
 | 
						|
  inline bool isUnsigned(uint32_t colIndex) const;
 | 
						|
  inline bool isShortString(uint32_t colIndex) const;
 | 
						|
  inline bool isLongString(uint32_t colIndex) const;
 | 
						|
 | 
						|
  bool colHasCollation(uint32_t colIndex) const
 | 
						|
  {
 | 
						|
    return datatypes::typeHasCollation(getColType(colIndex));
 | 
						|
  }
 | 
						|
 | 
						|
  inline const std::vector<uint32_t>& getScale() const;
 | 
						|
  inline const std::vector<uint32_t>& getPrecision() const;
 | 
						|
 | 
						|
  inline bool usesStringTable() const;
 | 
						|
  inline void setUseStringTable(bool);
 | 
						|
  void setUseOnlyLongString(bool b)
 | 
						|
  {
 | 
						|
    useOnlyLongStrings = b;
 | 
						|
  }
 | 
						|
  bool usesOnlyLongString() const
 | 
						|
  {
 | 
						|
    return useOnlyLongStrings;
 | 
						|
  }
 | 
						|
  void setUseAggregateDataStore(bool b, boost::span<boost::shared_ptr<GroupConcat>> group_concats = {});
 | 
						|
  bool usesAggregateDataStore() const
 | 
						|
  {
 | 
						|
    return useAggregateDataStore;
 | 
						|
  }
 | 
						|
 | 
						|
  bool hasLongString() const
 | 
						|
  {
 | 
						|
    return hasLongStringField;
 | 
						|
  }
 | 
						|
 | 
						|
  void serializeRGData(messageqcpp::ByteStream&) const;
 | 
						|
  inline uint32_t getStringTableThreshold() const;
 | 
						|
 | 
						|
  void append(RGData&);
 | 
						|
  void append(RowGroup&);
 | 
						|
  void append(RGData&, uint pos);  // insert starting at position 'pos'
 | 
						|
  void append(RowGroup&, uint pos);
 | 
						|
 | 
						|
  RGData duplicate();  // returns a copy of the attached RGData
 | 
						|
 | 
						|
  std::string toString(const std::vector<uint64_t>& used = {}) const;
 | 
						|
 | 
						|
  /** operator+=
 | 
						|
   *
 | 
						|
   * append the metadata of another RowGroup to this RowGroup
 | 
						|
   */
 | 
						|
  RowGroup& operator+=(const RowGroup& rhs);
 | 
						|
 | 
						|
  // returns a RowGroup with only the first cols columns.  Useful for generating a
 | 
						|
  // RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
 | 
						|
  RowGroup truncate(uint32_t cols);
 | 
						|
 | 
						|
  /** operator<
 | 
						|
   *
 | 
						|
   * Orders RG's based on baseRid
 | 
						|
   */
 | 
						|
  inline bool operator<(const RowGroup& rhs) const;
 | 
						|
 | 
						|
  void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
 | 
						|
 | 
						|
  /* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
 | 
						|
  inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
 | 
						|
                         const uint16_t& blockNum);
 | 
						|
  inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
 | 
						|
 | 
						|
  inline void setStringStore(boost::shared_ptr<StringStore>);
 | 
						|
 | 
						|
  const CHARSET_INFO* getCharset(uint32_t col);
 | 
						|
 | 
						|
  const auto& getGroupConcats() const
 | 
						|
  {
 | 
						|
    return fGroupConcats;
 | 
						|
  }
 | 
						|
 | 
						|
 private:
 | 
						|
  uint32_t columnCount = 0;
 | 
						|
  uint8_t* data = nullptr;
 | 
						|
 | 
						|
  std::vector<uint32_t> oldOffsets;  // inline data offsets
 | 
						|
  std::vector<uint32_t> stOffsets;   // string table offsets
 | 
						|
  uint32_t* offsets = nullptr;       // offsets either points to oldOffsets or stOffsets
 | 
						|
  std::vector<uint32_t> colWidths;
 | 
						|
  // oids: the real oid of the column, may have duplicates with alias.
 | 
						|
  // This oid is necessary for front-end to decide the real column width.
 | 
						|
  std::vector<uint32_t> oids;
 | 
						|
  // keys: the unique id for pair(oid, alias). bug 1632.
 | 
						|
  // Used to map the projected column and rowgroup index
 | 
						|
  std::vector<uint32_t> keys;
 | 
						|
  std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
 | 
						|
  // For string collation
 | 
						|
  std::vector<uint32_t> charsetNumbers;
 | 
						|
  std::vector<CHARSET_INFO*> charsets;
 | 
						|
 | 
						|
  // DECIMAL support.  For non-decimal fields, the values are 0.
 | 
						|
  std::vector<uint32_t> scale;
 | 
						|
  std::vector<uint32_t> precision;
 | 
						|
 | 
						|
  // string table impl
 | 
						|
  RGData* rgData = nullptr;
 | 
						|
  StringStore* strings = nullptr;  // note, strings and data belong to rgData
 | 
						|
  AggregateDataStore* aggregateDataStore = nullptr;
 | 
						|
  bool useStringTable = true;
 | 
						|
  bool useOnlyLongStrings = false;
 | 
						|
  bool useAggregateDataStore = false;
 | 
						|
  bool hasCollation = false;
 | 
						|
  bool hasLongStringField = false;
 | 
						|
  uint32_t sTableThreshold = 20;
 | 
						|
  std::shared_ptr<bool[]> forceInline;
 | 
						|
 | 
						|
  std::vector<boost::shared_ptr<GroupConcat>> fGroupConcats;
 | 
						|
 | 
						|
  static constexpr uint64_t headerSize = 18;
 | 
						|
  static constexpr uint64_t rowCountOffset = 0;
 | 
						|
  static constexpr uint64_t baseRidOffset = 4;
 | 
						|
  static constexpr uint64_t statusOffset = 12;
 | 
						|
  static constexpr uint64_t dbRootOffset = 14;
 | 
						|
};
 | 
						|
 | 
						|
inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
 | 
						|
                             const uint16_t& blockNum);
 | 
						|
inline void getLocationFromRid(uint64_t rid, uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | 
						|
                               uint16_t* blockNum);
 | 
						|
 | 
						|
// returns the first rid of the logical block specified by baseRid
 | 
						|
inline uint64_t getExtentRelativeRid(uint64_t baseRid);
 | 
						|
 | 
						|
// returns the first rid of the logical block specified by baseRid
 | 
						|
inline uint64_t getFileRelativeRid(uint64_t baseRid);
 | 
						|
 | 
						|
/** operator+
 | 
						|
 *
 | 
						|
 * add the metadata of 2 RowGroups together and return a new RowGroup
 | 
						|
 */
 | 
						|
RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
 | 
						|
 | 
						|
std::shared_ptr<int[]> makeMapping(const RowGroup& r1, const RowGroup& r2);
 | 
						|
void applyMapping(const std::shared_ptr<int[]>& mapping, const Row& in, Row* out);
 | 
						|
void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
 | 
						|
void applyMapping(const int* mapping, const Row& in, Row* out);
 | 
						|
 | 
						|
/* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
 | 
						|
every row, they're a measurable performance penalty */
 | 
						|
inline uint32_t RowGroup::getRowCount() const
 | 
						|
{
 | 
						|
  // 	idbassert(data);
 | 
						|
  // 	if (!data) throw std::logic_error("RowGroup::getRowCount(): data is nullptr!");
 | 
						|
  return *((uint32_t*)&data[rowCountOffset]);
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::incRowCount()
 | 
						|
{
 | 
						|
  // 	idbassert(data);
 | 
						|
  ++(*((uint32_t*)&data[rowCountOffset]));
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::setRowCount(uint32_t num)
 | 
						|
{
 | 
						|
  // 	idbassert(data);
 | 
						|
  *((uint32_t*)&data[rowCountOffset]) = num;
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
 | 
						|
{
 | 
						|
  // 	idbassert(data);
 | 
						|
  if (useStringTable != r->usesStringTable())
 | 
						|
    initRow(r);
 | 
						|
 | 
						|
  r->baseRid = getBaseRid();
 | 
						|
  r->data = &(data[headerSize + (rowNum * r->getSize())]);
 | 
						|
  r->strings = strings;
 | 
						|
  r->userDataStore = rgData->userDataStore.get();
 | 
						|
  r->aggregateDataStore = rgData->aggregateDataStore.get();
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::setData(RGData* rgd)
 | 
						|
{
 | 
						|
  data = rgd->rowData.get();
 | 
						|
  strings = rgd->strings.get();
 | 
						|
  aggregateDataStore = rgd->aggregateDataStore.get();
 | 
						|
  rgData = rgd;
 | 
						|
}
 | 
						|
 | 
						|
inline uint8_t* RowGroup::getData() const
 | 
						|
{
 | 
						|
  // assert(!useStringTable);
 | 
						|
  return data;
 | 
						|
}
 | 
						|
 | 
						|
inline RGData* RowGroup::getRGData() const
 | 
						|
{
 | 
						|
  return rgData;
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::setUseStringTable(bool b)
 | 
						|
{
 | 
						|
  useStringTable = (b && hasLongStringField);
 | 
						|
  // offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
 | 
						|
  offsets = nullptr;
 | 
						|
 | 
						|
  if (useStringTable && !stOffsets.empty())
 | 
						|
    offsets = &stOffsets[0];
 | 
						|
  else if (!useStringTable && !oldOffsets.empty())
 | 
						|
    offsets = &oldOffsets[0];
 | 
						|
 | 
						|
  if (!useStringTable)
 | 
						|
    strings = nullptr;
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t RowGroup::getBaseRid() const
 | 
						|
{
 | 
						|
  return *((uint64_t*)&data[baseRidOffset]);
 | 
						|
}
 | 
						|
 | 
						|
inline bool RowGroup::operator<(const RowGroup& rhs) const
 | 
						|
{
 | 
						|
  return (getBaseRid() < rhs.getBaseRid());
 | 
						|
}
 | 
						|
 | 
						|
void RowGroup::initRow(Row* r, bool forceInlineData) const
 | 
						|
{
 | 
						|
  r->columnCount = columnCount;
 | 
						|
 | 
						|
  if (LIKELY(!types.empty()))
 | 
						|
  {
 | 
						|
    r->colWidths = (uint32_t*)&colWidths[0];
 | 
						|
    r->types = (execplan::CalpontSystemCatalog::ColDataType*)&(types[0]);
 | 
						|
    r->charsetNumbers = (uint32_t*)&(charsetNumbers[0]);
 | 
						|
    r->charsets = (CHARSET_INFO**)&(charsets[0]);
 | 
						|
    r->scale = (uint32_t*)&(scale[0]);
 | 
						|
    r->precision = (uint32_t*)&(precision[0]);
 | 
						|
  }
 | 
						|
 | 
						|
  if (forceInlineData)
 | 
						|
  {
 | 
						|
    r->useStringTable = false;
 | 
						|
    r->oldOffsets = (uint32_t*)&(oldOffsets[0]);
 | 
						|
    r->stOffsets = (uint32_t*)&(stOffsets[0]);
 | 
						|
    r->offsets = (uint32_t*)&(oldOffsets[0]);
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    r->useStringTable = useStringTable;
 | 
						|
    r->oldOffsets = (uint32_t*)&(oldOffsets[0]);
 | 
						|
    r->stOffsets = (uint32_t*)&(stOffsets[0]);
 | 
						|
    r->offsets = offsets;
 | 
						|
  }
 | 
						|
 | 
						|
  r->hasLongStringField = hasLongStringField;
 | 
						|
  r->sTableThreshold = sTableThreshold;
 | 
						|
  r->forceInline = forceInline;
 | 
						|
  r->hasCollation = hasCollation;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t RowGroup::getRowSize() const
 | 
						|
{
 | 
						|
  return offsets[columnCount] + columnCount;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t RowGroup::getRowSizeWithStrings() const
 | 
						|
{
 | 
						|
  return oldOffsets[columnCount] + columnCount;
 | 
						|
}
 | 
						|
 | 
						|
inline RGDataSizeType RowGroup::getSizeWithStrings(uint64_t n) const
 | 
						|
{
 | 
						|
  RGDataSizeType ret = getDataSize(n);
 | 
						|
  if (strings)
 | 
						|
  {
 | 
						|
    ret += strings->getSize();
 | 
						|
  }
 | 
						|
  if (aggregateDataStore)
 | 
						|
  {
 | 
						|
    ret += aggregateDataStore->getDataSize();
 | 
						|
  }
 | 
						|
  return ret;
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t RowGroup::getSizeWithStrings() const
 | 
						|
{
 | 
						|
  return getSizeWithStrings(getRowCount());
 | 
						|
}
 | 
						|
 | 
						|
inline bool RowGroup::isCharType(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return datatypes::isCharType(types[colIndex]);
 | 
						|
}
 | 
						|
 | 
						|
inline bool RowGroup::isUnsigned(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return datatypes::isUnsigned(types[colIndex]);
 | 
						|
}
 | 
						|
 | 
						|
inline bool RowGroup::isShortString(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
 | 
						|
          (getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
 | 
						|
}
 | 
						|
 | 
						|
inline bool RowGroup::isLongString(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
 | 
						|
          (getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
 | 
						|
          types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
 | 
						|
          types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
 | 
						|
          types[colIndex] == execplan::CalpontSystemCatalog::TEXT ||
 | 
						|
          types[colIndex] == execplan::CalpontSystemCatalog::CLOB);
 | 
						|
}
 | 
						|
 | 
						|
inline bool RowGroup::usesStringTable() const
 | 
						|
{
 | 
						|
  return useStringTable;
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getOffsets() const
 | 
						|
{
 | 
						|
  return oldOffsets;
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getOIDs() const
 | 
						|
{
 | 
						|
  return oids;
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getKeys() const
 | 
						|
{
 | 
						|
  return keys;
 | 
						|
}
 | 
						|
 | 
						|
inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return types[colIndex];
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
 | 
						|
{
 | 
						|
  return types;
 | 
						|
}
 | 
						|
 | 
						|
inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
 | 
						|
{
 | 
						|
  return types;
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
 | 
						|
{
 | 
						|
  return charsetNumbers;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
 | 
						|
{
 | 
						|
  return charsetNumbers[colIndex];
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getScale() const
 | 
						|
{
 | 
						|
  return scale;
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getPrecision() const
 | 
						|
{
 | 
						|
  return precision;
 | 
						|
}
 | 
						|
 | 
						|
inline const std::vector<uint32_t>& RowGroup::getColWidths() const
 | 
						|
{
 | 
						|
  return colWidths;
 | 
						|
}
 | 
						|
 | 
						|
inline std::shared_ptr<bool[]>& RowGroup::getForceInline()
 | 
						|
{
 | 
						|
  return forceInline;
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t convertToRid(const uint32_t& partitionNum, const uint16_t& segmentNum, const uint8_t& exNum,
 | 
						|
                             const uint16_t& blNum)
 | 
						|
{
 | 
						|
  uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum, blockNum = blNum;
 | 
						|
 | 
						|
  // extentNum gets trunc'd to 6 bits, blockNums to 10 bits
 | 
						|
  extentNum &= 0x3f;
 | 
						|
  blockNum &= 0x3ff;
 | 
						|
 | 
						|
  return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
 | 
						|
                                 const uint16_t& blockNum)
 | 
						|
{
 | 
						|
  *((uint64_t*)&data[baseRidOffset]) = convertToRid(partNum, segNum, extentNum, blockNum);
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t RowGroup::getStringTableThreshold() const
 | 
						|
{
 | 
						|
  return sTableThreshold;
 | 
						|
}
 | 
						|
 | 
						|
// TODO This is unused, so rm this in the dev branch.
 | 
						|
inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
 | 
						|
{
 | 
						|
  if (useStringTable)
 | 
						|
  {
 | 
						|
    rgData->setStringStore(ss);
 | 
						|
    strings = rgData->strings.get();
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void getLocationFromRid(uint64_t rid, uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
 | 
						|
                               uint16_t* blockNum)
 | 
						|
{
 | 
						|
  if (partNum)
 | 
						|
    *partNum = rid >> 32;
 | 
						|
 | 
						|
  if (segNum)
 | 
						|
    *segNum = rid >> 16;
 | 
						|
 | 
						|
  if (extentNum)
 | 
						|
    *extentNum = (rid >> 10) & 0x3f;
 | 
						|
 | 
						|
  if (blockNum)
 | 
						|
    *blockNum = rid & 0x3ff;
 | 
						|
}
 | 
						|
 | 
						|
inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
 | 
						|
{
 | 
						|
  getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
 | 
						|
}
 | 
						|
 | 
						|
// returns the first RID of the logical block identified by baseRid
 | 
						|
inline uint64_t getExtentRelativeRid(uint64_t baseRid)
 | 
						|
{
 | 
						|
  uint64_t blockNum = baseRid & 0x3ff;
 | 
						|
  return (blockNum << 13);
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::getExtentRelativeRid() const
 | 
						|
{
 | 
						|
  return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
 | 
						|
}
 | 
						|
 | 
						|
// returns the first RID of the logical block identified by baseRid
 | 
						|
inline uint64_t getFileRelativeRid(uint64_t baseRid)
 | 
						|
{
 | 
						|
  uint64_t extentNum = (baseRid >> 10) & 0x3f;
 | 
						|
  uint64_t blockNum = baseRid & 0x3ff;
 | 
						|
  return (extentNum << 23) | (blockNum << 13);
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t Row::getFileRelativeRid() const
 | 
						|
{
 | 
						|
  return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
 | 
						|
}
 | 
						|
 | 
						|
inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum,
 | 
						|
                             uint16_t* rowNum)
 | 
						|
{
 | 
						|
  getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
 | 
						|
 | 
						|
  if (rowNum)
 | 
						|
    *rowNum = getRelRid();
 | 
						|
}
 | 
						|
 | 
						|
// This routine can be slow for your purposes. Please inspect copyRowInline below,
 | 
						|
// in some cases it can be faster.
 | 
						|
// Please be sure that copyRowInline does indeed copy rows of the same structure of
 | 
						|
// fields.
 | 
						|
inline void copyRow(const Row& in, Row* out, uint32_t colCount)
 | 
						|
{
 | 
						|
  if (&in == out)
 | 
						|
    return;
 | 
						|
 | 
						|
  out->setRid(in.getRelRid());
 | 
						|
 | 
						|
  if (!in.usesStringTable() && !out->usesStringTable())
 | 
						|
  {
 | 
						|
    memcpy(out->getData(), in.getData(), std::min(in.getSize(), out->getSize()));
 | 
						|
 | 
						|
    for (uint32_t i = 0; i < colCount; i++)
 | 
						|
    {
 | 
						|
      out->setNullMark(i, in.getNullMark(i));
 | 
						|
    }
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  for (uint32_t i = 0; i < colCount; i++)
 | 
						|
  {
 | 
						|
    if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
 | 
						|
                 in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
 | 
						|
                 in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT ||
 | 
						|
                 in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB))
 | 
						|
    {
 | 
						|
      out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i);
 | 
						|
    }
 | 
						|
    else if (UNLIKELY(in.isLongString(i)))
 | 
						|
    {
 | 
						|
      out->setStringField(in.getConstString(i), i);
 | 
						|
    }
 | 
						|
    else if (UNLIKELY(in.isShortString(i)))
 | 
						|
    {
 | 
						|
      out->setUintField(in.getUintField(i), i);
 | 
						|
    }
 | 
						|
    else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::DOUBLE))
 | 
						|
    {
 | 
						|
      out->setDoubleField(in.getDoubleField(i), i);
 | 
						|
    }
 | 
						|
    else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
 | 
						|
    {
 | 
						|
      out->setLongDoubleField(in.getLongDoubleField(i), i);
 | 
						|
    }
 | 
						|
    else if (UNLIKELY(datatypes::isWideDecimalType(in.getColType(i), in.getColumnWidth(i))))
 | 
						|
    {
 | 
						|
      in.copyBinaryField(*out, i, i);
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
      out->setIntField(in.getIntField(i), i);
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
inline void copyRow(const Row& in, Row* out)
 | 
						|
{
 | 
						|
  copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
 | 
						|
}
 | 
						|
 | 
						|
// This routine can be substantially faster than copyRow above, but there are caveats.
 | 
						|
// The speedy part with memcpy should only be invoked when structures of the rows are the same.
 | 
						|
// Otherwise information about NULLs for inline strings can be lost or garbled.
 | 
						|
inline void copyRowInline(const Row& in, Row* out, uint32_t colCount)
 | 
						|
{
 | 
						|
  if (&in == out)
 | 
						|
    return;
 | 
						|
 | 
						|
  // XXX: this code still may copy data incorrectly if sizes of columns differ.
 | 
						|
  if (!in.usesStringTable() && !out->usesStringTable() && in.getSize() == out->getSize())
 | 
						|
  {
 | 
						|
    out->setRid(in.getRelRid());
 | 
						|
 | 
						|
    memcpy(out->getData(), in.getData(), in.getSize());
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  copyRow(in, out, colCount);
 | 
						|
}
 | 
						|
 | 
						|
inline utils::NullString StringStore::getString(uint64_t off) const
 | 
						|
{
 | 
						|
  uint32_t length;
 | 
						|
  utils::NullString nStr;
 | 
						|
 | 
						|
  if (off == std::numeric_limits<uint64_t>::max())
 | 
						|
    return nStr;
 | 
						|
 | 
						|
  MemChunk* mc;
 | 
						|
 | 
						|
  if (off & 0x8000000000000000)
 | 
						|
  {
 | 
						|
    // off = off - 0x8000000000000000;
 | 
						|
    off &= ~0x8000000000000000;
 | 
						|
 | 
						|
    if (longStrings.size() <= off)
 | 
						|
      return nStr;
 | 
						|
 | 
						|
    mc = (MemChunk*)longStrings[off].get();
 | 
						|
    memcpy(&length, mc->data, 4);
 | 
						|
    nStr.assign(std::string((char*)mc->data + 4, length));
 | 
						|
    return nStr;
 | 
						|
  }
 | 
						|
 | 
						|
  uint64_t chunk = off / CHUNK_SIZE;
 | 
						|
  uint64_t offset = off % CHUNK_SIZE;
 | 
						|
 | 
						|
  // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
 | 
						|
  // what gets returned, it just can't go out of bounds.
 | 
						|
  if (mem.size() <= chunk)
 | 
						|
    return nStr;
 | 
						|
 | 
						|
  mc = (MemChunk*)mem[chunk].get();
 | 
						|
 | 
						|
  memcpy(&length, &mc->data[offset], 4);
 | 
						|
 | 
						|
  if ((offset + length) > mc->currentSize)
 | 
						|
    return nStr;
 | 
						|
 | 
						|
  nStr.assign(std::string((char*)&(mc->data[offset]) + 4, length));
 | 
						|
  return nStr;
 | 
						|
}
 | 
						|
 | 
						|
inline const uint8_t* StringStore::getPointer(uint64_t off) const
 | 
						|
{
 | 
						|
  if (off == std::numeric_limits<uint64_t>::max())
 | 
						|
    return nullptr;
 | 
						|
 | 
						|
  uint64_t chunk = off / CHUNK_SIZE;
 | 
						|
  uint64_t offset = off % CHUNK_SIZE;
 | 
						|
  MemChunk* mc;
 | 
						|
 | 
						|
  if (off & 0x8000000000000000)
 | 
						|
  {
 | 
						|
    // off = off - 0x8000000000000000;
 | 
						|
    off &= ~0x8000000000000000;
 | 
						|
 | 
						|
    if (longStrings.size() <= off)
 | 
						|
      return nullptr;
 | 
						|
 | 
						|
    mc = (MemChunk*)longStrings[off].get();
 | 
						|
    return mc->data + 4;
 | 
						|
  }
 | 
						|
 | 
						|
  // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
 | 
						|
  // what gets returned, it just can't go out of bounds.
 | 
						|
  if (UNLIKELY(mem.size() <= chunk))
 | 
						|
    return nullptr;
 | 
						|
 | 
						|
  mc = (MemChunk*)mem[chunk].get();
 | 
						|
 | 
						|
  if (offset > mc->currentSize)
 | 
						|
    return nullptr;
 | 
						|
 | 
						|
  return &(mc->data[offset]) + 4;
 | 
						|
}
 | 
						|
 | 
						|
inline bool StringStore::isNullValue(uint64_t off) const
 | 
						|
{
 | 
						|
  if (off == std::numeric_limits<uint64_t>::max())
 | 
						|
    return true;
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
inline uint32_t StringStore::getStringLength(uint64_t off) const
 | 
						|
{
 | 
						|
  uint32_t length;
 | 
						|
  MemChunk* mc;
 | 
						|
 | 
						|
  if (off == std::numeric_limits<uint64_t>::max())
 | 
						|
    return 0;
 | 
						|
 | 
						|
  if (off & 0x8000000000000000)
 | 
						|
  {
 | 
						|
    // off = off - 0x8000000000000000;
 | 
						|
    off &= ~0x8000000000000000;
 | 
						|
 | 
						|
    if (longStrings.size() <= off)
 | 
						|
      return 0;
 | 
						|
 | 
						|
    mc = (MemChunk*)longStrings[off].get();
 | 
						|
    memcpy(&length, mc->data, 4);
 | 
						|
  }
 | 
						|
  else
 | 
						|
  {
 | 
						|
    uint64_t chunk = off / CHUNK_SIZE;
 | 
						|
    uint64_t offset = off % CHUNK_SIZE;
 | 
						|
 | 
						|
    if (mem.size() <= chunk)
 | 
						|
      return 0;
 | 
						|
 | 
						|
    mc = (MemChunk*)mem[chunk].get();
 | 
						|
    memcpy(&length, &mc->data[offset], 4);
 | 
						|
  }
 | 
						|
 | 
						|
  return length;
 | 
						|
}
 | 
						|
 | 
						|
inline bool StringStore::isEmpty() const
 | 
						|
{
 | 
						|
  return empty;
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t StringStore::getSize() const
 | 
						|
{
 | 
						|
  uint32_t i;
 | 
						|
  uint64_t ret = 0;
 | 
						|
  MemChunk* mc;
 | 
						|
 | 
						|
  ret += sizeof(MemChunk) * mem.size();
 | 
						|
  for (i = 0; i < mem.size(); i++)
 | 
						|
  {
 | 
						|
    mc = (MemChunk*)mem[i].get();
 | 
						|
    ret += mc->capacity;
 | 
						|
  }
 | 
						|
 | 
						|
  ret += sizeof(MemChunk) * longStrings.size();
 | 
						|
  for (i = 0; i < longStrings.size(); i++)
 | 
						|
  {
 | 
						|
    mc = (MemChunk*)longStrings[i].get();
 | 
						|
    ret += mc->capacity;
 | 
						|
  }
 | 
						|
 | 
						|
  return ret;
 | 
						|
}
 | 
						|
 | 
						|
inline void RGData::getRow(uint32_t num, Row* row)
 | 
						|
{
 | 
						|
  uint32_t incomingRowSize = row->getSize();
 | 
						|
  idbassert(columnCount == row->getColumnCount() && rowSize == incomingRowSize);
 | 
						|
 | 
						|
  row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * incomingRowSize)], strings.get(),
 | 
						|
                            userDataStore.get(), aggregateDataStore.get()));
 | 
						|
}
 | 
						|
 | 
						|
inline uint64_t rowGidRidToIdx(uint64_t gid, uint32_t rid, uint32_t maxRows)
 | 
						|
{
 | 
						|
  return gid * maxRows + rid;
 | 
						|
}
 | 
						|
 | 
						|
inline std::pair<uint64_t, uint64_t> rowIdxToGidRid(uint64_t idx, uint32_t maxRows)
 | 
						|
{
 | 
						|
  return {idx / maxRows, idx % maxRows};
 | 
						|
}
 | 
						|
 | 
						|
}  // namespace rowgroup
 |