1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

Reformat all code to coding standard

This commit is contained in:
Andrew Hutchings
2017-10-26 17:18:17 +01:00
parent 4985f3456e
commit 01446d1e22
1296 changed files with 403852 additions and 353747 deletions

View File

@ -43,229 +43,333 @@ namespace joiner
inline uint64_t order_swap(uint64_t x)
{
return (x>>56) |
((x<<40) & 0x00FF000000000000ULL) |
((x<<24) & 0x0000FF0000000000ULL) |
((x<<8) & 0x000000FF00000000ULL) |
((x>>8) & 0x00000000FF000000ULL) |
((x>>24) & 0x0000000000FF0000ULL) |
((x>>40) & 0x000000000000FF00ULL) |
(x<<56);
return (x >> 56) |
((x << 40) & 0x00FF000000000000ULL) |
((x << 24) & 0x0000FF0000000000ULL) |
((x << 8) & 0x000000FF00000000ULL) |
((x >> 8) & 0x00000000FF000000ULL) |
((x >> 24) & 0x0000000000FF0000ULL) |
((x >> 40) & 0x000000000000FF00ULL) |
(x << 56);
}
class TypelessData
{
public:
uint8_t *data;
uint32_t len;
uint8_t* data;
uint32_t len;
TypelessData() : data(NULL), len(0) { }
inline bool operator==(const TypelessData &) const;
void serialize(messageqcpp::ByteStream &) const;
void deserialize(messageqcpp::ByteStream &, utils::FixedAllocator &);
void deserialize(messageqcpp::ByteStream &, utils::PoolAllocator &);
std::string toString() const;
TypelessData() : data(NULL), len(0) { }
inline bool operator==(const TypelessData&) const;
void serialize(messageqcpp::ByteStream&) const;
void deserialize(messageqcpp::ByteStream&, utils::FixedAllocator&);
void deserialize(messageqcpp::ByteStream&, utils::PoolAllocator&);
std::string toString() const;
};
inline bool TypelessData::operator==(const TypelessData &t) const
inline bool TypelessData::operator==(const TypelessData& t) const
{
if (len != t.len)
return false;
if (len == 0) // special value to force mismatches
return false;
return (memcmp(data, t.data, len) == 0);
if (len != t.len)
return false;
if (len == 0) // special value to force mismatches
return false;
return (memcmp(data, t.data, len) == 0);
}
/* This function makes the keys for string & compound joins. The length of the
* key is limited by keylen. Keys that are longer are assigned a length of 0 on return,
* signifying that it shouldn't match anything.
*/
extern TypelessData makeTypelessKey(const rowgroup::Row &,
const std::vector<uint32_t> &, uint32_t keylen, utils::FixedAllocator *fa);
extern TypelessData makeTypelessKey(const rowgroup::Row &,
const std::vector<uint32_t> &, utils::PoolAllocator *fa);
extern uint64_t getHashOfTypelessKey(const rowgroup::Row &, const std::vector<uint32_t> &,
uint32_t seed=0);
extern TypelessData makeTypelessKey(const rowgroup::Row&,
const std::vector<uint32_t>&, uint32_t keylen, utils::FixedAllocator* fa);
extern TypelessData makeTypelessKey(const rowgroup::Row&,
const std::vector<uint32_t>&, utils::PoolAllocator* fa);
extern uint64_t getHashOfTypelessKey(const rowgroup::Row&, const std::vector<uint32_t>&,
uint32_t seed = 0);
class TupleJoiner
{
public:
struct hasher {
inline size_t operator()(int64_t val) const
{ return fHasher((char *) &val, 8); }
inline size_t operator()(const TypelessData &e) const
{ return fHasher((char *) e.data, e.len); }
struct hasher
{
inline size_t operator()(int64_t val) const
{
return fHasher((char*) &val, 8);
}
inline size_t operator()(const TypelessData& e) const
{
return fHasher((char*) e.data, e.len);
}
private:
utils::Hasher fHasher;
};
private:
utils::Hasher fHasher;
};
/* ctor to use for numeric join */
TupleJoiner(
const rowgroup::RowGroup &smallInput,
const rowgroup::RowGroup &largeInput,
uint32_t smallJoinColumn,
uint32_t largeJoinColumn,
joblist::JoinType jt);
/* ctor to use for numeric join */
TupleJoiner(
const rowgroup::RowGroup& smallInput,
const rowgroup::RowGroup& largeInput,
uint32_t smallJoinColumn,
uint32_t largeJoinColumn,
joblist::JoinType jt);
/* ctor to use for string & compound join */
TupleJoiner(
const rowgroup::RowGroup &smallInput,
const rowgroup::RowGroup &largeInput,
const std::vector<uint32_t> &smallJoinColumns,
const std::vector<uint32_t> &largeJoinColumns,
joblist::JoinType jt);
/* ctor to use for string & compound join */
TupleJoiner(
const rowgroup::RowGroup& smallInput,
const rowgroup::RowGroup& largeInput,
const std::vector<uint32_t>& smallJoinColumns,
const std::vector<uint32_t>& largeJoinColumns,
joblist::JoinType jt);
~TupleJoiner();
~TupleJoiner();
size_t size() const;
void insert(rowgroup::Row &r, bool zeroTheRid = true);
void doneInserting();
size_t size() const;
void insert(rowgroup::Row& r, bool zeroTheRid = true);
void doneInserting();
/* match() returns the small-side rows that match the large-side row.
On a UM join, it uses largeSideRow,
on a PM join, it uses index and threadID.
*/
void match(rowgroup::Row &largeSideRow, uint32_t index, uint32_t threadID,
std::vector<rowgroup::Row::Pointer> *matches);
/* match() returns the small-side rows that match the large-side row.
On a UM join, it uses largeSideRow,
on a PM join, it uses index and threadID.
*/
void match(rowgroup::Row& largeSideRow, uint32_t index, uint32_t threadID,
std::vector<rowgroup::Row::Pointer>* matches);
/* On a PM left outer join + aggregation, the result is already complete.
No need to match, just mark.
*/
void markMatches(uint32_t threadID, uint32_t rowCount);
/* On a PM left outer join + aggregation, the result is already complete.
No need to match, just mark.
*/
void markMatches(uint32_t threadID, uint32_t rowCount);
/* For small outer joins, this is how matches are marked now. */
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer> &matches);
/* For small outer joins, this is how matches are marked now. */
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer>& matches);
/* Some accessors */
inline bool inPM() const { return joinAlg == PM; }
inline bool inUM() const { return joinAlg == UM; }
void setInPM();
void setInUM();
void setThreadCount(uint32_t cnt);
void setPMJoinResults(boost::shared_array<std::vector<uint32_t> >,
uint32_t threadID);
boost::shared_array<std::vector<uint32_t> > getPMJoinArrays(uint32_t threadID);
std::vector<rowgroup::Row::Pointer> *getSmallSide() { return &rows; }
inline bool smallOuterJoin() { return ((joinType & joblist::SMALLOUTER) != 0); }
inline bool largeOuterJoin() { return ((joinType & joblist::LARGEOUTER) != 0); }
inline bool innerJoin() { return joinType == joblist::INNER; }
inline bool fullOuterJoin() { return (smallOuterJoin() && largeOuterJoin()); }
inline joblist::JoinType getJoinType() { return joinType; }
inline const rowgroup::RowGroup &getSmallRG() { return smallRG; }
inline const rowgroup::RowGroup &getLargeRG() { return largeRG; }
inline uint32_t getSmallKeyColumn() { return smallKeyColumns[0]; }
inline uint32_t getLargeKeyColumn() { return largeKeyColumns[0]; }
bool hasNullJoinColumn(const rowgroup::Row &largeRow) const;
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer> *out);
std::string getTableName() const;
void setTableName(const std::string &tname);
/* Some accessors */
inline bool inPM() const
{
return joinAlg == PM;
}
inline bool inUM() const
{
return joinAlg == UM;
}
void setInPM();
void setInUM();
void setThreadCount(uint32_t cnt);
void setPMJoinResults(boost::shared_array<std::vector<uint32_t> >,
uint32_t threadID);
boost::shared_array<std::vector<uint32_t> > getPMJoinArrays(uint32_t threadID);
std::vector<rowgroup::Row::Pointer>* getSmallSide()
{
return &rows;
}
inline bool smallOuterJoin()
{
return ((joinType & joblist::SMALLOUTER) != 0);
}
inline bool largeOuterJoin()
{
return ((joinType & joblist::LARGEOUTER) != 0);
}
inline bool innerJoin()
{
return joinType == joblist::INNER;
}
inline bool fullOuterJoin()
{
return (smallOuterJoin() && largeOuterJoin());
}
inline joblist::JoinType getJoinType()
{
return joinType;
}
inline const rowgroup::RowGroup& getSmallRG()
{
return smallRG;
}
inline const rowgroup::RowGroup& getLargeRG()
{
return largeRG;
}
inline uint32_t getSmallKeyColumn()
{
return smallKeyColumns[0];
}
inline uint32_t getLargeKeyColumn()
{
return largeKeyColumns[0];
}
bool hasNullJoinColumn(const rowgroup::Row& largeRow) const;
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer>* out);
std::string getTableName() const;
void setTableName(const std::string& tname);
/* To allow sorting */
bool operator<(const TupleJoiner &) const;
/* To allow sorting */
bool operator<(const TupleJoiner&) const;
uint64_t getMemUsage() const;
uint64_t getMemUsage() const;
/* Typeless join interface */
inline bool isTypelessJoin() { return typelessJoin; }
inline bool isSignedUnsignedJoin() { return bSignedUnsignedJoin; }
inline const std::vector<uint32_t> & getSmallKeyColumns() { return smallKeyColumns; }
inline const std::vector<uint32_t> & getLargeKeyColumns() { return largeKeyColumns; }
inline uint32_t getKeyLength() { return keyLength; }
/* Typeless join interface */
inline bool isTypelessJoin()
{
return typelessJoin;
}
inline bool isSignedUnsignedJoin()
{
return bSignedUnsignedJoin;
}
inline const std::vector<uint32_t>& getSmallKeyColumns()
{
return smallKeyColumns;
}
inline const std::vector<uint32_t>& getLargeKeyColumns()
{
return largeKeyColumns;
}
inline uint32_t getKeyLength()
{
return keyLength;
}
/* Runtime casual partitioning support */
inline const boost::scoped_array<bool> &discreteCPValues() { return discreteValues; }
inline const boost::scoped_array<std::vector<int64_t> > &getCPData() { return cpValues; }
inline void setUniqueLimit(uint32_t limit) { uniqueLimit = limit; }
/* Runtime casual partitioning support */
inline const boost::scoped_array<bool>& discreteCPValues()
{
return discreteValues;
}
inline const boost::scoped_array<std::vector<int64_t> >& getCPData()
{
return cpValues;
}
inline void setUniqueLimit(uint32_t limit)
{
uniqueLimit = limit;
}
/* Semi-join interface */
inline bool semiJoin() { return ((joinType & joblist::SEMI) != 0); }
inline bool antiJoin() { return ((joinType & joblist::ANTI) != 0); }
inline bool scalar() { return ((joinType & joblist::SCALAR) != 0); }
inline bool matchnulls() { return ((joinType & joblist::MATCHNULLS) != 0); }
inline bool hasFEFilter() { return fe.get(); }
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter() { return fe; }
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
inline bool evaluateFilter(rowgroup::Row &r, uint32_t index) { return fes[index].evaluate(&r); }
inline uint64_t getJoinNullValue() { return joblist::BIGINTNULL; } // a normalized NULL value
inline uint64_t smallNullValue() { return nullValueForJoinColumn; }
/* Semi-join interface */
inline bool semiJoin()
{
return ((joinType & joblist::SEMI) != 0);
}
inline bool antiJoin()
{
return ((joinType & joblist::ANTI) != 0);
}
inline bool scalar()
{
return ((joinType & joblist::SCALAR) != 0);
}
inline bool matchnulls()
{
return ((joinType & joblist::MATCHNULLS) != 0);
}
inline bool hasFEFilter()
{
return fe.get();
}
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter()
{
return fe;
}
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
inline bool evaluateFilter(rowgroup::Row& r, uint32_t index)
{
return fes[index].evaluate(&r);
}
inline uint64_t getJoinNullValue()
{
return joblist::BIGINTNULL; // a normalized NULL value
}
inline uint64_t smallNullValue()
{
return nullValueForJoinColumn;
}
// Disk-based join support
void clearData();
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
bool isFinished() { return finished; }
// Disk-based join support
void clearData();
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
bool isFinished()
{
return finished;
}
private:
typedef std::tr1::unordered_multimap<int64_t, uint8_t *, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, uint8_t *> > > hash_t;
typedef std::tr1::unordered_multimap<int64_t, rowgroup::Row::Pointer, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, rowgroup::Row::Pointer> > > sthash_t;
typedef std::tr1::unordered_multimap<TypelessData, rowgroup::Row::Pointer, hasher, std::equal_to<TypelessData>,
utils::STLPoolAllocator<std::pair<const TypelessData, rowgroup::Row::Pointer> > > typelesshash_t;
typedef std::tr1::unordered_multimap<int64_t, uint8_t*, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, uint8_t*> > > hash_t;
typedef std::tr1::unordered_multimap<int64_t, rowgroup::Row::Pointer, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, rowgroup::Row::Pointer> > > sthash_t;
typedef std::tr1::unordered_multimap<TypelessData, rowgroup::Row::Pointer, hasher, std::equal_to<TypelessData>,
utils::STLPoolAllocator<std::pair<const TypelessData, rowgroup::Row::Pointer> > > typelesshash_t;
typedef hash_t::iterator iterator;
typedef typelesshash_t::iterator thIterator;
typedef hash_t::iterator iterator;
typedef typelesshash_t::iterator thIterator;
TupleJoiner();
TupleJoiner(const TupleJoiner &);
TupleJoiner & operator=(const TupleJoiner &);
TupleJoiner();
TupleJoiner(const TupleJoiner&);
TupleJoiner& operator=(const TupleJoiner&);
iterator begin() { return h->begin(); }
iterator end() { return h->end(); }
iterator begin()
{
return h->begin();
}
iterator end()
{
return h->end();
}
rowgroup::RGData smallNullMemory;
rowgroup::RGData smallNullMemory;
boost::scoped_ptr<hash_t> h; // used for UM joins on ints
boost::scoped_ptr<sthash_t> sth; // used for UM join on ints where the backing table uses a string table
std::vector<rowgroup::Row::Pointer> rows; // used for PM join
boost::scoped_ptr<hash_t> h; // used for UM joins on ints
boost::scoped_ptr<sthash_t> sth; // used for UM join on ints where the backing table uses a string table
std::vector<rowgroup::Row::Pointer> rows; // used for PM join
/* This struct is rough. The BPP-JL stores the parsed results for
the logical block being processed. There are X threads at once, so
up to X logical blocks being processed. For each of those there's a vector
of matches. Each match is an index into 'rows'. */
boost::shared_array<boost::shared_array<std::vector<uint32_t> > > pmJoinResults;
rowgroup::RowGroup smallRG, largeRG;
boost::scoped_array<rowgroup::Row> smallRow;
//boost::shared_array<uint8_t> smallNullMemory;
rowgroup::Row smallNullRow;
/* This struct is rough. The BPP-JL stores the parsed results for
the logical block being processed. There are X threads at once, so
up to X logical blocks being processed. For each of those there's a vector
of matches. Each match is an index into 'rows'. */
boost::shared_array<boost::shared_array<std::vector<uint32_t> > > pmJoinResults;
rowgroup::RowGroup smallRG, largeRG;
boost::scoped_array<rowgroup::Row> smallRow;
//boost::shared_array<uint8_t> smallNullMemory;
rowgroup::Row smallNullRow;
enum JoinAlg {
INSERTING,
PM,
UM,
LARGE
};
JoinAlg joinAlg;
joblist::JoinType joinType;
boost::shared_ptr<utils::PoolAllocator> _pool; // pool for the table and nodes
uint32_t threadCount;
std::string tableName;
enum JoinAlg
{
INSERTING,
PM,
UM,
LARGE
};
JoinAlg joinAlg;
joblist::JoinType joinType;
boost::shared_ptr<utils::PoolAllocator> _pool; // pool for the table and nodes
uint32_t threadCount;
std::string tableName;
/* vars, & fcns for typeless join */
bool typelessJoin;
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
boost::scoped_ptr<typelesshash_t> ht; // used for UM join on strings
uint32_t keyLength;
utils::FixedAllocator storedKeyAlloc;
/* vars, & fcns for typeless join */
bool typelessJoin;
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
boost::scoped_ptr<typelesshash_t> ht; // used for UM join on strings
uint32_t keyLength;
utils::FixedAllocator storedKeyAlloc;
boost::scoped_array<utils::FixedAllocator> tmpKeyAlloc;
bool bSignedUnsignedJoin; // Set if we have a signed vs unsigned compare in a join. When not set, we can save checking for the signed bit.
/* semi-join vars & fcns */
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
// this var is only used to normalize the NULL values for single-column joins,
// will have to change when/if we need to support that for compound or string joins
int64_t nullValueForJoinColumn;
/* semi-join vars & fcns */
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
// this var is only used to normalize the NULL values for single-column joins,
// will have to change when/if we need to support that for compound or string joins
int64_t nullValueForJoinColumn;
/* Runtime casual partitioning support */
void updateCPData(const rowgroup::Row &r);
boost::scoped_array<bool> discreteValues;
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
uint32_t uniqueLimit;
bool finished;
/* Runtime casual partitioning support */
void updateCPData(const rowgroup::Row& r);
boost::scoped_array<bool> discreteValues;
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
uint32_t uniqueLimit;
bool finished;
};
}