1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2026-01-06 08:21:10 +03:00

boost::unrodered_flat_map for join

This commit is contained in:
Leonid Fedorov
2025-10-23 11:09:37 +00:00
parent 18a4f01242
commit 6082446215
8 changed files with 252 additions and 3649 deletions

View File

@@ -33,6 +33,7 @@
#include <vector>
#include <list>
#include <unordered.h>
#include <boost/unordered/unordered_flat_map.hpp>
#include <boost/thread.hpp>
#include <boost/scoped_array.hpp>
@@ -107,13 +108,12 @@ class HashJoin
// than a list of ElementType to reduce memory utilization and to increase the performance of loading the
// map. typedef std::tr1::unordered_map<typename element_t::second_type, std::list<element_t>,
// HjHasher<element_t> > hash_t;
typedef
typename std::tr1::unordered_multimap<typename element_t::second_type, typename element_t::first_type>
hash_t;
typedef typename std::tr1::unordered_multimap<typename element_t::second_type,
typename element_t::first_type>::iterator hashIter_t;
typedef typename std::tr1::unordered_multimap<typename element_t::second_type,
typename element_t::first_type>::value_type hashPair_t;
// Now using boost::unordered_flat_map with vector for better cache locality
typedef typename boost::unordered_flat_map<typename element_t::second_type,
std::vector<typename element_t::first_type>>
hash_t;
typedef typename hash_t::iterator hashIter_t;
typedef typename hash_t::value_type hashPair_t;
// allow each thread to have its own pointers
struct control_struct
@@ -503,9 +503,18 @@ void HashJoin<element_t>::createHash(BucketDL<element_t>* srcBucketDL, hash_t* d
try
{
// std::list<element_t> tmp(1,e);
destHashTbl->insert(
std::pair<typename element_t::second_type, typename element_t::first_type>(e.second, e.first));
// For boost::unordered_flat_map, we store vectors of values
auto it = destHashTbl->find(e.second);
if (it != destHashTbl->end())
{
it->second.push_back(e.first);
}
else
{
std::vector<typename element_t::first_type> vec;
vec.push_back(e.first);
destHashTbl->emplace(e.second, std::move(vec));
}
}
catch (exception& exc)
{

File diff suppressed because it is too large Load Diff

View File

@@ -1,445 +0,0 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iosfwd>
#include <utility>
#include "btree.h"
namespace btree
{
// A common base class for btree_set, btree_map, btree_multiset and
// btree_multimap.
template <typename Tree>
class btree_container
{
typedef btree_container<Tree> self_type;
public:
typedef typename Tree::params_type params_type;
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::pointer pointer;
typedef typename Tree::const_pointer const_pointer;
typedef typename Tree::reference reference;
typedef typename Tree::const_reference const_reference;
typedef typename Tree::size_type size_type;
typedef typename Tree::difference_type difference_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
typedef typename Tree::reverse_iterator reverse_iterator;
typedef typename Tree::const_reverse_iterator const_reverse_iterator;
public:
// Default constructor.
btree_container(const key_compare& comp, const allocator_type& alloc) : tree_(comp, alloc)
{
}
// Copy constructor.
btree_container(const self_type& x) : tree_(x.tree_)
{
}
// Iterator routines.
iterator begin()
{
return tree_.begin();
}
const_iterator begin() const
{
return tree_.begin();
}
iterator end()
{
return tree_.end();
}
const_iterator end() const
{
return tree_.end();
}
reverse_iterator rbegin()
{
return tree_.rbegin();
}
const_reverse_iterator rbegin() const
{
return tree_.rbegin();
}
reverse_iterator rend()
{
return tree_.rend();
}
const_reverse_iterator rend() const
{
return tree_.rend();
}
// Lookup routines.
iterator lower_bound(const key_type& key)
{
return tree_.lower_bound(key);
}
const_iterator lower_bound(const key_type& key) const
{
return tree_.lower_bound(key);
}
iterator upper_bound(const key_type& key)
{
return tree_.upper_bound(key);
}
const_iterator upper_bound(const key_type& key) const
{
return tree_.upper_bound(key);
}
std::pair<iterator, iterator> equal_range(const key_type& key)
{
return tree_.equal_range(key);
}
std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
{
return tree_.equal_range(key);
}
// Utility routines.
void clear()
{
tree_.clear();
}
void swap(self_type& x)
{
tree_.swap(x.tree_);
}
void dump(std::ostream& os) const
{
tree_.dump(os);
}
void verify() const
{
tree_.verify();
}
// Size routines.
size_type size() const
{
return tree_.size();
}
size_type max_size() const
{
return tree_.max_size();
}
bool empty() const
{
return tree_.empty();
}
size_type height() const
{
return tree_.height();
}
size_type internal_nodes() const
{
return tree_.internal_nodes();
}
size_type leaf_nodes() const
{
return tree_.leaf_nodes();
}
size_type nodes() const
{
return tree_.nodes();
}
size_type bytes_used() const
{
return tree_.bytes_used();
}
static double average_bytes_per_value()
{
return Tree::average_bytes_per_value();
}
double fullness() const
{
return tree_.fullness();
}
double overhead() const
{
return tree_.overhead();
}
bool operator==(const self_type& x) const
{
if (size() != x.size())
{
return false;
}
for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi)
{
if (*i != *xi)
{
return false;
}
}
return true;
}
bool operator!=(const self_type& other) const
{
return !operator==(other);
}
protected:
Tree tree_;
};
template <typename T>
inline std::ostream& operator<<(std::ostream& os, const btree_container<T>& b)
{
b.dump(os);
return os;
}
// A common base class for btree_set and safe_btree_set.
template <typename Tree>
class btree_unique_container : public btree_container<Tree>
{
typedef btree_unique_container<Tree> self_type;
typedef btree_container<Tree> super_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::size_type size_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
public:
// Default constructor.
btree_unique_container(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_unique_container(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_unique_container(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
insert(b, e);
}
// Lookup routines.
iterator find(const key_type& key)
{
return this->tree_.find_unique(key);
}
const_iterator find(const key_type& key) const
{
return this->tree_.find_unique(key);
}
size_type count(const key_type& key) const
{
return this->tree_.count_unique(key);
}
// Insertion routines.
std::pair<iterator, bool> insert(const value_type& x)
{
return this->tree_.insert_unique(x);
}
iterator insert(iterator position, const value_type& x)
{
return this->tree_.insert_unique(position, x);
}
template <typename InputIterator>
void insert(InputIterator b, InputIterator e)
{
this->tree_.insert_unique(b, e);
}
// Deletion routines.
int erase(const key_type& key)
{
return this->tree_.erase_unique(key);
}
// Erase the specified iterator from the btree. The iterator must be valid
// (i.e. not equal to end()). Return an iterator pointing to the node after
// the one that was erased (or end() if none exists).
iterator erase(const iterator& iter)
{
return this->tree_.erase(iter);
}
void erase(const iterator& first, const iterator& last)
{
this->tree_.erase(first, last);
}
};
// A common base class for btree_map and safe_btree_map.
template <typename Tree>
class btree_map_container : public btree_unique_container<Tree>
{
typedef btree_map_container<Tree> self_type;
typedef btree_unique_container<Tree> super_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::data_type data_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::mapped_type mapped_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
private:
// A pointer-like object which only generates its value when
// dereferenced. Used by operator[] to avoid constructing an empty data_type
// if the key already exists in the map.
struct generate_value
{
generate_value(const key_type& k) : key(k)
{
}
value_type operator*() const
{
return std::make_pair(key, data_type());
}
const key_type& key;
};
public:
// Default constructor.
btree_map_container(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_map_container(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_map_container(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
// Insertion routines.
data_type& operator[](const key_type& key)
{
return this->tree_.insert_unique(key, generate_value(key)).first->second;
}
};
// A common base class for btree_multiset and btree_multimap.
template <typename Tree>
class btree_multi_container : public btree_container<Tree>
{
typedef btree_multi_container<Tree> self_type;
typedef btree_container<Tree> super_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::size_type size_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
public:
// Default constructor.
btree_multi_container(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_multi_container(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_multi_container(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
insert(b, e);
}
// Lookup routines.
iterator find(const key_type& key)
{
return this->tree_.find_multi(key);
}
const_iterator find(const key_type& key) const
{
return this->tree_.find_multi(key);
}
size_type count(const key_type& key) const
{
return this->tree_.count_multi(key);
}
// Insertion routines.
iterator insert(const value_type& x)
{
return this->tree_.insert_multi(x);
}
iterator insert(iterator position, const value_type& x)
{
return this->tree_.insert_multi(position, x);
}
template <typename InputIterator>
void insert(InputIterator b, InputIterator e)
{
this->tree_.insert_multi(b, e);
}
// Deletion routines.
int erase(const key_type& key)
{
return this->tree_.erase_multi(key);
}
// Erase the specified iterator from the btree. The iterator must be valid
// (i.e. not equal to end()). Return an iterator pointing to the node after
// the one that was erased (or end() if none exists).
iterator erase(const iterator& iter)
{
return this->tree_.erase(iter);
}
void erase(const iterator& first, const iterator& last)
{
this->tree_.erase(first, last);
}
};
} // namespace btree

View File

@@ -1,121 +0,0 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// A btree_map<> implements the STL unique sorted associative container
// interface and the pair associative container interface (a.k.a map<>) using a
// btree. A btree_multimap<> implements the STL multiple sorted associative
// container interface and the pair associtive container interface (a.k.a
// multimap<>) using a btree. See btree.h for details of the btree
// implementation and caveats.
#pragma once
#include <algorithm>
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include "btree.h"
#include "btree_container.h"
namespace btree
{
// The btree_map class is needed mainly for its constructors.
template <typename Key, typename Value, typename Compare = std::less<Key>,
typename Alloc = std::allocator<std::pair<const Key, Value> >, int TargetNodeSize = 256>
class btree_map
: public btree_map_container<btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_map_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
// Default constructor.
btree_map(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_map(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_map(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename V, typename C, typename A, int N>
inline void swap(btree_map<K, V, C, A, N>& x, btree_map<K, V, C, A, N>& y)
{
x.swap(y);
}
// The btree_multimap class is needed mainly for its constructors.
template <typename Key, typename Value, typename Compare = std::less<Key>,
typename Alloc = std::allocator<std::pair<const Key, Value> >, int TargetNodeSize = 256>
class btree_multimap
: public btree_multi_container<btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_multi_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
typedef typename btree_type::data_type data_type;
typedef typename btree_type::mapped_type mapped_type;
public:
// Default constructor.
btree_multimap(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_multimap(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_multimap(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename V, typename C, typename A, int N>
inline void swap(btree_multimap<K, V, C, A, N>& x, btree_multimap<K, V, C, A, N>& y)
{
x.swap(y);
}
} // namespace btree

View File

@@ -1,115 +0,0 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// A btree_set<> implements the STL unique sorted associative container
// interface (a.k.a set<>) using a btree. A btree_multiset<> implements the STL
// multiple sorted associative container interface (a.k.a multiset<>) using a
// btree. See btree.h for details of the btree implementation and caveats.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include "btree.h"
#include "btree_container.h"
namespace btree
{
// The btree_set class is needed mainly for its constructors.
template <typename Key, typename Compare = std::less<Key>, typename Alloc = std::allocator<Key>,
int TargetNodeSize = 256>
class btree_set
: public btree_unique_container<btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_unique_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
// Default constructor.
btree_set(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_set(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_set(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename C, typename A, int N>
inline void swap(btree_set<K, C, A, N>& x, btree_set<K, C, A, N>& y)
{
x.swap(y);
}
// The btree_multiset class is needed mainly for its constructors.
template <typename Key, typename Compare = std::less<Key>, typename Alloc = std::allocator<Key>,
int TargetNodeSize = 256>
class btree_multiset
: public btree_multi_container<btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_multi_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
// Default constructor.
btree_multiset(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_multiset(const self_type& x) : super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_multiset(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename C, typename A, int N>
inline void swap(btree_multiset<K, C, A, N>& x, btree_multiset<K, C, A, N>& y)
{
x.swap(y);
}
} // namespace btree

View File

@@ -20,6 +20,7 @@
#include <iostream>
#include <vector>
#include <boost/shared_ptr.hpp>
#include <boost/unordered/unordered_flat_map.hpp>
#include <unordered.h>
@@ -50,9 +51,8 @@ class Joiner
{
public:
// typedef std::tr1::unordered_multimap<uint64_t, uint64_t> hash_t;
typedef std::tr1::unordered_multimap<uint64_t, uint64_t, std::tr1::hash<uint64_t>, std::equal_to<uint64_t>,
utils::SimpleAllocator<std::pair<uint64_t const, uint64_t> > >
hash_t;
// Using boost::unordered_flat_map with vector for multimap behavior
typedef boost::unordered_flat_map<uint64_t, std::vector<uint64_t>> hash_t;
typedef hash_t::iterator iterator;
@@ -74,7 +74,17 @@ class Joiner
}
inline void insert(const joblist::ElementType& e)
{
h->insert(std::pair<uint64_t, uint64_t>(e.second, e.first));
auto it = h->find(e.second);
if (it != h->end())
{
it->second.push_back(e.first);
}
else
{
std::vector<uint64_t> vec;
vec.push_back(e.first);
h->emplace(e.second, std::move(vec));
}
}
void doneInserting();
boost::shared_ptr<std::vector<joblist::ElementType> > getSmallSide();
@@ -83,19 +93,17 @@ class Joiner
/* Used by the UM */
inline bool match(const joblist::ElementType& large)
{
std::pair<iterator, iterator> range;
iterator it = h->find(large.second);
if (it == h->end())
return _includeAll;
else if (it->second & MSB)
else if (!it->second.empty() && (it->second[0] & MSB))
return true;
else
{
range = h->equal_range(large.second);
for (; range.first != range.second; ++range.first)
range.first->second |= MSB;
// Mark all values in the vector
for (auto& val : it->second)
val |= MSB;
return true;
}
@@ -103,31 +111,32 @@ class Joiner
inline void mark(const joblist::ElementType& large)
{
std::pair<iterator, iterator> range;
iterator it = h->find(large.second);
range = h->equal_range(large.second);
for (; range.first != range.second; ++range.first)
range.first->second |= MSB;
if (it != h->end())
{
for (auto& val : it->second)
val |= MSB;
}
}
/* Used by the PM */
inline bool getNewMatches(const uint64_t value, std::vector<joblist::ElementType>* newMatches)
{
std::pair<iterator, iterator> range;
iterator it = h->find(value);
if (it == h->end())
return _includeAll;
else if (it->second & MSB)
else if (!it->second.empty() && (it->second[0] & MSB))
return true;
else
{
newMatches->push_back(joblist::ElementType(it->second | MSB, value));
range = h->equal_range(value);
for (; range.first != range.second; ++range.first)
range.first->second |= MSB;
// Add all values to newMatches and mark them
for (auto& val : it->second)
{
newMatches->push_back(joblist::ElementType(val | MSB, value));
val |= MSB;
}
return true;
}

View File

@@ -41,11 +41,10 @@ namespace joiner
constexpr const size_t DEFAULT_BUCKET_COUNT = 10;
template <typename HashTable>
std::unique_ptr<HashTable> makeHashMap(size_t bucketCount, ResourceManager* resourceManager)
std::unique_ptr<HashTable> makeHashMap(size_t /*bucketCount*/, ResourceManager* /*resourceManager*/)
{
return std::unique_ptr<HashTable>(new HashTable(bucketCount, TupleJoiner::hasher(),
typename HashTable::key_equal(),
utils::STLPoolAllocator<typename HashTable::value_type>(resourceManager)));
// boost::unordered_flat_map doesn't need bucket_count or allocator in constructor
return std::unique_ptr<HashTable>(new HashTable());
}
void TupleJoiner::initRowsVector()
@@ -290,7 +289,19 @@ void TupleJoiner::bucketsToTables(buckets_t* buckets, hash_table_t& tables)
done = false;
continue;
}
tables[i]->insert(buckets[i].begin(), buckets[i].end());
// Insert each pair into the vector-based flat_map
for (auto& p : buckets[i])
{
auto it = tables[i]->find(p.first);
if (it != tables[i]->end())
it->second.push_back(p.second);
else
{
std::vector<typename std::decay<decltype(p.second)>::type> vec;
vec.push_back(p.second);
tables[i]->emplace(p.first, std::move(vec));
}
}
}
wasProductive = true;
@@ -440,7 +451,15 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
if (td.len > 0)
{
uint bucket = bucketPicker((char*)td.data, td.len, bpSeed) & bucketMask;
ht[bucket]->insert(pair<TypelessData, Row::Pointer>(td, r.getPointer()));
auto it = ht[bucket]->find(td);
if (it != ht[bucket]->end())
it->second.push_back(r.getPointer());
else
{
std::vector<Row::Pointer> vec;
vec.push_back(r.getPointer());
ht[bucket]->emplace(td, std::move(vec));
}
}
}
else if (r.getColType(smallKeyColumns[0]) == execplan::CalpontSystemCatalog::LONGDOUBLE)
@@ -448,10 +467,16 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
long double smallKey = r.getLongDoubleField(smallKeyColumns[0]);
uint bucket = bucketPicker((char*)&smallKey, 10, bpSeed) &
bucketMask; // change if we decide to support windows again
if (UNLIKELY(smallKey == joblist::LONGDOUBLENULL))
ld[bucket]->insert(pair<long double, Row::Pointer>(joblist::LONGDOUBLENULL, r.getPointer()));
long double key = UNLIKELY(smallKey == joblist::LONGDOUBLENULL) ? joblist::LONGDOUBLENULL : smallKey;
auto it = ld[bucket]->find(key);
if (it != ld[bucket]->end())
it->second.push_back(r.getPointer());
else
ld[bucket]->insert(pair<long double, Row::Pointer>(smallKey, r.getPointer()));
{
std::vector<Row::Pointer> vec;
vec.push_back(r.getPointer());
ld[bucket]->emplace(key, std::move(vec));
}
}
else if (!smallRG.usesStringTable())
{
@@ -462,10 +487,16 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
else
smallKey = (int64_t)r.getUintField(smallKeyColumns[0]);
uint bucket = bucketPicker((char*)&smallKey, sizeof(smallKey), bpSeed) & bucketMask;
if (UNLIKELY(smallKey == nullValueForJoinColumn))
h[bucket]->insert(pair<int64_t, uint8_t*>(getJoinNullValue(), r.getData()));
int64_t key = UNLIKELY(smallKey == nullValueForJoinColumn) ? getJoinNullValue() : smallKey;
auto it = h[bucket]->find(key);
if (it != h[bucket]->end())
it->second.push_back(r.getData());
else
h[bucket]->insert(pair<int64_t, uint8_t*>(smallKey, r.getData())); // Normal path for integers
{
std::vector<uint8_t*> vec;
vec.push_back(r.getData());
h[bucket]->emplace(key, std::move(vec));
}
}
else
{
@@ -476,10 +507,16 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
else
smallKey = (int64_t)r.getUintField(smallKeyColumns[0]);
uint bucket = bucketPicker((char*)&smallKey, sizeof(smallKey), bpSeed) & bucketMask;
if (UNLIKELY(smallKey == nullValueForJoinColumn))
sth[bucket]->insert(pair<int64_t, Row::Pointer>(getJoinNullValue(), r.getPointer()));
int64_t key = UNLIKELY(smallKey == nullValueForJoinColumn) ? getJoinNullValue() : smallKey;
auto it = sth[bucket]->find(key);
if (it != sth[bucket]->end())
it->second.push_back(r.getPointer());
else
sth[bucket]->insert(pair<int64_t, Row::Pointer>(smallKey, r.getPointer()));
{
std::vector<Row::Pointer> vec;
vec.push_back(r.getPointer());
sth[bucket]->emplace(key, std::move(vec));
}
}
}
else
@@ -510,8 +547,6 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
if (UNLIKELY(typelessJoin))
{
TypelessData largeKey;
thIterator it;
pair<thIterator, thIterator> range;
largeKey = makeTypelessKey(largeSideRow, largeKeyColumns, keyLength, &tmpKeyAlloc[threadID], smallRG,
smallKeyColumns);
@@ -519,31 +554,30 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
return;
uint bucket = bucketPicker((char*)largeKey.data, largeKey.len, bpSeed) & bucketMask;
range = ht[bucket]->equal_range(largeKey);
auto it = ht[bucket]->find(largeKey);
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
if (it == ht[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
return;
for (; range.first != range.second; ++range.first)
matches->push_back(range.first->second);
if (it != ht[bucket]->end())
for (auto& val : it->second)
matches->push_back(val);
}
else if (largeSideRow.getColType(largeKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE && !ld.empty())
{
// This is a compare of two long double
long double largeKey;
ldIterator it;
pair<ldIterator, ldIterator> range;
Row r;
largeKey = largeSideRow.getLongDoubleField(largeKeyColumns[0]);
uint bucket = bucketPicker((char*)&largeKey, 10, bpSeed) & bucketMask;
range = ld[bucket]->equal_range(largeKey);
auto it = ld[bucket]->find(largeKey);
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
if (it == ld[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
return;
for (; range.first != range.second; ++range.first)
if (it != ld[bucket]->end())
{
matches->push_back(range.first->second);
for (auto& val : it->second)
matches->push_back(val);
}
}
else if (!smallRG.usesStringTable())
@@ -568,37 +602,40 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
// Compare against long double
long double ldKey = largeKey;
uint bucket = bucketPicker((char*)&ldKey, 10, bpSeed) & bucketMask;
auto range = ld[bucket]->equal_range(ldKey);
auto it = ld[bucket]->find(ldKey);
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
if (it == ld[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
return;
for (; range.first != range.second; ++range.first)
matches->push_back(range.first->second);
if (it != ld[bucket]->end())
for (auto& val : it->second)
matches->push_back(val);
}
else
{
uint bucket = bucketPicker((char*)&largeKey, sizeof(largeKey), bpSeed) & bucketMask;
auto range = h[bucket]->equal_range(largeKey);
auto it = h[bucket]->find(largeKey);
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
if (it == h[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
return;
for (; range.first != range.second; ++range.first)
matches->emplace_back(rowgroup::Row::Pointer(range.first->second));
if (it != h[bucket]->end())
for (auto& val : it->second)
matches->emplace_back(rowgroup::Row::Pointer(val));
}
}
else
{
int64_t largeKey = largeSideRow.getIntField(largeKeyColumns[0]);
uint bucket = bucketPicker((char*)&largeKey, sizeof(largeKey), bpSeed) & bucketMask;
auto range = sth[bucket]->equal_range(largeKey);
auto it = sth[bucket]->find(largeKey);
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
if (it == sth[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
return;
for (; range.first != range.second; ++range.first)
matches->push_back(range.first->second);
if (it != sth[bucket]->end())
for (auto& val : it->second)
matches->push_back(val);
}
}
@@ -614,28 +651,31 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
{
uint bucket = bucketPicker((char*)&(joblist::LONGDOUBLENULL), sizeof(joblist::LONGDOUBLENULL), bpSeed) &
bucketMask;
pair<ldIterator, ldIterator> range = ld[bucket]->equal_range(joblist::LONGDOUBLENULL);
auto it = ld[bucket]->find(joblist::LONGDOUBLENULL);
for (; range.first != range.second; ++range.first)
matches->push_back(range.first->second);
if (it != ld[bucket]->end())
for (auto& val : it->second)
matches->push_back(val);
}
else if (!smallRG.usesStringTable())
{
auto nullVal = getJoinNullValue();
uint bucket = bucketPicker((char*)&nullVal, sizeof(nullVal), bpSeed) & bucketMask;
pair<iterator, iterator> range = h[bucket]->equal_range(nullVal);
auto it = h[bucket]->find(nullVal);
for (; range.first != range.second; ++range.first)
matches->emplace_back(rowgroup::Row::Pointer(range.first->second));
if (it != h[bucket]->end())
for (auto& val : it->second)
matches->emplace_back(rowgroup::Row::Pointer(val));
}
else
{
auto nullVal = getJoinNullValue();
uint bucket = bucketPicker((char*)&nullVal, sizeof(nullVal), bpSeed) & bucketMask;
pair<sthash_t::iterator, sthash_t::iterator> range = sth[bucket]->equal_range(nullVal);
auto it = sth[bucket]->find(nullVal);
for (; range.first != range.second; ++range.first)
matches->push_back(range.first->second);
if (it != sth[bucket]->end())
for (auto& val : it->second)
matches->push_back(val);
}
}
@@ -651,7 +691,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
for (uint i = 0; i < bucketCount; i++)
for (it = ld[i]->begin(); it != ld[i]->end(); ++it)
matches->push_back(it->second);
for (auto& val : it->second)
matches->push_back(val);
}
else if (!smallRG.usesStringTable())
{
@@ -659,7 +700,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
for (uint i = 0; i < bucketCount; i++)
for (it = h[i]->begin(); it != h[i]->end(); ++it)
matches->emplace_back(rowgroup::Row::Pointer(it->second));
for (auto& val : it->second)
matches->emplace_back(rowgroup::Row::Pointer(val));
}
else
{
@@ -667,7 +709,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
for (uint i = 0; i < bucketCount; i++)
for (it = sth[i]->begin(); it != sth[i]->end(); ++it)
matches->push_back(it->second);
for (auto& val : it->second)
matches->push_back(val);
}
}
else
@@ -676,7 +719,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
for (uint i = 0; i < bucketCount; i++)
for (it = ht[i]->begin(); it != ht[i]->end(); ++it)
matches->push_back(it->second);
for (auto& val : it->second)
matches->push_back(val);
}
}
}
@@ -726,6 +770,7 @@ void TupleJoiner::doneInserting()
rowCount = size();
uint bucket = 0;
size_t vecIdx = 0;
if (joinAlg == PM)
pmpos = 0;
else if (typelessJoin)
@@ -743,31 +788,71 @@ void TupleJoiner::doneInserting()
smallRow.setPointer((*rows)[pmpos++]);
else if (typelessJoin)
{
while (thit == ht[bucket]->end())
thit = ht[++bucket]->begin();
smallRow.setPointer(thit->second);
++thit;
while (thit == ht[bucket]->end() || vecIdx >= thit->second.size())
{
if (thit != ht[bucket]->end() && vecIdx >= thit->second.size())
{
++thit;
vecIdx = 0;
}
if (thit == ht[bucket]->end())
{
thit = ht[++bucket]->begin();
vecIdx = 0;
}
}
smallRow.setPointer(thit->second[vecIdx++]);
}
else if (isLongDouble(smallSideColType))
{
while (ldit == ld[bucket]->end())
ldit = ld[++bucket]->begin();
smallRow.setPointer(ldit->second);
++ldit;
while (ldit == ld[bucket]->end() || vecIdx >= ldit->second.size())
{
if (ldit != ld[bucket]->end() && vecIdx >= ldit->second.size())
{
++ldit;
vecIdx = 0;
}
if (ldit == ld[bucket]->end())
{
ldit = ld[++bucket]->begin();
vecIdx = 0;
}
}
smallRow.setPointer(ldit->second[vecIdx++]);
}
else if (!smallRG.usesStringTable())
{
while (hit == h[bucket]->end())
hit = h[++bucket]->begin();
smallRow.setPointer(rowgroup::Row::Pointer(hit->second));
++hit;
while (hit == h[bucket]->end() || vecIdx >= hit->second.size())
{
if (hit != h[bucket]->end() && vecIdx >= hit->second.size())
{
++hit;
vecIdx = 0;
}
if (hit == h[bucket]->end())
{
hit = h[++bucket]->begin();
vecIdx = 0;
}
}
smallRow.setPointer(rowgroup::Row::Pointer(hit->second[vecIdx++]));
}
else
{
while (sthit == sth[bucket]->end())
sthit = sth[++bucket]->begin();
smallRow.setPointer(sthit->second);
++sthit;
while (sthit == sth[bucket]->end() || vecIdx >= sthit->second.size())
{
if (sthit != sth[bucket]->end() && vecIdx >= sthit->second.size())
{
++sthit;
vecIdx = 0;
}
if (sthit == sth[bucket]->end())
{
sthit = sth[++bucket]->begin();
vecIdx = 0;
}
}
smallRow.setPointer(sthit->second[vecIdx++]);
}
if (isLongDouble(smallSideColType))
@@ -1023,10 +1108,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
for (uint i = 0; i < bucketCount; i++)
for (it = ht[i]->begin(); it != ht[i]->end(); ++it)
{
smallR.setPointer(it->second);
for (auto& val : it->second)
{
smallR.setPointer(val);
if (!smallR.isMarked())
out->push_back(it->second);
if (!smallR.isMarked())
out->push_back(val);
}
}
}
else if (smallRG.getColType(smallKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE)
@@ -1036,10 +1124,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
for (uint i = 0; i < bucketCount; i++)
for (it = ld[i]->begin(); it != ld[i]->end(); ++it)
{
smallR.setPointer(it->second);
for (auto& val : it->second)
{
smallR.setPointer(val);
if (!smallR.isMarked())
out->push_back(it->second);
if (!smallR.isMarked())
out->push_back(val);
}
}
}
else if (!smallRG.usesStringTable())
@@ -1049,10 +1140,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
for (uint i = 0; i < bucketCount; i++)
for (it = h[i]->begin(); it != h[i]->end(); ++it)
{
smallR.setPointer(rowgroup::Row::Pointer(it->second));
for (auto& val : it->second)
{
smallR.setPointer(rowgroup::Row::Pointer(val));
if (!smallR.isMarked())
out->emplace_back(rowgroup::Row::Pointer(it->second));
if (!smallR.isMarked())
out->emplace_back(rowgroup::Row::Pointer(val));
}
}
}
else
@@ -1062,10 +1156,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
for (uint i = 0; i < bucketCount; i++)
for (it = sth[i]->begin(); it != sth[i]->end(); ++it)
{
smallR.setPointer(it->second);
for (auto& val : it->second)
{
smallR.setPointer(val);
if (!smallR.isMarked())
out->push_back(it->second);
if (!smallR.isMarked())
out->push_back(val);
}
}
}
}
@@ -1198,14 +1295,28 @@ size_t TupleJoiner::size() const
{
size_t ret = 0;
for (uint i = 0; i < bucketCount; i++)
{
if (UNLIKELY(typelessJoin))
ret += ht[i]->size();
{
for (auto& kv : *ht[i])
ret += kv.second.size();
}
else if (smallRG.getColType(smallKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE)
ret += ld[i]->size();
{
for (auto& kv : *ld[i])
ret += kv.second.size();
}
else if (!smallRG.usesStringTable())
ret += h[i]->size();
{
for (auto& kv : *h[i])
ret += kv.second.size();
}
else
ret += sth[i]->size();
{
for (auto& kv : *sth[i])
ret += kv.second.size();
}
}
return ret;
}

View File

@@ -39,6 +39,7 @@
#include "columnwidth.h"
#include "mcs_string.h"
namespace joiner
{
uint32_t calculateKeyLength(const std::vector<uint32_t>& aKeyColumnsIds, const rowgroup::RowGroup& aRowGroup,
@@ -475,13 +476,11 @@ class TupleJoiner
void clearHashMaps();
private:
template <typename K, typename V>
using HashMapTemplate =
std::unordered_multimap<K, V, hasher, std::equal_to<K>, utils::STLPoolAllocator<std::pair<const K, V>>>;
using hash_t = HashMapTemplate<int64_t, uint8_t*>;
using sthash_t = HashMapTemplate<int64_t, rowgroup::Row::Pointer>;
using typelesshash_t = HashMapTemplate<TypelessData, rowgroup::Row::Pointer>;
using ldhash_t = HashMapTemplate<long double, rowgroup::Row::Pointer>;
// Use boost::hash for standard types, custom hasher for TypelessData
using hash_t = boost::unordered_flat_map<int64_t, std::vector<uint8_t*>>;
using sthash_t = boost::unordered_flat_map<int64_t, std::vector<rowgroup::Row::Pointer>>;
using typelesshash_t = boost::unordered_flat_map<TypelessData, std::vector<rowgroup::Row::Pointer>, hasher, std::equal_to<TypelessData>>;
using ldhash_t = boost::unordered_flat_map<long double, std::vector<rowgroup::Row::Pointer>>;
typedef hash_t::iterator iterator;
typedef typelesshash_t::iterator thIterator;