You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2026-01-06 08:21:10 +03:00
boost::unrodered_flat_map for join
This commit is contained in:
@@ -33,6 +33,7 @@
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <unordered.h>
|
||||
#include <boost/unordered/unordered_flat_map.hpp>
|
||||
|
||||
#include <boost/thread.hpp>
|
||||
#include <boost/scoped_array.hpp>
|
||||
@@ -107,13 +108,12 @@ class HashJoin
|
||||
// than a list of ElementType to reduce memory utilization and to increase the performance of loading the
|
||||
// map. typedef std::tr1::unordered_map<typename element_t::second_type, std::list<element_t>,
|
||||
// HjHasher<element_t> > hash_t;
|
||||
typedef
|
||||
typename std::tr1::unordered_multimap<typename element_t::second_type, typename element_t::first_type>
|
||||
hash_t;
|
||||
typedef typename std::tr1::unordered_multimap<typename element_t::second_type,
|
||||
typename element_t::first_type>::iterator hashIter_t;
|
||||
typedef typename std::tr1::unordered_multimap<typename element_t::second_type,
|
||||
typename element_t::first_type>::value_type hashPair_t;
|
||||
// Now using boost::unordered_flat_map with vector for better cache locality
|
||||
typedef typename boost::unordered_flat_map<typename element_t::second_type,
|
||||
std::vector<typename element_t::first_type>>
|
||||
hash_t;
|
||||
typedef typename hash_t::iterator hashIter_t;
|
||||
typedef typename hash_t::value_type hashPair_t;
|
||||
|
||||
// allow each thread to have its own pointers
|
||||
struct control_struct
|
||||
@@ -503,9 +503,18 @@ void HashJoin<element_t>::createHash(BucketDL<element_t>* srcBucketDL, hash_t* d
|
||||
|
||||
try
|
||||
{
|
||||
// std::list<element_t> tmp(1,e);
|
||||
destHashTbl->insert(
|
||||
std::pair<typename element_t::second_type, typename element_t::first_type>(e.second, e.first));
|
||||
// For boost::unordered_flat_map, we store vectors of values
|
||||
auto it = destHashTbl->find(e.second);
|
||||
if (it != destHashTbl->end())
|
||||
{
|
||||
it->second.push_back(e.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<typename element_t::first_type> vec;
|
||||
vec.push_back(e.first);
|
||||
destHashTbl->emplace(e.second, std::move(vec));
|
||||
}
|
||||
}
|
||||
catch (exception& exc)
|
||||
{
|
||||
|
||||
2844
utils/joiner/btree.h
2844
utils/joiner/btree.h
File diff suppressed because it is too large
Load Diff
@@ -1,445 +0,0 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iosfwd>
|
||||
#include <utility>
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
namespace btree
|
||||
{
|
||||
// A common base class for btree_set, btree_map, btree_multiset and
|
||||
// btree_multimap.
|
||||
template <typename Tree>
|
||||
class btree_container
|
||||
{
|
||||
typedef btree_container<Tree> self_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::params_type params_type;
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::pointer pointer;
|
||||
typedef typename Tree::const_pointer const_pointer;
|
||||
typedef typename Tree::reference reference;
|
||||
typedef typename Tree::const_reference const_reference;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::difference_type difference_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
typedef typename Tree::reverse_iterator reverse_iterator;
|
||||
typedef typename Tree::const_reverse_iterator const_reverse_iterator;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_container(const key_compare& comp, const allocator_type& alloc) : tree_(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_container(const self_type& x) : tree_(x.tree_)
|
||||
{
|
||||
}
|
||||
|
||||
// Iterator routines.
|
||||
iterator begin()
|
||||
{
|
||||
return tree_.begin();
|
||||
}
|
||||
const_iterator begin() const
|
||||
{
|
||||
return tree_.begin();
|
||||
}
|
||||
iterator end()
|
||||
{
|
||||
return tree_.end();
|
||||
}
|
||||
const_iterator end() const
|
||||
{
|
||||
return tree_.end();
|
||||
}
|
||||
reverse_iterator rbegin()
|
||||
{
|
||||
return tree_.rbegin();
|
||||
}
|
||||
const_reverse_iterator rbegin() const
|
||||
{
|
||||
return tree_.rbegin();
|
||||
}
|
||||
reverse_iterator rend()
|
||||
{
|
||||
return tree_.rend();
|
||||
}
|
||||
const_reverse_iterator rend() const
|
||||
{
|
||||
return tree_.rend();
|
||||
}
|
||||
|
||||
// Lookup routines.
|
||||
iterator lower_bound(const key_type& key)
|
||||
{
|
||||
return tree_.lower_bound(key);
|
||||
}
|
||||
const_iterator lower_bound(const key_type& key) const
|
||||
{
|
||||
return tree_.lower_bound(key);
|
||||
}
|
||||
iterator upper_bound(const key_type& key)
|
||||
{
|
||||
return tree_.upper_bound(key);
|
||||
}
|
||||
const_iterator upper_bound(const key_type& key) const
|
||||
{
|
||||
return tree_.upper_bound(key);
|
||||
}
|
||||
std::pair<iterator, iterator> equal_range(const key_type& key)
|
||||
{
|
||||
return tree_.equal_range(key);
|
||||
}
|
||||
std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
|
||||
{
|
||||
return tree_.equal_range(key);
|
||||
}
|
||||
|
||||
// Utility routines.
|
||||
void clear()
|
||||
{
|
||||
tree_.clear();
|
||||
}
|
||||
void swap(self_type& x)
|
||||
{
|
||||
tree_.swap(x.tree_);
|
||||
}
|
||||
void dump(std::ostream& os) const
|
||||
{
|
||||
tree_.dump(os);
|
||||
}
|
||||
void verify() const
|
||||
{
|
||||
tree_.verify();
|
||||
}
|
||||
|
||||
// Size routines.
|
||||
size_type size() const
|
||||
{
|
||||
return tree_.size();
|
||||
}
|
||||
size_type max_size() const
|
||||
{
|
||||
return tree_.max_size();
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return tree_.empty();
|
||||
}
|
||||
size_type height() const
|
||||
{
|
||||
return tree_.height();
|
||||
}
|
||||
size_type internal_nodes() const
|
||||
{
|
||||
return tree_.internal_nodes();
|
||||
}
|
||||
size_type leaf_nodes() const
|
||||
{
|
||||
return tree_.leaf_nodes();
|
||||
}
|
||||
size_type nodes() const
|
||||
{
|
||||
return tree_.nodes();
|
||||
}
|
||||
size_type bytes_used() const
|
||||
{
|
||||
return tree_.bytes_used();
|
||||
}
|
||||
static double average_bytes_per_value()
|
||||
{
|
||||
return Tree::average_bytes_per_value();
|
||||
}
|
||||
double fullness() const
|
||||
{
|
||||
return tree_.fullness();
|
||||
}
|
||||
double overhead() const
|
||||
{
|
||||
return tree_.overhead();
|
||||
}
|
||||
|
||||
bool operator==(const self_type& x) const
|
||||
{
|
||||
if (size() != x.size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi)
|
||||
{
|
||||
if (*i != *xi)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool operator!=(const self_type& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
protected:
|
||||
Tree tree_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline std::ostream& operator<<(std::ostream& os, const btree_container<T>& b)
|
||||
{
|
||||
b.dump(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
// A common base class for btree_set and safe_btree_set.
|
||||
template <typename Tree>
|
||||
class btree_unique_container : public btree_container<Tree>
|
||||
{
|
||||
typedef btree_unique_container<Tree> self_type;
|
||||
typedef btree_container<Tree> super_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_unique_container(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_unique_container(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_unique_container(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
insert(b, e);
|
||||
}
|
||||
|
||||
// Lookup routines.
|
||||
iterator find(const key_type& key)
|
||||
{
|
||||
return this->tree_.find_unique(key);
|
||||
}
|
||||
const_iterator find(const key_type& key) const
|
||||
{
|
||||
return this->tree_.find_unique(key);
|
||||
}
|
||||
size_type count(const key_type& key) const
|
||||
{
|
||||
return this->tree_.count_unique(key);
|
||||
}
|
||||
|
||||
// Insertion routines.
|
||||
std::pair<iterator, bool> insert(const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_unique(x);
|
||||
}
|
||||
iterator insert(iterator position, const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_unique(position, x);
|
||||
}
|
||||
template <typename InputIterator>
|
||||
void insert(InputIterator b, InputIterator e)
|
||||
{
|
||||
this->tree_.insert_unique(b, e);
|
||||
}
|
||||
|
||||
// Deletion routines.
|
||||
int erase(const key_type& key)
|
||||
{
|
||||
return this->tree_.erase_unique(key);
|
||||
}
|
||||
// Erase the specified iterator from the btree. The iterator must be valid
|
||||
// (i.e. not equal to end()). Return an iterator pointing to the node after
|
||||
// the one that was erased (or end() if none exists).
|
||||
iterator erase(const iterator& iter)
|
||||
{
|
||||
return this->tree_.erase(iter);
|
||||
}
|
||||
void erase(const iterator& first, const iterator& last)
|
||||
{
|
||||
this->tree_.erase(first, last);
|
||||
}
|
||||
};
|
||||
|
||||
// A common base class for btree_map and safe_btree_map.
|
||||
template <typename Tree>
|
||||
class btree_map_container : public btree_unique_container<Tree>
|
||||
{
|
||||
typedef btree_map_container<Tree> self_type;
|
||||
typedef btree_unique_container<Tree> super_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::data_type data_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::mapped_type mapped_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
|
||||
private:
|
||||
// A pointer-like object which only generates its value when
|
||||
// dereferenced. Used by operator[] to avoid constructing an empty data_type
|
||||
// if the key already exists in the map.
|
||||
struct generate_value
|
||||
{
|
||||
generate_value(const key_type& k) : key(k)
|
||||
{
|
||||
}
|
||||
value_type operator*() const
|
||||
{
|
||||
return std::make_pair(key, data_type());
|
||||
}
|
||||
const key_type& key;
|
||||
};
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_map_container(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_map_container(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_map_container(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Insertion routines.
|
||||
data_type& operator[](const key_type& key)
|
||||
{
|
||||
return this->tree_.insert_unique(key, generate_value(key)).first->second;
|
||||
}
|
||||
};
|
||||
|
||||
// A common base class for btree_multiset and btree_multimap.
|
||||
template <typename Tree>
|
||||
class btree_multi_container : public btree_container<Tree>
|
||||
{
|
||||
typedef btree_multi_container<Tree> self_type;
|
||||
typedef btree_container<Tree> super_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multi_container(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_multi_container(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multi_container(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
insert(b, e);
|
||||
}
|
||||
|
||||
// Lookup routines.
|
||||
iterator find(const key_type& key)
|
||||
{
|
||||
return this->tree_.find_multi(key);
|
||||
}
|
||||
const_iterator find(const key_type& key) const
|
||||
{
|
||||
return this->tree_.find_multi(key);
|
||||
}
|
||||
size_type count(const key_type& key) const
|
||||
{
|
||||
return this->tree_.count_multi(key);
|
||||
}
|
||||
|
||||
// Insertion routines.
|
||||
iterator insert(const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_multi(x);
|
||||
}
|
||||
iterator insert(iterator position, const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_multi(position, x);
|
||||
}
|
||||
template <typename InputIterator>
|
||||
void insert(InputIterator b, InputIterator e)
|
||||
{
|
||||
this->tree_.insert_multi(b, e);
|
||||
}
|
||||
|
||||
// Deletion routines.
|
||||
int erase(const key_type& key)
|
||||
{
|
||||
return this->tree_.erase_multi(key);
|
||||
}
|
||||
// Erase the specified iterator from the btree. The iterator must be valid
|
||||
// (i.e. not equal to end()). Return an iterator pointing to the node after
|
||||
// the one that was erased (or end() if none exists).
|
||||
iterator erase(const iterator& iter)
|
||||
{
|
||||
return this->tree_.erase(iter);
|
||||
}
|
||||
void erase(const iterator& first, const iterator& last)
|
||||
{
|
||||
this->tree_.erase(first, last);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace btree
|
||||
@@ -1,121 +0,0 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// A btree_map<> implements the STL unique sorted associative container
|
||||
// interface and the pair associative container interface (a.k.a map<>) using a
|
||||
// btree. A btree_multimap<> implements the STL multiple sorted associative
|
||||
// container interface and the pair associtive container interface (a.k.a
|
||||
// multimap<>) using a btree. See btree.h for details of the btree
|
||||
// implementation and caveats.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "btree.h"
|
||||
#include "btree_container.h"
|
||||
|
||||
namespace btree
|
||||
{
|
||||
// The btree_map class is needed mainly for its constructors.
|
||||
template <typename Key, typename Value, typename Compare = std::less<Key>,
|
||||
typename Alloc = std::allocator<std::pair<const Key, Value> >, int TargetNodeSize = 256>
|
||||
class btree_map
|
||||
: public btree_map_container<btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_map_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_map(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_map(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_map(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename V, typename C, typename A, int N>
|
||||
inline void swap(btree_map<K, V, C, A, N>& x, btree_map<K, V, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
// The btree_multimap class is needed mainly for its constructors.
|
||||
template <typename Key, typename Value, typename Compare = std::less<Key>,
|
||||
typename Alloc = std::allocator<std::pair<const Key, Value> >, int TargetNodeSize = 256>
|
||||
class btree_multimap
|
||||
: public btree_multi_container<btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_multi_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
typedef typename btree_type::data_type data_type;
|
||||
typedef typename btree_type::mapped_type mapped_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multimap(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_multimap(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multimap(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename V, typename C, typename A, int N>
|
||||
inline void swap(btree_multimap<K, V, C, A, N>& x, btree_multimap<K, V, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
} // namespace btree
|
||||
@@ -1,115 +0,0 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// A btree_set<> implements the STL unique sorted associative container
|
||||
// interface (a.k.a set<>) using a btree. A btree_multiset<> implements the STL
|
||||
// multiple sorted associative container interface (a.k.a multiset<>) using a
|
||||
// btree. See btree.h for details of the btree implementation and caveats.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "btree.h"
|
||||
#include "btree_container.h"
|
||||
|
||||
namespace btree
|
||||
{
|
||||
// The btree_set class is needed mainly for its constructors.
|
||||
template <typename Key, typename Compare = std::less<Key>, typename Alloc = std::allocator<Key>,
|
||||
int TargetNodeSize = 256>
|
||||
class btree_set
|
||||
: public btree_unique_container<btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_unique_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_set(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_set(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_set(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename C, typename A, int N>
|
||||
inline void swap(btree_set<K, C, A, N>& x, btree_set<K, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
// The btree_multiset class is needed mainly for its constructors.
|
||||
template <typename Key, typename Compare = std::less<Key>, typename Alloc = std::allocator<Key>,
|
||||
int TargetNodeSize = 256>
|
||||
class btree_multiset
|
||||
: public btree_multi_container<btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_multi_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multiset(const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_multiset(const self_type& x) : super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multiset(InputIterator b, InputIterator e, const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename C, typename A, int N>
|
||||
inline void swap(btree_multiset<K, C, A, N>& x, btree_multiset<K, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
} // namespace btree
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/unordered/unordered_flat_map.hpp>
|
||||
|
||||
#include <unordered.h>
|
||||
|
||||
@@ -50,9 +51,8 @@ class Joiner
|
||||
{
|
||||
public:
|
||||
// typedef std::tr1::unordered_multimap<uint64_t, uint64_t> hash_t;
|
||||
typedef std::tr1::unordered_multimap<uint64_t, uint64_t, std::tr1::hash<uint64_t>, std::equal_to<uint64_t>,
|
||||
utils::SimpleAllocator<std::pair<uint64_t const, uint64_t> > >
|
||||
hash_t;
|
||||
// Using boost::unordered_flat_map with vector for multimap behavior
|
||||
typedef boost::unordered_flat_map<uint64_t, std::vector<uint64_t>> hash_t;
|
||||
|
||||
typedef hash_t::iterator iterator;
|
||||
|
||||
@@ -74,7 +74,17 @@ class Joiner
|
||||
}
|
||||
inline void insert(const joblist::ElementType& e)
|
||||
{
|
||||
h->insert(std::pair<uint64_t, uint64_t>(e.second, e.first));
|
||||
auto it = h->find(e.second);
|
||||
if (it != h->end())
|
||||
{
|
||||
it->second.push_back(e.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<uint64_t> vec;
|
||||
vec.push_back(e.first);
|
||||
h->emplace(e.second, std::move(vec));
|
||||
}
|
||||
}
|
||||
void doneInserting();
|
||||
boost::shared_ptr<std::vector<joblist::ElementType> > getSmallSide();
|
||||
@@ -83,19 +93,17 @@ class Joiner
|
||||
/* Used by the UM */
|
||||
inline bool match(const joblist::ElementType& large)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(large.second);
|
||||
|
||||
if (it == h->end())
|
||||
return _includeAll;
|
||||
else if (it->second & MSB)
|
||||
else if (!it->second.empty() && (it->second[0] & MSB))
|
||||
return true;
|
||||
else
|
||||
{
|
||||
range = h->equal_range(large.second);
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
// Mark all values in the vector
|
||||
for (auto& val : it->second)
|
||||
val |= MSB;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -103,31 +111,32 @@ class Joiner
|
||||
|
||||
inline void mark(const joblist::ElementType& large)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(large.second);
|
||||
|
||||
range = h->equal_range(large.second);
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
if (it != h->end())
|
||||
{
|
||||
for (auto& val : it->second)
|
||||
val |= MSB;
|
||||
}
|
||||
}
|
||||
|
||||
/* Used by the PM */
|
||||
inline bool getNewMatches(const uint64_t value, std::vector<joblist::ElementType>* newMatches)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(value);
|
||||
|
||||
if (it == h->end())
|
||||
return _includeAll;
|
||||
else if (it->second & MSB)
|
||||
else if (!it->second.empty() && (it->second[0] & MSB))
|
||||
return true;
|
||||
else
|
||||
{
|
||||
newMatches->push_back(joblist::ElementType(it->second | MSB, value));
|
||||
range = h->equal_range(value);
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
// Add all values to newMatches and mark them
|
||||
for (auto& val : it->second)
|
||||
{
|
||||
newMatches->push_back(joblist::ElementType(val | MSB, value));
|
||||
val |= MSB;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -41,11 +41,10 @@ namespace joiner
|
||||
constexpr const size_t DEFAULT_BUCKET_COUNT = 10;
|
||||
|
||||
template <typename HashTable>
|
||||
std::unique_ptr<HashTable> makeHashMap(size_t bucketCount, ResourceManager* resourceManager)
|
||||
std::unique_ptr<HashTable> makeHashMap(size_t /*bucketCount*/, ResourceManager* /*resourceManager*/)
|
||||
{
|
||||
return std::unique_ptr<HashTable>(new HashTable(bucketCount, TupleJoiner::hasher(),
|
||||
typename HashTable::key_equal(),
|
||||
utils::STLPoolAllocator<typename HashTable::value_type>(resourceManager)));
|
||||
// boost::unordered_flat_map doesn't need bucket_count or allocator in constructor
|
||||
return std::unique_ptr<HashTable>(new HashTable());
|
||||
}
|
||||
|
||||
void TupleJoiner::initRowsVector()
|
||||
@@ -290,7 +289,19 @@ void TupleJoiner::bucketsToTables(buckets_t* buckets, hash_table_t& tables)
|
||||
done = false;
|
||||
continue;
|
||||
}
|
||||
tables[i]->insert(buckets[i].begin(), buckets[i].end());
|
||||
// Insert each pair into the vector-based flat_map
|
||||
for (auto& p : buckets[i])
|
||||
{
|
||||
auto it = tables[i]->find(p.first);
|
||||
if (it != tables[i]->end())
|
||||
it->second.push_back(p.second);
|
||||
else
|
||||
{
|
||||
std::vector<typename std::decay<decltype(p.second)>::type> vec;
|
||||
vec.push_back(p.second);
|
||||
tables[i]->emplace(p.first, std::move(vec));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wasProductive = true;
|
||||
@@ -440,7 +451,15 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
|
||||
if (td.len > 0)
|
||||
{
|
||||
uint bucket = bucketPicker((char*)td.data, td.len, bpSeed) & bucketMask;
|
||||
ht[bucket]->insert(pair<TypelessData, Row::Pointer>(td, r.getPointer()));
|
||||
auto it = ht[bucket]->find(td);
|
||||
if (it != ht[bucket]->end())
|
||||
it->second.push_back(r.getPointer());
|
||||
else
|
||||
{
|
||||
std::vector<Row::Pointer> vec;
|
||||
vec.push_back(r.getPointer());
|
||||
ht[bucket]->emplace(td, std::move(vec));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (r.getColType(smallKeyColumns[0]) == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
@@ -448,10 +467,16 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
|
||||
long double smallKey = r.getLongDoubleField(smallKeyColumns[0]);
|
||||
uint bucket = bucketPicker((char*)&smallKey, 10, bpSeed) &
|
||||
bucketMask; // change if we decide to support windows again
|
||||
if (UNLIKELY(smallKey == joblist::LONGDOUBLENULL))
|
||||
ld[bucket]->insert(pair<long double, Row::Pointer>(joblist::LONGDOUBLENULL, r.getPointer()));
|
||||
long double key = UNLIKELY(smallKey == joblist::LONGDOUBLENULL) ? joblist::LONGDOUBLENULL : smallKey;
|
||||
auto it = ld[bucket]->find(key);
|
||||
if (it != ld[bucket]->end())
|
||||
it->second.push_back(r.getPointer());
|
||||
else
|
||||
ld[bucket]->insert(pair<long double, Row::Pointer>(smallKey, r.getPointer()));
|
||||
{
|
||||
std::vector<Row::Pointer> vec;
|
||||
vec.push_back(r.getPointer());
|
||||
ld[bucket]->emplace(key, std::move(vec));
|
||||
}
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
{
|
||||
@@ -462,10 +487,16 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
|
||||
else
|
||||
smallKey = (int64_t)r.getUintField(smallKeyColumns[0]);
|
||||
uint bucket = bucketPicker((char*)&smallKey, sizeof(smallKey), bpSeed) & bucketMask;
|
||||
if (UNLIKELY(smallKey == nullValueForJoinColumn))
|
||||
h[bucket]->insert(pair<int64_t, uint8_t*>(getJoinNullValue(), r.getData()));
|
||||
int64_t key = UNLIKELY(smallKey == nullValueForJoinColumn) ? getJoinNullValue() : smallKey;
|
||||
auto it = h[bucket]->find(key);
|
||||
if (it != h[bucket]->end())
|
||||
it->second.push_back(r.getData());
|
||||
else
|
||||
h[bucket]->insert(pair<int64_t, uint8_t*>(smallKey, r.getData())); // Normal path for integers
|
||||
{
|
||||
std::vector<uint8_t*> vec;
|
||||
vec.push_back(r.getData());
|
||||
h[bucket]->emplace(key, std::move(vec));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -476,10 +507,16 @@ void TupleJoiner::insert(Row& r, bool zeroTheRid)
|
||||
else
|
||||
smallKey = (int64_t)r.getUintField(smallKeyColumns[0]);
|
||||
uint bucket = bucketPicker((char*)&smallKey, sizeof(smallKey), bpSeed) & bucketMask;
|
||||
if (UNLIKELY(smallKey == nullValueForJoinColumn))
|
||||
sth[bucket]->insert(pair<int64_t, Row::Pointer>(getJoinNullValue(), r.getPointer()));
|
||||
int64_t key = UNLIKELY(smallKey == nullValueForJoinColumn) ? getJoinNullValue() : smallKey;
|
||||
auto it = sth[bucket]->find(key);
|
||||
if (it != sth[bucket]->end())
|
||||
it->second.push_back(r.getPointer());
|
||||
else
|
||||
sth[bucket]->insert(pair<int64_t, Row::Pointer>(smallKey, r.getPointer()));
|
||||
{
|
||||
std::vector<Row::Pointer> vec;
|
||||
vec.push_back(r.getPointer());
|
||||
sth[bucket]->emplace(key, std::move(vec));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -510,8 +547,6 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
if (UNLIKELY(typelessJoin))
|
||||
{
|
||||
TypelessData largeKey;
|
||||
thIterator it;
|
||||
pair<thIterator, thIterator> range;
|
||||
|
||||
largeKey = makeTypelessKey(largeSideRow, largeKeyColumns, keyLength, &tmpKeyAlloc[threadID], smallRG,
|
||||
smallKeyColumns);
|
||||
@@ -519,31 +554,30 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
return;
|
||||
|
||||
uint bucket = bucketPicker((char*)largeKey.data, largeKey.len, bpSeed) & bucketMask;
|
||||
range = ht[bucket]->equal_range(largeKey);
|
||||
auto it = ht[bucket]->find(largeKey);
|
||||
|
||||
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
if (it == ht[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
return;
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->push_back(range.first->second);
|
||||
if (it != ht[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
else if (largeSideRow.getColType(largeKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE && !ld.empty())
|
||||
{
|
||||
// This is a compare of two long double
|
||||
long double largeKey;
|
||||
ldIterator it;
|
||||
pair<ldIterator, ldIterator> range;
|
||||
Row r;
|
||||
|
||||
largeKey = largeSideRow.getLongDoubleField(largeKeyColumns[0]);
|
||||
uint bucket = bucketPicker((char*)&largeKey, 10, bpSeed) & bucketMask;
|
||||
range = ld[bucket]->equal_range(largeKey);
|
||||
auto it = ld[bucket]->find(largeKey);
|
||||
|
||||
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
if (it == ld[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
return;
|
||||
for (; range.first != range.second; ++range.first)
|
||||
if (it != ld[bucket]->end())
|
||||
{
|
||||
matches->push_back(range.first->second);
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
@@ -568,37 +602,40 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
// Compare against long double
|
||||
long double ldKey = largeKey;
|
||||
uint bucket = bucketPicker((char*)&ldKey, 10, bpSeed) & bucketMask;
|
||||
auto range = ld[bucket]->equal_range(ldKey);
|
||||
auto it = ld[bucket]->find(ldKey);
|
||||
|
||||
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
if (it == ld[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
return;
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->push_back(range.first->second);
|
||||
if (it != ld[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint bucket = bucketPicker((char*)&largeKey, sizeof(largeKey), bpSeed) & bucketMask;
|
||||
auto range = h[bucket]->equal_range(largeKey);
|
||||
auto it = h[bucket]->find(largeKey);
|
||||
|
||||
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
if (it == h[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
return;
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->emplace_back(rowgroup::Row::Pointer(range.first->second));
|
||||
if (it != h[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->emplace_back(rowgroup::Row::Pointer(val));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int64_t largeKey = largeSideRow.getIntField(largeKeyColumns[0]);
|
||||
uint bucket = bucketPicker((char*)&largeKey, sizeof(largeKey), bpSeed) & bucketMask;
|
||||
auto range = sth[bucket]->equal_range(largeKey);
|
||||
auto it = sth[bucket]->find(largeKey);
|
||||
|
||||
if (range.first == range.second && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
if (it == sth[bucket]->end() && !(joinType & (LARGEOUTER | MATCHNULLS)))
|
||||
return;
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->push_back(range.first->second);
|
||||
if (it != sth[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -614,28 +651,31 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
{
|
||||
uint bucket = bucketPicker((char*)&(joblist::LONGDOUBLENULL), sizeof(joblist::LONGDOUBLENULL), bpSeed) &
|
||||
bucketMask;
|
||||
pair<ldIterator, ldIterator> range = ld[bucket]->equal_range(joblist::LONGDOUBLENULL);
|
||||
auto it = ld[bucket]->find(joblist::LONGDOUBLENULL);
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->push_back(range.first->second);
|
||||
if (it != ld[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
{
|
||||
auto nullVal = getJoinNullValue();
|
||||
uint bucket = bucketPicker((char*)&nullVal, sizeof(nullVal), bpSeed) & bucketMask;
|
||||
pair<iterator, iterator> range = h[bucket]->equal_range(nullVal);
|
||||
auto it = h[bucket]->find(nullVal);
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->emplace_back(rowgroup::Row::Pointer(range.first->second));
|
||||
if (it != h[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->emplace_back(rowgroup::Row::Pointer(val));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto nullVal = getJoinNullValue();
|
||||
uint bucket = bucketPicker((char*)&nullVal, sizeof(nullVal), bpSeed) & bucketMask;
|
||||
pair<sthash_t::iterator, sthash_t::iterator> range = sth[bucket]->equal_range(nullVal);
|
||||
auto it = sth[bucket]->find(nullVal);
|
||||
|
||||
for (; range.first != range.second; ++range.first)
|
||||
matches->push_back(range.first->second);
|
||||
if (it != sth[bucket]->end())
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -651,7 +691,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = ld[i]->begin(); it != ld[i]->end(); ++it)
|
||||
matches->push_back(it->second);
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
{
|
||||
@@ -659,7 +700,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = h[i]->begin(); it != h[i]->end(); ++it)
|
||||
matches->emplace_back(rowgroup::Row::Pointer(it->second));
|
||||
for (auto& val : it->second)
|
||||
matches->emplace_back(rowgroup::Row::Pointer(val));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -667,7 +709,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = sth[i]->begin(); it != sth[i]->end(); ++it)
|
||||
matches->push_back(it->second);
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -676,7 +719,8 @@ void TupleJoiner::match(rowgroup::Row& largeSideRow, uint32_t largeRowIndex, uin
|
||||
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = ht[i]->begin(); it != ht[i]->end(); ++it)
|
||||
matches->push_back(it->second);
|
||||
for (auto& val : it->second)
|
||||
matches->push_back(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -726,6 +770,7 @@ void TupleJoiner::doneInserting()
|
||||
rowCount = size();
|
||||
|
||||
uint bucket = 0;
|
||||
size_t vecIdx = 0;
|
||||
if (joinAlg == PM)
|
||||
pmpos = 0;
|
||||
else if (typelessJoin)
|
||||
@@ -743,31 +788,71 @@ void TupleJoiner::doneInserting()
|
||||
smallRow.setPointer((*rows)[pmpos++]);
|
||||
else if (typelessJoin)
|
||||
{
|
||||
while (thit == ht[bucket]->end())
|
||||
thit = ht[++bucket]->begin();
|
||||
smallRow.setPointer(thit->second);
|
||||
++thit;
|
||||
while (thit == ht[bucket]->end() || vecIdx >= thit->second.size())
|
||||
{
|
||||
if (thit != ht[bucket]->end() && vecIdx >= thit->second.size())
|
||||
{
|
||||
++thit;
|
||||
vecIdx = 0;
|
||||
}
|
||||
if (thit == ht[bucket]->end())
|
||||
{
|
||||
thit = ht[++bucket]->begin();
|
||||
vecIdx = 0;
|
||||
}
|
||||
}
|
||||
smallRow.setPointer(thit->second[vecIdx++]);
|
||||
}
|
||||
else if (isLongDouble(smallSideColType))
|
||||
{
|
||||
while (ldit == ld[bucket]->end())
|
||||
ldit = ld[++bucket]->begin();
|
||||
smallRow.setPointer(ldit->second);
|
||||
++ldit;
|
||||
while (ldit == ld[bucket]->end() || vecIdx >= ldit->second.size())
|
||||
{
|
||||
if (ldit != ld[bucket]->end() && vecIdx >= ldit->second.size())
|
||||
{
|
||||
++ldit;
|
||||
vecIdx = 0;
|
||||
}
|
||||
if (ldit == ld[bucket]->end())
|
||||
{
|
||||
ldit = ld[++bucket]->begin();
|
||||
vecIdx = 0;
|
||||
}
|
||||
}
|
||||
smallRow.setPointer(ldit->second[vecIdx++]);
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
{
|
||||
while (hit == h[bucket]->end())
|
||||
hit = h[++bucket]->begin();
|
||||
smallRow.setPointer(rowgroup::Row::Pointer(hit->second));
|
||||
++hit;
|
||||
while (hit == h[bucket]->end() || vecIdx >= hit->second.size())
|
||||
{
|
||||
if (hit != h[bucket]->end() && vecIdx >= hit->second.size())
|
||||
{
|
||||
++hit;
|
||||
vecIdx = 0;
|
||||
}
|
||||
if (hit == h[bucket]->end())
|
||||
{
|
||||
hit = h[++bucket]->begin();
|
||||
vecIdx = 0;
|
||||
}
|
||||
}
|
||||
smallRow.setPointer(rowgroup::Row::Pointer(hit->second[vecIdx++]));
|
||||
}
|
||||
else
|
||||
{
|
||||
while (sthit == sth[bucket]->end())
|
||||
sthit = sth[++bucket]->begin();
|
||||
smallRow.setPointer(sthit->second);
|
||||
++sthit;
|
||||
while (sthit == sth[bucket]->end() || vecIdx >= sthit->second.size())
|
||||
{
|
||||
if (sthit != sth[bucket]->end() && vecIdx >= sthit->second.size())
|
||||
{
|
||||
++sthit;
|
||||
vecIdx = 0;
|
||||
}
|
||||
if (sthit == sth[bucket]->end())
|
||||
{
|
||||
sthit = sth[++bucket]->begin();
|
||||
vecIdx = 0;
|
||||
}
|
||||
}
|
||||
smallRow.setPointer(sthit->second[vecIdx++]);
|
||||
}
|
||||
|
||||
if (isLongDouble(smallSideColType))
|
||||
@@ -1023,10 +1108,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = ht[i]->begin(); it != ht[i]->end(); ++it)
|
||||
{
|
||||
smallR.setPointer(it->second);
|
||||
for (auto& val : it->second)
|
||||
{
|
||||
smallR.setPointer(val);
|
||||
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(it->second);
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (smallRG.getColType(smallKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE)
|
||||
@@ -1036,10 +1124,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = ld[i]->begin(); it != ld[i]->end(); ++it)
|
||||
{
|
||||
smallR.setPointer(it->second);
|
||||
for (auto& val : it->second)
|
||||
{
|
||||
smallR.setPointer(val);
|
||||
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(it->second);
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
@@ -1049,10 +1140,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = h[i]->begin(); it != h[i]->end(); ++it)
|
||||
{
|
||||
smallR.setPointer(rowgroup::Row::Pointer(it->second));
|
||||
for (auto& val : it->second)
|
||||
{
|
||||
smallR.setPointer(rowgroup::Row::Pointer(val));
|
||||
|
||||
if (!smallR.isMarked())
|
||||
out->emplace_back(rowgroup::Row::Pointer(it->second));
|
||||
if (!smallR.isMarked())
|
||||
out->emplace_back(rowgroup::Row::Pointer(val));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1062,10 +1156,13 @@ void TupleJoiner::getUnmarkedRows(vector<Row::Pointer>* out)
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
for (it = sth[i]->begin(); it != sth[i]->end(); ++it)
|
||||
{
|
||||
smallR.setPointer(it->second);
|
||||
for (auto& val : it->second)
|
||||
{
|
||||
smallR.setPointer(val);
|
||||
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(it->second);
|
||||
if (!smallR.isMarked())
|
||||
out->push_back(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1198,14 +1295,28 @@ size_t TupleJoiner::size() const
|
||||
{
|
||||
size_t ret = 0;
|
||||
for (uint i = 0; i < bucketCount; i++)
|
||||
{
|
||||
if (UNLIKELY(typelessJoin))
|
||||
ret += ht[i]->size();
|
||||
{
|
||||
for (auto& kv : *ht[i])
|
||||
ret += kv.second.size();
|
||||
}
|
||||
else if (smallRG.getColType(smallKeyColumns[0]) == CalpontSystemCatalog::LONGDOUBLE)
|
||||
ret += ld[i]->size();
|
||||
{
|
||||
for (auto& kv : *ld[i])
|
||||
ret += kv.second.size();
|
||||
}
|
||||
else if (!smallRG.usesStringTable())
|
||||
ret += h[i]->size();
|
||||
{
|
||||
for (auto& kv : *h[i])
|
||||
ret += kv.second.size();
|
||||
}
|
||||
else
|
||||
ret += sth[i]->size();
|
||||
{
|
||||
for (auto& kv : *sth[i])
|
||||
ret += kv.second.size();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "columnwidth.h"
|
||||
#include "mcs_string.h"
|
||||
|
||||
|
||||
namespace joiner
|
||||
{
|
||||
uint32_t calculateKeyLength(const std::vector<uint32_t>& aKeyColumnsIds, const rowgroup::RowGroup& aRowGroup,
|
||||
@@ -475,13 +476,11 @@ class TupleJoiner
|
||||
void clearHashMaps();
|
||||
|
||||
private:
|
||||
template <typename K, typename V>
|
||||
using HashMapTemplate =
|
||||
std::unordered_multimap<K, V, hasher, std::equal_to<K>, utils::STLPoolAllocator<std::pair<const K, V>>>;
|
||||
using hash_t = HashMapTemplate<int64_t, uint8_t*>;
|
||||
using sthash_t = HashMapTemplate<int64_t, rowgroup::Row::Pointer>;
|
||||
using typelesshash_t = HashMapTemplate<TypelessData, rowgroup::Row::Pointer>;
|
||||
using ldhash_t = HashMapTemplate<long double, rowgroup::Row::Pointer>;
|
||||
// Use boost::hash for standard types, custom hasher for TypelessData
|
||||
using hash_t = boost::unordered_flat_map<int64_t, std::vector<uint8_t*>>;
|
||||
using sthash_t = boost::unordered_flat_map<int64_t, std::vector<rowgroup::Row::Pointer>>;
|
||||
using typelesshash_t = boost::unordered_flat_map<TypelessData, std::vector<rowgroup::Row::Pointer>, hasher, std::equal_to<TypelessData>>;
|
||||
using ldhash_t = boost::unordered_flat_map<long double, std::vector<rowgroup::Row::Pointer>>;
|
||||
|
||||
typedef hash_t::iterator iterator;
|
||||
typedef typelesshash_t::iterator thIterator;
|
||||
|
||||
Reference in New Issue
Block a user