1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

Reformat all code to coding standard

This commit is contained in:
Andrew Hutchings
2017-10-26 17:18:17 +01:00
parent 4985f3456e
commit 01446d1e22
1296 changed files with 403852 additions and 353747 deletions

File diff suppressed because it is too large Load Diff

View File

@ -20,328 +20,439 @@
#include "btree.h"
namespace btree {
namespace btree
{
// A common base class for btree_set, btree_map, btree_multiset and
// btree_multimap.
template <typename Tree>
class btree_container {
typedef btree_container<Tree> self_type;
class btree_container
{
typedef btree_container<Tree> self_type;
public:
typedef typename Tree::params_type params_type;
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::pointer pointer;
typedef typename Tree::const_pointer const_pointer;
typedef typename Tree::reference reference;
typedef typename Tree::const_reference const_reference;
typedef typename Tree::size_type size_type;
typedef typename Tree::difference_type difference_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
typedef typename Tree::reverse_iterator reverse_iterator;
typedef typename Tree::const_reverse_iterator const_reverse_iterator;
public:
typedef typename Tree::params_type params_type;
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::pointer pointer;
typedef typename Tree::const_pointer const_pointer;
typedef typename Tree::reference reference;
typedef typename Tree::const_reference const_reference;
typedef typename Tree::size_type size_type;
typedef typename Tree::difference_type difference_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
typedef typename Tree::reverse_iterator reverse_iterator;
typedef typename Tree::const_reverse_iterator const_reverse_iterator;
public:
// Default constructor.
btree_container(const key_compare &comp, const allocator_type &alloc)
: tree_(comp, alloc) {
}
// Copy constructor.
btree_container(const self_type &x)
: tree_(x.tree_) {
}
// Iterator routines.
iterator begin() { return tree_.begin(); }
const_iterator begin() const { return tree_.begin(); }
iterator end() { return tree_.end(); }
const_iterator end() const { return tree_.end(); }
reverse_iterator rbegin() { return tree_.rbegin(); }
const_reverse_iterator rbegin() const { return tree_.rbegin(); }
reverse_iterator rend() { return tree_.rend(); }
const_reverse_iterator rend() const { return tree_.rend(); }
// Lookup routines.
iterator lower_bound(const key_type &key) {
return tree_.lower_bound(key);
}
const_iterator lower_bound(const key_type &key) const {
return tree_.lower_bound(key);
}
iterator upper_bound(const key_type &key) {
return tree_.upper_bound(key);
}
const_iterator upper_bound(const key_type &key) const {
return tree_.upper_bound(key);
}
std::pair<iterator,iterator> equal_range(const key_type &key) {
return tree_.equal_range(key);
}
std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
return tree_.equal_range(key);
}
// Utility routines.
void clear() {
tree_.clear();
}
void swap(self_type &x) {
tree_.swap(x.tree_);
}
void dump(std::ostream &os) const {
tree_.dump(os);
}
void verify() const {
tree_.verify();
}
// Size routines.
size_type size() const { return tree_.size(); }
size_type max_size() const { return tree_.max_size(); }
bool empty() const { return tree_.empty(); }
size_type height() const { return tree_.height(); }
size_type internal_nodes() const { return tree_.internal_nodes(); }
size_type leaf_nodes() const { return tree_.leaf_nodes(); }
size_type nodes() const { return tree_.nodes(); }
size_type bytes_used() const { return tree_.bytes_used(); }
static double average_bytes_per_value() {
return Tree::average_bytes_per_value();
}
double fullness() const { return tree_.fullness(); }
double overhead() const { return tree_.overhead(); }
bool operator==(const self_type& x) const {
if (size() != x.size()) {
return false;
public:
// Default constructor.
btree_container(const key_compare& comp, const allocator_type& alloc)
: tree_(comp, alloc)
{
}
for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) {
if (*i != *xi) {
return false;
}
// Copy constructor.
btree_container(const self_type& x)
: tree_(x.tree_)
{
}
return true;
}
bool operator!=(const self_type& other) const {
return !operator==(other);
}
// Iterator routines.
iterator begin()
{
return tree_.begin();
}
const_iterator begin() const
{
return tree_.begin();
}
iterator end()
{
return tree_.end();
}
const_iterator end() const
{
return tree_.end();
}
reverse_iterator rbegin()
{
return tree_.rbegin();
}
const_reverse_iterator rbegin() const
{
return tree_.rbegin();
}
reverse_iterator rend()
{
return tree_.rend();
}
const_reverse_iterator rend() const
{
return tree_.rend();
}
// Lookup routines.
iterator lower_bound(const key_type& key)
{
return tree_.lower_bound(key);
}
const_iterator lower_bound(const key_type& key) const
{
return tree_.lower_bound(key);
}
iterator upper_bound(const key_type& key)
{
return tree_.upper_bound(key);
}
const_iterator upper_bound(const key_type& key) const
{
return tree_.upper_bound(key);
}
std::pair<iterator, iterator> equal_range(const key_type& key)
{
return tree_.equal_range(key);
}
std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
{
return tree_.equal_range(key);
}
// Utility routines.
void clear()
{
tree_.clear();
}
void swap(self_type& x)
{
tree_.swap(x.tree_);
}
void dump(std::ostream& os) const
{
tree_.dump(os);
}
void verify() const
{
tree_.verify();
}
// Size routines.
size_type size() const
{
return tree_.size();
}
size_type max_size() const
{
return tree_.max_size();
}
bool empty() const
{
return tree_.empty();
}
size_type height() const
{
return tree_.height();
}
size_type internal_nodes() const
{
return tree_.internal_nodes();
}
size_type leaf_nodes() const
{
return tree_.leaf_nodes();
}
size_type nodes() const
{
return tree_.nodes();
}
size_type bytes_used() const
{
return tree_.bytes_used();
}
static double average_bytes_per_value()
{
return Tree::average_bytes_per_value();
}
double fullness() const
{
return tree_.fullness();
}
double overhead() const
{
return tree_.overhead();
}
bool operator==(const self_type& x) const
{
if (size() != x.size())
{
return false;
}
for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi)
{
if (*i != *xi)
{
return false;
}
}
return true;
}
bool operator!=(const self_type& other) const
{
return !operator==(other);
}
protected:
Tree tree_;
protected:
Tree tree_;
};
template <typename T>
inline std::ostream& operator<<(std::ostream &os, const btree_container<T> &b) {
b.dump(os);
return os;
inline std::ostream& operator<<(std::ostream& os, const btree_container<T>& b)
{
b.dump(os);
return os;
}
// A common base class for btree_set and safe_btree_set.
template <typename Tree>
class btree_unique_container : public btree_container<Tree> {
typedef btree_unique_container<Tree> self_type;
typedef btree_container<Tree> super_type;
class btree_unique_container : public btree_container<Tree>
{
typedef btree_unique_container<Tree> self_type;
typedef btree_container<Tree> super_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::size_type size_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::size_type size_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
public:
// Default constructor.
btree_unique_container(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
public:
// Default constructor.
btree_unique_container(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_unique_container(const self_type &x)
: super_type(x) {
}
// Copy constructor.
btree_unique_container(const self_type& x)
: super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_unique_container(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
insert(b, e);
}
// Range constructor.
template <class InputIterator>
btree_unique_container(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
insert(b, e);
}
// Lookup routines.
iterator find(const key_type &key) {
return this->tree_.find_unique(key);
}
const_iterator find(const key_type &key) const {
return this->tree_.find_unique(key);
}
size_type count(const key_type &key) const {
return this->tree_.count_unique(key);
}
// Lookup routines.
iterator find(const key_type& key)
{
return this->tree_.find_unique(key);
}
const_iterator find(const key_type& key) const
{
return this->tree_.find_unique(key);
}
size_type count(const key_type& key) const
{
return this->tree_.count_unique(key);
}
// Insertion routines.
std::pair<iterator,bool> insert(const value_type &x) {
return this->tree_.insert_unique(x);
}
iterator insert(iterator position, const value_type &x) {
return this->tree_.insert_unique(position, x);
}
template <typename InputIterator>
void insert(InputIterator b, InputIterator e) {
this->tree_.insert_unique(b, e);
}
// Insertion routines.
std::pair<iterator, bool> insert(const value_type& x)
{
return this->tree_.insert_unique(x);
}
iterator insert(iterator position, const value_type& x)
{
return this->tree_.insert_unique(position, x);
}
template <typename InputIterator>
void insert(InputIterator b, InputIterator e)
{
this->tree_.insert_unique(b, e);
}
// Deletion routines.
int erase(const key_type &key) {
return this->tree_.erase_unique(key);
}
// Erase the specified iterator from the btree. The iterator must be valid
// (i.e. not equal to end()). Return an iterator pointing to the node after
// the one that was erased (or end() if none exists).
iterator erase(const iterator &iter) {
return this->tree_.erase(iter);
}
void erase(const iterator &first, const iterator &last) {
this->tree_.erase(first, last);
}
// Deletion routines.
int erase(const key_type& key)
{
return this->tree_.erase_unique(key);
}
// Erase the specified iterator from the btree. The iterator must be valid
// (i.e. not equal to end()). Return an iterator pointing to the node after
// the one that was erased (or end() if none exists).
iterator erase(const iterator& iter)
{
return this->tree_.erase(iter);
}
void erase(const iterator& first, const iterator& last)
{
this->tree_.erase(first, last);
}
};
// A common base class for btree_map and safe_btree_map.
template <typename Tree>
class btree_map_container : public btree_unique_container<Tree> {
typedef btree_map_container<Tree> self_type;
typedef btree_unique_container<Tree> super_type;
class btree_map_container : public btree_unique_container<Tree>
{
typedef btree_map_container<Tree> self_type;
typedef btree_unique_container<Tree> super_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::data_type data_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::mapped_type mapped_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::data_type data_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::mapped_type mapped_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
private:
// A pointer-like object which only generates its value when
// dereferenced. Used by operator[] to avoid constructing an empty data_type
// if the key already exists in the map.
struct generate_value {
generate_value(const key_type &k)
: key(k) {
private:
// A pointer-like object which only generates its value when
// dereferenced. Used by operator[] to avoid constructing an empty data_type
// if the key already exists in the map.
struct generate_value
{
generate_value(const key_type& k)
: key(k)
{
}
value_type operator*() const
{
return std::make_pair(key, data_type());
}
const key_type& key;
};
public:
// Default constructor.
btree_map_container(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
value_type operator*() const {
return std::make_pair(key, data_type());
// Copy constructor.
btree_map_container(const self_type& x)
: super_type(x)
{
}
const key_type &key;
};
public:
// Default constructor.
btree_map_container(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
// Range constructor.
template <class InputIterator>
btree_map_container(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
// Copy constructor.
btree_map_container(const self_type &x)
: super_type(x) {
}
// Range constructor.
template <class InputIterator>
btree_map_container(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(b, e, comp, alloc) {
}
// Insertion routines.
data_type& operator[](const key_type &key) {
return this->tree_.insert_unique(key, generate_value(key)).first->second;
}
// Insertion routines.
data_type& operator[](const key_type& key)
{
return this->tree_.insert_unique(key, generate_value(key)).first->second;
}
};
// A common base class for btree_multiset and btree_multimap.
template <typename Tree>
class btree_multi_container : public btree_container<Tree> {
typedef btree_multi_container<Tree> self_type;
typedef btree_container<Tree> super_type;
class btree_multi_container : public btree_container<Tree>
{
typedef btree_multi_container<Tree> self_type;
typedef btree_container<Tree> super_type;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::size_type size_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
public:
typedef typename Tree::key_type key_type;
typedef typename Tree::value_type value_type;
typedef typename Tree::size_type size_type;
typedef typename Tree::key_compare key_compare;
typedef typename Tree::allocator_type allocator_type;
typedef typename Tree::iterator iterator;
typedef typename Tree::const_iterator const_iterator;
public:
// Default constructor.
btree_multi_container(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
public:
// Default constructor.
btree_multi_container(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_multi_container(const self_type &x)
: super_type(x) {
}
// Copy constructor.
btree_multi_container(const self_type& x)
: super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_multi_container(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
insert(b, e);
}
// Range constructor.
template <class InputIterator>
btree_multi_container(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
insert(b, e);
}
// Lookup routines.
iterator find(const key_type &key) {
return this->tree_.find_multi(key);
}
const_iterator find(const key_type &key) const {
return this->tree_.find_multi(key);
}
size_type count(const key_type &key) const {
return this->tree_.count_multi(key);
}
// Lookup routines.
iterator find(const key_type& key)
{
return this->tree_.find_multi(key);
}
const_iterator find(const key_type& key) const
{
return this->tree_.find_multi(key);
}
size_type count(const key_type& key) const
{
return this->tree_.count_multi(key);
}
// Insertion routines.
iterator insert(const value_type &x) {
return this->tree_.insert_multi(x);
}
iterator insert(iterator position, const value_type &x) {
return this->tree_.insert_multi(position, x);
}
template <typename InputIterator>
void insert(InputIterator b, InputIterator e) {
this->tree_.insert_multi(b, e);
}
// Insertion routines.
iterator insert(const value_type& x)
{
return this->tree_.insert_multi(x);
}
iterator insert(iterator position, const value_type& x)
{
return this->tree_.insert_multi(position, x);
}
template <typename InputIterator>
void insert(InputIterator b, InputIterator e)
{
this->tree_.insert_multi(b, e);
}
// Deletion routines.
int erase(const key_type &key) {
return this->tree_.erase_multi(key);
}
// Erase the specified iterator from the btree. The iterator must be valid
// (i.e. not equal to end()). Return an iterator pointing to the node after
// the one that was erased (or end() if none exists).
iterator erase(const iterator &iter) {
return this->tree_.erase(iter);
}
void erase(const iterator &first, const iterator &last) {
this->tree_.erase(first, last);
}
// Deletion routines.
int erase(const key_type& key)
{
return this->tree_.erase_multi(key);
}
// Erase the specified iterator from the btree. The iterator must be valid
// (i.e. not equal to end()). Return an iterator pointing to the node after
// the one that was erased (or end() if none exists).
iterator erase(const iterator& iter)
{
return this->tree_.erase(iter);
}
void erase(const iterator& first, const iterator& last)
{
this->tree_.erase(first, last);
}
};
} // namespace btree

View File

@ -31,51 +31,57 @@
#include "btree.h"
#include "btree_container.h"
namespace btree {
namespace btree
{
// The btree_map class is needed mainly for its constructors.
template <typename Key, typename Value,
typename Compare = std::less<Key>,
typename Alloc = std::allocator<std::pair<const Key, Value> >,
int TargetNodeSize = 256>
class btree_map : public btree_map_container<
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
class btree_map : public btree_map_container <
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_map_params<
Key, Value, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_map_container<btree_type> super_type;
typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_map_params <
Key, Value, Compare, Alloc, TargetNodeSize > params_type;
typedef btree<params_type> btree_type;
typedef btree_map_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
// Default constructor.
btree_map(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
public:
// Default constructor.
btree_map(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_map(const self_type &x)
: super_type(x) {
}
// Copy constructor.
btree_map(const self_type& x)
: super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_map(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(b, e, comp, alloc) {
}
// Range constructor.
template <class InputIterator>
btree_map(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename V, typename C, typename A, int N>
inline void swap(btree_map<K, V, C, A, N> &x,
btree_map<K, V, C, A, N> &y) {
x.swap(y);
inline void swap(btree_map<K, V, C, A, N>& x,
btree_map<K, V, C, A, N>& y)
{
x.swap(y);
}
// The btree_multimap class is needed mainly for its constructors.
@ -83,46 +89,51 @@ template <typename Key, typename Value,
typename Compare = std::less<Key>,
typename Alloc = std::allocator<std::pair<const Key, Value> >,
int TargetNodeSize = 256>
class btree_multimap : public btree_multi_container<
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
class btree_multimap : public btree_multi_container <
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_map_params<
Key, Value, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_multi_container<btree_type> super_type;
typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_map_params <
Key, Value, Compare, Alloc, TargetNodeSize > params_type;
typedef btree<params_type> btree_type;
typedef btree_multi_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
typedef typename btree_type::data_type data_type;
typedef typename btree_type::mapped_type mapped_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
typedef typename btree_type::data_type data_type;
typedef typename btree_type::mapped_type mapped_type;
public:
// Default constructor.
btree_multimap(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
public:
// Default constructor.
btree_multimap(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_multimap(const self_type &x)
: super_type(x) {
}
// Copy constructor.
btree_multimap(const self_type& x)
: super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_multimap(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(b, e, comp, alloc) {
}
// Range constructor.
template <class InputIterator>
btree_multimap(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename V, typename C, typename A, int N>
inline void swap(btree_multimap<K, V, C, A, N> &x,
btree_multimap<K, V, C, A, N> &y) {
x.swap(y);
inline void swap(btree_multimap<K, V, C, A, N>& x,
btree_multimap<K, V, C, A, N>& y)
{
x.swap(y);
}
} // namespace btree

View File

@ -27,49 +27,55 @@
#include "btree.h"
#include "btree_container.h"
namespace btree {
namespace btree
{
// The btree_set class is needed mainly for its constructors.
template <typename Key,
typename Compare = std::less<Key>,
typename Alloc = std::allocator<Key>,
int TargetNodeSize = 256>
class btree_set : public btree_unique_container<
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
class btree_set : public btree_unique_container <
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_unique_container<btree_type> super_type;
typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_unique_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
// Default constructor.
btree_set(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
public:
// Default constructor.
btree_set(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_set(const self_type &x)
: super_type(x) {
}
// Copy constructor.
btree_set(const self_type& x)
: super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_set(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(b, e, comp, alloc) {
}
// Range constructor.
template <class InputIterator>
btree_set(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename C, typename A, int N>
inline void swap(btree_set<K, C, A, N> &x, btree_set<K, C, A, N> &y) {
x.swap(y);
inline void swap(btree_set<K, C, A, N>& x, btree_set<K, C, A, N>& y)
{
x.swap(y);
}
// The btree_multiset class is needed mainly for its constructors.
@ -77,43 +83,48 @@ template <typename Key,
typename Compare = std::less<Key>,
typename Alloc = std::allocator<Key>,
int TargetNodeSize = 256>
class btree_multiset : public btree_multi_container<
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
class btree_multiset : public btree_multi_container <
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
{
typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_multi_container<btree_type> super_type;
typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
typedef btree<params_type> btree_type;
typedef btree_multi_container<btree_type> super_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
typedef typename btree_type::key_compare key_compare;
typedef typename btree_type::allocator_type allocator_type;
public:
// Default constructor.
btree_multiset(const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(comp, alloc) {
}
public:
// Default constructor.
btree_multiset(const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(comp, alloc)
{
}
// Copy constructor.
btree_multiset(const self_type &x)
: super_type(x) {
}
// Copy constructor.
btree_multiset(const self_type& x)
: super_type(x)
{
}
// Range constructor.
template <class InputIterator>
btree_multiset(InputIterator b, InputIterator e,
const key_compare &comp = key_compare(),
const allocator_type &alloc = allocator_type())
: super_type(b, e, comp, alloc) {
}
// Range constructor.
template <class InputIterator>
btree_multiset(InputIterator b, InputIterator e,
const key_compare& comp = key_compare(),
const allocator_type& alloc = allocator_type())
: super_type(b, e, comp, alloc)
{
}
};
template <typename K, typename C, typename A, int N>
inline void swap(btree_multiset<K, C, A, N> &x,
btree_multiset<K, C, A, N> &y) {
x.swap(y);
inline void swap(btree_multiset<K, C, A, N>& x,
btree_multiset<K, C, A, N>& y)
{
x.swap(y);
}
} // namespace btree

View File

@ -22,62 +22,67 @@ using namespace std;
using namespace joblist;
using namespace utils;
namespace joiner {
namespace joiner
{
Joiner::Joiner(bool ia) : _includeAll(ia), _inPM(false), _pool(new SimplePool)
{
SimpleAllocator<pair<uint64_t const, uint64_t> > alloc(_pool);
h.reset(new hash_t(10, hash_t::hasher(), hash_t::key_equal(), alloc));
SimpleAllocator<pair<uint64_t const, uint64_t> > alloc(_pool);
h.reset(new hash_t(10, hash_t::hasher(), hash_t::key_equal(), alloc));
// cout << "Joiner()\n";
}
Joiner::Joiner()
{ }
Joiner::Joiner(const Joiner &j)
Joiner::Joiner(const Joiner& j)
{ }
Joiner & Joiner::operator=(const Joiner &j)
Joiner& Joiner::operator=(const Joiner& j)
{
return *this;
return *this;
}
Joiner::~Joiner()
Joiner::~Joiner()
{
// cout << "~Joiner()\n";
// get rid of the hash table first
h.reset();
// get rid of the hash table first
h.reset();
// delete _pool;
// _pool = NULL;
}
boost::shared_ptr<vector<ElementType> > Joiner::getSortedMatches()
{
boost::shared_ptr<vector<ElementType> > ret;
iterator it;
boost::shared_ptr<vector<ElementType> > ret;
iterator it;
ret.reset(new vector<ElementType>());
for (it = begin(); it != end(); ++it)
if (it->second & MSB)
ret->push_back(ElementType(it->second & ~MSB, it->first));
sort<vector<ElementType>::iterator>(ret->begin(), ret->end());
return ret;
ret.reset(new vector<ElementType>());
for (it = begin(); it != end(); ++it)
if (it->second & MSB)
ret->push_back(ElementType(it->second & ~MSB, it->first));
sort<vector<ElementType>::iterator>(ret->begin(), ret->end());
return ret;
}
boost::shared_ptr<std::vector<joblist::ElementType> > Joiner::getSmallSide()
{
boost::shared_ptr<vector<ElementType> > ret;
iterator it;
boost::shared_ptr<vector<ElementType> > ret;
iterator it;
ret.reset(new vector<ElementType>());
for (it = begin(); it != end(); ++it)
ret->push_back(ElementType(it->second & ~MSB, it->first));
return ret;
ret.reset(new vector<ElementType>());
for (it = begin(); it != end(); ++it)
ret->push_back(ElementType(it->second & ~MSB, it->first));
return ret;
}
void Joiner::doneInserting()
{
//sort here if the data structure is a vector
//sort here if the data structure is a vector
}
}

View File

@ -39,14 +39,16 @@ namespace std
{
namespace tr1
{
template<>
struct hash<long long unsigned int>
template<>
struct hash<long long unsigned int>
: public std::unary_function<long long unsigned int, std::size_t>
{
std::size_t
operator()(long long unsigned int val) const
{
std::size_t
operator()(long long unsigned int val) const
{ return static_cast<std::size_t>(val); }
};
return static_cast<std::size_t>(val);
}
};
}
}
#endif
@ -62,108 +64,145 @@ namespace joiner
{
/* There has to be a better name for this. Not used ATM. */
struct MatchedET {
MatchedET() { }
MatchedET(const joblist::ElementType &et) : e(et) { }
joblist::ElementType e;
struct MatchedET
{
MatchedET() { }
MatchedET(const joblist::ElementType& et) : e(et) { }
joblist::ElementType e;
// bool matched; // Might need this, might not
inline bool operator<(const MatchedET &c) const { return e.second < c.e.second; }
inline bool operator<(const MatchedET& c) const
{
return e.second < c.e.second;
}
};
class Joiner {
public:
class Joiner
{
public:
// typedef std::tr1::unordered_multimap<uint64_t, uint64_t> hash_t;
typedef std::tr1::unordered_multimap<uint64_t, uint64_t,
std::tr1::hash<uint64_t>, std::equal_to<uint64_t>,
utils::SimpleAllocator<std::pair<uint64_t const, uint64_t> > > hash_t;
typedef std::tr1::unordered_multimap<uint64_t, uint64_t,
std::tr1::hash<uint64_t>, std::equal_to<uint64_t>,
utils::SimpleAllocator<std::pair<uint64_t const, uint64_t> > > hash_t;
typedef hash_t::iterator iterator;
typedef hash_t::iterator iterator;
Joiner(bool bIncludeAll);
virtual ~Joiner();
Joiner(bool bIncludeAll);
virtual ~Joiner();
// elements are stored as <value, rid>
inline iterator begin() { return h->begin(); }
inline iterator end() { return h->end(); }
inline size_t size() { return h->size(); }
inline void insert(const joblist::ElementType &e)
{
h->insert(std::pair<uint64_t, uint64_t>(e.second, e.first));
}
void doneInserting();
boost::shared_ptr<std::vector<joblist::ElementType> > getSmallSide();
boost::shared_ptr<std::vector<joblist::ElementType> > getSortedMatches();
// elements are stored as <value, rid>
inline iterator begin()
{
return h->begin();
}
inline iterator end()
{
return h->end();
}
inline size_t size()
{
return h->size();
}
inline void insert(const joblist::ElementType& e)
{
h->insert(std::pair<uint64_t, uint64_t>(e.second, e.first));
}
void doneInserting();
boost::shared_ptr<std::vector<joblist::ElementType> > getSmallSide();
boost::shared_ptr<std::vector<joblist::ElementType> > getSortedMatches();
/* Used by the UM */
inline bool match(const joblist::ElementType &large)
{
std::pair<iterator, iterator> range;
iterator it = h->find(large.second);
/* Used by the UM */
inline bool match(const joblist::ElementType& large)
{
std::pair<iterator, iterator> range;
iterator it = h->find(large.second);
if (it == h->end())
return _includeAll;
else
if (it->second & MSB)
return true;
else {
range = h->equal_range(large.second);
for( ; range.first != range.second; ++range.first)
range.first->second |= MSB;
return true;
}
}
if (it == h->end())
return _includeAll;
else if (it->second & MSB)
return true;
else
{
range = h->equal_range(large.second);
inline void mark(const joblist::ElementType &large)
{
std::pair<iterator, iterator> range;
range = h->equal_range(large.second);
for( ; range.first != range.second; ++range.first)
range.first->second |= MSB;
}
for ( ; range.first != range.second; ++range.first)
range.first->second |= MSB;
/* Used by the PM */
inline bool getNewMatches(const uint64_t value,
std::vector<joblist::ElementType> *newMatches)
{
std::pair<iterator, iterator> range;
iterator it = h->find(value);
return true;
}
}
if (it == h->end())
return _includeAll;
else
if (it->second & MSB)
return true;
else {
newMatches->push_back(
joblist::ElementType(it->second | MSB, value));
range = h->equal_range(value);
for( ; range.first != range.second; ++range.first)
range.first->second |= MSB;
return true;
}
}
inline void mark(const joblist::ElementType& large)
{
std::pair<iterator, iterator> range;
inline bool inPM() { return _inPM; }
void inPM(bool b) { _inPM = b; }
inline bool inUM() { return !_inPM; }
void inUM(bool b) { _inPM = !b; }
bool includeAll() { return _includeAll; }
range = h->equal_range(large.second);
uint64_t getMemUsage() { return (_pool ? _pool->getMemUsage() : 0); }
for ( ; range.first != range.second; ++range.first)
range.first->second |= MSB;
}
static const uint64_t MSB = 0x8000000000000000ULL;
protected:
Joiner();
Joiner(const Joiner &);
Joiner & operator=(const Joiner &);
private:
boost::shared_ptr<hash_t> h;
bool _includeAll;
bool _inPM; // true -> should execute on the PM, false -> UM
boost::shared_ptr<utils::SimplePool> _pool; // pool for the table and nodes
/* Used by the PM */
inline bool getNewMatches(const uint64_t value,
std::vector<joblist::ElementType>* newMatches)
{
std::pair<iterator, iterator> range;
iterator it = h->find(value);
if (it == h->end())
return _includeAll;
else if (it->second & MSB)
return true;
else
{
newMatches->push_back(
joblist::ElementType(it->second | MSB, value));
range = h->equal_range(value);
for ( ; range.first != range.second; ++range.first)
range.first->second |= MSB;
return true;
}
}
inline bool inPM()
{
return _inPM;
}
void inPM(bool b)
{
_inPM = b;
}
inline bool inUM()
{
return !_inPM;
}
void inUM(bool b)
{
_inPM = !b;
}
bool includeAll()
{
return _includeAll;
}
uint64_t getMemUsage()
{
return (_pool ? _pool->getMemUsage() : 0);
}
static const uint64_t MSB = 0x8000000000000000ULL;
protected:
Joiner();
Joiner(const Joiner&);
Joiner& operator=(const Joiner&);
private:
boost::shared_ptr<hash_t> h;
bool _includeAll;
bool _inPM; // true -> should execute on the PM, false -> UM
boost::shared_ptr<utils::SimplePool> _pool; // pool for the table and nodes
};
}

File diff suppressed because it is too large Load Diff

View File

@ -26,138 +26,154 @@
#include <fstream>
#include <boost/thread.hpp>
namespace joiner {
namespace joiner
{
class JoinPartition
{
public:
JoinPartition();
JoinPartition(const rowgroup::RowGroup &largeRG,
const rowgroup::RowGroup &smallRG,
const std::vector<uint32_t> &smallkeyCols,
const std::vector<uint32_t> &largeKeyCols,
bool typeless,
bool isAntiWithMatchNulls,
bool hasFEFilter,
uint64_t totalUMMemory,
uint64_t partitionSize);
JoinPartition(const JoinPartition &, bool splitMode);
public:
JoinPartition();
JoinPartition(const rowgroup::RowGroup& largeRG,
const rowgroup::RowGroup& smallRG,
const std::vector<uint32_t>& smallkeyCols,
const std::vector<uint32_t>& largeKeyCols,
bool typeless,
bool isAntiWithMatchNulls,
bool hasFEFilter,
uint64_t totalUMMemory,
uint64_t partitionSize);
JoinPartition(const JoinPartition&, bool splitMode);
virtual ~JoinPartition();
virtual ~JoinPartition();
// For now, the root node will use the RGData interface, the branches & leaves use
// only the Row interface.
int64_t insertSmallSideRow(const rowgroup::Row &row);
int64_t insertSmallSideRGData(rowgroup::RGData &);
// note, the vector version of this fcn frees the input RGDatas as it goes
int64_t insertSmallSideRGData(std::vector<rowgroup::RGData> &);
int64_t doneInsertingSmallData();
int64_t insertLargeSideRGData(rowgroup::RGData &);
int64_t insertLargeSideRow(const rowgroup::Row &row);
int64_t doneInsertingLargeData();
// For now, the root node will use the RGData interface, the branches & leaves use
// only the Row interface.
int64_t insertSmallSideRow(const rowgroup::Row& row);
int64_t insertSmallSideRGData(rowgroup::RGData&);
// note, the vector version of this fcn frees the input RGDatas as it goes
int64_t insertSmallSideRGData(std::vector<rowgroup::RGData>&);
int64_t doneInsertingSmallData();
int64_t insertLargeSideRGData(rowgroup::RGData&);
int64_t insertLargeSideRow(const rowgroup::Row& row);
int64_t doneInsertingLargeData();
/* Returns true if there are more partitions to fetch, false otherwise */
bool getNextPartition(std::vector<rowgroup::RGData> *smallData, uint64_t *partitionID,
JoinPartition **jp);
/* Returns true if there are more partitions to fetch, false otherwise */
bool getNextPartition(std::vector<rowgroup::RGData>* smallData, uint64_t* partitionID,
JoinPartition** jp);
boost::shared_ptr<rowgroup::RGData> getNextLargeRGData();
boost::shared_ptr<rowgroup::RGData> getNextLargeRGData();
/* It's important to follow the sequence of operations to maintain the correct
internal state. Right now it doesn't check that you the programmer are doing things
right, it'll likely fail queries or crash if you do things wrong.
This should be made simpler at some point.
/* It's important to follow the sequence of operations to maintain the correct
internal state. Right now it doesn't check that you the programmer are doing things
right, it'll likely fail queries or crash if you do things wrong.
This should be made simpler at some point.
On construction, the JP is config'd for small-side reading.
After that's done, call doneInsertingSmallData() and initForLargeSideFeed().
Then, insert the large-side data. When done, call doneInsertingLargeData()
and initForProcessing().
In the processing phase, use getNextPartition() and getNextLargeRGData()
to get the data back out. After processing all partitions, if it's necessary
to process more iterations of the large side, call initForProcessing() again, and
continue as before.
*/
On construction, the JP is config'd for small-side reading.
After that's done, call doneInsertingSmallData() and initForLargeSideFeed().
Then, insert the large-side data. When done, call doneInsertingLargeData()
and initForProcessing().
In the processing phase, use getNextPartition() and getNextLargeRGData()
to get the data back out. After processing all partitions, if it's necessary
to process more iterations of the large side, call initForProcessing() again, and
continue as before.
*/
/* Call this before reading into the large side */
void initForLargeSideFeed();
/* Call this between large-side insertion & join processing */
void initForProcessing();
/* Small outer joins need to retain some state after each large-side iteration */
void saveSmallSidePartition(std::vector<rowgroup::RGData> &rgdata);
/* Call this before reading into the large side */
void initForLargeSideFeed();
/* Call this between large-side insertion & join processing */
void initForProcessing();
/* Small outer joins need to retain some state after each large-side iteration */
void saveSmallSidePartition(std::vector<rowgroup::RGData>& rgdata);
/* each JP instance stores the sizes of every JP instance below it, so root node has the total. */
int64_t getCurrentDiskUsage() { return smallSizeOnDisk + largeSizeOnDisk; }
int64_t getSmallSideDiskUsage() { return smallSizeOnDisk; }
int64_t getLargeSideDiskUsage() { return largeSizeOnDisk; }
/* each JP instance stores the sizes of every JP instance below it, so root node has the total. */
int64_t getCurrentDiskUsage()
{
return smallSizeOnDisk + largeSizeOnDisk;
}
int64_t getSmallSideDiskUsage()
{
return smallSizeOnDisk;
}
int64_t getLargeSideDiskUsage()
{
return largeSizeOnDisk;
}
uint64_t getBytesRead();
uint64_t getBytesWritten();
uint64_t getMaxLargeSize() { return maxLargeSize; }
uint64_t getMaxSmallSize() { return maxSmallSize; }
uint64_t getBytesRead();
uint64_t getBytesWritten();
uint64_t getMaxLargeSize()
{
return maxLargeSize;
}
uint64_t getMaxSmallSize()
{
return maxSmallSize;
}
protected:
private:
void initBuffers();
int64_t convertToSplitMode();
int64_t processSmallBuffer();
int64_t processLargeBuffer();
protected:
private:
void initBuffers();
int64_t convertToSplitMode();
int64_t processSmallBuffer();
int64_t processLargeBuffer();
int64_t processSmallBuffer(rowgroup::RGData &);
int64_t processLargeBuffer(rowgroup::RGData &);
int64_t processSmallBuffer(rowgroup::RGData&);
int64_t processLargeBuffer(rowgroup::RGData&);
rowgroup::RowGroup smallRG;
rowgroup::RowGroup largeRG;
std::vector<uint32_t> smallKeyCols;
std::vector<uint32_t> largeKeyCols;
bool typelessJoin;
uint32_t hashSeed;
std::vector<boost::shared_ptr<JoinPartition> > buckets;
uint32_t bucketCount; // = TotalUMMem / htTargetSize
rowgroup::RowGroup smallRG;
rowgroup::RowGroup largeRG;
std::vector<uint32_t> smallKeyCols;
std::vector<uint32_t> largeKeyCols;
bool typelessJoin;
uint32_t hashSeed;
std::vector<boost::shared_ptr<JoinPartition> > buckets;
uint32_t bucketCount; // = TotalUMMem / htTargetSize
bool fileMode;
std::fstream smallFile;
std::fstream largeFile;
std::string filenamePrefix;
std::string smallFilename;
std::string largeFilename;
rowgroup::RGData buffer;
rowgroup::Row smallRow;
rowgroup::Row largeRow;
uint32_t nextPartitionToReturn;
uint64_t htSizeEstimate;
uint64_t htTargetSize;
uint64_t uniqueID;
uint64_t smallSizeOnDisk;
uint64_t largeSizeOnDisk;
utils::Hasher_r hasher;
bool rootNode;
bool fileMode;
std::fstream smallFile;
std::fstream largeFile;
std::string filenamePrefix;
std::string smallFilename;
std::string largeFilename;
rowgroup::RGData buffer;
rowgroup::Row smallRow;
rowgroup::Row largeRow;
uint32_t nextPartitionToReturn;
uint64_t htSizeEstimate;
uint64_t htTargetSize;
uint64_t uniqueID;
uint64_t smallSizeOnDisk;
uint64_t largeSizeOnDisk;
utils::Hasher_r hasher;
bool rootNode;
/* Not-in antijoin hack. A small-side row with a null join column has to go into every partition or
into one always resident partition (TBD).
/* Not-in antijoin hack. A small-side row with a null join column has to go into every partition or
into one always resident partition (TBD).
If an F&E filter exists, it needs all null rows, if not, it only needs one. */
bool antiWithMatchNulls;
bool needsAllNullRows;
bool gotNullRow;
bool hasNullJoinColumn(rowgroup::Row &);
If an F&E filter exists, it needs all null rows, if not, it only needs one. */
bool antiWithMatchNulls;
bool needsAllNullRows;
bool gotNullRow;
bool hasNullJoinColumn(rowgroup::Row&);
// which = 0 -> smallFile, which = 1 -> largeFile
void readByteStream(int which, messageqcpp::ByteStream *bs);
uint64_t writeByteStream(int which, messageqcpp::ByteStream &bs);
// which = 0 -> smallFile, which = 1 -> largeFile
void readByteStream(int which, messageqcpp::ByteStream* bs);
uint64_t writeByteStream(int which, messageqcpp::ByteStream& bs);
/* Compression support */
bool useCompression;
compress::IDBCompressInterface compressor;
/* TBD: do the reading/writing in one thread, compression/decompression in another */
/* Compression support */
bool useCompression;
compress::IDBCompressInterface compressor;
/* TBD: do the reading/writing in one thread, compression/decompression in another */
/* Some stats for reporting */
uint64_t totalBytesRead, totalBytesWritten;
uint64_t maxLargeSize, maxSmallSize;
/* Some stats for reporting */
uint64_t totalBytesRead, totalBytesWritten;
uint64_t maxLargeSize, maxSmallSize;
/* file descriptor reduction */
size_t nextSmallOffset;
size_t nextLargeOffset;
/* file descriptor reduction */
size_t nextSmallOffset;
size_t nextLargeOffset;
};

File diff suppressed because it is too large Load Diff

View File

@ -43,229 +43,333 @@ namespace joiner
inline uint64_t order_swap(uint64_t x)
{
return (x>>56) |
((x<<40) & 0x00FF000000000000ULL) |
((x<<24) & 0x0000FF0000000000ULL) |
((x<<8) & 0x000000FF00000000ULL) |
((x>>8) & 0x00000000FF000000ULL) |
((x>>24) & 0x0000000000FF0000ULL) |
((x>>40) & 0x000000000000FF00ULL) |
(x<<56);
return (x >> 56) |
((x << 40) & 0x00FF000000000000ULL) |
((x << 24) & 0x0000FF0000000000ULL) |
((x << 8) & 0x000000FF00000000ULL) |
((x >> 8) & 0x00000000FF000000ULL) |
((x >> 24) & 0x0000000000FF0000ULL) |
((x >> 40) & 0x000000000000FF00ULL) |
(x << 56);
}
class TypelessData
{
public:
uint8_t *data;
uint32_t len;
uint8_t* data;
uint32_t len;
TypelessData() : data(NULL), len(0) { }
inline bool operator==(const TypelessData &) const;
void serialize(messageqcpp::ByteStream &) const;
void deserialize(messageqcpp::ByteStream &, utils::FixedAllocator &);
void deserialize(messageqcpp::ByteStream &, utils::PoolAllocator &);
std::string toString() const;
TypelessData() : data(NULL), len(0) { }
inline bool operator==(const TypelessData&) const;
void serialize(messageqcpp::ByteStream&) const;
void deserialize(messageqcpp::ByteStream&, utils::FixedAllocator&);
void deserialize(messageqcpp::ByteStream&, utils::PoolAllocator&);
std::string toString() const;
};
inline bool TypelessData::operator==(const TypelessData &t) const
inline bool TypelessData::operator==(const TypelessData& t) const
{
if (len != t.len)
return false;
if (len == 0) // special value to force mismatches
return false;
return (memcmp(data, t.data, len) == 0);
if (len != t.len)
return false;
if (len == 0) // special value to force mismatches
return false;
return (memcmp(data, t.data, len) == 0);
}
/* This function makes the keys for string & compound joins. The length of the
* key is limited by keylen. Keys that are longer are assigned a length of 0 on return,
* signifying that it shouldn't match anything.
*/
extern TypelessData makeTypelessKey(const rowgroup::Row &,
const std::vector<uint32_t> &, uint32_t keylen, utils::FixedAllocator *fa);
extern TypelessData makeTypelessKey(const rowgroup::Row &,
const std::vector<uint32_t> &, utils::PoolAllocator *fa);
extern uint64_t getHashOfTypelessKey(const rowgroup::Row &, const std::vector<uint32_t> &,
uint32_t seed=0);
extern TypelessData makeTypelessKey(const rowgroup::Row&,
const std::vector<uint32_t>&, uint32_t keylen, utils::FixedAllocator* fa);
extern TypelessData makeTypelessKey(const rowgroup::Row&,
const std::vector<uint32_t>&, utils::PoolAllocator* fa);
extern uint64_t getHashOfTypelessKey(const rowgroup::Row&, const std::vector<uint32_t>&,
uint32_t seed = 0);
class TupleJoiner
{
public:
struct hasher {
inline size_t operator()(int64_t val) const
{ return fHasher((char *) &val, 8); }
inline size_t operator()(const TypelessData &e) const
{ return fHasher((char *) e.data, e.len); }
struct hasher
{
inline size_t operator()(int64_t val) const
{
return fHasher((char*) &val, 8);
}
inline size_t operator()(const TypelessData& e) const
{
return fHasher((char*) e.data, e.len);
}
private:
utils::Hasher fHasher;
};
private:
utils::Hasher fHasher;
};
/* ctor to use for numeric join */
TupleJoiner(
const rowgroup::RowGroup &smallInput,
const rowgroup::RowGroup &largeInput,
uint32_t smallJoinColumn,
uint32_t largeJoinColumn,
joblist::JoinType jt);
/* ctor to use for numeric join */
TupleJoiner(
const rowgroup::RowGroup& smallInput,
const rowgroup::RowGroup& largeInput,
uint32_t smallJoinColumn,
uint32_t largeJoinColumn,
joblist::JoinType jt);
/* ctor to use for string & compound join */
TupleJoiner(
const rowgroup::RowGroup &smallInput,
const rowgroup::RowGroup &largeInput,
const std::vector<uint32_t> &smallJoinColumns,
const std::vector<uint32_t> &largeJoinColumns,
joblist::JoinType jt);
/* ctor to use for string & compound join */
TupleJoiner(
const rowgroup::RowGroup& smallInput,
const rowgroup::RowGroup& largeInput,
const std::vector<uint32_t>& smallJoinColumns,
const std::vector<uint32_t>& largeJoinColumns,
joblist::JoinType jt);
~TupleJoiner();
~TupleJoiner();
size_t size() const;
void insert(rowgroup::Row &r, bool zeroTheRid = true);
void doneInserting();
size_t size() const;
void insert(rowgroup::Row& r, bool zeroTheRid = true);
void doneInserting();
/* match() returns the small-side rows that match the large-side row.
On a UM join, it uses largeSideRow,
on a PM join, it uses index and threadID.
*/
void match(rowgroup::Row &largeSideRow, uint32_t index, uint32_t threadID,
std::vector<rowgroup::Row::Pointer> *matches);
/* match() returns the small-side rows that match the large-side row.
On a UM join, it uses largeSideRow,
on a PM join, it uses index and threadID.
*/
void match(rowgroup::Row& largeSideRow, uint32_t index, uint32_t threadID,
std::vector<rowgroup::Row::Pointer>* matches);
/* On a PM left outer join + aggregation, the result is already complete.
No need to match, just mark.
*/
void markMatches(uint32_t threadID, uint32_t rowCount);
/* On a PM left outer join + aggregation, the result is already complete.
No need to match, just mark.
*/
void markMatches(uint32_t threadID, uint32_t rowCount);
/* For small outer joins, this is how matches are marked now. */
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer> &matches);
/* For small outer joins, this is how matches are marked now. */
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer>& matches);
/* Some accessors */
inline bool inPM() const { return joinAlg == PM; }
inline bool inUM() const { return joinAlg == UM; }
void setInPM();
void setInUM();
void setThreadCount(uint32_t cnt);
void setPMJoinResults(boost::shared_array<std::vector<uint32_t> >,
uint32_t threadID);
boost::shared_array<std::vector<uint32_t> > getPMJoinArrays(uint32_t threadID);
std::vector<rowgroup::Row::Pointer> *getSmallSide() { return &rows; }
inline bool smallOuterJoin() { return ((joinType & joblist::SMALLOUTER) != 0); }
inline bool largeOuterJoin() { return ((joinType & joblist::LARGEOUTER) != 0); }
inline bool innerJoin() { return joinType == joblist::INNER; }
inline bool fullOuterJoin() { return (smallOuterJoin() && largeOuterJoin()); }
inline joblist::JoinType getJoinType() { return joinType; }
inline const rowgroup::RowGroup &getSmallRG() { return smallRG; }
inline const rowgroup::RowGroup &getLargeRG() { return largeRG; }
inline uint32_t getSmallKeyColumn() { return smallKeyColumns[0]; }
inline uint32_t getLargeKeyColumn() { return largeKeyColumns[0]; }
bool hasNullJoinColumn(const rowgroup::Row &largeRow) const;
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer> *out);
std::string getTableName() const;
void setTableName(const std::string &tname);
/* Some accessors */
inline bool inPM() const
{
return joinAlg == PM;
}
inline bool inUM() const
{
return joinAlg == UM;
}
void setInPM();
void setInUM();
void setThreadCount(uint32_t cnt);
void setPMJoinResults(boost::shared_array<std::vector<uint32_t> >,
uint32_t threadID);
boost::shared_array<std::vector<uint32_t> > getPMJoinArrays(uint32_t threadID);
std::vector<rowgroup::Row::Pointer>* getSmallSide()
{
return &rows;
}
inline bool smallOuterJoin()
{
return ((joinType & joblist::SMALLOUTER) != 0);
}
inline bool largeOuterJoin()
{
return ((joinType & joblist::LARGEOUTER) != 0);
}
inline bool innerJoin()
{
return joinType == joblist::INNER;
}
inline bool fullOuterJoin()
{
return (smallOuterJoin() && largeOuterJoin());
}
inline joblist::JoinType getJoinType()
{
return joinType;
}
inline const rowgroup::RowGroup& getSmallRG()
{
return smallRG;
}
inline const rowgroup::RowGroup& getLargeRG()
{
return largeRG;
}
inline uint32_t getSmallKeyColumn()
{
return smallKeyColumns[0];
}
inline uint32_t getLargeKeyColumn()
{
return largeKeyColumns[0];
}
bool hasNullJoinColumn(const rowgroup::Row& largeRow) const;
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer>* out);
std::string getTableName() const;
void setTableName(const std::string& tname);
/* To allow sorting */
bool operator<(const TupleJoiner &) const;
/* To allow sorting */
bool operator<(const TupleJoiner&) const;
uint64_t getMemUsage() const;
uint64_t getMemUsage() const;
/* Typeless join interface */
inline bool isTypelessJoin() { return typelessJoin; }
inline bool isSignedUnsignedJoin() { return bSignedUnsignedJoin; }
inline const std::vector<uint32_t> & getSmallKeyColumns() { return smallKeyColumns; }
inline const std::vector<uint32_t> & getLargeKeyColumns() { return largeKeyColumns; }
inline uint32_t getKeyLength() { return keyLength; }
/* Typeless join interface */
inline bool isTypelessJoin()
{
return typelessJoin;
}
inline bool isSignedUnsignedJoin()
{
return bSignedUnsignedJoin;
}
inline const std::vector<uint32_t>& getSmallKeyColumns()
{
return smallKeyColumns;
}
inline const std::vector<uint32_t>& getLargeKeyColumns()
{
return largeKeyColumns;
}
inline uint32_t getKeyLength()
{
return keyLength;
}
/* Runtime casual partitioning support */
inline const boost::scoped_array<bool> &discreteCPValues() { return discreteValues; }
inline const boost::scoped_array<std::vector<int64_t> > &getCPData() { return cpValues; }
inline void setUniqueLimit(uint32_t limit) { uniqueLimit = limit; }
/* Runtime casual partitioning support */
inline const boost::scoped_array<bool>& discreteCPValues()
{
return discreteValues;
}
inline const boost::scoped_array<std::vector<int64_t> >& getCPData()
{
return cpValues;
}
inline void setUniqueLimit(uint32_t limit)
{
uniqueLimit = limit;
}
/* Semi-join interface */
inline bool semiJoin() { return ((joinType & joblist::SEMI) != 0); }
inline bool antiJoin() { return ((joinType & joblist::ANTI) != 0); }
inline bool scalar() { return ((joinType & joblist::SCALAR) != 0); }
inline bool matchnulls() { return ((joinType & joblist::MATCHNULLS) != 0); }
inline bool hasFEFilter() { return fe.get(); }
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter() { return fe; }
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
inline bool evaluateFilter(rowgroup::Row &r, uint32_t index) { return fes[index].evaluate(&r); }
inline uint64_t getJoinNullValue() { return joblist::BIGINTNULL; } // a normalized NULL value
inline uint64_t smallNullValue() { return nullValueForJoinColumn; }
/* Semi-join interface */
inline bool semiJoin()
{
return ((joinType & joblist::SEMI) != 0);
}
inline bool antiJoin()
{
return ((joinType & joblist::ANTI) != 0);
}
inline bool scalar()
{
return ((joinType & joblist::SCALAR) != 0);
}
inline bool matchnulls()
{
return ((joinType & joblist::MATCHNULLS) != 0);
}
inline bool hasFEFilter()
{
return fe.get();
}
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter()
{
return fe;
}
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
inline bool evaluateFilter(rowgroup::Row& r, uint32_t index)
{
return fes[index].evaluate(&r);
}
inline uint64_t getJoinNullValue()
{
return joblist::BIGINTNULL; // a normalized NULL value
}
inline uint64_t smallNullValue()
{
return nullValueForJoinColumn;
}
// Disk-based join support
void clearData();
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
bool isFinished() { return finished; }
// Disk-based join support
void clearData();
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
bool isFinished()
{
return finished;
}
private:
typedef std::tr1::unordered_multimap<int64_t, uint8_t *, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, uint8_t *> > > hash_t;
typedef std::tr1::unordered_multimap<int64_t, rowgroup::Row::Pointer, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, rowgroup::Row::Pointer> > > sthash_t;
typedef std::tr1::unordered_multimap<TypelessData, rowgroup::Row::Pointer, hasher, std::equal_to<TypelessData>,
utils::STLPoolAllocator<std::pair<const TypelessData, rowgroup::Row::Pointer> > > typelesshash_t;
typedef std::tr1::unordered_multimap<int64_t, uint8_t*, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, uint8_t*> > > hash_t;
typedef std::tr1::unordered_multimap<int64_t, rowgroup::Row::Pointer, hasher, std::equal_to<int64_t>,
utils::STLPoolAllocator<std::pair<const int64_t, rowgroup::Row::Pointer> > > sthash_t;
typedef std::tr1::unordered_multimap<TypelessData, rowgroup::Row::Pointer, hasher, std::equal_to<TypelessData>,
utils::STLPoolAllocator<std::pair<const TypelessData, rowgroup::Row::Pointer> > > typelesshash_t;
typedef hash_t::iterator iterator;
typedef typelesshash_t::iterator thIterator;
typedef hash_t::iterator iterator;
typedef typelesshash_t::iterator thIterator;
TupleJoiner();
TupleJoiner(const TupleJoiner &);
TupleJoiner & operator=(const TupleJoiner &);
TupleJoiner();
TupleJoiner(const TupleJoiner&);
TupleJoiner& operator=(const TupleJoiner&);
iterator begin() { return h->begin(); }
iterator end() { return h->end(); }
iterator begin()
{
return h->begin();
}
iterator end()
{
return h->end();
}
rowgroup::RGData smallNullMemory;
rowgroup::RGData smallNullMemory;
boost::scoped_ptr<hash_t> h; // used for UM joins on ints
boost::scoped_ptr<sthash_t> sth; // used for UM join on ints where the backing table uses a string table
std::vector<rowgroup::Row::Pointer> rows; // used for PM join
boost::scoped_ptr<hash_t> h; // used for UM joins on ints
boost::scoped_ptr<sthash_t> sth; // used for UM join on ints where the backing table uses a string table
std::vector<rowgroup::Row::Pointer> rows; // used for PM join
/* This struct is rough. The BPP-JL stores the parsed results for
the logical block being processed. There are X threads at once, so
up to X logical blocks being processed. For each of those there's a vector
of matches. Each match is an index into 'rows'. */
boost::shared_array<boost::shared_array<std::vector<uint32_t> > > pmJoinResults;
rowgroup::RowGroup smallRG, largeRG;
boost::scoped_array<rowgroup::Row> smallRow;
//boost::shared_array<uint8_t> smallNullMemory;
rowgroup::Row smallNullRow;
/* This struct is rough. The BPP-JL stores the parsed results for
the logical block being processed. There are X threads at once, so
up to X logical blocks being processed. For each of those there's a vector
of matches. Each match is an index into 'rows'. */
boost::shared_array<boost::shared_array<std::vector<uint32_t> > > pmJoinResults;
rowgroup::RowGroup smallRG, largeRG;
boost::scoped_array<rowgroup::Row> smallRow;
//boost::shared_array<uint8_t> smallNullMemory;
rowgroup::Row smallNullRow;
enum JoinAlg {
INSERTING,
PM,
UM,
LARGE
};
JoinAlg joinAlg;
joblist::JoinType joinType;
boost::shared_ptr<utils::PoolAllocator> _pool; // pool for the table and nodes
uint32_t threadCount;
std::string tableName;
enum JoinAlg
{
INSERTING,
PM,
UM,
LARGE
};
JoinAlg joinAlg;
joblist::JoinType joinType;
boost::shared_ptr<utils::PoolAllocator> _pool; // pool for the table and nodes
uint32_t threadCount;
std::string tableName;
/* vars, & fcns for typeless join */
bool typelessJoin;
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
boost::scoped_ptr<typelesshash_t> ht; // used for UM join on strings
uint32_t keyLength;
utils::FixedAllocator storedKeyAlloc;
/* vars, & fcns for typeless join */
bool typelessJoin;
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
boost::scoped_ptr<typelesshash_t> ht; // used for UM join on strings
uint32_t keyLength;
utils::FixedAllocator storedKeyAlloc;
boost::scoped_array<utils::FixedAllocator> tmpKeyAlloc;
bool bSignedUnsignedJoin; // Set if we have a signed vs unsigned compare in a join. When not set, we can save checking for the signed bit.
/* semi-join vars & fcns */
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
// this var is only used to normalize the NULL values for single-column joins,
// will have to change when/if we need to support that for compound or string joins
int64_t nullValueForJoinColumn;
/* semi-join vars & fcns */
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
// this var is only used to normalize the NULL values for single-column joins,
// will have to change when/if we need to support that for compound or string joins
int64_t nullValueForJoinColumn;
/* Runtime casual partitioning support */
void updateCPData(const rowgroup::Row &r);
boost::scoped_array<bool> discreteValues;
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
uint32_t uniqueLimit;
bool finished;
/* Runtime casual partitioning support */
void updateCPData(const rowgroup::Row& r);
boost::scoped_array<bool> discreteValues;
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
uint32_t uniqueLimit;
bool finished;
};
}