You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
Reformat all code to coding standard
This commit is contained in:
4344
utils/joiner/btree.h
4344
utils/joiner/btree.h
File diff suppressed because it is too large
Load Diff
@ -20,328 +20,439 @@
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
namespace btree {
|
||||
namespace btree
|
||||
{
|
||||
|
||||
// A common base class for btree_set, btree_map, btree_multiset and
|
||||
// btree_multimap.
|
||||
template <typename Tree>
|
||||
class btree_container {
|
||||
typedef btree_container<Tree> self_type;
|
||||
class btree_container
|
||||
{
|
||||
typedef btree_container<Tree> self_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::params_type params_type;
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::pointer pointer;
|
||||
typedef typename Tree::const_pointer const_pointer;
|
||||
typedef typename Tree::reference reference;
|
||||
typedef typename Tree::const_reference const_reference;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::difference_type difference_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
typedef typename Tree::reverse_iterator reverse_iterator;
|
||||
typedef typename Tree::const_reverse_iterator const_reverse_iterator;
|
||||
public:
|
||||
typedef typename Tree::params_type params_type;
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::pointer pointer;
|
||||
typedef typename Tree::const_pointer const_pointer;
|
||||
typedef typename Tree::reference reference;
|
||||
typedef typename Tree::const_reference const_reference;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::difference_type difference_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
typedef typename Tree::reverse_iterator reverse_iterator;
|
||||
typedef typename Tree::const_reverse_iterator const_reverse_iterator;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_container(const key_compare &comp, const allocator_type &alloc)
|
||||
: tree_(comp, alloc) {
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_container(const self_type &x)
|
||||
: tree_(x.tree_) {
|
||||
}
|
||||
|
||||
// Iterator routines.
|
||||
iterator begin() { return tree_.begin(); }
|
||||
const_iterator begin() const { return tree_.begin(); }
|
||||
iterator end() { return tree_.end(); }
|
||||
const_iterator end() const { return tree_.end(); }
|
||||
reverse_iterator rbegin() { return tree_.rbegin(); }
|
||||
const_reverse_iterator rbegin() const { return tree_.rbegin(); }
|
||||
reverse_iterator rend() { return tree_.rend(); }
|
||||
const_reverse_iterator rend() const { return tree_.rend(); }
|
||||
|
||||
// Lookup routines.
|
||||
iterator lower_bound(const key_type &key) {
|
||||
return tree_.lower_bound(key);
|
||||
}
|
||||
const_iterator lower_bound(const key_type &key) const {
|
||||
return tree_.lower_bound(key);
|
||||
}
|
||||
iterator upper_bound(const key_type &key) {
|
||||
return tree_.upper_bound(key);
|
||||
}
|
||||
const_iterator upper_bound(const key_type &key) const {
|
||||
return tree_.upper_bound(key);
|
||||
}
|
||||
std::pair<iterator,iterator> equal_range(const key_type &key) {
|
||||
return tree_.equal_range(key);
|
||||
}
|
||||
std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
|
||||
return tree_.equal_range(key);
|
||||
}
|
||||
|
||||
// Utility routines.
|
||||
void clear() {
|
||||
tree_.clear();
|
||||
}
|
||||
void swap(self_type &x) {
|
||||
tree_.swap(x.tree_);
|
||||
}
|
||||
void dump(std::ostream &os) const {
|
||||
tree_.dump(os);
|
||||
}
|
||||
void verify() const {
|
||||
tree_.verify();
|
||||
}
|
||||
|
||||
// Size routines.
|
||||
size_type size() const { return tree_.size(); }
|
||||
size_type max_size() const { return tree_.max_size(); }
|
||||
bool empty() const { return tree_.empty(); }
|
||||
size_type height() const { return tree_.height(); }
|
||||
size_type internal_nodes() const { return tree_.internal_nodes(); }
|
||||
size_type leaf_nodes() const { return tree_.leaf_nodes(); }
|
||||
size_type nodes() const { return tree_.nodes(); }
|
||||
size_type bytes_used() const { return tree_.bytes_used(); }
|
||||
static double average_bytes_per_value() {
|
||||
return Tree::average_bytes_per_value();
|
||||
}
|
||||
double fullness() const { return tree_.fullness(); }
|
||||
double overhead() const { return tree_.overhead(); }
|
||||
|
||||
bool operator==(const self_type& x) const {
|
||||
if (size() != x.size()) {
|
||||
return false;
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_container(const key_compare& comp, const allocator_type& alloc)
|
||||
: tree_(comp, alloc)
|
||||
{
|
||||
}
|
||||
for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) {
|
||||
if (*i != *xi) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_container(const self_type& x)
|
||||
: tree_(x.tree_)
|
||||
{
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool operator!=(const self_type& other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
// Iterator routines.
|
||||
iterator begin()
|
||||
{
|
||||
return tree_.begin();
|
||||
}
|
||||
const_iterator begin() const
|
||||
{
|
||||
return tree_.begin();
|
||||
}
|
||||
iterator end()
|
||||
{
|
||||
return tree_.end();
|
||||
}
|
||||
const_iterator end() const
|
||||
{
|
||||
return tree_.end();
|
||||
}
|
||||
reverse_iterator rbegin()
|
||||
{
|
||||
return tree_.rbegin();
|
||||
}
|
||||
const_reverse_iterator rbegin() const
|
||||
{
|
||||
return tree_.rbegin();
|
||||
}
|
||||
reverse_iterator rend()
|
||||
{
|
||||
return tree_.rend();
|
||||
}
|
||||
const_reverse_iterator rend() const
|
||||
{
|
||||
return tree_.rend();
|
||||
}
|
||||
|
||||
// Lookup routines.
|
||||
iterator lower_bound(const key_type& key)
|
||||
{
|
||||
return tree_.lower_bound(key);
|
||||
}
|
||||
const_iterator lower_bound(const key_type& key) const
|
||||
{
|
||||
return tree_.lower_bound(key);
|
||||
}
|
||||
iterator upper_bound(const key_type& key)
|
||||
{
|
||||
return tree_.upper_bound(key);
|
||||
}
|
||||
const_iterator upper_bound(const key_type& key) const
|
||||
{
|
||||
return tree_.upper_bound(key);
|
||||
}
|
||||
std::pair<iterator, iterator> equal_range(const key_type& key)
|
||||
{
|
||||
return tree_.equal_range(key);
|
||||
}
|
||||
std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
|
||||
{
|
||||
return tree_.equal_range(key);
|
||||
}
|
||||
|
||||
// Utility routines.
|
||||
void clear()
|
||||
{
|
||||
tree_.clear();
|
||||
}
|
||||
void swap(self_type& x)
|
||||
{
|
||||
tree_.swap(x.tree_);
|
||||
}
|
||||
void dump(std::ostream& os) const
|
||||
{
|
||||
tree_.dump(os);
|
||||
}
|
||||
void verify() const
|
||||
{
|
||||
tree_.verify();
|
||||
}
|
||||
|
||||
// Size routines.
|
||||
size_type size() const
|
||||
{
|
||||
return tree_.size();
|
||||
}
|
||||
size_type max_size() const
|
||||
{
|
||||
return tree_.max_size();
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return tree_.empty();
|
||||
}
|
||||
size_type height() const
|
||||
{
|
||||
return tree_.height();
|
||||
}
|
||||
size_type internal_nodes() const
|
||||
{
|
||||
return tree_.internal_nodes();
|
||||
}
|
||||
size_type leaf_nodes() const
|
||||
{
|
||||
return tree_.leaf_nodes();
|
||||
}
|
||||
size_type nodes() const
|
||||
{
|
||||
return tree_.nodes();
|
||||
}
|
||||
size_type bytes_used() const
|
||||
{
|
||||
return tree_.bytes_used();
|
||||
}
|
||||
static double average_bytes_per_value()
|
||||
{
|
||||
return Tree::average_bytes_per_value();
|
||||
}
|
||||
double fullness() const
|
||||
{
|
||||
return tree_.fullness();
|
||||
}
|
||||
double overhead() const
|
||||
{
|
||||
return tree_.overhead();
|
||||
}
|
||||
|
||||
bool operator==(const self_type& x) const
|
||||
{
|
||||
if (size() != x.size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi)
|
||||
{
|
||||
if (*i != *xi)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool operator!=(const self_type& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
Tree tree_;
|
||||
protected:
|
||||
Tree tree_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline std::ostream& operator<<(std::ostream &os, const btree_container<T> &b) {
|
||||
b.dump(os);
|
||||
return os;
|
||||
inline std::ostream& operator<<(std::ostream& os, const btree_container<T>& b)
|
||||
{
|
||||
b.dump(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
// A common base class for btree_set and safe_btree_set.
|
||||
template <typename Tree>
|
||||
class btree_unique_container : public btree_container<Tree> {
|
||||
typedef btree_unique_container<Tree> self_type;
|
||||
typedef btree_container<Tree> super_type;
|
||||
class btree_unique_container : public btree_container<Tree>
|
||||
{
|
||||
typedef btree_unique_container<Tree> self_type;
|
||||
typedef btree_container<Tree> super_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_unique_container(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_unique_container(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_unique_container(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
// Copy constructor.
|
||||
btree_unique_container(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_unique_container(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
insert(b, e);
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_unique_container(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
insert(b, e);
|
||||
}
|
||||
|
||||
// Lookup routines.
|
||||
iterator find(const key_type &key) {
|
||||
return this->tree_.find_unique(key);
|
||||
}
|
||||
const_iterator find(const key_type &key) const {
|
||||
return this->tree_.find_unique(key);
|
||||
}
|
||||
size_type count(const key_type &key) const {
|
||||
return this->tree_.count_unique(key);
|
||||
}
|
||||
// Lookup routines.
|
||||
iterator find(const key_type& key)
|
||||
{
|
||||
return this->tree_.find_unique(key);
|
||||
}
|
||||
const_iterator find(const key_type& key) const
|
||||
{
|
||||
return this->tree_.find_unique(key);
|
||||
}
|
||||
size_type count(const key_type& key) const
|
||||
{
|
||||
return this->tree_.count_unique(key);
|
||||
}
|
||||
|
||||
// Insertion routines.
|
||||
std::pair<iterator,bool> insert(const value_type &x) {
|
||||
return this->tree_.insert_unique(x);
|
||||
}
|
||||
iterator insert(iterator position, const value_type &x) {
|
||||
return this->tree_.insert_unique(position, x);
|
||||
}
|
||||
template <typename InputIterator>
|
||||
void insert(InputIterator b, InputIterator e) {
|
||||
this->tree_.insert_unique(b, e);
|
||||
}
|
||||
// Insertion routines.
|
||||
std::pair<iterator, bool> insert(const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_unique(x);
|
||||
}
|
||||
iterator insert(iterator position, const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_unique(position, x);
|
||||
}
|
||||
template <typename InputIterator>
|
||||
void insert(InputIterator b, InputIterator e)
|
||||
{
|
||||
this->tree_.insert_unique(b, e);
|
||||
}
|
||||
|
||||
// Deletion routines.
|
||||
int erase(const key_type &key) {
|
||||
return this->tree_.erase_unique(key);
|
||||
}
|
||||
// Erase the specified iterator from the btree. The iterator must be valid
|
||||
// (i.e. not equal to end()). Return an iterator pointing to the node after
|
||||
// the one that was erased (or end() if none exists).
|
||||
iterator erase(const iterator &iter) {
|
||||
return this->tree_.erase(iter);
|
||||
}
|
||||
void erase(const iterator &first, const iterator &last) {
|
||||
this->tree_.erase(first, last);
|
||||
}
|
||||
// Deletion routines.
|
||||
int erase(const key_type& key)
|
||||
{
|
||||
return this->tree_.erase_unique(key);
|
||||
}
|
||||
// Erase the specified iterator from the btree. The iterator must be valid
|
||||
// (i.e. not equal to end()). Return an iterator pointing to the node after
|
||||
// the one that was erased (or end() if none exists).
|
||||
iterator erase(const iterator& iter)
|
||||
{
|
||||
return this->tree_.erase(iter);
|
||||
}
|
||||
void erase(const iterator& first, const iterator& last)
|
||||
{
|
||||
this->tree_.erase(first, last);
|
||||
}
|
||||
};
|
||||
|
||||
// A common base class for btree_map and safe_btree_map.
|
||||
template <typename Tree>
|
||||
class btree_map_container : public btree_unique_container<Tree> {
|
||||
typedef btree_map_container<Tree> self_type;
|
||||
typedef btree_unique_container<Tree> super_type;
|
||||
class btree_map_container : public btree_unique_container<Tree>
|
||||
{
|
||||
typedef btree_map_container<Tree> self_type;
|
||||
typedef btree_unique_container<Tree> super_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::data_type data_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::mapped_type mapped_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::data_type data_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::mapped_type mapped_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
|
||||
private:
|
||||
// A pointer-like object which only generates its value when
|
||||
// dereferenced. Used by operator[] to avoid constructing an empty data_type
|
||||
// if the key already exists in the map.
|
||||
struct generate_value {
|
||||
generate_value(const key_type &k)
|
||||
: key(k) {
|
||||
private:
|
||||
// A pointer-like object which only generates its value when
|
||||
// dereferenced. Used by operator[] to avoid constructing an empty data_type
|
||||
// if the key already exists in the map.
|
||||
struct generate_value
|
||||
{
|
||||
generate_value(const key_type& k)
|
||||
: key(k)
|
||||
{
|
||||
}
|
||||
value_type operator*() const
|
||||
{
|
||||
return std::make_pair(key, data_type());
|
||||
}
|
||||
const key_type& key;
|
||||
};
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_map_container(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
value_type operator*() const {
|
||||
return std::make_pair(key, data_type());
|
||||
|
||||
// Copy constructor.
|
||||
btree_map_container(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
const key_type &key;
|
||||
};
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_map_container(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_map_container(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_map_container(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_map_container(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc) {
|
||||
}
|
||||
|
||||
// Insertion routines.
|
||||
data_type& operator[](const key_type &key) {
|
||||
return this->tree_.insert_unique(key, generate_value(key)).first->second;
|
||||
}
|
||||
// Insertion routines.
|
||||
data_type& operator[](const key_type& key)
|
||||
{
|
||||
return this->tree_.insert_unique(key, generate_value(key)).first->second;
|
||||
}
|
||||
};
|
||||
|
||||
// A common base class for btree_multiset and btree_multimap.
|
||||
template <typename Tree>
|
||||
class btree_multi_container : public btree_container<Tree> {
|
||||
typedef btree_multi_container<Tree> self_type;
|
||||
typedef btree_container<Tree> super_type;
|
||||
class btree_multi_container : public btree_container<Tree>
|
||||
{
|
||||
typedef btree_multi_container<Tree> self_type;
|
||||
typedef btree_container<Tree> super_type;
|
||||
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
public:
|
||||
typedef typename Tree::key_type key_type;
|
||||
typedef typename Tree::value_type value_type;
|
||||
typedef typename Tree::size_type size_type;
|
||||
typedef typename Tree::key_compare key_compare;
|
||||
typedef typename Tree::allocator_type allocator_type;
|
||||
typedef typename Tree::iterator iterator;
|
||||
typedef typename Tree::const_iterator const_iterator;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multi_container(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multi_container(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_multi_container(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
// Copy constructor.
|
||||
btree_multi_container(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multi_container(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
insert(b, e);
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multi_container(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
insert(b, e);
|
||||
}
|
||||
|
||||
// Lookup routines.
|
||||
iterator find(const key_type &key) {
|
||||
return this->tree_.find_multi(key);
|
||||
}
|
||||
const_iterator find(const key_type &key) const {
|
||||
return this->tree_.find_multi(key);
|
||||
}
|
||||
size_type count(const key_type &key) const {
|
||||
return this->tree_.count_multi(key);
|
||||
}
|
||||
// Lookup routines.
|
||||
iterator find(const key_type& key)
|
||||
{
|
||||
return this->tree_.find_multi(key);
|
||||
}
|
||||
const_iterator find(const key_type& key) const
|
||||
{
|
||||
return this->tree_.find_multi(key);
|
||||
}
|
||||
size_type count(const key_type& key) const
|
||||
{
|
||||
return this->tree_.count_multi(key);
|
||||
}
|
||||
|
||||
// Insertion routines.
|
||||
iterator insert(const value_type &x) {
|
||||
return this->tree_.insert_multi(x);
|
||||
}
|
||||
iterator insert(iterator position, const value_type &x) {
|
||||
return this->tree_.insert_multi(position, x);
|
||||
}
|
||||
template <typename InputIterator>
|
||||
void insert(InputIterator b, InputIterator e) {
|
||||
this->tree_.insert_multi(b, e);
|
||||
}
|
||||
// Insertion routines.
|
||||
iterator insert(const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_multi(x);
|
||||
}
|
||||
iterator insert(iterator position, const value_type& x)
|
||||
{
|
||||
return this->tree_.insert_multi(position, x);
|
||||
}
|
||||
template <typename InputIterator>
|
||||
void insert(InputIterator b, InputIterator e)
|
||||
{
|
||||
this->tree_.insert_multi(b, e);
|
||||
}
|
||||
|
||||
// Deletion routines.
|
||||
int erase(const key_type &key) {
|
||||
return this->tree_.erase_multi(key);
|
||||
}
|
||||
// Erase the specified iterator from the btree. The iterator must be valid
|
||||
// (i.e. not equal to end()). Return an iterator pointing to the node after
|
||||
// the one that was erased (or end() if none exists).
|
||||
iterator erase(const iterator &iter) {
|
||||
return this->tree_.erase(iter);
|
||||
}
|
||||
void erase(const iterator &first, const iterator &last) {
|
||||
this->tree_.erase(first, last);
|
||||
}
|
||||
// Deletion routines.
|
||||
int erase(const key_type& key)
|
||||
{
|
||||
return this->tree_.erase_multi(key);
|
||||
}
|
||||
// Erase the specified iterator from the btree. The iterator must be valid
|
||||
// (i.e. not equal to end()). Return an iterator pointing to the node after
|
||||
// the one that was erased (or end() if none exists).
|
||||
iterator erase(const iterator& iter)
|
||||
{
|
||||
return this->tree_.erase(iter);
|
||||
}
|
||||
void erase(const iterator& first, const iterator& last)
|
||||
{
|
||||
this->tree_.erase(first, last);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace btree
|
||||
|
@ -31,51 +31,57 @@
|
||||
#include "btree.h"
|
||||
#include "btree_container.h"
|
||||
|
||||
namespace btree {
|
||||
namespace btree
|
||||
{
|
||||
|
||||
// The btree_map class is needed mainly for its constructors.
|
||||
template <typename Key, typename Value,
|
||||
typename Compare = std::less<Key>,
|
||||
typename Alloc = std::allocator<std::pair<const Key, Value> >,
|
||||
int TargetNodeSize = 256>
|
||||
class btree_map : public btree_map_container<
|
||||
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
|
||||
class btree_map : public btree_map_container <
|
||||
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
|
||||
typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_map_params<
|
||||
Key, Value, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_map_container<btree_type> super_type;
|
||||
typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_map_params <
|
||||
Key, Value, Compare, Alloc, TargetNodeSize > params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_map_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_map(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_map(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_map(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
// Copy constructor.
|
||||
btree_map(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_map(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc) {
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_map(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename V, typename C, typename A, int N>
|
||||
inline void swap(btree_map<K, V, C, A, N> &x,
|
||||
btree_map<K, V, C, A, N> &y) {
|
||||
x.swap(y);
|
||||
inline void swap(btree_map<K, V, C, A, N>& x,
|
||||
btree_map<K, V, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
// The btree_multimap class is needed mainly for its constructors.
|
||||
@ -83,46 +89,51 @@ template <typename Key, typename Value,
|
||||
typename Compare = std::less<Key>,
|
||||
typename Alloc = std::allocator<std::pair<const Key, Value> >,
|
||||
int TargetNodeSize = 256>
|
||||
class btree_multimap : public btree_multi_container<
|
||||
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
|
||||
class btree_multimap : public btree_multi_container <
|
||||
btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
|
||||
typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_map_params<
|
||||
Key, Value, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_multi_container<btree_type> super_type;
|
||||
typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_map_params <
|
||||
Key, Value, Compare, Alloc, TargetNodeSize > params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_multi_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
typedef typename btree_type::data_type data_type;
|
||||
typedef typename btree_type::mapped_type mapped_type;
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
typedef typename btree_type::data_type data_type;
|
||||
typedef typename btree_type::mapped_type mapped_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multimap(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multimap(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_multimap(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
// Copy constructor.
|
||||
btree_multimap(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multimap(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc) {
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multimap(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename V, typename C, typename A, int N>
|
||||
inline void swap(btree_multimap<K, V, C, A, N> &x,
|
||||
btree_multimap<K, V, C, A, N> &y) {
|
||||
x.swap(y);
|
||||
inline void swap(btree_multimap<K, V, C, A, N>& x,
|
||||
btree_multimap<K, V, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
} // namespace btree
|
||||
|
@ -27,49 +27,55 @@
|
||||
#include "btree.h"
|
||||
#include "btree_container.h"
|
||||
|
||||
namespace btree {
|
||||
namespace btree
|
||||
{
|
||||
|
||||
// The btree_set class is needed mainly for its constructors.
|
||||
template <typename Key,
|
||||
typename Compare = std::less<Key>,
|
||||
typename Alloc = std::allocator<Key>,
|
||||
int TargetNodeSize = 256>
|
||||
class btree_set : public btree_unique_container<
|
||||
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
|
||||
class btree_set : public btree_unique_container <
|
||||
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
|
||||
typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_unique_container<btree_type> super_type;
|
||||
typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_unique_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_set(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_set(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_set(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
// Copy constructor.
|
||||
btree_set(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_set(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc) {
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_set(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename C, typename A, int N>
|
||||
inline void swap(btree_set<K, C, A, N> &x, btree_set<K, C, A, N> &y) {
|
||||
x.swap(y);
|
||||
inline void swap(btree_set<K, C, A, N>& x, btree_set<K, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
// The btree_multiset class is needed mainly for its constructors.
|
||||
@ -77,43 +83,48 @@ template <typename Key,
|
||||
typename Compare = std::less<Key>,
|
||||
typename Alloc = std::allocator<Key>,
|
||||
int TargetNodeSize = 256>
|
||||
class btree_multiset : public btree_multi_container<
|
||||
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
|
||||
class btree_multiset : public btree_multi_container <
|
||||
btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > >
|
||||
{
|
||||
|
||||
typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_multi_container<btree_type> super_type;
|
||||
typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
|
||||
typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
|
||||
typedef btree<params_type> btree_type;
|
||||
typedef btree_multi_container<btree_type> super_type;
|
||||
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
public:
|
||||
typedef typename btree_type::key_compare key_compare;
|
||||
typedef typename btree_type::allocator_type allocator_type;
|
||||
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multiset(const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(comp, alloc) {
|
||||
}
|
||||
public:
|
||||
// Default constructor.
|
||||
btree_multiset(const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(comp, alloc)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
btree_multiset(const self_type &x)
|
||||
: super_type(x) {
|
||||
}
|
||||
// Copy constructor.
|
||||
btree_multiset(const self_type& x)
|
||||
: super_type(x)
|
||||
{
|
||||
}
|
||||
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multiset(InputIterator b, InputIterator e,
|
||||
const key_compare &comp = key_compare(),
|
||||
const allocator_type &alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc) {
|
||||
}
|
||||
// Range constructor.
|
||||
template <class InputIterator>
|
||||
btree_multiset(InputIterator b, InputIterator e,
|
||||
const key_compare& comp = key_compare(),
|
||||
const allocator_type& alloc = allocator_type())
|
||||
: super_type(b, e, comp, alloc)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename K, typename C, typename A, int N>
|
||||
inline void swap(btree_multiset<K, C, A, N> &x,
|
||||
btree_multiset<K, C, A, N> &y) {
|
||||
x.swap(y);
|
||||
inline void swap(btree_multiset<K, C, A, N>& x,
|
||||
btree_multiset<K, C, A, N>& y)
|
||||
{
|
||||
x.swap(y);
|
||||
}
|
||||
|
||||
} // namespace btree
|
||||
|
@ -22,62 +22,67 @@ using namespace std;
|
||||
using namespace joblist;
|
||||
using namespace utils;
|
||||
|
||||
namespace joiner {
|
||||
namespace joiner
|
||||
{
|
||||
|
||||
Joiner::Joiner(bool ia) : _includeAll(ia), _inPM(false), _pool(new SimplePool)
|
||||
{
|
||||
SimpleAllocator<pair<uint64_t const, uint64_t> > alloc(_pool);
|
||||
h.reset(new hash_t(10, hash_t::hasher(), hash_t::key_equal(), alloc));
|
||||
SimpleAllocator<pair<uint64_t const, uint64_t> > alloc(_pool);
|
||||
h.reset(new hash_t(10, hash_t::hasher(), hash_t::key_equal(), alloc));
|
||||
// cout << "Joiner()\n";
|
||||
}
|
||||
|
||||
Joiner::Joiner()
|
||||
{ }
|
||||
|
||||
Joiner::Joiner(const Joiner &j)
|
||||
Joiner::Joiner(const Joiner& j)
|
||||
{ }
|
||||
|
||||
Joiner & Joiner::operator=(const Joiner &j)
|
||||
Joiner& Joiner::operator=(const Joiner& j)
|
||||
{
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Joiner::~Joiner()
|
||||
Joiner::~Joiner()
|
||||
{
|
||||
// cout << "~Joiner()\n";
|
||||
// get rid of the hash table first
|
||||
h.reset();
|
||||
// get rid of the hash table first
|
||||
h.reset();
|
||||
// delete _pool;
|
||||
// _pool = NULL;
|
||||
}
|
||||
|
||||
boost::shared_ptr<vector<ElementType> > Joiner::getSortedMatches()
|
||||
{
|
||||
boost::shared_ptr<vector<ElementType> > ret;
|
||||
iterator it;
|
||||
boost::shared_ptr<vector<ElementType> > ret;
|
||||
iterator it;
|
||||
|
||||
ret.reset(new vector<ElementType>());
|
||||
for (it = begin(); it != end(); ++it)
|
||||
if (it->second & MSB)
|
||||
ret->push_back(ElementType(it->second & ~MSB, it->first));
|
||||
sort<vector<ElementType>::iterator>(ret->begin(), ret->end());
|
||||
return ret;
|
||||
ret.reset(new vector<ElementType>());
|
||||
|
||||
for (it = begin(); it != end(); ++it)
|
||||
if (it->second & MSB)
|
||||
ret->push_back(ElementType(it->second & ~MSB, it->first));
|
||||
|
||||
sort<vector<ElementType>::iterator>(ret->begin(), ret->end());
|
||||
return ret;
|
||||
}
|
||||
|
||||
boost::shared_ptr<std::vector<joblist::ElementType> > Joiner::getSmallSide()
|
||||
{
|
||||
boost::shared_ptr<vector<ElementType> > ret;
|
||||
iterator it;
|
||||
boost::shared_ptr<vector<ElementType> > ret;
|
||||
iterator it;
|
||||
|
||||
ret.reset(new vector<ElementType>());
|
||||
for (it = begin(); it != end(); ++it)
|
||||
ret->push_back(ElementType(it->second & ~MSB, it->first));
|
||||
return ret;
|
||||
ret.reset(new vector<ElementType>());
|
||||
|
||||
for (it = begin(); it != end(); ++it)
|
||||
ret->push_back(ElementType(it->second & ~MSB, it->first));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Joiner::doneInserting()
|
||||
{
|
||||
//sort here if the data structure is a vector
|
||||
//sort here if the data structure is a vector
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -39,14 +39,16 @@ namespace std
|
||||
{
|
||||
namespace tr1
|
||||
{
|
||||
template<>
|
||||
struct hash<long long unsigned int>
|
||||
template<>
|
||||
struct hash<long long unsigned int>
|
||||
: public std::unary_function<long long unsigned int, std::size_t>
|
||||
{
|
||||
std::size_t
|
||||
operator()(long long unsigned int val) const
|
||||
{
|
||||
std::size_t
|
||||
operator()(long long unsigned int val) const
|
||||
{ return static_cast<std::size_t>(val); }
|
||||
};
|
||||
return static_cast<std::size_t>(val);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -62,108 +64,145 @@ namespace joiner
|
||||
{
|
||||
|
||||
/* There has to be a better name for this. Not used ATM. */
|
||||
struct MatchedET {
|
||||
MatchedET() { }
|
||||
MatchedET(const joblist::ElementType &et) : e(et) { }
|
||||
joblist::ElementType e;
|
||||
struct MatchedET
|
||||
{
|
||||
MatchedET() { }
|
||||
MatchedET(const joblist::ElementType& et) : e(et) { }
|
||||
joblist::ElementType e;
|
||||
// bool matched; // Might need this, might not
|
||||
|
||||
inline bool operator<(const MatchedET &c) const { return e.second < c.e.second; }
|
||||
inline bool operator<(const MatchedET& c) const
|
||||
{
|
||||
return e.second < c.e.second;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Joiner {
|
||||
public:
|
||||
class Joiner
|
||||
{
|
||||
public:
|
||||
// typedef std::tr1::unordered_multimap<uint64_t, uint64_t> hash_t;
|
||||
typedef std::tr1::unordered_multimap<uint64_t, uint64_t,
|
||||
std::tr1::hash<uint64_t>, std::equal_to<uint64_t>,
|
||||
utils::SimpleAllocator<std::pair<uint64_t const, uint64_t> > > hash_t;
|
||||
typedef std::tr1::unordered_multimap<uint64_t, uint64_t,
|
||||
std::tr1::hash<uint64_t>, std::equal_to<uint64_t>,
|
||||
utils::SimpleAllocator<std::pair<uint64_t const, uint64_t> > > hash_t;
|
||||
|
||||
typedef hash_t::iterator iterator;
|
||||
typedef hash_t::iterator iterator;
|
||||
|
||||
Joiner(bool bIncludeAll);
|
||||
virtual ~Joiner();
|
||||
Joiner(bool bIncludeAll);
|
||||
virtual ~Joiner();
|
||||
|
||||
// elements are stored as <value, rid>
|
||||
inline iterator begin() { return h->begin(); }
|
||||
inline iterator end() { return h->end(); }
|
||||
inline size_t size() { return h->size(); }
|
||||
inline void insert(const joblist::ElementType &e)
|
||||
{
|
||||
h->insert(std::pair<uint64_t, uint64_t>(e.second, e.first));
|
||||
}
|
||||
void doneInserting();
|
||||
boost::shared_ptr<std::vector<joblist::ElementType> > getSmallSide();
|
||||
boost::shared_ptr<std::vector<joblist::ElementType> > getSortedMatches();
|
||||
// elements are stored as <value, rid>
|
||||
inline iterator begin()
|
||||
{
|
||||
return h->begin();
|
||||
}
|
||||
inline iterator end()
|
||||
{
|
||||
return h->end();
|
||||
}
|
||||
inline size_t size()
|
||||
{
|
||||
return h->size();
|
||||
}
|
||||
inline void insert(const joblist::ElementType& e)
|
||||
{
|
||||
h->insert(std::pair<uint64_t, uint64_t>(e.second, e.first));
|
||||
}
|
||||
void doneInserting();
|
||||
boost::shared_ptr<std::vector<joblist::ElementType> > getSmallSide();
|
||||
boost::shared_ptr<std::vector<joblist::ElementType> > getSortedMatches();
|
||||
|
||||
/* Used by the UM */
|
||||
inline bool match(const joblist::ElementType &large)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(large.second);
|
||||
/* Used by the UM */
|
||||
inline bool match(const joblist::ElementType& large)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(large.second);
|
||||
|
||||
if (it == h->end())
|
||||
return _includeAll;
|
||||
else
|
||||
if (it->second & MSB)
|
||||
return true;
|
||||
else {
|
||||
range = h->equal_range(large.second);
|
||||
for( ; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (it == h->end())
|
||||
return _includeAll;
|
||||
else if (it->second & MSB)
|
||||
return true;
|
||||
else
|
||||
{
|
||||
range = h->equal_range(large.second);
|
||||
|
||||
inline void mark(const joblist::ElementType &large)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
|
||||
range = h->equal_range(large.second);
|
||||
for( ; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
}
|
||||
for ( ; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
|
||||
/* Used by the PM */
|
||||
inline bool getNewMatches(const uint64_t value,
|
||||
std::vector<joblist::ElementType> *newMatches)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(value);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (it == h->end())
|
||||
return _includeAll;
|
||||
else
|
||||
if (it->second & MSB)
|
||||
return true;
|
||||
else {
|
||||
newMatches->push_back(
|
||||
joblist::ElementType(it->second | MSB, value));
|
||||
range = h->equal_range(value);
|
||||
for( ; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
inline void mark(const joblist::ElementType& large)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
|
||||
inline bool inPM() { return _inPM; }
|
||||
void inPM(bool b) { _inPM = b; }
|
||||
inline bool inUM() { return !_inPM; }
|
||||
void inUM(bool b) { _inPM = !b; }
|
||||
bool includeAll() { return _includeAll; }
|
||||
range = h->equal_range(large.second);
|
||||
|
||||
uint64_t getMemUsage() { return (_pool ? _pool->getMemUsage() : 0); }
|
||||
for ( ; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
}
|
||||
|
||||
static const uint64_t MSB = 0x8000000000000000ULL;
|
||||
protected:
|
||||
Joiner();
|
||||
Joiner(const Joiner &);
|
||||
Joiner & operator=(const Joiner &);
|
||||
private:
|
||||
boost::shared_ptr<hash_t> h;
|
||||
bool _includeAll;
|
||||
bool _inPM; // true -> should execute on the PM, false -> UM
|
||||
boost::shared_ptr<utils::SimplePool> _pool; // pool for the table and nodes
|
||||
/* Used by the PM */
|
||||
inline bool getNewMatches(const uint64_t value,
|
||||
std::vector<joblist::ElementType>* newMatches)
|
||||
{
|
||||
std::pair<iterator, iterator> range;
|
||||
iterator it = h->find(value);
|
||||
|
||||
if (it == h->end())
|
||||
return _includeAll;
|
||||
else if (it->second & MSB)
|
||||
return true;
|
||||
else
|
||||
{
|
||||
newMatches->push_back(
|
||||
joblist::ElementType(it->second | MSB, value));
|
||||
range = h->equal_range(value);
|
||||
|
||||
for ( ; range.first != range.second; ++range.first)
|
||||
range.first->second |= MSB;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool inPM()
|
||||
{
|
||||
return _inPM;
|
||||
}
|
||||
void inPM(bool b)
|
||||
{
|
||||
_inPM = b;
|
||||
}
|
||||
inline bool inUM()
|
||||
{
|
||||
return !_inPM;
|
||||
}
|
||||
void inUM(bool b)
|
||||
{
|
||||
_inPM = !b;
|
||||
}
|
||||
bool includeAll()
|
||||
{
|
||||
return _includeAll;
|
||||
}
|
||||
|
||||
uint64_t getMemUsage()
|
||||
{
|
||||
return (_pool ? _pool->getMemUsage() : 0);
|
||||
}
|
||||
|
||||
static const uint64_t MSB = 0x8000000000000000ULL;
|
||||
protected:
|
||||
Joiner();
|
||||
Joiner(const Joiner&);
|
||||
Joiner& operator=(const Joiner&);
|
||||
private:
|
||||
boost::shared_ptr<hash_t> h;
|
||||
bool _includeAll;
|
||||
bool _inPM; // true -> should execute on the PM, false -> UM
|
||||
boost::shared_ptr<utils::SimplePool> _pool; // pool for the table and nodes
|
||||
};
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -26,138 +26,154 @@
|
||||
#include <fstream>
|
||||
#include <boost/thread.hpp>
|
||||
|
||||
namespace joiner {
|
||||
namespace joiner
|
||||
{
|
||||
|
||||
class JoinPartition
|
||||
{
|
||||
public:
|
||||
JoinPartition();
|
||||
JoinPartition(const rowgroup::RowGroup &largeRG,
|
||||
const rowgroup::RowGroup &smallRG,
|
||||
const std::vector<uint32_t> &smallkeyCols,
|
||||
const std::vector<uint32_t> &largeKeyCols,
|
||||
bool typeless,
|
||||
bool isAntiWithMatchNulls,
|
||||
bool hasFEFilter,
|
||||
uint64_t totalUMMemory,
|
||||
uint64_t partitionSize);
|
||||
JoinPartition(const JoinPartition &, bool splitMode);
|
||||
public:
|
||||
JoinPartition();
|
||||
JoinPartition(const rowgroup::RowGroup& largeRG,
|
||||
const rowgroup::RowGroup& smallRG,
|
||||
const std::vector<uint32_t>& smallkeyCols,
|
||||
const std::vector<uint32_t>& largeKeyCols,
|
||||
bool typeless,
|
||||
bool isAntiWithMatchNulls,
|
||||
bool hasFEFilter,
|
||||
uint64_t totalUMMemory,
|
||||
uint64_t partitionSize);
|
||||
JoinPartition(const JoinPartition&, bool splitMode);
|
||||
|
||||
virtual ~JoinPartition();
|
||||
virtual ~JoinPartition();
|
||||
|
||||
// For now, the root node will use the RGData interface, the branches & leaves use
|
||||
// only the Row interface.
|
||||
int64_t insertSmallSideRow(const rowgroup::Row &row);
|
||||
int64_t insertSmallSideRGData(rowgroup::RGData &);
|
||||
// note, the vector version of this fcn frees the input RGDatas as it goes
|
||||
int64_t insertSmallSideRGData(std::vector<rowgroup::RGData> &);
|
||||
int64_t doneInsertingSmallData();
|
||||
int64_t insertLargeSideRGData(rowgroup::RGData &);
|
||||
int64_t insertLargeSideRow(const rowgroup::Row &row);
|
||||
int64_t doneInsertingLargeData();
|
||||
// For now, the root node will use the RGData interface, the branches & leaves use
|
||||
// only the Row interface.
|
||||
int64_t insertSmallSideRow(const rowgroup::Row& row);
|
||||
int64_t insertSmallSideRGData(rowgroup::RGData&);
|
||||
// note, the vector version of this fcn frees the input RGDatas as it goes
|
||||
int64_t insertSmallSideRGData(std::vector<rowgroup::RGData>&);
|
||||
int64_t doneInsertingSmallData();
|
||||
int64_t insertLargeSideRGData(rowgroup::RGData&);
|
||||
int64_t insertLargeSideRow(const rowgroup::Row& row);
|
||||
int64_t doneInsertingLargeData();
|
||||
|
||||
/* Returns true if there are more partitions to fetch, false otherwise */
|
||||
bool getNextPartition(std::vector<rowgroup::RGData> *smallData, uint64_t *partitionID,
|
||||
JoinPartition **jp);
|
||||
/* Returns true if there are more partitions to fetch, false otherwise */
|
||||
bool getNextPartition(std::vector<rowgroup::RGData>* smallData, uint64_t* partitionID,
|
||||
JoinPartition** jp);
|
||||
|
||||
boost::shared_ptr<rowgroup::RGData> getNextLargeRGData();
|
||||
boost::shared_ptr<rowgroup::RGData> getNextLargeRGData();
|
||||
|
||||
/* It's important to follow the sequence of operations to maintain the correct
|
||||
internal state. Right now it doesn't check that you the programmer are doing things
|
||||
right, it'll likely fail queries or crash if you do things wrong.
|
||||
This should be made simpler at some point.
|
||||
/* It's important to follow the sequence of operations to maintain the correct
|
||||
internal state. Right now it doesn't check that you the programmer are doing things
|
||||
right, it'll likely fail queries or crash if you do things wrong.
|
||||
This should be made simpler at some point.
|
||||
|
||||
On construction, the JP is config'd for small-side reading.
|
||||
After that's done, call doneInsertingSmallData() and initForLargeSideFeed().
|
||||
Then, insert the large-side data. When done, call doneInsertingLargeData()
|
||||
and initForProcessing().
|
||||
In the processing phase, use getNextPartition() and getNextLargeRGData()
|
||||
to get the data back out. After processing all partitions, if it's necessary
|
||||
to process more iterations of the large side, call initForProcessing() again, and
|
||||
continue as before.
|
||||
*/
|
||||
On construction, the JP is config'd for small-side reading.
|
||||
After that's done, call doneInsertingSmallData() and initForLargeSideFeed().
|
||||
Then, insert the large-side data. When done, call doneInsertingLargeData()
|
||||
and initForProcessing().
|
||||
In the processing phase, use getNextPartition() and getNextLargeRGData()
|
||||
to get the data back out. After processing all partitions, if it's necessary
|
||||
to process more iterations of the large side, call initForProcessing() again, and
|
||||
continue as before.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Call this before reading into the large side */
|
||||
void initForLargeSideFeed();
|
||||
/* Call this between large-side insertion & join processing */
|
||||
void initForProcessing();
|
||||
/* Small outer joins need to retain some state after each large-side iteration */
|
||||
void saveSmallSidePartition(std::vector<rowgroup::RGData> &rgdata);
|
||||
/* Call this before reading into the large side */
|
||||
void initForLargeSideFeed();
|
||||
/* Call this between large-side insertion & join processing */
|
||||
void initForProcessing();
|
||||
/* Small outer joins need to retain some state after each large-side iteration */
|
||||
void saveSmallSidePartition(std::vector<rowgroup::RGData>& rgdata);
|
||||
|
||||
/* each JP instance stores the sizes of every JP instance below it, so root node has the total. */
|
||||
int64_t getCurrentDiskUsage() { return smallSizeOnDisk + largeSizeOnDisk; }
|
||||
int64_t getSmallSideDiskUsage() { return smallSizeOnDisk; }
|
||||
int64_t getLargeSideDiskUsage() { return largeSizeOnDisk; }
|
||||
/* each JP instance stores the sizes of every JP instance below it, so root node has the total. */
|
||||
int64_t getCurrentDiskUsage()
|
||||
{
|
||||
return smallSizeOnDisk + largeSizeOnDisk;
|
||||
}
|
||||
int64_t getSmallSideDiskUsage()
|
||||
{
|
||||
return smallSizeOnDisk;
|
||||
}
|
||||
int64_t getLargeSideDiskUsage()
|
||||
{
|
||||
return largeSizeOnDisk;
|
||||
}
|
||||
|
||||
uint64_t getBytesRead();
|
||||
uint64_t getBytesWritten();
|
||||
uint64_t getMaxLargeSize() { return maxLargeSize; }
|
||||
uint64_t getMaxSmallSize() { return maxSmallSize; }
|
||||
uint64_t getBytesRead();
|
||||
uint64_t getBytesWritten();
|
||||
uint64_t getMaxLargeSize()
|
||||
{
|
||||
return maxLargeSize;
|
||||
}
|
||||
uint64_t getMaxSmallSize()
|
||||
{
|
||||
return maxSmallSize;
|
||||
}
|
||||
|
||||
protected:
|
||||
private:
|
||||
void initBuffers();
|
||||
int64_t convertToSplitMode();
|
||||
int64_t processSmallBuffer();
|
||||
int64_t processLargeBuffer();
|
||||
protected:
|
||||
private:
|
||||
void initBuffers();
|
||||
int64_t convertToSplitMode();
|
||||
int64_t processSmallBuffer();
|
||||
int64_t processLargeBuffer();
|
||||
|
||||
int64_t processSmallBuffer(rowgroup::RGData &);
|
||||
int64_t processLargeBuffer(rowgroup::RGData &);
|
||||
int64_t processSmallBuffer(rowgroup::RGData&);
|
||||
int64_t processLargeBuffer(rowgroup::RGData&);
|
||||
|
||||
rowgroup::RowGroup smallRG;
|
||||
rowgroup::RowGroup largeRG;
|
||||
std::vector<uint32_t> smallKeyCols;
|
||||
std::vector<uint32_t> largeKeyCols;
|
||||
bool typelessJoin;
|
||||
uint32_t hashSeed;
|
||||
std::vector<boost::shared_ptr<JoinPartition> > buckets;
|
||||
uint32_t bucketCount; // = TotalUMMem / htTargetSize
|
||||
rowgroup::RowGroup smallRG;
|
||||
rowgroup::RowGroup largeRG;
|
||||
std::vector<uint32_t> smallKeyCols;
|
||||
std::vector<uint32_t> largeKeyCols;
|
||||
bool typelessJoin;
|
||||
uint32_t hashSeed;
|
||||
std::vector<boost::shared_ptr<JoinPartition> > buckets;
|
||||
uint32_t bucketCount; // = TotalUMMem / htTargetSize
|
||||
|
||||
bool fileMode;
|
||||
std::fstream smallFile;
|
||||
std::fstream largeFile;
|
||||
std::string filenamePrefix;
|
||||
std::string smallFilename;
|
||||
std::string largeFilename;
|
||||
rowgroup::RGData buffer;
|
||||
rowgroup::Row smallRow;
|
||||
rowgroup::Row largeRow;
|
||||
uint32_t nextPartitionToReturn;
|
||||
uint64_t htSizeEstimate;
|
||||
uint64_t htTargetSize;
|
||||
uint64_t uniqueID;
|
||||
uint64_t smallSizeOnDisk;
|
||||
uint64_t largeSizeOnDisk;
|
||||
utils::Hasher_r hasher;
|
||||
bool rootNode;
|
||||
bool fileMode;
|
||||
std::fstream smallFile;
|
||||
std::fstream largeFile;
|
||||
std::string filenamePrefix;
|
||||
std::string smallFilename;
|
||||
std::string largeFilename;
|
||||
rowgroup::RGData buffer;
|
||||
rowgroup::Row smallRow;
|
||||
rowgroup::Row largeRow;
|
||||
uint32_t nextPartitionToReturn;
|
||||
uint64_t htSizeEstimate;
|
||||
uint64_t htTargetSize;
|
||||
uint64_t uniqueID;
|
||||
uint64_t smallSizeOnDisk;
|
||||
uint64_t largeSizeOnDisk;
|
||||
utils::Hasher_r hasher;
|
||||
bool rootNode;
|
||||
|
||||
/* Not-in antijoin hack. A small-side row with a null join column has to go into every partition or
|
||||
into one always resident partition (TBD).
|
||||
/* Not-in antijoin hack. A small-side row with a null join column has to go into every partition or
|
||||
into one always resident partition (TBD).
|
||||
|
||||
If an F&E filter exists, it needs all null rows, if not, it only needs one. */
|
||||
bool antiWithMatchNulls;
|
||||
bool needsAllNullRows;
|
||||
bool gotNullRow;
|
||||
bool hasNullJoinColumn(rowgroup::Row &);
|
||||
If an F&E filter exists, it needs all null rows, if not, it only needs one. */
|
||||
bool antiWithMatchNulls;
|
||||
bool needsAllNullRows;
|
||||
bool gotNullRow;
|
||||
bool hasNullJoinColumn(rowgroup::Row&);
|
||||
|
||||
// which = 0 -> smallFile, which = 1 -> largeFile
|
||||
void readByteStream(int which, messageqcpp::ByteStream *bs);
|
||||
uint64_t writeByteStream(int which, messageqcpp::ByteStream &bs);
|
||||
// which = 0 -> smallFile, which = 1 -> largeFile
|
||||
void readByteStream(int which, messageqcpp::ByteStream* bs);
|
||||
uint64_t writeByteStream(int which, messageqcpp::ByteStream& bs);
|
||||
|
||||
/* Compression support */
|
||||
bool useCompression;
|
||||
compress::IDBCompressInterface compressor;
|
||||
/* TBD: do the reading/writing in one thread, compression/decompression in another */
|
||||
/* Compression support */
|
||||
bool useCompression;
|
||||
compress::IDBCompressInterface compressor;
|
||||
/* TBD: do the reading/writing in one thread, compression/decompression in another */
|
||||
|
||||
/* Some stats for reporting */
|
||||
uint64_t totalBytesRead, totalBytesWritten;
|
||||
uint64_t maxLargeSize, maxSmallSize;
|
||||
/* Some stats for reporting */
|
||||
uint64_t totalBytesRead, totalBytesWritten;
|
||||
uint64_t maxLargeSize, maxSmallSize;
|
||||
|
||||
/* file descriptor reduction */
|
||||
size_t nextSmallOffset;
|
||||
size_t nextLargeOffset;
|
||||
/* file descriptor reduction */
|
||||
size_t nextSmallOffset;
|
||||
size_t nextLargeOffset;
|
||||
};
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -43,229 +43,333 @@ namespace joiner
|
||||
|
||||
inline uint64_t order_swap(uint64_t x)
|
||||
{
|
||||
return (x>>56) |
|
||||
((x<<40) & 0x00FF000000000000ULL) |
|
||||
((x<<24) & 0x0000FF0000000000ULL) |
|
||||
((x<<8) & 0x000000FF00000000ULL) |
|
||||
((x>>8) & 0x00000000FF000000ULL) |
|
||||
((x>>24) & 0x0000000000FF0000ULL) |
|
||||
((x>>40) & 0x000000000000FF00ULL) |
|
||||
(x<<56);
|
||||
return (x >> 56) |
|
||||
((x << 40) & 0x00FF000000000000ULL) |
|
||||
((x << 24) & 0x0000FF0000000000ULL) |
|
||||
((x << 8) & 0x000000FF00000000ULL) |
|
||||
((x >> 8) & 0x00000000FF000000ULL) |
|
||||
((x >> 24) & 0x0000000000FF0000ULL) |
|
||||
((x >> 40) & 0x000000000000FF00ULL) |
|
||||
(x << 56);
|
||||
}
|
||||
|
||||
class TypelessData
|
||||
{
|
||||
public:
|
||||
uint8_t *data;
|
||||
uint32_t len;
|
||||
uint8_t* data;
|
||||
uint32_t len;
|
||||
|
||||
TypelessData() : data(NULL), len(0) { }
|
||||
inline bool operator==(const TypelessData &) const;
|
||||
void serialize(messageqcpp::ByteStream &) const;
|
||||
void deserialize(messageqcpp::ByteStream &, utils::FixedAllocator &);
|
||||
void deserialize(messageqcpp::ByteStream &, utils::PoolAllocator &);
|
||||
std::string toString() const;
|
||||
TypelessData() : data(NULL), len(0) { }
|
||||
inline bool operator==(const TypelessData&) const;
|
||||
void serialize(messageqcpp::ByteStream&) const;
|
||||
void deserialize(messageqcpp::ByteStream&, utils::FixedAllocator&);
|
||||
void deserialize(messageqcpp::ByteStream&, utils::PoolAllocator&);
|
||||
std::string toString() const;
|
||||
};
|
||||
|
||||
inline bool TypelessData::operator==(const TypelessData &t) const
|
||||
inline bool TypelessData::operator==(const TypelessData& t) const
|
||||
{
|
||||
if (len != t.len)
|
||||
return false;
|
||||
if (len == 0) // special value to force mismatches
|
||||
return false;
|
||||
return (memcmp(data, t.data, len) == 0);
|
||||
if (len != t.len)
|
||||
return false;
|
||||
|
||||
if (len == 0) // special value to force mismatches
|
||||
return false;
|
||||
|
||||
return (memcmp(data, t.data, len) == 0);
|
||||
}
|
||||
|
||||
/* This function makes the keys for string & compound joins. The length of the
|
||||
* key is limited by keylen. Keys that are longer are assigned a length of 0 on return,
|
||||
* signifying that it shouldn't match anything.
|
||||
*/
|
||||
extern TypelessData makeTypelessKey(const rowgroup::Row &,
|
||||
const std::vector<uint32_t> &, uint32_t keylen, utils::FixedAllocator *fa);
|
||||
extern TypelessData makeTypelessKey(const rowgroup::Row &,
|
||||
const std::vector<uint32_t> &, utils::PoolAllocator *fa);
|
||||
extern uint64_t getHashOfTypelessKey(const rowgroup::Row &, const std::vector<uint32_t> &,
|
||||
uint32_t seed=0);
|
||||
extern TypelessData makeTypelessKey(const rowgroup::Row&,
|
||||
const std::vector<uint32_t>&, uint32_t keylen, utils::FixedAllocator* fa);
|
||||
extern TypelessData makeTypelessKey(const rowgroup::Row&,
|
||||
const std::vector<uint32_t>&, utils::PoolAllocator* fa);
|
||||
extern uint64_t getHashOfTypelessKey(const rowgroup::Row&, const std::vector<uint32_t>&,
|
||||
uint32_t seed = 0);
|
||||
|
||||
|
||||
class TupleJoiner
|
||||
{
|
||||
public:
|
||||
struct hasher {
|
||||
inline size_t operator()(int64_t val) const
|
||||
{ return fHasher((char *) &val, 8); }
|
||||
inline size_t operator()(const TypelessData &e) const
|
||||
{ return fHasher((char *) e.data, e.len); }
|
||||
struct hasher
|
||||
{
|
||||
inline size_t operator()(int64_t val) const
|
||||
{
|
||||
return fHasher((char*) &val, 8);
|
||||
}
|
||||
inline size_t operator()(const TypelessData& e) const
|
||||
{
|
||||
return fHasher((char*) e.data, e.len);
|
||||
}
|
||||
|
||||
private:
|
||||
utils::Hasher fHasher;
|
||||
};
|
||||
private:
|
||||
utils::Hasher fHasher;
|
||||
};
|
||||
|
||||
/* ctor to use for numeric join */
|
||||
TupleJoiner(
|
||||
const rowgroup::RowGroup &smallInput,
|
||||
const rowgroup::RowGroup &largeInput,
|
||||
uint32_t smallJoinColumn,
|
||||
uint32_t largeJoinColumn,
|
||||
joblist::JoinType jt);
|
||||
/* ctor to use for numeric join */
|
||||
TupleJoiner(
|
||||
const rowgroup::RowGroup& smallInput,
|
||||
const rowgroup::RowGroup& largeInput,
|
||||
uint32_t smallJoinColumn,
|
||||
uint32_t largeJoinColumn,
|
||||
joblist::JoinType jt);
|
||||
|
||||
/* ctor to use for string & compound join */
|
||||
TupleJoiner(
|
||||
const rowgroup::RowGroup &smallInput,
|
||||
const rowgroup::RowGroup &largeInput,
|
||||
const std::vector<uint32_t> &smallJoinColumns,
|
||||
const std::vector<uint32_t> &largeJoinColumns,
|
||||
joblist::JoinType jt);
|
||||
/* ctor to use for string & compound join */
|
||||
TupleJoiner(
|
||||
const rowgroup::RowGroup& smallInput,
|
||||
const rowgroup::RowGroup& largeInput,
|
||||
const std::vector<uint32_t>& smallJoinColumns,
|
||||
const std::vector<uint32_t>& largeJoinColumns,
|
||||
joblist::JoinType jt);
|
||||
|
||||
~TupleJoiner();
|
||||
~TupleJoiner();
|
||||
|
||||
size_t size() const;
|
||||
void insert(rowgroup::Row &r, bool zeroTheRid = true);
|
||||
void doneInserting();
|
||||
size_t size() const;
|
||||
void insert(rowgroup::Row& r, bool zeroTheRid = true);
|
||||
void doneInserting();
|
||||
|
||||
/* match() returns the small-side rows that match the large-side row.
|
||||
On a UM join, it uses largeSideRow,
|
||||
on a PM join, it uses index and threadID.
|
||||
*/
|
||||
void match(rowgroup::Row &largeSideRow, uint32_t index, uint32_t threadID,
|
||||
std::vector<rowgroup::Row::Pointer> *matches);
|
||||
/* match() returns the small-side rows that match the large-side row.
|
||||
On a UM join, it uses largeSideRow,
|
||||
on a PM join, it uses index and threadID.
|
||||
*/
|
||||
void match(rowgroup::Row& largeSideRow, uint32_t index, uint32_t threadID,
|
||||
std::vector<rowgroup::Row::Pointer>* matches);
|
||||
|
||||
/* On a PM left outer join + aggregation, the result is already complete.
|
||||
No need to match, just mark.
|
||||
*/
|
||||
void markMatches(uint32_t threadID, uint32_t rowCount);
|
||||
/* On a PM left outer join + aggregation, the result is already complete.
|
||||
No need to match, just mark.
|
||||
*/
|
||||
void markMatches(uint32_t threadID, uint32_t rowCount);
|
||||
|
||||
/* For small outer joins, this is how matches are marked now. */
|
||||
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer> &matches);
|
||||
/* For small outer joins, this is how matches are marked now. */
|
||||
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer>& matches);
|
||||
|
||||
/* Some accessors */
|
||||
inline bool inPM() const { return joinAlg == PM; }
|
||||
inline bool inUM() const { return joinAlg == UM; }
|
||||
void setInPM();
|
||||
void setInUM();
|
||||
void setThreadCount(uint32_t cnt);
|
||||
void setPMJoinResults(boost::shared_array<std::vector<uint32_t> >,
|
||||
uint32_t threadID);
|
||||
boost::shared_array<std::vector<uint32_t> > getPMJoinArrays(uint32_t threadID);
|
||||
std::vector<rowgroup::Row::Pointer> *getSmallSide() { return &rows; }
|
||||
inline bool smallOuterJoin() { return ((joinType & joblist::SMALLOUTER) != 0); }
|
||||
inline bool largeOuterJoin() { return ((joinType & joblist::LARGEOUTER) != 0); }
|
||||
inline bool innerJoin() { return joinType == joblist::INNER; }
|
||||
inline bool fullOuterJoin() { return (smallOuterJoin() && largeOuterJoin()); }
|
||||
inline joblist::JoinType getJoinType() { return joinType; }
|
||||
inline const rowgroup::RowGroup &getSmallRG() { return smallRG; }
|
||||
inline const rowgroup::RowGroup &getLargeRG() { return largeRG; }
|
||||
inline uint32_t getSmallKeyColumn() { return smallKeyColumns[0]; }
|
||||
inline uint32_t getLargeKeyColumn() { return largeKeyColumns[0]; }
|
||||
bool hasNullJoinColumn(const rowgroup::Row &largeRow) const;
|
||||
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer> *out);
|
||||
std::string getTableName() const;
|
||||
void setTableName(const std::string &tname);
|
||||
/* Some accessors */
|
||||
inline bool inPM() const
|
||||
{
|
||||
return joinAlg == PM;
|
||||
}
|
||||
inline bool inUM() const
|
||||
{
|
||||
return joinAlg == UM;
|
||||
}
|
||||
void setInPM();
|
||||
void setInUM();
|
||||
void setThreadCount(uint32_t cnt);
|
||||
void setPMJoinResults(boost::shared_array<std::vector<uint32_t> >,
|
||||
uint32_t threadID);
|
||||
boost::shared_array<std::vector<uint32_t> > getPMJoinArrays(uint32_t threadID);
|
||||
std::vector<rowgroup::Row::Pointer>* getSmallSide()
|
||||
{
|
||||
return &rows;
|
||||
}
|
||||
inline bool smallOuterJoin()
|
||||
{
|
||||
return ((joinType & joblist::SMALLOUTER) != 0);
|
||||
}
|
||||
inline bool largeOuterJoin()
|
||||
{
|
||||
return ((joinType & joblist::LARGEOUTER) != 0);
|
||||
}
|
||||
inline bool innerJoin()
|
||||
{
|
||||
return joinType == joblist::INNER;
|
||||
}
|
||||
inline bool fullOuterJoin()
|
||||
{
|
||||
return (smallOuterJoin() && largeOuterJoin());
|
||||
}
|
||||
inline joblist::JoinType getJoinType()
|
||||
{
|
||||
return joinType;
|
||||
}
|
||||
inline const rowgroup::RowGroup& getSmallRG()
|
||||
{
|
||||
return smallRG;
|
||||
}
|
||||
inline const rowgroup::RowGroup& getLargeRG()
|
||||
{
|
||||
return largeRG;
|
||||
}
|
||||
inline uint32_t getSmallKeyColumn()
|
||||
{
|
||||
return smallKeyColumns[0];
|
||||
}
|
||||
inline uint32_t getLargeKeyColumn()
|
||||
{
|
||||
return largeKeyColumns[0];
|
||||
}
|
||||
bool hasNullJoinColumn(const rowgroup::Row& largeRow) const;
|
||||
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer>* out);
|
||||
std::string getTableName() const;
|
||||
void setTableName(const std::string& tname);
|
||||
|
||||
/* To allow sorting */
|
||||
bool operator<(const TupleJoiner &) const;
|
||||
/* To allow sorting */
|
||||
bool operator<(const TupleJoiner&) const;
|
||||
|
||||
uint64_t getMemUsage() const;
|
||||
uint64_t getMemUsage() const;
|
||||
|
||||
/* Typeless join interface */
|
||||
inline bool isTypelessJoin() { return typelessJoin; }
|
||||
inline bool isSignedUnsignedJoin() { return bSignedUnsignedJoin; }
|
||||
inline const std::vector<uint32_t> & getSmallKeyColumns() { return smallKeyColumns; }
|
||||
inline const std::vector<uint32_t> & getLargeKeyColumns() { return largeKeyColumns; }
|
||||
inline uint32_t getKeyLength() { return keyLength; }
|
||||
/* Typeless join interface */
|
||||
inline bool isTypelessJoin()
|
||||
{
|
||||
return typelessJoin;
|
||||
}
|
||||
inline bool isSignedUnsignedJoin()
|
||||
{
|
||||
return bSignedUnsignedJoin;
|
||||
}
|
||||
inline const std::vector<uint32_t>& getSmallKeyColumns()
|
||||
{
|
||||
return smallKeyColumns;
|
||||
}
|
||||
inline const std::vector<uint32_t>& getLargeKeyColumns()
|
||||
{
|
||||
return largeKeyColumns;
|
||||
}
|
||||
inline uint32_t getKeyLength()
|
||||
{
|
||||
return keyLength;
|
||||
}
|
||||
|
||||
/* Runtime casual partitioning support */
|
||||
inline const boost::scoped_array<bool> &discreteCPValues() { return discreteValues; }
|
||||
inline const boost::scoped_array<std::vector<int64_t> > &getCPData() { return cpValues; }
|
||||
inline void setUniqueLimit(uint32_t limit) { uniqueLimit = limit; }
|
||||
/* Runtime casual partitioning support */
|
||||
inline const boost::scoped_array<bool>& discreteCPValues()
|
||||
{
|
||||
return discreteValues;
|
||||
}
|
||||
inline const boost::scoped_array<std::vector<int64_t> >& getCPData()
|
||||
{
|
||||
return cpValues;
|
||||
}
|
||||
inline void setUniqueLimit(uint32_t limit)
|
||||
{
|
||||
uniqueLimit = limit;
|
||||
}
|
||||
|
||||
/* Semi-join interface */
|
||||
inline bool semiJoin() { return ((joinType & joblist::SEMI) != 0); }
|
||||
inline bool antiJoin() { return ((joinType & joblist::ANTI) != 0); }
|
||||
inline bool scalar() { return ((joinType & joblist::SCALAR) != 0); }
|
||||
inline bool matchnulls() { return ((joinType & joblist::MATCHNULLS) != 0); }
|
||||
inline bool hasFEFilter() { return fe.get(); }
|
||||
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter() { return fe; }
|
||||
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
|
||||
inline bool evaluateFilter(rowgroup::Row &r, uint32_t index) { return fes[index].evaluate(&r); }
|
||||
inline uint64_t getJoinNullValue() { return joblist::BIGINTNULL; } // a normalized NULL value
|
||||
inline uint64_t smallNullValue() { return nullValueForJoinColumn; }
|
||||
/* Semi-join interface */
|
||||
inline bool semiJoin()
|
||||
{
|
||||
return ((joinType & joblist::SEMI) != 0);
|
||||
}
|
||||
inline bool antiJoin()
|
||||
{
|
||||
return ((joinType & joblist::ANTI) != 0);
|
||||
}
|
||||
inline bool scalar()
|
||||
{
|
||||
return ((joinType & joblist::SCALAR) != 0);
|
||||
}
|
||||
inline bool matchnulls()
|
||||
{
|
||||
return ((joinType & joblist::MATCHNULLS) != 0);
|
||||
}
|
||||
inline bool hasFEFilter()
|
||||
{
|
||||
return fe.get();
|
||||
}
|
||||
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter()
|
||||
{
|
||||
return fe;
|
||||
}
|
||||
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
|
||||
inline bool evaluateFilter(rowgroup::Row& r, uint32_t index)
|
||||
{
|
||||
return fes[index].evaluate(&r);
|
||||
}
|
||||
inline uint64_t getJoinNullValue()
|
||||
{
|
||||
return joblist::BIGINTNULL; // a normalized NULL value
|
||||
}
|
||||
inline uint64_t smallNullValue()
|
||||
{
|
||||
return nullValueForJoinColumn;
|
||||
}
|
||||
|
||||
// Disk-based join support
|
||||
void clearData();
|
||||
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
|
||||
bool isFinished() { return finished; }
|
||||
// Disk-based join support
|
||||
void clearData();
|
||||
boost::shared_ptr<TupleJoiner> copyForDiskJoin();
|
||||
bool isFinished()
|
||||
{
|
||||
return finished;
|
||||
}
|
||||
|
||||
private:
|
||||
typedef std::tr1::unordered_multimap<int64_t, uint8_t *, hasher, std::equal_to<int64_t>,
|
||||
utils::STLPoolAllocator<std::pair<const int64_t, uint8_t *> > > hash_t;
|
||||
typedef std::tr1::unordered_multimap<int64_t, rowgroup::Row::Pointer, hasher, std::equal_to<int64_t>,
|
||||
utils::STLPoolAllocator<std::pair<const int64_t, rowgroup::Row::Pointer> > > sthash_t;
|
||||
typedef std::tr1::unordered_multimap<TypelessData, rowgroup::Row::Pointer, hasher, std::equal_to<TypelessData>,
|
||||
utils::STLPoolAllocator<std::pair<const TypelessData, rowgroup::Row::Pointer> > > typelesshash_t;
|
||||
typedef std::tr1::unordered_multimap<int64_t, uint8_t*, hasher, std::equal_to<int64_t>,
|
||||
utils::STLPoolAllocator<std::pair<const int64_t, uint8_t*> > > hash_t;
|
||||
typedef std::tr1::unordered_multimap<int64_t, rowgroup::Row::Pointer, hasher, std::equal_to<int64_t>,
|
||||
utils::STLPoolAllocator<std::pair<const int64_t, rowgroup::Row::Pointer> > > sthash_t;
|
||||
typedef std::tr1::unordered_multimap<TypelessData, rowgroup::Row::Pointer, hasher, std::equal_to<TypelessData>,
|
||||
utils::STLPoolAllocator<std::pair<const TypelessData, rowgroup::Row::Pointer> > > typelesshash_t;
|
||||
|
||||
typedef hash_t::iterator iterator;
|
||||
typedef typelesshash_t::iterator thIterator;
|
||||
typedef hash_t::iterator iterator;
|
||||
typedef typelesshash_t::iterator thIterator;
|
||||
|
||||
TupleJoiner();
|
||||
TupleJoiner(const TupleJoiner &);
|
||||
TupleJoiner & operator=(const TupleJoiner &);
|
||||
TupleJoiner();
|
||||
TupleJoiner(const TupleJoiner&);
|
||||
TupleJoiner& operator=(const TupleJoiner&);
|
||||
|
||||
iterator begin() { return h->begin(); }
|
||||
iterator end() { return h->end(); }
|
||||
iterator begin()
|
||||
{
|
||||
return h->begin();
|
||||
}
|
||||
iterator end()
|
||||
{
|
||||
return h->end();
|
||||
}
|
||||
|
||||
|
||||
rowgroup::RGData smallNullMemory;
|
||||
rowgroup::RGData smallNullMemory;
|
||||
|
||||
|
||||
boost::scoped_ptr<hash_t> h; // used for UM joins on ints
|
||||
boost::scoped_ptr<sthash_t> sth; // used for UM join on ints where the backing table uses a string table
|
||||
std::vector<rowgroup::Row::Pointer> rows; // used for PM join
|
||||
boost::scoped_ptr<hash_t> h; // used for UM joins on ints
|
||||
boost::scoped_ptr<sthash_t> sth; // used for UM join on ints where the backing table uses a string table
|
||||
std::vector<rowgroup::Row::Pointer> rows; // used for PM join
|
||||
|
||||
/* This struct is rough. The BPP-JL stores the parsed results for
|
||||
the logical block being processed. There are X threads at once, so
|
||||
up to X logical blocks being processed. For each of those there's a vector
|
||||
of matches. Each match is an index into 'rows'. */
|
||||
boost::shared_array<boost::shared_array<std::vector<uint32_t> > > pmJoinResults;
|
||||
rowgroup::RowGroup smallRG, largeRG;
|
||||
boost::scoped_array<rowgroup::Row> smallRow;
|
||||
//boost::shared_array<uint8_t> smallNullMemory;
|
||||
rowgroup::Row smallNullRow;
|
||||
/* This struct is rough. The BPP-JL stores the parsed results for
|
||||
the logical block being processed. There are X threads at once, so
|
||||
up to X logical blocks being processed. For each of those there's a vector
|
||||
of matches. Each match is an index into 'rows'. */
|
||||
boost::shared_array<boost::shared_array<std::vector<uint32_t> > > pmJoinResults;
|
||||
rowgroup::RowGroup smallRG, largeRG;
|
||||
boost::scoped_array<rowgroup::Row> smallRow;
|
||||
//boost::shared_array<uint8_t> smallNullMemory;
|
||||
rowgroup::Row smallNullRow;
|
||||
|
||||
enum JoinAlg {
|
||||
INSERTING,
|
||||
PM,
|
||||
UM,
|
||||
LARGE
|
||||
};
|
||||
JoinAlg joinAlg;
|
||||
joblist::JoinType joinType;
|
||||
boost::shared_ptr<utils::PoolAllocator> _pool; // pool for the table and nodes
|
||||
uint32_t threadCount;
|
||||
std::string tableName;
|
||||
enum JoinAlg
|
||||
{
|
||||
INSERTING,
|
||||
PM,
|
||||
UM,
|
||||
LARGE
|
||||
};
|
||||
JoinAlg joinAlg;
|
||||
joblist::JoinType joinType;
|
||||
boost::shared_ptr<utils::PoolAllocator> _pool; // pool for the table and nodes
|
||||
uint32_t threadCount;
|
||||
std::string tableName;
|
||||
|
||||
/* vars, & fcns for typeless join */
|
||||
bool typelessJoin;
|
||||
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
|
||||
boost::scoped_ptr<typelesshash_t> ht; // used for UM join on strings
|
||||
uint32_t keyLength;
|
||||
utils::FixedAllocator storedKeyAlloc;
|
||||
/* vars, & fcns for typeless join */
|
||||
bool typelessJoin;
|
||||
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
|
||||
boost::scoped_ptr<typelesshash_t> ht; // used for UM join on strings
|
||||
uint32_t keyLength;
|
||||
utils::FixedAllocator storedKeyAlloc;
|
||||
boost::scoped_array<utils::FixedAllocator> tmpKeyAlloc;
|
||||
bool bSignedUnsignedJoin; // Set if we have a signed vs unsigned compare in a join. When not set, we can save checking for the signed bit.
|
||||
|
||||
/* semi-join vars & fcns */
|
||||
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
|
||||
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
|
||||
// this var is only used to normalize the NULL values for single-column joins,
|
||||
// will have to change when/if we need to support that for compound or string joins
|
||||
int64_t nullValueForJoinColumn;
|
||||
/* semi-join vars & fcns */
|
||||
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
|
||||
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
|
||||
// this var is only used to normalize the NULL values for single-column joins,
|
||||
// will have to change when/if we need to support that for compound or string joins
|
||||
int64_t nullValueForJoinColumn;
|
||||
|
||||
/* Runtime casual partitioning support */
|
||||
void updateCPData(const rowgroup::Row &r);
|
||||
boost::scoped_array<bool> discreteValues;
|
||||
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
|
||||
uint32_t uniqueLimit;
|
||||
bool finished;
|
||||
/* Runtime casual partitioning support */
|
||||
void updateCPData(const rowgroup::Row& r);
|
||||
boost::scoped_array<bool> discreteValues;
|
||||
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
|
||||
uint32_t uniqueLimit;
|
||||
bool finished;
|
||||
};
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user