mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
Implement percent_rank window function
This commit is contained in:
@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref)
|
||||
if (window_func->fix_fields(thd, ref))
|
||||
return true;
|
||||
|
||||
fix_length_and_dec();
|
||||
|
||||
max_length= window_func->max_length;
|
||||
|
||||
fixed= 1;
|
||||
@ -180,3 +182,27 @@ void Item_window_func::advance_window()
|
||||
}
|
||||
window_func->add();
|
||||
}
|
||||
|
||||
bool Item_sum_percent_rank::add()
|
||||
{
|
||||
row_number++;
|
||||
if (test_if_group_changed(orderby_fields) > -1)
|
||||
{
|
||||
/* Row value changed. */
|
||||
cur_rank= row_number;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec)
|
||||
{
|
||||
/* TODO: move this into Item_window_func? */
|
||||
for (ORDER *curr= window_spec->order_list.first; curr; curr=curr->next)
|
||||
{
|
||||
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
|
||||
orderby_fields.push_back(tmp);
|
||||
}
|
||||
clear();
|
||||
}
|
||||
|
||||
|
||||
|
@ -70,6 +70,7 @@ public:
|
||||
|
||||
class Item_sum_rank: public Item_sum_int
|
||||
{
|
||||
protected:
|
||||
longlong row_number; // just ROW_NUMBER()
|
||||
longlong cur_rank; // current value
|
||||
|
||||
@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int
|
||||
|
||||
};
|
||||
|
||||
/* TODO-cvicentiu
|
||||
* Perhaps this is overengineering, but I would like to decouple the 2-pass
|
||||
* algorithm from the specific action that must be performed during the
|
||||
* first pass. The second pass can make use of the "add" function from the
|
||||
* Item_sum_<window_function>.
|
||||
*/
|
||||
|
||||
/*
|
||||
This class represents a generic interface for window functions that need
|
||||
to store aditional information. Such window functions include percent_rank
|
||||
and cume_dist.
|
||||
*/
|
||||
class Window_context
|
||||
{
|
||||
public:
|
||||
virtual void add_field_to_context(Field* field) = 0;
|
||||
virtual void reset() = 0;
|
||||
virtual ~Window_context() {};
|
||||
};
|
||||
|
||||
/*
|
||||
A generic interface that specifies the datatype that the context represents.
|
||||
*/
|
||||
template <typename T>
|
||||
class Window_context_getter
|
||||
{
|
||||
protected:
|
||||
virtual T get_field_context(const Field* field) = 0;
|
||||
virtual ~Window_context_getter() {};
|
||||
};
|
||||
|
||||
/*
|
||||
A window function context representing the number of rows that are present
|
||||
with a partition. Because the number of rows is not dependent of the
|
||||
specific value within the current field, we ignore the parameter
|
||||
in this case.
|
||||
*/
|
||||
class Window_context_row_count :
|
||||
public Window_context, Window_context_getter<ulonglong>
|
||||
{
|
||||
public:
|
||||
Window_context_row_count() : num_rows_(0) {};
|
||||
|
||||
void add_field_to_context(Field* field __attribute__((unused)))
|
||||
{
|
||||
num_rows_++;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
num_rows_= 0;
|
||||
}
|
||||
|
||||
ulonglong get_field_context(const Field* field __attribute__((unused)))
|
||||
{
|
||||
return num_rows_;
|
||||
}
|
||||
private:
|
||||
ulonglong num_rows_;
|
||||
};
|
||||
|
||||
class Window_context_row_and_group_count :
|
||||
public Window_context, Window_context_getter<std::pair<ulonglong, ulonglong> >
|
||||
{
|
||||
public:
|
||||
Window_context_row_and_group_count(void * group_list) {}
|
||||
};
|
||||
|
||||
/*
|
||||
An abstract class representing an item that holds a context.
|
||||
*/
|
||||
class Item_context
|
||||
{
|
||||
public:
|
||||
Item_context() : context_(NULL) {}
|
||||
Window_context* get_window_context() { return context_; }
|
||||
|
||||
virtual bool create_window_context() = 0;
|
||||
virtual void delete_window_context() = 0;
|
||||
|
||||
protected:
|
||||
Window_context* context_;
|
||||
};
|
||||
|
||||
/*
|
||||
A base window function (aggregate) that also holds a context.
|
||||
|
||||
NOTE: All two pass window functions need to implement
|
||||
this interface.
|
||||
*/
|
||||
class Item_sum_window_with_context : public Item_sum_num,
|
||||
public Item_context
|
||||
{
|
||||
public:
|
||||
Item_sum_window_with_context(THD *thd)
|
||||
: Item_sum_num(thd), Item_context() {}
|
||||
};
|
||||
|
||||
/*
|
||||
@detail
|
||||
@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int
|
||||
|
||||
Computation of this function requires two passes:
|
||||
- First pass to find #rows in the partition
|
||||
This is held within the row_count context.
|
||||
- Second pass to compute rank of current row and the value of the function
|
||||
*/
|
||||
|
||||
class Item_sum_percent_rank: public Item_sum_num
|
||||
class Item_sum_percent_rank: public Item_sum_window_with_context,
|
||||
public Window_context_row_count
|
||||
{
|
||||
longlong rank;
|
||||
longlong partition_rows;
|
||||
|
||||
void clear() {}
|
||||
bool add() { return false; }
|
||||
void update_field() {}
|
||||
|
||||
public:
|
||||
Item_sum_percent_rank(THD *thd)
|
||||
: Item_sum_num(thd), rank(0), partition_rows(0) {}
|
||||
: Item_sum_window_with_context(thd), cur_rank(1) {}
|
||||
|
||||
double val_real() { return 0; }
|
||||
longlong val_int()
|
||||
{
|
||||
/*
|
||||
Percent rank is a real value so calling the integer value should never
|
||||
happen. It makes no sense as it gets truncated to either 0 or 1.
|
||||
*/
|
||||
DBUG_ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
double val_real()
|
||||
{
|
||||
/*
|
||||
We can not get the real value without knowing the number of rows
|
||||
in the partition. Don't divide by 0.
|
||||
*/
|
||||
if (!get_context_())
|
||||
{
|
||||
// Calling this kind of function with a context makes no sense.
|
||||
DBUG_ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
longlong partition_rows = get_context_()->get_field_context(result_field);
|
||||
return partition_rows > 1 ?
|
||||
static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
|
||||
}
|
||||
|
||||
enum Sumfunctype sum_func () const
|
||||
{
|
||||
@ -205,11 +323,60 @@ class Item_sum_percent_rank: public Item_sum_num
|
||||
return "percent_rank";
|
||||
}
|
||||
|
||||
bool create_window_context()
|
||||
{
|
||||
// TODO-cvicentiu: Currently this means we must make sure to delete
|
||||
// the window context. We can potentially allocate this on the THD memroot.
|
||||
// At the same time, this is only necessary for a small portion of the
|
||||
// query execution and it does not make sense to keep it for all of it.
|
||||
context_ = new Window_context_row_count();
|
||||
if (context_ == NULL)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void delete_window_context()
|
||||
{
|
||||
if (context_)
|
||||
delete get_context_();
|
||||
context_ = NULL;
|
||||
}
|
||||
|
||||
void update_field() {}
|
||||
|
||||
void clear()
|
||||
{
|
||||
cur_rank= 1;
|
||||
row_number= 0;
|
||||
}
|
||||
bool add();
|
||||
enum Item_result result_type () const { return REAL_RESULT; }
|
||||
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
|
||||
|
||||
void fix_length_and_dec()
|
||||
{
|
||||
decimals = 10; // TODO-cvicentiu find out how many decimals the standard
|
||||
// requires.
|
||||
}
|
||||
|
||||
void setup_window_func(THD *thd, Window_spec *window_spec);
|
||||
|
||||
private:
|
||||
longlong cur_rank; // Current rank of the current row.
|
||||
longlong row_number; // Value if this were ROW_NUMBER() function.
|
||||
|
||||
List<Cached_item> orderby_fields;
|
||||
|
||||
/* Helper function so that we don't cast the context every time. */
|
||||
Window_context_row_count* get_context_()
|
||||
{
|
||||
return static_cast<Window_context_row_count *>(context_);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
@detail
|
||||
"The relative rank of a row R is defined as NP/NR, where
|
||||
@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num
|
||||
two passes.
|
||||
*/
|
||||
|
||||
class Item_sum_cume_dist: public Item_sum_num
|
||||
class Item_sum_cume_dist: public Item_sum_percent_rank
|
||||
{
|
||||
longlong count;
|
||||
longlong partition_rows;
|
||||
|
||||
void clear() {}
|
||||
bool add() { return false; }
|
||||
void update_field() {}
|
||||
|
||||
public:
|
||||
Item_sum_cume_dist(THD *thd)
|
||||
: Item_sum_num(thd), count(0), partition_rows(0) {}
|
||||
: Item_sum_percent_rank(thd) {}
|
||||
|
||||
double val_real() { return 0; }
|
||||
|
||||
@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num
|
||||
{
|
||||
return "cume_dist";
|
||||
}
|
||||
|
||||
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -361,7 +518,7 @@ public:
|
||||
List<Item> &fields, uint flags);
|
||||
void fix_length_and_dec()
|
||||
{
|
||||
window_func->fix_length_and_dec();
|
||||
decimals = window_func->decimals;
|
||||
}
|
||||
|
||||
const char* func_name() const { return "WF"; }
|
||||
@ -369,7 +526,6 @@ public:
|
||||
bool fix_fields(THD *thd, Item **ref);
|
||||
|
||||
bool resolve_window_name(THD *thd);
|
||||
|
||||
};
|
||||
|
||||
#endif /* ITEM_WINDOWFUNC_INCLUDED */
|
||||
|
@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
|
||||
}
|
||||
|
||||
|
||||
bool compute_two_pass_window_functions(Item_window_func *item_win,
|
||||
TABLE *table, READ_RECORD *info)
|
||||
{
|
||||
/* Perform first pass. */
|
||||
|
||||
// TODO-cvicentiu why not initialize the record for when we need, _in_
|
||||
// this function.
|
||||
READ_RECORD *info2= new READ_RECORD();
|
||||
int err;
|
||||
bool is_error = false;
|
||||
bool first_row= true;
|
||||
clone_read_record(info, info2);
|
||||
Item_sum_window_with_context *window_func=
|
||||
static_cast<Item_sum_window_with_context *>(item_win->window_func);
|
||||
uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
|
||||
|
||||
is_error= window_func->create_window_context();
|
||||
/* Unable to allocate a new context. */
|
||||
if (is_error)
|
||||
return true;
|
||||
|
||||
Window_context *context = window_func->get_window_context();
|
||||
/*
|
||||
The two pass algorithm is as follows:
|
||||
We have a sorted table according to the partition and order by clauses.
|
||||
1. Scan through the table till we reach a partition boundary.
|
||||
2. For each row that we scan, add it to the context.
|
||||
3. Once the partition boundary is met, do a second scan through the
|
||||
current partition and use the context information to compute the value for
|
||||
the window function for that partition.
|
||||
4. Reset the context.
|
||||
5. Repeat from 1 till end of table.
|
||||
*/
|
||||
|
||||
bool done = false;
|
||||
longlong rows_in_current_partition = 0;
|
||||
// TODO handle end of table updating.
|
||||
while (!done)
|
||||
{
|
||||
|
||||
if ((err= info->read_record(info)))
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
|
||||
bool partition_changed= (done || item_win->check_partition_bound() > -1) ?
|
||||
true : false;
|
||||
// The first time we always have a partition changed. Ignore it.
|
||||
if (first_row)
|
||||
{
|
||||
partition_changed= false;
|
||||
first_row= false;
|
||||
}
|
||||
|
||||
if (partition_changed)
|
||||
{
|
||||
/*
|
||||
We are now looking at the first row for the next partition, or at the
|
||||
end of the table. Either way, we must remember this position for when
|
||||
we finish doing the second pass.
|
||||
*/
|
||||
table->file->position(table->record[0]);
|
||||
memcpy(rowid_buf, table->file->ref, table->file->ref_length);
|
||||
|
||||
for (longlong row_number = 0; row_number < rows_in_current_partition;
|
||||
row_number++)
|
||||
{
|
||||
if ((err= info2->read_record(info2)))
|
||||
{
|
||||
is_error= true;
|
||||
break;
|
||||
}
|
||||
window_func->add();
|
||||
// Save the window function into the table.
|
||||
item_win->save_in_field(item_win->result_field, true);
|
||||
err= table->file->ha_update_row(table->record[1], table->record[0]);
|
||||
if (err && err != HA_ERR_RECORD_IS_THE_SAME)
|
||||
{
|
||||
is_error= true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_error)
|
||||
break;
|
||||
|
||||
rows_in_current_partition= 0;
|
||||
window_func->clear();
|
||||
context->reset();
|
||||
|
||||
// Return to the beginning of the new partition.
|
||||
table->file->ha_rnd_pos(table->record[0], rowid_buf);
|
||||
}
|
||||
rows_in_current_partition++;
|
||||
context->add_field_to_context(item_win->result_field);
|
||||
}
|
||||
|
||||
window_func->delete_window_context();
|
||||
delete info2;
|
||||
my_free(rowid_buf);
|
||||
return is_error;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
This function is called by JOIN::exec to compute window function values
|
||||
@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
|
||||
is_error= true;
|
||||
break;
|
||||
}
|
||||
case Item_sum::PERCENT_RANK_FUNC:
|
||||
case Item_sum::CUME_DIST_FUNC:
|
||||
{
|
||||
if (compute_two_pass_window_functions(item_win, tbl, &info))
|
||||
is_error= true;
|
||||
break;
|
||||
}
|
||||
case Item_sum::COUNT_FUNC:
|
||||
{
|
||||
/*
|
||||
|
Reference in New Issue
Block a user