mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Implement percent_rank window function
This commit is contained in:
@@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref)
|
|||||||
if (window_func->fix_fields(thd, ref))
|
if (window_func->fix_fields(thd, ref))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
fix_length_and_dec();
|
||||||
|
|
||||||
max_length= window_func->max_length;
|
max_length= window_func->max_length;
|
||||||
|
|
||||||
fixed= 1;
|
fixed= 1;
|
||||||
@@ -180,3 +182,27 @@ void Item_window_func::advance_window()
|
|||||||
}
|
}
|
||||||
window_func->add();
|
window_func->add();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Item_sum_percent_rank::add()
|
||||||
|
{
|
||||||
|
row_number++;
|
||||||
|
if (test_if_group_changed(orderby_fields) > -1)
|
||||||
|
{
|
||||||
|
/* Row value changed. */
|
||||||
|
cur_rank= row_number;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec)
|
||||||
|
{
|
||||||
|
/* TODO: move this into Item_window_func? */
|
||||||
|
for (ORDER *curr= window_spec->order_list.first; curr; curr=curr->next)
|
||||||
|
{
|
||||||
|
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
|
||||||
|
orderby_fields.push_back(tmp);
|
||||||
|
}
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -70,6 +70,7 @@ public:
|
|||||||
|
|
||||||
class Item_sum_rank: public Item_sum_int
|
class Item_sum_rank: public Item_sum_int
|
||||||
{
|
{
|
||||||
|
protected:
|
||||||
longlong row_number; // just ROW_NUMBER()
|
longlong row_number; // just ROW_NUMBER()
|
||||||
longlong cur_rank; // current value
|
longlong cur_rank; // current value
|
||||||
|
|
||||||
@@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* TODO-cvicentiu
|
||||||
|
* Perhaps this is overengineering, but I would like to decouple the 2-pass
|
||||||
|
* algorithm from the specific action that must be performed during the
|
||||||
|
* first pass. The second pass can make use of the "add" function from the
|
||||||
|
* Item_sum_<window_function>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
This class represents a generic interface for window functions that need
|
||||||
|
to store aditional information. Such window functions include percent_rank
|
||||||
|
and cume_dist.
|
||||||
|
*/
|
||||||
|
class Window_context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual void add_field_to_context(Field* field) = 0;
|
||||||
|
virtual void reset() = 0;
|
||||||
|
virtual ~Window_context() {};
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
A generic interface that specifies the datatype that the context represents.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class Window_context_getter
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
virtual T get_field_context(const Field* field) = 0;
|
||||||
|
virtual ~Window_context_getter() {};
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
A window function context representing the number of rows that are present
|
||||||
|
with a partition. Because the number of rows is not dependent of the
|
||||||
|
specific value within the current field, we ignore the parameter
|
||||||
|
in this case.
|
||||||
|
*/
|
||||||
|
class Window_context_row_count :
|
||||||
|
public Window_context, Window_context_getter<ulonglong>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Window_context_row_count() : num_rows_(0) {};
|
||||||
|
|
||||||
|
void add_field_to_context(Field* field __attribute__((unused)))
|
||||||
|
{
|
||||||
|
num_rows_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
num_rows_= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ulonglong get_field_context(const Field* field __attribute__((unused)))
|
||||||
|
{
|
||||||
|
return num_rows_;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
ulonglong num_rows_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Window_context_row_and_group_count :
|
||||||
|
public Window_context, Window_context_getter<std::pair<ulonglong, ulonglong> >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Window_context_row_and_group_count(void * group_list) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
An abstract class representing an item that holds a context.
|
||||||
|
*/
|
||||||
|
class Item_context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Item_context() : context_(NULL) {}
|
||||||
|
Window_context* get_window_context() { return context_; }
|
||||||
|
|
||||||
|
virtual bool create_window_context() = 0;
|
||||||
|
virtual void delete_window_context() = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Window_context* context_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
A base window function (aggregate) that also holds a context.
|
||||||
|
|
||||||
|
NOTE: All two pass window functions need to implement
|
||||||
|
this interface.
|
||||||
|
*/
|
||||||
|
class Item_sum_window_with_context : public Item_sum_num,
|
||||||
|
public Item_context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Item_sum_window_with_context(THD *thd)
|
||||||
|
: Item_sum_num(thd), Item_context() {}
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@detail
|
@detail
|
||||||
@@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int
|
|||||||
|
|
||||||
Computation of this function requires two passes:
|
Computation of this function requires two passes:
|
||||||
- First pass to find #rows in the partition
|
- First pass to find #rows in the partition
|
||||||
|
This is held within the row_count context.
|
||||||
- Second pass to compute rank of current row and the value of the function
|
- Second pass to compute rank of current row and the value of the function
|
||||||
*/
|
*/
|
||||||
|
class Item_sum_percent_rank: public Item_sum_window_with_context,
|
||||||
class Item_sum_percent_rank: public Item_sum_num
|
public Window_context_row_count
|
||||||
{
|
{
|
||||||
longlong rank;
|
|
||||||
longlong partition_rows;
|
|
||||||
|
|
||||||
void clear() {}
|
|
||||||
bool add() { return false; }
|
|
||||||
void update_field() {}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Item_sum_percent_rank(THD *thd)
|
Item_sum_percent_rank(THD *thd)
|
||||||
: Item_sum_num(thd), rank(0), partition_rows(0) {}
|
: Item_sum_window_with_context(thd), cur_rank(1) {}
|
||||||
|
|
||||||
double val_real() { return 0; }
|
longlong val_int()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Percent rank is a real value so calling the integer value should never
|
||||||
|
happen. It makes no sense as it gets truncated to either 0 or 1.
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double val_real()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
We can not get the real value without knowing the number of rows
|
||||||
|
in the partition. Don't divide by 0.
|
||||||
|
*/
|
||||||
|
if (!get_context_())
|
||||||
|
{
|
||||||
|
// Calling this kind of function with a context makes no sense.
|
||||||
|
DBUG_ASSERT(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
longlong partition_rows = get_context_()->get_field_context(result_field);
|
||||||
|
return partition_rows > 1 ?
|
||||||
|
static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
enum Sumfunctype sum_func () const
|
enum Sumfunctype sum_func () const
|
||||||
{
|
{
|
||||||
@@ -205,11 +323,60 @@ class Item_sum_percent_rank: public Item_sum_num
|
|||||||
return "percent_rank";
|
return "percent_rank";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool create_window_context()
|
||||||
|
{
|
||||||
|
// TODO-cvicentiu: Currently this means we must make sure to delete
|
||||||
|
// the window context. We can potentially allocate this on the THD memroot.
|
||||||
|
// At the same time, this is only necessary for a small portion of the
|
||||||
|
// query execution and it does not make sense to keep it for all of it.
|
||||||
|
context_ = new Window_context_row_count();
|
||||||
|
if (context_ == NULL)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void delete_window_context()
|
||||||
|
{
|
||||||
|
if (context_)
|
||||||
|
delete get_context_();
|
||||||
|
context_ = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_field() {}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
cur_rank= 1;
|
||||||
|
row_number= 0;
|
||||||
|
}
|
||||||
|
bool add();
|
||||||
|
enum Item_result result_type () const { return REAL_RESULT; }
|
||||||
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
|
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
|
||||||
|
|
||||||
|
void fix_length_and_dec()
|
||||||
|
{
|
||||||
|
decimals = 10; // TODO-cvicentiu find out how many decimals the standard
|
||||||
|
// requires.
|
||||||
|
}
|
||||||
|
|
||||||
|
void setup_window_func(THD *thd, Window_spec *window_spec);
|
||||||
|
|
||||||
|
private:
|
||||||
|
longlong cur_rank; // Current rank of the current row.
|
||||||
|
longlong row_number; // Value if this were ROW_NUMBER() function.
|
||||||
|
|
||||||
|
List<Cached_item> orderby_fields;
|
||||||
|
|
||||||
|
/* Helper function so that we don't cast the context every time. */
|
||||||
|
Window_context_row_count* get_context_()
|
||||||
|
{
|
||||||
|
return static_cast<Window_context_row_count *>(context_);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@detail
|
@detail
|
||||||
"The relative rank of a row R is defined as NP/NR, where
|
"The relative rank of a row R is defined as NP/NR, where
|
||||||
@@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num
|
|||||||
two passes.
|
two passes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class Item_sum_cume_dist: public Item_sum_num
|
class Item_sum_cume_dist: public Item_sum_percent_rank
|
||||||
{
|
{
|
||||||
longlong count;
|
|
||||||
longlong partition_rows;
|
|
||||||
|
|
||||||
void clear() {}
|
|
||||||
bool add() { return false; }
|
|
||||||
void update_field() {}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Item_sum_cume_dist(THD *thd)
|
Item_sum_cume_dist(THD *thd)
|
||||||
: Item_sum_num(thd), count(0), partition_rows(0) {}
|
: Item_sum_percent_rank(thd) {}
|
||||||
|
|
||||||
double val_real() { return 0; }
|
double val_real() { return 0; }
|
||||||
|
|
||||||
@@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num
|
|||||||
{
|
{
|
||||||
return "cume_dist";
|
return "cume_dist";
|
||||||
}
|
}
|
||||||
|
|
||||||
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -361,7 +518,7 @@ public:
|
|||||||
List<Item> &fields, uint flags);
|
List<Item> &fields, uint flags);
|
||||||
void fix_length_and_dec()
|
void fix_length_and_dec()
|
||||||
{
|
{
|
||||||
window_func->fix_length_and_dec();
|
decimals = window_func->decimals;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* func_name() const { return "WF"; }
|
const char* func_name() const { return "WF"; }
|
||||||
@@ -369,7 +526,6 @@ public:
|
|||||||
bool fix_fields(THD *thd, Item **ref);
|
bool fix_fields(THD *thd, Item **ref);
|
||||||
|
|
||||||
bool resolve_window_name(THD *thd);
|
bool resolve_window_name(THD *thd);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* ITEM_WINDOWFUNC_INCLUDED */
|
#endif /* ITEM_WINDOWFUNC_INCLUDED */
|
||||||
|
@@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool compute_two_pass_window_functions(Item_window_func *item_win,
|
||||||
|
TABLE *table, READ_RECORD *info)
|
||||||
|
{
|
||||||
|
/* Perform first pass. */
|
||||||
|
|
||||||
|
// TODO-cvicentiu why not initialize the record for when we need, _in_
|
||||||
|
// this function.
|
||||||
|
READ_RECORD *info2= new READ_RECORD();
|
||||||
|
int err;
|
||||||
|
bool is_error = false;
|
||||||
|
bool first_row= true;
|
||||||
|
clone_read_record(info, info2);
|
||||||
|
Item_sum_window_with_context *window_func=
|
||||||
|
static_cast<Item_sum_window_with_context *>(item_win->window_func);
|
||||||
|
uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
|
||||||
|
|
||||||
|
is_error= window_func->create_window_context();
|
||||||
|
/* Unable to allocate a new context. */
|
||||||
|
if (is_error)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
Window_context *context = window_func->get_window_context();
|
||||||
|
/*
|
||||||
|
The two pass algorithm is as follows:
|
||||||
|
We have a sorted table according to the partition and order by clauses.
|
||||||
|
1. Scan through the table till we reach a partition boundary.
|
||||||
|
2. For each row that we scan, add it to the context.
|
||||||
|
3. Once the partition boundary is met, do a second scan through the
|
||||||
|
current partition and use the context information to compute the value for
|
||||||
|
the window function for that partition.
|
||||||
|
4. Reset the context.
|
||||||
|
5. Repeat from 1 till end of table.
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool done = false;
|
||||||
|
longlong rows_in_current_partition = 0;
|
||||||
|
// TODO handle end of table updating.
|
||||||
|
while (!done)
|
||||||
|
{
|
||||||
|
|
||||||
|
if ((err= info->read_record(info)))
|
||||||
|
{
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool partition_changed= (done || item_win->check_partition_bound() > -1) ?
|
||||||
|
true : false;
|
||||||
|
// The first time we always have a partition changed. Ignore it.
|
||||||
|
if (first_row)
|
||||||
|
{
|
||||||
|
partition_changed= false;
|
||||||
|
first_row= false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (partition_changed)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
We are now looking at the first row for the next partition, or at the
|
||||||
|
end of the table. Either way, we must remember this position for when
|
||||||
|
we finish doing the second pass.
|
||||||
|
*/
|
||||||
|
table->file->position(table->record[0]);
|
||||||
|
memcpy(rowid_buf, table->file->ref, table->file->ref_length);
|
||||||
|
|
||||||
|
for (longlong row_number = 0; row_number < rows_in_current_partition;
|
||||||
|
row_number++)
|
||||||
|
{
|
||||||
|
if ((err= info2->read_record(info2)))
|
||||||
|
{
|
||||||
|
is_error= true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
window_func->add();
|
||||||
|
// Save the window function into the table.
|
||||||
|
item_win->save_in_field(item_win->result_field, true);
|
||||||
|
err= table->file->ha_update_row(table->record[1], table->record[0]);
|
||||||
|
if (err && err != HA_ERR_RECORD_IS_THE_SAME)
|
||||||
|
{
|
||||||
|
is_error= true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_error)
|
||||||
|
break;
|
||||||
|
|
||||||
|
rows_in_current_partition= 0;
|
||||||
|
window_func->clear();
|
||||||
|
context->reset();
|
||||||
|
|
||||||
|
// Return to the beginning of the new partition.
|
||||||
|
table->file->ha_rnd_pos(table->record[0], rowid_buf);
|
||||||
|
}
|
||||||
|
rows_in_current_partition++;
|
||||||
|
context->add_field_to_context(item_win->result_field);
|
||||||
|
}
|
||||||
|
|
||||||
|
window_func->delete_window_context();
|
||||||
|
delete info2;
|
||||||
|
my_free(rowid_buf);
|
||||||
|
return is_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@brief
|
@brief
|
||||||
This function is called by JOIN::exec to compute window function values
|
This function is called by JOIN::exec to compute window function values
|
||||||
@@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
|
|||||||
is_error= true;
|
is_error= true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Item_sum::PERCENT_RANK_FUNC:
|
||||||
|
case Item_sum::CUME_DIST_FUNC:
|
||||||
|
{
|
||||||
|
if (compute_two_pass_window_functions(item_win, tbl, &info))
|
||||||
|
is_error= true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Item_sum::COUNT_FUNC:
|
case Item_sum::COUNT_FUNC:
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user