mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Initial implementation of RANK() window function
This commit is contained in:
@ -1,4 +1,17 @@
|
||||
drop table if exists t1,t2;
|
||||
#
|
||||
# Check what happens when one attempts to use window function without OVER clause
|
||||
#
|
||||
create table t1 (a int, b int);
|
||||
insert into t1 values (1,1),(2,2);
|
||||
select row_number() from t1;
|
||||
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1
|
||||
select rank() from t1;
|
||||
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1
|
||||
drop table t1;
|
||||
#
|
||||
# Check if basic window functions work
|
||||
#
|
||||
create table t1(a int, b int, x char(32));
|
||||
insert into t1 values (2, 10, 'xx');
|
||||
insert into t1 values (2, 10, 'zz');
|
||||
@ -39,3 +52,34 @@ pk a b row_number() over (order by a) row_number() over (order by b)
|
||||
4 13 19 4 2
|
||||
5 14 18 5 1
|
||||
drop table t1;
|
||||
#
|
||||
# Try RANK() function
|
||||
#
|
||||
create table t2 (
|
||||
pk int primary key,
|
||||
a int
|
||||
);
|
||||
insert into t2 values
|
||||
( 1 , 0),
|
||||
( 2 , 0),
|
||||
( 3 , 1),
|
||||
( 4 , 1),
|
||||
( 8 , 2),
|
||||
( 5 , 2),
|
||||
( 6 , 2),
|
||||
( 7 , 2),
|
||||
( 9 , 4),
|
||||
(10 , 4);
|
||||
select pk, a, rank() over (order by a) from t2;
|
||||
pk a rank() over (order by a)
|
||||
1 0 1
|
||||
2 0 1
|
||||
3 1 3
|
||||
4 1 3
|
||||
8 2 5
|
||||
5 2 5
|
||||
6 2 5
|
||||
7 2 5
|
||||
9 4 9
|
||||
10 4 9
|
||||
drop table t2;
|
||||
|
@ -6,6 +6,22 @@
|
||||
drop table if exists t1,t2;
|
||||
--enable_warnings
|
||||
|
||||
--echo #
|
||||
--echo # Check what happens when one attempts to use window function without OVER clause
|
||||
--echo #
|
||||
create table t1 (a int, b int);
|
||||
insert into t1 values (1,1),(2,2);
|
||||
|
||||
--error ER_PARSE_ERROR
|
||||
select row_number() from t1;
|
||||
--error ER_PARSE_ERROR
|
||||
select rank() from t1;
|
||||
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # Check if basic window functions work
|
||||
--echo #
|
||||
create table t1(a int, b int, x char(32));
|
||||
insert into t1 values (2, 10, 'xx');
|
||||
insert into t1 values (2, 10, 'zz');
|
||||
@ -35,3 +51,27 @@ from t1;
|
||||
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # Try RANK() function
|
||||
--echo #
|
||||
create table t2 (
|
||||
pk int primary key,
|
||||
a int
|
||||
);
|
||||
|
||||
insert into t2 values
|
||||
( 1 , 0),
|
||||
( 2 , 0),
|
||||
( 3 , 1),
|
||||
( 4 , 1),
|
||||
( 8 , 2),
|
||||
( 5 , 2),
|
||||
( 6 , 2),
|
||||
( 7 , 2),
|
||||
( 9 , 4),
|
||||
(10 , 4);
|
||||
|
||||
select pk, a, rank() over (order by a) from t2;
|
||||
|
||||
drop table t2;
|
||||
|
||||
|
@ -109,6 +109,7 @@ public:
|
||||
|
||||
|
||||
class st_select_lex;
|
||||
class Window_spec;
|
||||
|
||||
/**
|
||||
Class Item_sum is the base class used for special expressions that SQL calls
|
||||
@ -550,6 +551,9 @@ public:
|
||||
{
|
||||
return trace_unsupported_by_check_vcol_func_processor(func_name());
|
||||
}
|
||||
|
||||
//psergey-todo: this is ugly:
|
||||
virtual void setup_window_func(THD *thd, Window_spec *window_spec) {}
|
||||
};
|
||||
|
||||
|
||||
|
@ -40,6 +40,30 @@ void Item_window_func::setup_partition_border_check(THD *thd)
|
||||
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
|
||||
partition_fields.push_back(tmp);
|
||||
}
|
||||
window_func->setup_window_func(thd, window_spec);
|
||||
}
|
||||
|
||||
|
||||
void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec)
|
||||
{
|
||||
/* TODO: move this into Item_window_func? */
|
||||
for (ORDER *curr = window_spec->order_list.first; curr; curr=curr->next) {
|
||||
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
|
||||
orderby_fields.push_back(tmp);
|
||||
}
|
||||
clear();
|
||||
}
|
||||
|
||||
|
||||
bool Item_sum_rank::add()
|
||||
{
|
||||
row_number++;
|
||||
if (test_if_group_changed(orderby_fields) > -1)
|
||||
{
|
||||
/* Row value changed */
|
||||
cur_rank= row_number;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -48,6 +72,7 @@ void Item_window_func::advance_window() {
|
||||
int changed = test_if_group_changed(partition_fields);
|
||||
|
||||
if (changed > -1) {
|
||||
/* Next partition */
|
||||
window_func->clear();
|
||||
}
|
||||
window_func->add();
|
||||
|
@ -9,6 +9,7 @@ class Window_spec;
|
||||
/*
|
||||
ROW_NUMBER() OVER (...)
|
||||
|
||||
@detail
|
||||
- This is a Window function (not just an aggregate)
|
||||
- It can be computed by doing one pass over select output, provided
|
||||
the output is sorted according to the window definition.
|
||||
@ -26,7 +27,7 @@ class Item_sum_row_number: public Item_sum_int
|
||||
Item_sum_row_number(THD *thd)
|
||||
: Item_sum_int(thd), count(0) {}
|
||||
|
||||
enum Sumfunctype sum_func () const
|
||||
enum Sumfunctype sum_func() const
|
||||
{
|
||||
return ROW_NUMBER_FUNC;
|
||||
}
|
||||
@ -42,29 +43,71 @@ class Item_sum_row_number: public Item_sum_int
|
||||
/*
|
||||
RANK() OVER (...) Windowing function
|
||||
|
||||
@detail
|
||||
- This is a Window function (not just an aggregate)
|
||||
- It can be computed by doing one pass over select output, provided
|
||||
the output is sorted according to the window definition.
|
||||
|
||||
The function is defined as:
|
||||
|
||||
"The rank of row R is defined as 1 (one) plus the number of rows that
|
||||
precede R and are not peers of R"
|
||||
|
||||
"This implies that if two or more rows are not distinct with respect to
|
||||
the window ordering, then there will be one or more"
|
||||
|
||||
@todo: failure to overload val_int() causes infinite mutual recursion like
|
||||
this:
|
||||
|
||||
#7505 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364
|
||||
#7506 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707
|
||||
#7507 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721
|
||||
#7508 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364
|
||||
#7509 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707
|
||||
#7510 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721
|
||||
#7511 0x0000555555e6b411 in Item_window_func::val_real (this=0x7fff5000d870) at sql/item_windowfunc.h:291
|
||||
#7512 0x0000555555ce1f40 in Item::save_in_field (this=0x7fff5000d870, field=0x7fff50012be0, no_conversions=true) at sql/item.cc:5843
|
||||
#7513 0x00005555559c2c54 in Item_result_field::save_in_result_field (this=0x7fff5000d870, no_conversions=true) at sql/item.h:2280
|
||||
#7514 0x0000555555aeb6bf in copy_funcs (func_ptr=0x7fff500126c8, thd=0x55555ab77458) at sql/sql_select.cc:23077
|
||||
#7515 0x0000555555ae2d01 in end_write (join=0x7fff5000f230, join_tab=0x7fff50010728, end_of_records=false) at sql/sql_select.cc:19520
|
||||
#7516 0x0000555555adffc1 in evaluate_join_record (join=0x7fff5000f230, join_tab=0x7fff500103e0, error=0) at sql/sql_select.cc:18388
|
||||
#7517 0x0000555555adf8b6 in sub_select (join=0x7fff5000f230, join_tab=0x7fff500103e0, end_of_records=false) at sql/sql_select.cc:18163
|
||||
|
||||
is this normal? Can it happen with other val_XXX functions?
|
||||
Should we use another way to prevent this by forcing
|
||||
Item_window_func::val_real() to return NULL at phase #1?
|
||||
*/
|
||||
|
||||
class Item_sum_rank: public Item_sum_int
|
||||
{
|
||||
longlong rank;
|
||||
longlong row_number; // just ROW_NUMBER()
|
||||
longlong cur_rank; // current value
|
||||
|
||||
/*TODO: implementation is currently missing */
|
||||
List<Cached_item> orderby_fields;
|
||||
public:
|
||||
void clear()
|
||||
{
|
||||
// This is called on next partition
|
||||
/* This is called on partition start */
|
||||
cur_rank= 1;
|
||||
row_number= 0;
|
||||
}
|
||||
bool add()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
void update_field() {}
|
||||
|
||||
public:
|
||||
bool add();
|
||||
|
||||
longlong val_int()
|
||||
{
|
||||
return cur_rank;
|
||||
}
|
||||
|
||||
void update_field() {}
|
||||
/*
|
||||
void reset_field();
|
||||
TODO: ^^ what does this do ? It is not called ever?
|
||||
*/
|
||||
|
||||
public:
|
||||
Item_sum_rank(THD *thd)
|
||||
: Item_sum_int(thd), rank(0) {}
|
||||
: Item_sum_int(thd) {}
|
||||
|
||||
enum Sumfunctype sum_func () const
|
||||
{
|
||||
@ -76,17 +119,29 @@ class Item_sum_rank: public Item_sum_int
|
||||
return "rank";
|
||||
}
|
||||
|
||||
void setup_window_func(THD *thd, Window_spec *window_spec);
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
RANK() OVER (...) Windowing function
|
||||
|
||||
@detail
|
||||
- This is a Window function (not just an aggregate)
|
||||
- It can be computed by doing one pass over select output, provided
|
||||
the output is sorted according to the window definition.
|
||||
|
||||
The function is defined as:
|
||||
|
||||
"If DENSE_RANK is specified, then the rank of row R is defined as the
|
||||
number of rows preceding and including R that are distinct with respect
|
||||
to the window ordering"
|
||||
|
||||
"This implies that there are no gaps in the sequential rank numbering of
|
||||
rows in each window partition."
|
||||
*/
|
||||
|
||||
|
||||
class Item_sum_dense_rank: public Item_sum_int
|
||||
{
|
||||
longlong dense_rank;
|
||||
@ -111,6 +166,18 @@ class Item_sum_dense_rank: public Item_sum_int
|
||||
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
@detail
|
||||
"The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is
|
||||
defined to be the RANK of R and NR is defined to be the number of rows in
|
||||
the window partition of R."
|
||||
|
||||
Computation of this function requires two passes:
|
||||
- First pass to find #rows in the partition
|
||||
- Second pass to compute rank of current row and the value of the function
|
||||
*/
|
||||
|
||||
class Item_sum_percent_rank: public Item_sum_num
|
||||
{
|
||||
longlong rank;
|
||||
@ -140,6 +207,18 @@ class Item_sum_percent_rank: public Item_sum_num
|
||||
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
@detail
|
||||
"The relative rank of a row R is defined as NP/NR, where
|
||||
- NP is defined to be the number of rows preceding or peer with R in the
|
||||
window ordering of the window partition of R
|
||||
- NR is defined to be the number of rows in the window partition of R.
|
||||
|
||||
Just like with Item_sum_percent_rank, compuation of this function requires
|
||||
two passes.
|
||||
*/
|
||||
|
||||
class Item_sum_cume_dist: public Item_sum_num
|
||||
{
|
||||
longlong count;
|
||||
@ -172,7 +251,6 @@ class Item_sum_cume_dist: public Item_sum_num
|
||||
|
||||
class Item_window_func : public Item_result_field
|
||||
{
|
||||
private:
|
||||
Item_sum *window_func;
|
||||
LEX_STRING *window_name;
|
||||
Window_spec *window_spec;
|
||||
|
@ -22341,6 +22341,11 @@ int test_if_item_cache_changed(List<Cached_item> &list)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@return
|
||||
-1 - Group not changed
|
||||
value>=0 - Number of the component where the group changed
|
||||
*/
|
||||
|
||||
static int
|
||||
test_if_group_changed(List<Cached_item> &list)
|
||||
|
@ -286,6 +286,8 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
|
||||
return true;
|
||||
|
||||
item_win->setup_partition_border_check(thd);
|
||||
// TODO: somehow, setup_sortkey_check here (either directly here
|
||||
// or in the item.
|
||||
|
||||
int err;
|
||||
TABLE *tbl= *table;
|
||||
|
Reference in New Issue
Block a user