From 64ab10ffff28fcbedaba763fde9fbc7d1f2b5cbf Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sat, 6 Feb 2016 01:53:17 +0300 Subject: [PATCH] Initial implementation of RANK() window function --- mysql-test/r/win.result | 44 +++++++++++++++++ mysql-test/t/win.test | 40 ++++++++++++++++ sql/item_sum.h | 4 ++ sql/item_windowfunc.cc | 25 ++++++++++ sql/item_windowfunc.h | 102 +++++++++++++++++++++++++++++++++++----- sql/sql_select.cc | 5 ++ sql/sql_window.cc | 8 ++-- 7 files changed, 213 insertions(+), 15 deletions(-) diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result index 7aa8a514061..098159c4d36 100644 --- a/mysql-test/r/win.result +++ b/mysql-test/r/win.result @@ -1,4 +1,17 @@ drop table if exists t1,t2; +# +# Check what happens when one attempts to use window function without OVER clause +# +create table t1 (a int, b int); +insert into t1 values (1,1),(2,2); +select row_number() from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1 +select rank() from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1 +drop table t1; +# +# Check if basic window functions work +# create table t1(a int, b int, x char(32)); insert into t1 values (2, 10, 'xx'); insert into t1 values (2, 10, 'zz'); @@ -39,3 +52,34 @@ pk a b row_number() over (order by a) row_number() over (order by b) 4 13 19 4 2 5 14 18 5 1 drop table t1; +# +# Try RANK() function +# +create table t2 ( +pk int primary key, +a int +); +insert into t2 values +( 1 , 0), +( 2 , 0), +( 3 , 1), +( 4 , 1), +( 8 , 2), +( 5 , 2), +( 6 , 2), +( 7 , 2), +( 9 , 4), +(10 , 4); +select pk, a, rank() over (order by a) from t2; +pk a rank() over (order by a) +1 0 1 +2 0 1 +3 1 3 +4 1 3 +8 2 5 +5 2 5 +6 2 5 +7 2 5 +9 4 9 +10 4 9 +drop table t2; diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test index 5755412e908..a70b6128d81 100644 --- a/mysql-test/t/win.test +++ b/mysql-test/t/win.test @@ -6,6 +6,22 @@ drop table if exists t1,t2; --enable_warnings +--echo # +--echo # Check what happens when one attempts to use window function without OVER clause +--echo # +create table t1 (a int, b int); +insert into t1 values (1,1),(2,2); + +--error ER_PARSE_ERROR +select row_number() from t1; +--error ER_PARSE_ERROR +select rank() from t1; + +drop table t1; + +--echo # +--echo # Check if basic window functions work +--echo # create table t1(a int, b int, x char(32)); insert into t1 values (2, 10, 'xx'); insert into t1 values (2, 10, 'zz'); @@ -35,3 +51,27 @@ from t1; drop table t1; +--echo # +--echo # Try RANK() function +--echo # +create table t2 ( + pk int primary key, + a int +); + +insert into t2 values +( 1 , 0), +( 2 , 0), +( 3 , 1), +( 4 , 1), +( 8 , 2), +( 5 , 2), +( 6 , 2), +( 7 , 2), +( 9 , 4), +(10 , 4); + +select pk, a, rank() over (order by a) from t2; + +drop table t2; + diff --git a/sql/item_sum.h b/sql/item_sum.h index bf4a1560d42..e0e74efef28 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -109,6 +109,7 @@ public: class st_select_lex; +class Window_spec; /** Class Item_sum is the base class used for special expressions that SQL calls @@ -550,6 +551,9 @@ public: { return trace_unsupported_by_check_vcol_func_processor(func_name()); } + + //psergey-todo: this is ugly: + virtual void setup_window_func(THD *thd, Window_spec *window_spec) {} }; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index cdc4e26dfc8..a472d928181 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -40,6 +40,30 @@ void Item_window_func::setup_partition_border_check(THD *thd) Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); partition_fields.push_back(tmp); } + window_func->setup_window_func(thd, window_spec); +} + + +void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: move this into Item_window_func? */ + for (ORDER *curr = window_spec->order_list.first; curr; curr=curr->next) { + Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); + orderby_fields.push_back(tmp); + } + clear(); +} + + +bool Item_sum_rank::add() +{ + row_number++; + if (test_if_group_changed(orderby_fields) > -1) + { + /* Row value changed */ + cur_rank= row_number; + } + return false; } @@ -48,6 +72,7 @@ void Item_window_func::advance_window() { int changed = test_if_group_changed(partition_fields); if (changed > -1) { + /* Next partition */ window_func->clear(); } window_func->add(); diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 94f783f93e3..231c82c5131 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -9,6 +9,7 @@ class Window_spec; /* ROW_NUMBER() OVER (...) + @detail - This is a Window function (not just an aggregate) - It can be computed by doing one pass over select output, provided the output is sorted according to the window definition. @@ -26,7 +27,7 @@ class Item_sum_row_number: public Item_sum_int Item_sum_row_number(THD *thd) : Item_sum_int(thd), count(0) {} - enum Sumfunctype sum_func () const + enum Sumfunctype sum_func() const { return ROW_NUMBER_FUNC; } @@ -42,29 +43,71 @@ class Item_sum_row_number: public Item_sum_int /* RANK() OVER (...) Windowing function + @detail - This is a Window function (not just an aggregate) - It can be computed by doing one pass over select output, provided the output is sorted according to the window definition. + + The function is defined as: + + "The rank of row R is defined as 1 (one) plus the number of rows that + precede R and are not peers of R" + + "This implies that if two or more rows are not distinct with respect to + the window ordering, then there will be one or more" + + @todo: failure to overload val_int() causes infinite mutual recursion like + this: + + #7505 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364 + #7506 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707 + #7507 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721 + #7508 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364 + #7509 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707 + #7510 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721 + #7511 0x0000555555e6b411 in Item_window_func::val_real (this=0x7fff5000d870) at sql/item_windowfunc.h:291 + #7512 0x0000555555ce1f40 in Item::save_in_field (this=0x7fff5000d870, field=0x7fff50012be0, no_conversions=true) at sql/item.cc:5843 + #7513 0x00005555559c2c54 in Item_result_field::save_in_result_field (this=0x7fff5000d870, no_conversions=true) at sql/item.h:2280 + #7514 0x0000555555aeb6bf in copy_funcs (func_ptr=0x7fff500126c8, thd=0x55555ab77458) at sql/sql_select.cc:23077 + #7515 0x0000555555ae2d01 in end_write (join=0x7fff5000f230, join_tab=0x7fff50010728, end_of_records=false) at sql/sql_select.cc:19520 + #7516 0x0000555555adffc1 in evaluate_join_record (join=0x7fff5000f230, join_tab=0x7fff500103e0, error=0) at sql/sql_select.cc:18388 + #7517 0x0000555555adf8b6 in sub_select (join=0x7fff5000f230, join_tab=0x7fff500103e0, end_of_records=false) at sql/sql_select.cc:18163 + + is this normal? Can it happen with other val_XXX functions? + Should we use another way to prevent this by forcing + Item_window_func::val_real() to return NULL at phase #1? */ class Item_sum_rank: public Item_sum_int { - longlong rank; + longlong row_number; // just ROW_NUMBER() + longlong cur_rank; // current value - /*TODO: implementation is currently missing */ + List orderby_fields; +public: void clear() { - // This is called on next partition + /* This is called on partition start */ + cur_rank= 1; + row_number= 0; } - bool add() - { - return false; - } - void update_field() {} - public: + bool add(); + + longlong val_int() + { + return cur_rank; + } + + void update_field() {} + /* + void reset_field(); + TODO: ^^ what does this do ? It is not called ever? + */ + +public: Item_sum_rank(THD *thd) - : Item_sum_int(thd), rank(0) {} + : Item_sum_int(thd) {} enum Sumfunctype sum_func () const { @@ -76,17 +119,29 @@ class Item_sum_rank: public Item_sum_int return "rank"; } + void setup_window_func(THD *thd, Window_spec *window_spec); }; /* RANK() OVER (...) Windowing function + @detail - This is a Window function (not just an aggregate) - It can be computed by doing one pass over select output, provided the output is sorted according to the window definition. + + The function is defined as: + + "If DENSE_RANK is specified, then the rank of row R is defined as the + number of rows preceding and including R that are distinct with respect + to the window ordering" + + "This implies that there are no gaps in the sequential rank numbering of + rows in each window partition." */ + class Item_sum_dense_rank: public Item_sum_int { longlong dense_rank; @@ -111,6 +166,18 @@ class Item_sum_dense_rank: public Item_sum_int }; + +/* + @detail + "The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is + defined to be the RANK of R and NR is defined to be the number of rows in + the window partition of R." + + Computation of this function requires two passes: + - First pass to find #rows in the partition + - Second pass to compute rank of current row and the value of the function +*/ + class Item_sum_percent_rank: public Item_sum_num { longlong rank; @@ -140,6 +207,18 @@ class Item_sum_percent_rank: public Item_sum_num }; + +/* + @detail + "The relative rank of a row R is defined as NP/NR, where + - NP is defined to be the number of rows preceding or peer with R in the + window ordering of the window partition of R + - NR is defined to be the number of rows in the window partition of R. + + Just like with Item_sum_percent_rank, compuation of this function requires + two passes. +*/ + class Item_sum_cume_dist: public Item_sum_num { longlong count; @@ -172,7 +251,6 @@ class Item_sum_cume_dist: public Item_sum_num class Item_window_func : public Item_result_field { -private: Item_sum *window_func; LEX_STRING *window_name; Window_spec *window_spec; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 103f3aab1a3..c606bb98b40 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -22341,6 +22341,11 @@ int test_if_item_cache_changed(List &list) } +/* + @return + -1 - Group not changed + value>=0 - Number of the component where the group changed +*/ static int test_if_group_changed(List &list) diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 1dbd7053dd1..3060cc4011c 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -286,6 +286,8 @@ bool JOIN::process_window_functions(List *curr_fields_list) return true; item_win->setup_partition_border_check(thd); + // TODO: somehow, setup_sortkey_check here (either directly here + // or in the item. int err; TABLE *tbl= *table; @@ -295,12 +297,12 @@ bool JOIN::process_window_functions(List *curr_fields_list) /* This will cause window function to compute its value for the - current row : + current row : */ item_win->advance_window(); - /* - Put the new value into temptable's field + /* + Put the new value into temptable's field TODO: Should this use item_win->update_field() call? Regular aggegate function implementations seem to implement it. */