diff --git a/mysql-test/r/win_percent_cume.result b/mysql-test/r/win_percent_cume.result new file mode 100644 index 00000000000..d38c95c9ea7 --- /dev/null +++ b/mysql-test/r/win_percent_cume.result @@ -0,0 +1,62 @@ +create table t1 ( +pk int primary key, +a int, +b int +); +insert into t1 values +( 1 , 0, 10), +( 2 , 0, 10), +( 3 , 1, 10), +( 4 , 1, 10), +( 8 , 2, 10), +( 5 , 2, 20), +( 6 , 2, 20), +( 7 , 2, 20), +( 9 , 4, 20), +(10 , 4, 20); +select pk, a, b, +percent_rank() over (order by a), +cume_dist() over (order by a) +from t1; +pk a b percent_rank() over (order by a) cume_dist() over (order by a) +1 0 10 0.0000000000 0.2000000000 +2 0 10 0.0000000000 0.2000000000 +3 1 10 0.2222222222 0.4000000000 +4 1 10 0.2222222222 0.4000000000 +8 2 10 0.4444444444 0.8000000000 +5 2 20 0.4444444444 0.8000000000 +6 2 20 0.4444444444 0.8000000000 +7 2 20 0.4444444444 0.8000000000 +9 4 20 0.8888888889 1.0000000000 +10 4 20 0.8888888889 1.0000000000 +select pk, a, b, +percent_rank() over (order by pk), +cume_dist() over (order by pk) +from t1 order by pk; +pk a b percent_rank() over (order by pk) cume_dist() over (order by pk) +1 0 10 0.0000000000 0.1000000000 +2 0 10 0.1111111111 0.2000000000 +3 1 10 0.2222222222 0.3000000000 +4 1 10 0.3333333333 0.4000000000 +5 2 20 0.4444444444 0.5000000000 +6 2 20 0.5555555556 0.6000000000 +7 2 20 0.6666666667 0.7000000000 +8 2 10 0.7777777778 0.8000000000 +9 4 20 0.8888888889 0.9000000000 +10 4 20 1.0000000000 1.0000000000 +select pk, a, b, +percent_rank() over (partition by a order by a), +cume_dist() over (partition by a order by a) +from t1; +pk a b percent_rank() over (partition by a order by a) cume_dist() over (partition by a order by a) +1 0 10 0.0000000000 1.0000000000 +2 0 10 0.0000000000 1.0000000000 +3 1 10 0.0000000000 1.0000000000 +4 1 10 0.0000000000 1.0000000000 +8 2 10 0.0000000000 1.0000000000 +5 2 20 0.0000000000 1.0000000000 +6 2 20 0.0000000000 1.0000000000 +7 2 20 0.0000000000 1.0000000000 +9 4 20 0.0000000000 1.0000000000 +10 4 20 0.0000000000 1.0000000000 +drop table t1; diff --git a/mysql-test/t/win_percent_cume.test b/mysql-test/t/win_percent_cume.test new file mode 100644 index 00000000000..b851185cb32 --- /dev/null +++ b/mysql-test/t/win_percent_cume.test @@ -0,0 +1,36 @@ +create table t1 ( + pk int primary key, + a int, + b int +); + + +insert into t1 values +( 1 , 0, 10), +( 2 , 0, 10), +( 3 , 1, 10), +( 4 , 1, 10), +( 8 , 2, 10), +( 5 , 2, 20), +( 6 , 2, 20), +( 7 , 2, 20), +( 9 , 4, 20), +(10 , 4, 20); + +select pk, a, b, + percent_rank() over (order by a), + cume_dist() over (order by a) +from t1; + +select pk, a, b, + percent_rank() over (order by pk), + cume_dist() over (order by pk) +from t1 order by pk; + +select pk, a, b, + percent_rank() over (partition by a order by a), + cume_dist() over (partition by a order by a) +from t1; + +drop table t1; + diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 9695698bbd9..59b739767c8 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -317,12 +317,18 @@ class Item_context NOTE: All two pass window functions need to implement this interface. */ -class Item_sum_window_with_context : public Item_sum_num, - public Item_context +class Item_sum_window_with_row_count : public Item_sum_num { public: - Item_sum_window_with_context(THD *thd) - : Item_sum_num(thd), Item_context() {} + Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd), + partition_row_count_(0){} + + void set_row_count(ulonglong count) { partition_row_count_ = count; } + + protected: + longlong get_row_count() { return partition_row_count_; } + private: + ulonglong partition_row_count_; }; /* @@ -336,12 +342,11 @@ class Item_sum_window_with_context : public Item_sum_num, This is held within the row_count context. - Second pass to compute rank of current row and the value of the function */ -class Item_sum_percent_rank: public Item_sum_window_with_context, - public Window_context_row_count +class Item_sum_percent_rank: public Item_sum_window_with_row_count { public: Item_sum_percent_rank(THD *thd) - : Item_sum_window_with_context(thd), cur_rank(1) {} + : Item_sum_window_with_row_count(thd), cur_rank(1) {} longlong val_int() { @@ -359,14 +364,9 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, We can not get the real value without knowing the number of rows in the partition. Don't divide by 0. */ - if (!get_context_()) - { - // Calling this kind of function with a context makes no sense. - DBUG_ASSERT(0); - return 0; - } + ulonglong partition_rows = get_row_count(); + null_value= partition_rows > 0 ? false : true; - longlong partition_rows = get_context_()->get_field_context(result_field); return partition_rows > 1 ? static_cast(cur_rank - 1) / (partition_rows - 1) : 0; } @@ -381,25 +381,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, return "percent_rank"; } - bool create_window_context() - { - // TODO-cvicentiu: Currently this means we must make sure to delete - // the window context. We can potentially allocate this on the THD memroot. - // At the same time, this is only necessary for a small portion of the - // query execution and it does not make sense to keep it for all of it. - context_ = new Window_context_row_count(); - if (context_ == NULL) - return true; - return false; - } - - void delete_window_context() - { - if (context_) - delete get_context_(); - context_ = NULL; - } - void update_field() {} void clear() @@ -428,13 +409,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, void cleanup() { peer_tracker.cleanup(); - Item_sum_window_with_context::cleanup(); - } - - /* Helper function so that we don't cast the context every time. */ - Window_context_row_count* get_context_() - { - return static_cast(context_); } }; @@ -448,27 +422,62 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, window ordering of the window partition of R - NR is defined to be the number of rows in the window partition of R. - Just like with Item_sum_percent_rank, compuation of this function requires + Just like with Item_sum_percent_rank, computation of this function requires two passes. */ -class Item_sum_cume_dist: public Item_sum_percent_rank +class Item_sum_cume_dist: public Item_sum_window_with_row_count { public: - Item_sum_cume_dist(THD *thd) - : Item_sum_percent_rank(thd) {} + Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd), + current_row_count_(0) {} - double val_real() { return 0; } + double val_real() + { + if (get_row_count() == 0) + { + null_value= true; + return 0; + } + ulonglong partition_row_count= get_row_count(); + null_value= false; + return static_cast(current_row_count_) / partition_row_count; + } + + bool add() + { + current_row_count_++; + return false; + } enum Sumfunctype sum_func () const { return CUME_DIST_FUNC; } + void clear() + { + current_row_count_= 0; + set_row_count(0); + } + const char*func_name() const { return "cume_dist"; } + + void update_field() {} + enum Item_result result_type () const { return REAL_RESULT; } + enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; } + + void fix_length_and_dec() + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + } + + private: + ulonglong current_row_count_; }; @@ -499,11 +508,11 @@ public: force_return_blank(true), read_value_from_result_field(false) {} - Item_sum *window_func() { return (Item_sum *) args[0]; } + Item_sum *window_func() const { return (Item_sum *) args[0]; } void update_used_tables(); - bool is_frame_prohibited() + bool is_frame_prohibited() const { switch (window_func()->sum_func()) { case Item_sum::ROW_NUMBER_FUNC: @@ -517,7 +526,28 @@ public: } } - bool is_order_list_mandatory() + bool requires_partition_size() const + { + switch (window_func()->sum_func()) { + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + return true; + default: + return false; + } + } + + bool requires_peer_size() const + { + switch (window_func()->sum_func()) { + case Item_sum::CUME_DIST_FUNC: + return true; + default: + return false; + } + } + + bool is_order_list_mandatory() const { switch (window_func()->sum_func()) { case Item_sum::RANK_FUNC: diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 875930f13e0..3b99beeb8c6 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -865,15 +865,18 @@ public: } }; + /* UNBOUNDED FOLLOWING frame bound */ class Frame_unbounded_following : public Frame_cursor { - Table_read_cursor cursor; +protected: + Table_read_cursor cursor; Group_bound_tracker bound_tracker; + public: void init(THD *thd, READ_RECORD *info, SQL_I_List *partition_list, SQL_I_List *order_list) @@ -910,6 +913,35 @@ public: }; +class Frame_unbounded_following_set_count : public Frame_unbounded_following +{ + void next_partition(longlong rownum, Item_sum* item) + { + ulonglong num_rows_in_partition= 0; + if (!rownum) + { + /* Read the first row */ + if (cursor.get_next()) + return; + } + num_rows_in_partition++; + + /* Remember which partition we are in */ + bound_tracker.check_if_next_group(); + /* Walk to the end of the partition, find how many rows there are. */ + while (!cursor.get_next()) + { + if (bound_tracker.check_if_next_group()) + break; + num_rows_in_partition++; + } + + Item_sum_window_with_row_count* item_with_row_count = + static_cast(item); + item_with_row_count->set_row_count(num_rows_in_partition); + } +}; + ///////////////////////////////////////////////////////////////////////////// // ROWS-type frame bounds ///////////////////////////////////////////////////////////////////////////// @@ -1212,6 +1244,47 @@ Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound) return NULL; } +void add_extra_frame_cursors(List *cursors, + const Item_sum *window_func) +{ + switch (window_func->sum_func()) + { + case Item_sum::CUME_DIST_FUNC: + cursors->push_back(new Frame_unbounded_preceding); + cursors->push_back(new Frame_range_current_row_bottom); + break; + default: + cursors->push_back(new Frame_unbounded_preceding); + cursors->push_back(new Frame_rows_current_row_bottom); + } +} + +List get_window_func_required_cursors( + const Item_window_func* item_win) +{ + List result; + + if (item_win->requires_partition_size()) + result.push_back(new Frame_unbounded_following_set_count); + + /* + If it is not a regular window function that follows frame specifications, + specific cursors are required. + */ + if (item_win->is_frame_prohibited()) + { + add_extra_frame_cursors(&result, item_win->window_func()); + return result; + } + + /* A regular window function follows the frame specification. */ + result.push_back(get_frame_cursor(item_win->window_spec->window_frame, + false)); + result.push_back(get_frame_cursor(item_win->window_spec->window_frame, + true)); + + return result; +} /* Streamed window function computation with window frames. @@ -1254,21 +1327,20 @@ bool compute_window_func_with_frames(Item_window_func *item_win, { THD *thd= current_thd; int err= 0; - Frame_cursor *top_bound; - Frame_cursor *bottom_bound; Item_sum *sum_func= item_win->window_func(); /* This algorithm doesn't support DISTINCT aggregator */ sum_func->set_aggregator(Aggregator::SIMPLE_AGGREGATOR); - - Window_frame *window_frame= item_win->window_spec->window_frame; - top_bound= get_frame_cursor(window_frame, true); - bottom_bound= get_frame_cursor(window_frame, false); - - top_bound->init(thd, info, item_win->window_spec->partition_list, - item_win->window_spec->order_list); - bottom_bound->init(thd, info, item_win->window_spec->partition_list, - item_win->window_spec->order_list); + + List cursors= get_window_func_required_cursors(item_win); + + List_iterator_fast it(cursors); + Frame_cursor *c; + while((c= it++)) + { + c->init(thd, info, item_win->window_spec->partition_list, + item_win->window_spec->order_list); + } bool is_error= false; longlong rownum= 0; @@ -1293,24 +1365,28 @@ bool compute_window_func_with_frames(Item_window_func *item_win, pre_XXX functions assume that tbl->record[0] contains current_row, and they may not change it. */ - bottom_bound->pre_next_partition(rownum, sum_func); - top_bound->pre_next_partition(rownum, sum_func); + it.rewind(); + while ((c= it++)) + c->pre_next_partition(rownum, sum_func); /* We move bottom_bound first, because we want rows to be added into the aggregate before top_bound attempts to remove them. */ - bottom_bound->next_partition(rownum, sum_func); - top_bound->next_partition(rownum, sum_func); + it.rewind(); + while ((c= it++)) + c->next_partition(rownum, sum_func); } else { /* Again, both pre_XXX function can find current_row in tbl->record[0] */ - bottom_bound->pre_next_row(sum_func); - top_bound->pre_next_row(sum_func); + it.rewind(); + while ((c= it++)) + c->pre_next_row(sum_func); /* These make no assumptions about tbl->record[0] and may change it */ - bottom_bound->next_row(sum_func); - top_bound->next_row(sum_func); + it.rewind(); + while ((c= it++)) + c->next_row(sum_func); } rownum++; @@ -1331,115 +1407,11 @@ bool compute_window_func_with_frames(Item_window_func *item_win, } my_free(rowid_buf); - delete top_bound; - delete bottom_bound; + cursors.delete_elements(); return is_error? true: false; } -bool compute_two_pass_window_functions(Item_window_func *item_win, - TABLE *table, READ_RECORD *info) -{ - /* Perform first pass. */ - - // TODO-cvicentiu why not initialize the record for when we need, _in_ - // this function. - READ_RECORD *info2= new READ_RECORD(); - int err; - bool is_error = false; - bool first_row= true; - clone_read_record(info, info2); - Item_sum_window_with_context *window_func= - static_cast(item_win->window_func()); - uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0)); - - is_error= window_func->create_window_context(); - /* Unable to allocate a new context. */ - if (is_error) - return true; - - Window_context *context = window_func->get_window_context(); - /* - The two pass algorithm is as follows: - We have a sorted table according to the partition and order by clauses. - 1. Scan through the table till we reach a partition boundary. - 2. For each row that we scan, add it to the context. - 3. Once the partition boundary is met, do a second scan through the - current partition and use the context information to compute the value for - the window function for that partition. - 4. Reset the context. - 5. Repeat from 1 till end of table. - */ - - bool done = false; - longlong rows_in_current_partition = 0; - // TODO handle end of table updating. - while (!done) - { - - if ((err= info->read_record(info))) - { - done = true; - } - - bool partition_changed= done || item_win->check_if_partition_changed(); - // The first time we always have a partition changed. Ignore it. - if (first_row) - { - partition_changed= false; - first_row= false; - } - - if (partition_changed) - { - /* - We are now looking at the first row for the next partition, or at the - end of the table. Either way, we must remember this position for when - we finish doing the second pass. - */ - table->file->position(table->record[0]); - memcpy(rowid_buf, table->file->ref, table->file->ref_length); - - for (longlong row_number = 0; row_number < rows_in_current_partition; - row_number++) - { - if ((err= info2->read_record(info2))) - { - is_error= true; - break; - } - window_func->add(); - // Save the window function into the table. - item_win->save_in_field(item_win->result_field, true); - err= table->file->ha_update_row(table->record[1], table->record[0]); - if (err && err != HA_ERR_RECORD_IS_THE_SAME) - { - is_error= true; - break; - } - } - - if (is_error) - break; - - rows_in_current_partition= 0; - window_func->clear(); - context->reset(); - - // Return to the beginning of the new partition. - table->file->ha_rnd_pos(table->record[0], rowid_buf); - } - rows_in_current_partition++; - context->add_field_to_context(item_win->result_field); - } - - window_func->delete_window_context(); - delete info2; - my_free(rowid_buf); - return is_error; -} - - /* Make a list that is a concation of two lists of ORDER elements */ static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2) @@ -1502,16 +1474,12 @@ bool Window_func_runner::setup(THD *thd) compute_func= compute_window_func_values; break; } - case Item_sum::PERCENT_RANK_FUNC: - case Item_sum::CUME_DIST_FUNC: - { - compute_func= compute_two_pass_window_functions; - break; - } case Item_sum::COUNT_FUNC: case Item_sum::SUM_BIT_FUNC: case Item_sum::SUM_FUNC: case Item_sum::AVG_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: { /* Frame-aware window function computation. It does one pass, but diff --git a/sql/sql_window.h b/sql/sql_window.h index 042d978431f..dc58821a56c 100644 --- a/sql/sql_window.h +++ b/sql/sql_window.h @@ -150,7 +150,7 @@ typedef bool (*window_compute_func_t)(Item_window_func *item_win, Currently, we make a spearate filesort() call for each window function. */ -class Window_func_runner : public Sql_alloc +class Window_func_runner : public Sql_alloc { Item_window_func *win_func; /* Window function can be computed over this sorting */